Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 79f7679

Browse files
nguyenhoangthuan99luke-nguyen990sangjanaivansangpfiev
authored
feat: remote engine (#1666)
* Init remote engine * Fix: CI build windows * Fix: CI build windows * Fix: CI build windows * Fix: CI build windows * feat: new db schema for model and template for engine * Add remote model * Add Get, List, Update support for remote models * change model_id to model in remote engine * fix: mac compatibility * chore: some refactors before making big changes * feat: db ops for engines * chore: small refactor before more changes * Update engine * refine db schema, composite key for engines * add entry definition for engine at db layer * complete add, get engine operations * engine managements * Integrate with remote engine to run remote model * error handling and response transform * Support for stream request * chore: fix conflicts * feat: anthropic * feat: support anthropic * feat: support anthropic * chore: rename * chore: cleanup and fix unit tests * fix: issue with db * chore: refactor remote engine * fix: e2e tests * fix: e2e tests * chore: API docs * fix: use different interface for remote engine --------- Co-authored-by: Luke Nguyen <luke.nguyen990@gmail.com> Co-authored-by: vansangpfiev <sang@jan.ai> Co-authored-by: vansangpfiev <vansangpfiev@gmail.com>
1 parent 2b74824 commit 79f7679

38 files changed

+2934
-125
lines changed

docs/static/openapi/cortex.json

Lines changed: 229 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,73 @@
512512
}
513513
}
514514
},
515+
"/v1/models/add": {
516+
"post": {
517+
"operationId": "ModelsController_addModel",
518+
"summary": "Add a remote model",
519+
"description": "Add a new remote model configuration to the system.",
520+
"requestBody": {
521+
"required": true,
522+
"content": {
523+
"application/json": {
524+
"schema": {
525+
"$ref": "#/components/schemas/AddModelRequest"
526+
}
527+
}
528+
}
529+
},
530+
"responses": {
531+
"200": {
532+
"description": "Successful response",
533+
"content": {
534+
"application/json": {
535+
"schema": {
536+
"type": "object",
537+
"properties": {
538+
"message": {
539+
"type": "string"
540+
},
541+
"model": {
542+
"type": "object",
543+
"properties": {
544+
"model": {
545+
"type": "string"
546+
},
547+
"engine": {
548+
"type": "string"
549+
},
550+
"version": {
551+
"type": "string"
552+
}
553+
}
554+
}
555+
}
556+
},
557+
"example": {
558+
"message": "Model added successfully!",
559+
"model": {
560+
"model": "claude-3-5-sonnet-20241022",
561+
"engine": "anthropic",
562+
"version": "2023-06-01"
563+
}
564+
}
565+
}
566+
}
567+
},
568+
"400": {
569+
"description": "Bad request",
570+
"content": {
571+
"application/json": {
572+
"schema": {
573+
"$ref": "#/components/schemas/SimpleErrorResponse"
574+
}
575+
}
576+
}
577+
}
578+
},
579+
"tags": ["Pulling Models"]
580+
}
581+
},
515582
"/v1/models": {
516583
"get": {
517584
"operationId": "ModelsController_findAll",
@@ -1417,7 +1484,7 @@
14171484
"required": true,
14181485
"schema": {
14191486
"type": "string",
1420-
"enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
1487+
"enum": ["llama-cpp", "onnxruntime", "tensorrt-llm", "openai", "anthropic"],
14211488
"default": "llama-cpp"
14221489
},
14231490
"description": "The type of engine"
@@ -1439,6 +1506,31 @@
14391506
"type": "string",
14401507
"description": "The variant of the engine to install (optional)",
14411508
"example": "mac-arm64"
1509+
},
1510+
"type": {
1511+
"type": "string",
1512+
"description": "The type of connection, remote or local",
1513+
"example": "remote"
1514+
},
1515+
"url": {
1516+
"type": "string",
1517+
"description": "The URL for the API endpoint for remote engine",
1518+
"example": "https://api.openai.com"
1519+
},
1520+
"api_key": {
1521+
"type": "string",
1522+
"description": "The API key for authentication for remote engine",
1523+
"example": ""
1524+
},
1525+
"metadata": {
1526+
"type": "object",
1527+
"properties": {
1528+
"get_models_url": {
1529+
"type": "string",
1530+
"description": "The URL to get models",
1531+
"example": "https://api.openai.com/v1/models"
1532+
}
1533+
}
14421534
}
14431535
}
14441536
}
@@ -1475,7 +1567,7 @@
14751567
"required": true,
14761568
"schema": {
14771569
"type": "string",
1478-
"enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
1570+
"enum": ["llama-cpp", "onnxruntime", "tensorrt-llm", "openai", "anthropic"],
14791571
"default": "llama-cpp"
14801572
},
14811573
"description": "The type of engine"
@@ -1690,7 +1782,7 @@
16901782
"required": true,
16911783
"schema": {
16921784
"type": "string",
1693-
"enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
1785+
"enum": ["llama-cpp", "onnxruntime", "tensorrt-llm", "openai", "anthropic"],
16941786
"default": "llama-cpp"
16951787
},
16961788
"description": "The name of the engine to update"
@@ -3636,6 +3728,109 @@
36363728
}
36373729
}
36383730
},
3731+
"AddModelRequest": {
3732+
"type": "object",
3733+
"required": ["model", "engine", "version", "inference_params", "TransformReq", "TransformResp", "metadata"],
3734+
"properties": {
3735+
"model": {
3736+
"type": "string",
3737+
"description": "The identifier of the model."
3738+
},
3739+
"api_key_template": {
3740+
"type": "string",
3741+
"description": "Template for the API key header."
3742+
},
3743+
"engine": {
3744+
"type": "string",
3745+
"description": "The engine used for the model."
3746+
},
3747+
"version": {
3748+
"type": "string",
3749+
"description": "The version of the model."
3750+
},
3751+
"inference_params": {
3752+
"type": "object",
3753+
"properties": {
3754+
"temperature": {
3755+
"type": "number"
3756+
},
3757+
"top_p": {
3758+
"type": "number"
3759+
},
3760+
"frequency_penalty": {
3761+
"type": "number"
3762+
},
3763+
"presence_penalty": {
3764+
"type": "number"
3765+
},
3766+
"max_tokens": {
3767+
"type": "integer"
3768+
},
3769+
"stream": {
3770+
"type": "boolean"
3771+
}
3772+
}
3773+
},
3774+
"TransformReq": {
3775+
"type": "object",
3776+
"properties": {
3777+
"get_models": {
3778+
"type": "object"
3779+
},
3780+
"chat_completions": {
3781+
"type": "object",
3782+
"properties": {
3783+
"url": {
3784+
"type": "string"
3785+
},
3786+
"template": {
3787+
"type": "string"
3788+
}
3789+
}
3790+
},
3791+
"embeddings": {
3792+
"type": "object"
3793+
}
3794+
}
3795+
},
3796+
"TransformResp": {
3797+
"type": "object",
3798+
"properties": {
3799+
"chat_completions": {
3800+
"type": "object",
3801+
"properties": {
3802+
"template": {
3803+
"type": "string"
3804+
}
3805+
}
3806+
},
3807+
"embeddings": {
3808+
"type": "object"
3809+
}
3810+
}
3811+
},
3812+
"metadata": {
3813+
"type": "object",
3814+
"properties": {
3815+
"author": {
3816+
"type": "string"
3817+
},
3818+
"description": {
3819+
"type": "string"
3820+
},
3821+
"end_point": {
3822+
"type": "string"
3823+
},
3824+
"logo": {
3825+
"type": "string"
3826+
},
3827+
"api_key_url": {
3828+
"type": "string"
3829+
}
3830+
}
3831+
}
3832+
}
3833+
},
36393834
"CreateModelDto": {
36403835
"type": "object",
36413836
"properties": {
@@ -4305,6 +4500,37 @@
43054500
"type": "integer",
43064501
"description": "Number of GPU layers.",
43074502
"example": 33
4503+
},
4504+
"api_key_template": {
4505+
"type": "string",
4506+
"description": "Template for the API key header."
4507+
},
4508+
"version": {
4509+
"type": "string",
4510+
"description": "The version of the model."
4511+
},
4512+
"inference_params": {
4513+
"type": "object",
4514+
"properties": {
4515+
"temperature": {
4516+
"type": "number"
4517+
},
4518+
"top_p": {
4519+
"type": "number"
4520+
},
4521+
"frequency_penalty": {
4522+
"type": "number"
4523+
},
4524+
"presence_penalty": {
4525+
"type": "number"
4526+
},
4527+
"max_tokens": {
4528+
"type": "integer"
4529+
},
4530+
"stream": {
4531+
"type": "boolean"
4532+
}
4533+
}
43084534
}
43094535
}
43104536
},

engine/CMakeLists.txt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,10 @@ file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/cortex_openapi.h"
142142
add_executable(${TARGET_NAME} main.cc
143143
${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_info.cc
144144
${CMAKE_CURRENT_SOURCE_DIR}/utils/file_logger.cc
145+
${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/remote_engine.cc
146+
${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/openai_engine.cc
147+
${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/anthropic_engine.cc
148+
${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/template_renderer.cc
145149
)
146150

147151
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
@@ -171,17 +175,17 @@ endif()
171175
aux_source_directory(controllers CTL_SRC)
172176
aux_source_directory(repositories REPO_SRC)
173177
aux_source_directory(services SERVICES_SRC)
174-
aux_source_directory(common COMMON_SRC)
175178
aux_source_directory(models MODEL_SRC)
176179
aux_source_directory(cortex-common CORTEX_COMMON)
177180
aux_source_directory(config CONFIG_SRC)
178181
aux_source_directory(database DB_SRC)
182+
aux_source_directory(extensions EX_SRC)
179183
aux_source_directory(migrations MIGR_SRC)
180184
aux_source_directory(utils UTILS_SRC)
181185

182186
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} )
183187

184-
target_sources(${TARGET_NAME} PRIVATE ${UTILS_SRC} ${CONFIG_SRC} ${CTL_SRC} ${COMMON_SRC} ${SERVICES_SRC} ${DB_SRC} ${MIGR_SRC} ${REPO_SRC})
188+
target_sources(${TARGET_NAME} PRIVATE ${UTILS_SRC} ${CONFIG_SRC} ${CTL_SRC} ${COMMON_SRC} ${SERVICES_SRC} ${DB_SRC} ${EX_SRC} ${MIGR_SRC} ${REPO_SRC})
185189

186190
set_target_properties(${TARGET_NAME} PROPERTIES
187191
RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}

engine/cli/CMakeLists.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ add_executable(${TARGET_NAME} main.cc
8282
${CMAKE_CURRENT_SOURCE_DIR}/../services/model_service.cc
8383
${CMAKE_CURRENT_SOURCE_DIR}/../services/inference_service.cc
8484
${CMAKE_CURRENT_SOURCE_DIR}/../services/hardware_service.cc
85+
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/remote_engine.cc
86+
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/openai_engine.cc
87+
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/anthropic_engine.cc
88+
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/template_renderer.cc
8589
${CMAKE_CURRENT_SOURCE_DIR}/utils/easywsclient.cc
8690
${CMAKE_CURRENT_SOURCE_DIR}/utils/download_progress.cc
8791
${CMAKE_CURRENT_SOURCE_DIR}/../utils/config_yaml_utils.cc
@@ -121,11 +125,12 @@ aux_source_directory(../cortex-common CORTEX_COMMON)
121125
aux_source_directory(../config CONFIG_SRC)
122126
aux_source_directory(commands COMMANDS_SRC)
123127
aux_source_directory(../database DB_SRC)
128+
aux_source_directory(../extensions EX_SRC)
124129

125130
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/.. )
126131
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
127132

128-
target_sources(${TARGET_NAME} PRIVATE ${COMMANDS_SRC} ${CONFIG_SRC} ${COMMON_SRC} ${DB_SRC})
133+
target_sources(${TARGET_NAME} PRIVATE ${COMMANDS_SRC} ${CONFIG_SRC} ${COMMON_SRC} ${DB_SRC} ${EX_SRC})
129134

130135
set_target_properties(${TARGET_NAME} PROPERTIES
131136
RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}

engine/common/engine_servicei.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
#include <json/value.h>
44
#include <string>
55
#include <vector>
6+
#include "database/engines.h"
67
#include "utils/result.hpp"
7-
88
// TODO: namh think of the other name
99
struct DefaultEngineVariant {
1010
std::string engine;
@@ -54,4 +54,8 @@ class EngineServiceI {
5454

5555
virtual cpp::result<void, std::string> UnloadEngine(
5656
const std::string& engine_name) = 0;
57+
virtual cpp::result<cortex::db::EngineEntry, std::string>
58+
GetEngineByNameAndVariant(
59+
const std::string& engine_name,
60+
const std::optional<std::string> variant = std::nullopt) = 0;
5761
};

0 commit comments

Comments
 (0)