Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit cc2e093

Browse files
sangjanaivansangpfiev
authored andcommitted
feat: use llama.cpp server
1 parent 7c3788d commit cc2e093

22 files changed

+790
-119
lines changed

engine/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ add_executable(${TARGET_NAME} main.cc
178178
${CMAKE_CURRENT_SOURCE_DIR}/utils/process/utils.cc
179179

180180
${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/remote_engine.cc
181+
${CMAKE_CURRENT_SOURCE_DIR}/extensions/local-engine/local_engine.cc
181182

182183
)
183184

engine/cli/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ add_executable(${TARGET_NAME} main.cc
8686
${CMAKE_CURRENT_SOURCE_DIR}/../services/hardware_service.cc
8787
${CMAKE_CURRENT_SOURCE_DIR}/../services/database_service.cc
8888
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/remote_engine.cc
89+
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/local-engine/local_engine.cc
8990

9091
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/python-engine/python_engine.cc
9192
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/template_renderer.cc

engine/cli/command_line_parser.cc

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "services/engine_service.h"
3434
#include "utils/file_manager_utils.h"
3535
#include "utils/logging_utils.h"
36+
#include "utils/task_queue.h"
3637

3738
namespace {
3839
constexpr const auto kCommonCommandsGroup = "Common Commands";
@@ -51,7 +52,8 @@ CommandLineParser::CommandLineParser()
5152
dylib_path_manager_{std::make_shared<cortex::DylibPathManager>()},
5253
db_service_{std::make_shared<DatabaseService>()},
5354
engine_service_{std::make_shared<EngineService>(
54-
download_service_, dylib_path_manager_, db_service_)} {
55+
download_service_, dylib_path_manager_, db_service_,
56+
std::make_shared<cortex::TaskQueue>(1, "q"))} {
5557
supported_engines_ = engine_service_->GetSupportedEngineNames().value();
5658
}
5759

@@ -124,14 +126,14 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
124126
}
125127
}
126128
#endif
127-
auto config = file_manager_utils::GetCortexConfig();
128-
if (!config.llamacppVersion.empty() &&
129-
config.latestLlamacppRelease != config.llamacppVersion) {
130-
CLI_LOG(
131-
"\nNew llama.cpp version available: " << config.latestLlamacppRelease);
132-
CLI_LOG("To update, run: " << commands::GetCortexBinary()
133-
<< " engines update llama-cpp");
134-
}
129+
// auto config = file_manager_utils::GetCortexConfig();
130+
// if (!config.llamacppVersion.empty() &&
131+
// config.latestLlamacppRelease != config.llamacppVersion) {
132+
// CLI_LOG(
133+
// "\nNew llama.cpp version available: " << config.latestLlamacppRelease);
134+
// CLI_LOG("To update, run: " << commands::GetCortexBinary()
135+
// << " engines update llama-cpp");
136+
// }
135137

136138
return true;
137139
}

engine/cli/commands/engine_install_cmd.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,10 @@ bool EngineInstallCmd::Exec(const std::string& engine,
9090
std::vector<std::string> variant_selections;
9191
for (const auto& variant : variant_result.value()) {
9292
auto v_name = variant["name"].asString();
93-
if (string_utils::StringContainsIgnoreCase(v_name, hw_inf_.sys_inf->os) &&
93+
if ((string_utils::StringContainsIgnoreCase(v_name,
94+
hw_inf_.sys_inf->os) ||
95+
(hw_inf_.sys_inf->os == "linux" &&
96+
string_utils::StringContainsIgnoreCase(v_name, "ubuntu"))) &&
9497
string_utils::StringContainsIgnoreCase(v_name,
9598
hw_inf_.sys_inf->arch)) {
9699
variant_selections.push_back(variant["name"].asString());

engine/cli/commands/server_start_cmd.cc

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ bool ServerStartCmd::Exec(const std::string& host, int port,
6666
si.cb = sizeof(si);
6767
ZeroMemory(&pi, sizeof(pi));
6868
std::wstring params = L"--start-server";
69-
params += L" --config_file_path \"" +
69+
params += L" --config_file_path \"" +
7070
file_manager_utils::GetConfigurationPath().wstring() + L"\"";
7171
params += L" --data_folder_path \"" +
7272
file_manager_utils::GetCortexDataPath().wstring() + L"\"";
@@ -80,17 +80,17 @@ bool ServerStartCmd::Exec(const std::string& host, int port,
8080
mutable_cmds.push_back(L'\0');
8181
// Create child process
8282
if (!CreateProcess(
83-
NULL, // No module name (use command line)
83+
NULL, // No module name (use command line)
8484
mutable_cmds
85-
.data(), // Command line (replace with your actual executable)
86-
NULL, // Process handle not inheritable
87-
NULL, // Thread handle not inheritable
88-
FALSE, // Set handle inheritance
89-
CREATE_NO_WINDOW, // No new console
90-
NULL, // Use parent's environment block
91-
NULL, // Use parent's starting directory
92-
&si, // Pointer to STARTUPINFO structure
93-
&pi)) // Pointer to PROCESS_INFORMATION structure
85+
.data(), // Command line (replace with your actual executable)
86+
NULL, // Process handle not inheritable
87+
NULL, // Thread handle not inheritable
88+
FALSE, // Set handle inheritance
89+
CREATE_NO_WINDOW, // No new console
90+
NULL, // Use parent's environment block
91+
NULL, // Use parent's starting directory
92+
&si, // Pointer to STARTUPINFO structure
93+
&pi)) // Pointer to PROCESS_INFORMATION structure
9494
{
9595
std::cout << "Could not start server: " << GetLastError() << std::endl;
9696
return false;
@@ -109,7 +109,9 @@ bool ServerStartCmd::Exec(const std::string& host, int port,
109109
auto download_srv = std::make_shared<DownloadService>();
110110
auto dylib_path_mng = std::make_shared<cortex::DylibPathManager>();
111111
auto db_srv = std::make_shared<DatabaseService>();
112-
EngineService(download_srv, dylib_path_mng, db_srv).RegisterEngineLibPath();
112+
EngineService(download_srv, dylib_path_mng, db_srv,
113+
std::make_shared<cortex::TaskQueue>(1, "task_queue"))
114+
.RegisterEngineLibPath();
113115

114116
std::string p = cortex_utils::GetCurrentPath() + "/" + exe;
115117
commands.push_back(p);

engine/cli/main.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ int main(int argc, char* argv[]) {
147147
should_check_for_latest_llamacpp_version = now > last_check;
148148
}
149149

150-
if (should_check_for_latest_llamacpp_version) {
150+
if (false) {
151151
std::thread t1([]() {
152152
// TODO: namh current we only check for llamacpp. Need to add support for other engine
153153
auto get_latest_version = []() -> cpp::result<std::string, std::string> {

engine/cli/utils/download_progress.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ bool DownloadProgress::Handle(
8383
size_t max_length = 20) -> std::string {
8484
// Check the length of the input string
8585
if (str.length() >= max_length) {
86-
return str.substr(
87-
0, max_length); // Return truncated string if it's too long
86+
return str.substr(0, max_length - 3) +
87+
".. "; // Return truncated string if it's too long
8888
}
8989

9090
// Calculate the number of spaces needed

engine/controllers/engines.cc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
#include "utils/engine_constants.h"
66
#include "utils/http_util.h"
77
#include "utils/logging_utils.h"
8+
#include "utils/normalize_engine.h"
89
#include "utils/scope_exit.h"
910
#include "utils/string_utils.h"
10-
#include "utils/normalize_engine.h"
1111

1212
void Engines::ListEngine(
1313
const HttpRequestPtr& req,
@@ -146,11 +146,13 @@ void Engines::GetEngineVariants(
146146
auto normalize_version = string_utils::RemoveSubstring(version, "v");
147147
Json::Value releases(Json::arrayValue);
148148
for (const auto& release : result.value()) {
149-
auto json = release.ToApiJson(cortex::engine::NormalizeEngine(engine), normalize_version);
149+
auto json = release.ToApiJson(cortex::engine::NormalizeEngine(engine),
150+
normalize_version);
150151
if (json != std::nullopt) {
151152
releases.append(json.value());
152153
}
153154
}
155+
CTL_INF(releases.toStyledString());
154156
auto resp = cortex_utils::CreateCortexHttpJsonResponse(releases);
155157
resp->setStatusCode(k200OK);
156158
callback(resp);
@@ -173,6 +175,8 @@ void Engines::InstallEngine(
173175
}
174176
norm_version = version;
175177
}
178+
CTL_INF("version: " << norm_version
179+
<< ", norm_variant: " << norm_variant.value_or(""));
176180

177181
auto result =
178182
engine_service_->InstallEngineAsync(engine, norm_version, norm_variant);
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#pragma once
2+
3+
#include <functional>
4+
#include <memory>
5+
6+
#include "json/value.h"
7+
#include "trantor/utils/Logger.h"
8+
class LocalEngineI {
9+
public:
10+
virtual ~LocalEngineI() {}
11+
12+
virtual void HandleChatCompletion(
13+
std::shared_ptr<Json::Value> json_body,
14+
std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
15+
virtual void HandleEmbedding(
16+
std::shared_ptr<Json::Value> json_body,
17+
std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
18+
virtual void LoadModel(
19+
std::shared_ptr<Json::Value> json_body,
20+
std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
21+
virtual void UnloadModel(
22+
std::shared_ptr<Json::Value> json_body,
23+
std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
24+
virtual void GetModelStatus(
25+
std::shared_ptr<Json::Value> json_body,
26+
std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
27+
28+
// Get list of running models
29+
virtual void GetModels(
30+
std::shared_ptr<Json::Value> jsonBody,
31+
std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
32+
};

engine/cortex-common/remote_enginei.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#pragma once
22

3-
#pragma once
4-
53
#include <functional>
64
#include <memory>
75

0 commit comments

Comments
 (0)