Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 48b09fe

Browse files
committed
clean up
1 parent b4cf3cb commit 48b09fe

File tree

11 files changed

+62
-76
lines changed

11 files changed

+62
-76
lines changed

engine/cli/commands/model_del_cmd.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ void ModelDelCmd::Exec(const std::string& host, int port,
1515
return;
1616
}
1717
}
18-
18+
1919
// Call API to delete model
2020
httplib::Client cli(host + ":" + std::to_string(port));
2121
auto res = cli.Delete("/v1/models/" + model_handle);

engine/cli/commands/model_stop_cmd.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include "model_stop_cmd.h"
2-
#include "utils/logging_utils.h"
32
#include "httplib.h"
3+
#include "utils/logging_utils.h"
44

55
namespace commands {
66

engine/controllers/models.cc

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -348,8 +348,7 @@ void Models::ImportModel(
348348
std::filesystem::path(model_yaml_path).parent_path() /
349349
std::filesystem::path(modelPath).filename();
350350
std::filesystem::copy_file(
351-
modelPath, file_path,
352-
std::filesystem::copy_options::update_existing);
351+
modelPath, file_path, std::filesystem::copy_options::update_existing);
353352
model_config.files.push_back(file_path.string());
354353
auto size = std::filesystem::file_size(file_path);
355354
model_config.size = size;
@@ -409,7 +408,6 @@ void Models::StartModel(
409408
std::function<void(const HttpResponsePtr&)>&& callback) {
410409
if (!http_util::HasFieldInReq(req, callback, "model"))
411410
return;
412-
auto config = file_manager_utils::GetCortexConfig();
413411
auto model_handle = (*(req->getJsonObject())).get("model", "").asString();
414412
StartParameterOverride params_override;
415413
if (auto& o = (*(req->getJsonObject()))["prompt_template"]; !o.isNull()) {
@@ -480,9 +478,7 @@ void Models::StartModel(
480478
return;
481479
}
482480

483-
auto result = model_service_->StartModel(config.apiServerHost,
484-
std::stoi(config.apiServerPort),
485-
model_handle, params_override);
481+
auto result = model_service_->StartModel(model_handle, params_override);
486482
if (result.has_error()) {
487483
Json::Value ret;
488484
ret["message"] = result.error();
@@ -500,12 +496,12 @@ void Models::StartModel(
500496

501497
void Models::StopModel(const HttpRequestPtr& req,
502498
std::function<void(const HttpResponsePtr&)>&& callback) {
503-
if (!http_util::HasFieldInReq(req, callback, "model"))
499+
if (!http_util::HasFieldInReq(req, callback, "model")) {
504500
return;
505-
auto config = file_manager_utils::GetCortexConfig();
501+
}
502+
506503
auto model_handle = (*(req->getJsonObject())).get("model", "").asString();
507-
auto result = model_service_->StopModel(
508-
config.apiServerHost, std::stoi(config.apiServerPort), model_handle);
504+
auto result = model_service_->StopModel(model_handle);
509505
if (result.has_error()) {
510506
Json::Value ret;
511507
ret["message"] = result.error();
@@ -525,10 +521,7 @@ void Models::GetModelStatus(
525521
const HttpRequestPtr& req,
526522
std::function<void(const HttpResponsePtr&)>&& callback,
527523
const std::string& model_id) {
528-
auto config = file_manager_utils::GetCortexConfig();
529-
530-
auto result = model_service_->GetModelStatus(
531-
config.apiServerHost, std::stoi(config.apiServerPort), model_id);
524+
auto result = model_service_->GetModelStatus(model_id);
532525
if (result.has_error()) {
533526
Json::Value ret;
534527
ret["message"] = result.error();

engine/controllers/server.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,10 @@ void server::Embedding(const HttpRequestPtr& req,
6767
void server::UnloadModel(
6868
const HttpRequestPtr& req,
6969
std::function<void(const HttpResponsePtr&)>&& callback) {
70-
auto ir = inference_svc_->UnloadModel(req->getJsonObject());
70+
auto engine = (*req->getJsonObject())["engine"].asString();
71+
auto model = (*req->getJsonObject())["model_id"].asString();
72+
CTL_INF("Unloading model: " + model + ", engine: " + engine);
73+
auto ir = inference_svc_->UnloadModel(engine, model);
7174
auto resp = cortex_utils::CreateCortexHttpJsonResponse(std::get<1>(ir));
7275
resp->setStatusCode(
7376
static_cast<HttpStatusCode>(std::get<0>(ir)["status_code"].asInt()));

engine/services/download_service.cc

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#include "download_service.h"
22
#include <curl/curl.h>
3-
#include <httplib.h>
43
#include <stdio.h>
54
#include <filesystem>
65
#include <mutex>
@@ -12,14 +11,6 @@
1211
#include "utils/logging_utils.h"
1312
#include "utils/result.hpp"
1413

15-
#ifdef _WIN32
16-
#define ftell64(f) _ftelli64(f)
17-
#define fseek64(f, o, w) _fseeki64(f, o, w)
18-
#else
19-
#define ftell64(f) ftello(f)
20-
#define fseek64(f, o, w) fseeko(f, o, w)
21-
#endif
22-
2314
namespace {
2415
size_t WriteCallback(char* ptr, size_t size, size_t nmemb, void* userdata) {
2516
size_t written = fwrite(ptr, size, nmemb, (FILE*)userdata);

engine/services/download_service.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
#include <curl/curl.h>
44
#include <eventpp/eventqueue.h>
5-
#include <filesystem>
65
#include <functional>
76
#include <optional>
87
#include <queue>

engine/services/engine_service.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,17 @@ EngineService::SetDefaultEngineVariant(const std::string& engine,
557557
" is not installed yet!");
558558
}
559559

560+
if (IsEngineLoaded(ne)) {
561+
CTL_INF("Engine " << ne << " is already loaded, unloading it");
562+
auto unload_res = UnloadEngine(ne);
563+
if (unload_res.has_error()) {
564+
CTL_INF("Failed to unload engine: " << unload_res.error());
565+
return cpp::fail(unload_res.error());
566+
} else {
567+
CTL_INF("Engine " << ne << " unloaded successfully");
568+
}
569+
}
570+
560571
auto normalized_version = string_utils::RemoveSubstring(version, "v");
561572

562573
auto config = file_manager_utils::GetCortexConfig();

engine/services/inference_service.cc

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -91,18 +91,11 @@ InferResult InferenceService::LoadModel(
9191
return std::make_pair(stt, r);
9292
}
9393

94-
InferResult InferenceService::UnloadModel(
95-
std::shared_ptr<Json::Value> json_body) {
96-
std::string engine_type;
97-
if (!HasFieldInReq(json_body, "engine")) {
98-
engine_type = kLlamaRepo;
99-
} else {
100-
engine_type = (*(json_body)).get("engine", kLlamaRepo).asString();
101-
}
102-
94+
InferResult InferenceService::UnloadModel(const std::string& engine_name,
95+
const std::string& model_id) {
10396
Json::Value r;
10497
Json::Value stt;
105-
auto engine_result = engine_service_->GetLoadedEngine(engine_type);
98+
auto engine_result = engine_service_->GetLoadedEngine(engine_name);
10699
if (engine_result.has_error()) {
107100
Json::Value res;
108101
res["message"] = "Engine is not loaded yet";
@@ -112,9 +105,13 @@ InferResult InferenceService::UnloadModel(
112105
return std::make_pair(stt, res);
113106
}
114107

108+
Json::Value json_body;
109+
json_body["engine"] = engine_name;
110+
json_body["model"] = model_id;
111+
115112
LOG_TRACE << "Start unload model";
116113
auto engine = std::get<EngineI*>(engine_result.value());
117-
engine->UnloadModel(json_body,
114+
engine->UnloadModel(std::make_shared<Json::Value>(json_body),
118115
[&r, &stt](Json::Value status, Json::Value res) {
119116
stt = status;
120117
r = res;

engine/services/inference_service.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ class InferenceService {
4343

4444
InferResult LoadModel(std::shared_ptr<Json::Value> json_body);
4545

46-
InferResult UnloadModel(std::shared_ptr<Json::Value> json_body);
46+
InferResult UnloadModel(const std::string& engine,
47+
const std::string& model_id);
4748

4849
InferResult GetModelStatus(std::shared_ptr<Json::Value> json_body);
4950

engine/services/model_service.cc

Lines changed: 24 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ cpp::result<DownloadTask, std::string> GetDownloadTask(
7878
.host = kHuggingFaceHost,
7979
.pathParams = {"api", "models", "cortexso", modelId, "tree", branch}};
8080

81-
httplib::Client cli(url.GetProtocolAndHost());
8281
auto result = curl_utils::SimpleGetJson(url.ToFullPath());
8382
if (result.has_error()) {
8483
return cpp::fail("Model " + modelId + " not found");
@@ -546,6 +545,14 @@ cpp::result<void, std::string> ModelService::DeleteModel(
546545
cortex::db::Models modellist_handler;
547546
config::YamlHandler yaml_handler;
548547

548+
auto result = StopModel(model_handle);
549+
if (result.has_error()) {
550+
CTL_INF("Failed to stop model " << model_handle
551+
<< ", error: " << result.error());
552+
} else {
553+
CTL_INF("Model " << model_handle << " stopped successfully");
554+
}
555+
549556
try {
550557
auto model_entry = modellist_handler.GetModelInfo(model_handle);
551558
if (model_entry.has_error()) {
@@ -590,7 +597,7 @@ cpp::result<void, std::string> ModelService::DeleteModel(
590597
}
591598

592599
cpp::result<bool, std::string> ModelService::StartModel(
593-
const std::string& host, int port, const std::string& model_handle,
600+
const std::string& model_handle,
594601
const StartParameterOverride& params_override) {
595602
namespace fs = std::filesystem;
596603
namespace fmu = file_manager_utils;
@@ -627,7 +634,6 @@ cpp::result<bool, std::string> ModelService::StartModel(
627634
} else {
628635
bypass_stop_check_set_.insert(model_handle);
629636
}
630-
httplib::Client cli(host + ":" + std::to_string(port));
631637

632638
json_data["model"] = model_handle;
633639
if (auto& cpt = params_override.custom_prompt_template;
@@ -674,7 +680,7 @@ cpp::result<bool, std::string> ModelService::StartModel(
674680
}
675681

676682
cpp::result<bool, std::string> ModelService::StopModel(
677-
const std::string& host, int port, const std::string& model_handle) {
683+
const std::string& model_handle) {
678684
namespace fs = std::filesystem;
679685
namespace fmu = file_manager_utils;
680686
cortex::db::Models modellist_handler;
@@ -683,7 +689,7 @@ cpp::result<bool, std::string> ModelService::StopModel(
683689
try {
684690
auto bypass_check = (bypass_stop_check_set_.find(model_handle) !=
685691
bypass_stop_check_set_.end());
686-
Json::Value json_data;
692+
std::string engine_name = "";
687693
if (!bypass_check) {
688694
auto model_entry = modellist_handler.GetModelInfo(model_handle);
689695
if (model_entry.has_error()) {
@@ -695,18 +701,13 @@ cpp::result<bool, std::string> ModelService::StopModel(
695701
fs::path(model_entry.value().path_to_model_yaml))
696702
.string());
697703
auto mc = yaml_handler.GetModelConfig();
698-
json_data["engine"] = mc.engine;
704+
engine_name = mc.engine;
699705
}
700-
701-
httplib::Client cli(host + ":" + std::to_string(port));
702-
json_data["model"] = model_handle;
703706
if (bypass_check) {
704-
json_data["engine"] = kLlamaEngine;
707+
engine_name = kLlamaEngine;
705708
}
706-
CTL_INF(json_data.toStyledString());
707709
assert(inference_svc_);
708-
auto ir =
709-
inference_svc_->UnloadModel(std::make_shared<Json::Value>(json_data));
710+
auto ir = inference_svc_->UnloadModel(engine_name, model_handle);
710711
auto status = std::get<0>(ir)["status_code"].asInt();
711712
auto data = std::get<1>(ir);
712713
if (status == httplib::StatusCode::OK_200) {
@@ -725,7 +726,7 @@ cpp::result<bool, std::string> ModelService::StopModel(
725726
}
726727

727728
cpp::result<bool, std::string> ModelService::GetModelStatus(
728-
const std::string& host, int port, const std::string& model_handle) {
729+
const std::string& model_handle) {
729730
namespace fs = std::filesystem;
730731
namespace fmu = file_manager_utils;
731732
cortex::db::Models modellist_handler;
@@ -743,29 +744,20 @@ cpp::result<bool, std::string> ModelService::GetModelStatus(
743744
.string());
744745
auto mc = yaml_handler.GetModelConfig();
745746

746-
httplib::Client cli(host + ":" + std::to_string(port));
747-
748747
Json::Value root;
749748
root["model"] = model_handle;
750749
root["engine"] = mc.engine;
751750

752-
auto data_str = json_helper::DumpJsonString(root);
753-
754-
auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(),
755-
data_str.data(), data_str.size(), "application/json");
756-
if (res) {
757-
if (res->status == httplib::StatusCode::OK_200) {
758-
return true;
759-
} else {
760-
CTL_INF("Model failed to get model status with status code: "
761-
<< res->status);
762-
return cpp::fail("Model failed to get model status with status code: " +
763-
std::to_string(res->status));
764-
}
751+
auto ir =
752+
inference_svc_->GetModelStatus(std::make_shared<Json::Value>(root));
753+
auto status = std::get<0>(ir)["status_code"].asInt();
754+
auto data = std::get<1>(ir);
755+
if (status == httplib::StatusCode::OK_200) {
756+
return true;
765757
} else {
766-
auto err = res.error();
767-
CTL_WRN("HTTP error: " << httplib::to_string(err));
768-
return cpp::fail("HTTP error: " + httplib::to_string(err));
758+
CTL_ERR("Model failed to get model status with status code: " << status);
759+
return cpp::fail("Model failed to get model status: " +
760+
data["message"].asString());
769761
}
770762
} catch (const std::exception& e) {
771763
return cpp::fail("Fail to get model status with ID '" + model_handle +

0 commit comments

Comments
 (0)