clean up

namchuai · namchuai · commit 48b09fedfdf0 · 2024-11-05T00:45:33.000+07:00
diff --git a/engine/cli/commands/model_del_cmd.cc b/engine/cli/commands/model_del_cmd.cc
@@ -15,7 +15,7 @@ void ModelDelCmd::Exec(const std::string& host, int port,
       return;
     }
   }
-  
+
   // Call API to delete model
   httplib::Client cli(host + ":" + std::to_string(port));
   auto res = cli.Delete("/v1/models/" + model_handle);
diff --git a/engine/cli/commands/model_stop_cmd.cc b/engine/cli/commands/model_stop_cmd.cc
@@ -1,6 +1,6 @@
 #include "model_stop_cmd.h"
-#include "utils/logging_utils.h"
 #include "httplib.h"
+#include "utils/logging_utils.h"
 
 namespace commands {
 
diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
@@ -348,8 +348,7 @@ void Models::ImportModel(
           std::filesystem::path(model_yaml_path).parent_path() /
           std::filesystem::path(modelPath).filename();
       std::filesystem::copy_file(
-          modelPath, file_path,
-          std::filesystem::copy_options::update_existing);
+          modelPath, file_path, std::filesystem::copy_options::update_existing);
       model_config.files.push_back(file_path.string());
       auto size = std::filesystem::file_size(file_path);
       model_config.size = size;
@@ -409,7 +408,6 @@ void Models::StartModel(
     std::function<void(const HttpResponsePtr&)>&& callback) {
   if (!http_util::HasFieldInReq(req, callback, "model"))
     return;
-  auto config = file_manager_utils::GetCortexConfig();
   auto model_handle = (*(req->getJsonObject())).get("model", "").asString();
   StartParameterOverride params_override;
   if (auto& o = (*(req->getJsonObject()))["prompt_template"]; !o.isNull()) {
@@ -480,9 +478,7 @@ void Models::StartModel(
     return;
   }
 
-  auto result = model_service_->StartModel(config.apiServerHost,
-                                           std::stoi(config.apiServerPort),
-                                           model_handle, params_override);
+  auto result = model_service_->StartModel(model_handle, params_override);
   if (result.has_error()) {
     Json::Value ret;
     ret["message"] = result.error();
@@ -500,12 +496,12 @@ void Models::StartModel(
 
 void Models::StopModel(const HttpRequestPtr& req,
                        std::function<void(const HttpResponsePtr&)>&& callback) {
-  if (!http_util::HasFieldInReq(req, callback, "model"))
+  if (!http_util::HasFieldInReq(req, callback, "model")) {
     return;
-  auto config = file_manager_utils::GetCortexConfig();
+  }
+
   auto model_handle = (*(req->getJsonObject())).get("model", "").asString();
-  auto result = model_service_->StopModel(
-      config.apiServerHost, std::stoi(config.apiServerPort), model_handle);
+  auto result = model_service_->StopModel(model_handle);
   if (result.has_error()) {
     Json::Value ret;
     ret["message"] = result.error();
@@ -525,10 +521,7 @@ void Models::GetModelStatus(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback,
     const std::string& model_id) {
-  auto config = file_manager_utils::GetCortexConfig();
-
-  auto result = model_service_->GetModelStatus(
-      config.apiServerHost, std::stoi(config.apiServerPort), model_id);
+  auto result = model_service_->GetModelStatus(model_id);
   if (result.has_error()) {
     Json::Value ret;
     ret["message"] = result.error();
diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc
@@ -67,7 +67,10 @@ void server::Embedding(const HttpRequestPtr& req,
 void server::UnloadModel(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
-  auto ir = inference_svc_->UnloadModel(req->getJsonObject());
+  auto engine = (*req->getJsonObject())["engine"].asString();
+  auto model = (*req->getJsonObject())["model_id"].asString();
+  CTL_INF("Unloading model: " + model + ", engine: " + engine);
+  auto ir = inference_svc_->UnloadModel(engine, model);
   auto resp = cortex_utils::CreateCortexHttpJsonResponse(std::get<1>(ir));
   resp->setStatusCode(
       static_cast<HttpStatusCode>(std::get<0>(ir)["status_code"].asInt()));
diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc
@@ -1,6 +1,5 @@
 #include "download_service.h"
 #include <curl/curl.h>
-#include <httplib.h>
 #include <stdio.h>
 #include <filesystem>
 #include <mutex>
@@ -12,14 +11,6 @@
 #include "utils/logging_utils.h"
 #include "utils/result.hpp"
 
-#ifdef _WIN32
-#define ftell64(f) _ftelli64(f)
-#define fseek64(f, o, w) _fseeki64(f, o, w)
-#else
-#define ftell64(f) ftello(f)
-#define fseek64(f, o, w) fseeko(f, o, w)
-#endif
-
 namespace {
 size_t WriteCallback(char* ptr, size_t size, size_t nmemb, void* userdata) {
   size_t written = fwrite(ptr, size, nmemb, (FILE*)userdata);
diff --git a/engine/services/download_service.h b/engine/services/download_service.h
@@ -2,7 +2,6 @@
 
 #include <curl/curl.h>
 #include <eventpp/eventqueue.h>
-#include <filesystem>
 #include <functional>
 #include <optional>
 #include <queue>
diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc
@@ -557,6 +557,17 @@ EngineService::SetDefaultEngineVariant(const std::string& engine,
                      " is not installed yet!");
   }
 
+  if (IsEngineLoaded(ne)) {
+    CTL_INF("Engine " << ne << " is already loaded, unloading it");
+    auto unload_res = UnloadEngine(ne);
+    if (unload_res.has_error()) {
+      CTL_INF("Failed to unload engine: " << unload_res.error());
+      return cpp::fail(unload_res.error());
+    } else {
+      CTL_INF("Engine " << ne << " unloaded successfully");
+    }
+  }
+
   auto normalized_version = string_utils::RemoveSubstring(version, "v");
 
   auto config = file_manager_utils::GetCortexConfig();
diff --git a/engine/services/inference_service.cc b/engine/services/inference_service.cc
@@ -91,18 +91,11 @@ InferResult InferenceService::LoadModel(
   return std::make_pair(stt, r);
 }
 
-InferResult InferenceService::UnloadModel(
-    std::shared_ptr<Json::Value> json_body) {
-  std::string engine_type;
-  if (!HasFieldInReq(json_body, "engine")) {
-    engine_type = kLlamaRepo;
-  } else {
-    engine_type = (*(json_body)).get("engine", kLlamaRepo).asString();
-  }
-
+InferResult InferenceService::UnloadModel(const std::string& engine_name,
+                                          const std::string& model_id) {
   Json::Value r;
   Json::Value stt;
-  auto engine_result = engine_service_->GetLoadedEngine(engine_type);
+  auto engine_result = engine_service_->GetLoadedEngine(engine_name);
   if (engine_result.has_error()) {
     Json::Value res;
     res["message"] = "Engine is not loaded yet";
@@ -112,9 +105,13 @@ InferResult InferenceService::UnloadModel(
     return std::make_pair(stt, res);
   }
 
+  Json::Value json_body;
+  json_body["engine"] = engine_name;
+  json_body["model"] = model_id;
+
   LOG_TRACE << "Start unload model";
   auto engine = std::get<EngineI*>(engine_result.value());
-  engine->UnloadModel(json_body,
+  engine->UnloadModel(std::make_shared<Json::Value>(json_body),
                       [&r, &stt](Json::Value status, Json::Value res) {
                         stt = status;
                         r = res;
diff --git a/engine/services/inference_service.h b/engine/services/inference_service.h
@@ -43,7 +43,8 @@ class InferenceService {
 
   InferResult LoadModel(std::shared_ptr<Json::Value> json_body);
 
-  InferResult UnloadModel(std::shared_ptr<Json::Value> json_body);
+  InferResult UnloadModel(const std::string& engine,
+                          const std::string& model_id);
 
   InferResult GetModelStatus(std::shared_ptr<Json::Value> json_body);
 
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
@@ -78,7 +78,6 @@ cpp::result<DownloadTask, std::string> GetDownloadTask(
       .host = kHuggingFaceHost,
       .pathParams = {"api", "models", "cortexso", modelId, "tree", branch}};
 
-  httplib::Client cli(url.GetProtocolAndHost());
   auto result = curl_utils::SimpleGetJson(url.ToFullPath());
   if (result.has_error()) {
     return cpp::fail("Model " + modelId + " not found");
@@ -546,6 +545,14 @@ cpp::result<void, std::string> ModelService::DeleteModel(
   cortex::db::Models modellist_handler;
   config::YamlHandler yaml_handler;
 
+  auto result = StopModel(model_handle);
+  if (result.has_error()) {
+    CTL_INF("Failed to stop model " << model_handle
+                                    << ", error: " << result.error());
+  } else {
+    CTL_INF("Model " << model_handle << " stopped successfully");
+  }
+
   try {
     auto model_entry = modellist_handler.GetModelInfo(model_handle);
     if (model_entry.has_error()) {
@@ -590,7 +597,7 @@ cpp::result<void, std::string> ModelService::DeleteModel(
 }
 
 cpp::result<bool, std::string> ModelService::StartModel(
-    const std::string& host, int port, const std::string& model_handle,
+    const std::string& model_handle,
     const StartParameterOverride& params_override) {
   namespace fs = std::filesystem;
   namespace fmu = file_manager_utils;
@@ -627,7 +634,6 @@ cpp::result<bool, std::string> ModelService::StartModel(
     } else {
       bypass_stop_check_set_.insert(model_handle);
     }
-    httplib::Client cli(host + ":" + std::to_string(port));
 
     json_data["model"] = model_handle;
     if (auto& cpt = params_override.custom_prompt_template;
@@ -674,7 +680,7 @@ cpp::result<bool, std::string> ModelService::StartModel(
 }
 
 cpp::result<bool, std::string> ModelService::StopModel(
-    const std::string& host, int port, const std::string& model_handle) {
+    const std::string& model_handle) {
   namespace fs = std::filesystem;
   namespace fmu = file_manager_utils;
   cortex::db::Models modellist_handler;
@@ -683,7 +689,7 @@ cpp::result<bool, std::string> ModelService::StopModel(
   try {
     auto bypass_check = (bypass_stop_check_set_.find(model_handle) !=
                          bypass_stop_check_set_.end());
-    Json::Value json_data;
+    std::string engine_name = "";
     if (!bypass_check) {
       auto model_entry = modellist_handler.GetModelInfo(model_handle);
       if (model_entry.has_error()) {
@@ -695,18 +701,13 @@ cpp::result<bool, std::string> ModelService::StopModel(
               fs::path(model_entry.value().path_to_model_yaml))
               .string());
       auto mc = yaml_handler.GetModelConfig();
-      json_data["engine"] = mc.engine;
+      engine_name = mc.engine;
     }
-
-    httplib::Client cli(host + ":" + std::to_string(port));
-    json_data["model"] = model_handle;
     if (bypass_check) {
-      json_data["engine"] = kLlamaEngine;
+      engine_name = kLlamaEngine;
     }
-    CTL_INF(json_data.toStyledString());
     assert(inference_svc_);
-    auto ir =
-        inference_svc_->UnloadModel(std::make_shared<Json::Value>(json_data));
+    auto ir = inference_svc_->UnloadModel(engine_name, model_handle);
     auto status = std::get<0>(ir)["status_code"].asInt();
     auto data = std::get<1>(ir);
     if (status == httplib::StatusCode::OK_200) {
@@ -725,7 +726,7 @@ cpp::result<bool, std::string> ModelService::StopModel(
 }
 
 cpp::result<bool, std::string> ModelService::GetModelStatus(
-    const std::string& host, int port, const std::string& model_handle) {
+    const std::string& model_handle) {
   namespace fs = std::filesystem;
   namespace fmu = file_manager_utils;
   cortex::db::Models modellist_handler;
@@ -743,29 +744,20 @@ cpp::result<bool, std::string> ModelService::GetModelStatus(
             .string());
     auto mc = yaml_handler.GetModelConfig();
 
-    httplib::Client cli(host + ":" + std::to_string(port));
-
     Json::Value root;
     root["model"] = model_handle;
     root["engine"] = mc.engine;
 
-    auto data_str = json_helper::DumpJsonString(root);
-
-    auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(),
-                        data_str.data(), data_str.size(), "application/json");
-    if (res) {
-      if (res->status == httplib::StatusCode::OK_200) {
-        return true;
-      } else {
-        CTL_INF("Model failed to get model status with status code: "
-                << res->status);
-        return cpp::fail("Model failed to get model status with status code: " +
-                         std::to_string(res->status));
-      }
+    auto ir =
+        inference_svc_->GetModelStatus(std::make_shared<Json::Value>(root));
+    auto status = std::get<0>(ir)["status_code"].asInt();
+    auto data = std::get<1>(ir);
+    if (status == httplib::StatusCode::OK_200) {
+      return true;
     } else {
-      auto err = res.error();
-      CTL_WRN("HTTP error: " << httplib::to_string(err));
-      return cpp::fail("HTTP error: " + httplib::to_string(err));
+      CTL_ERR("Model failed to get model status with status code: " << status);
+      return cpp::fail("Model failed to get model status: " +
+                       data["message"].asString());
     }
   } catch (const std::exception& e) {
     return cpp::fail("Fail to get model status with ID '" + model_handle +
diff --git a/engine/services/model_service.h b/engine/services/model_service.h
@@ -37,7 +37,7 @@ class ModelService {
       std::shared_ptr<DownloadService> download_service,
       std::shared_ptr<services::InferenceService> inference_service)
       : download_service_{download_service},
-        inference_svc_(inference_service){};
+        inference_svc_(inference_service) {};
 
   /**
    * Return model id if download successfully
@@ -64,14 +64,13 @@ class ModelService {
   cpp::result<void, std::string> DeleteModel(const std::string& model_handle);
 
   cpp::result<bool, std::string> StartModel(
-      const std::string& host, int port, const std::string& model_handle,
+      const std::string& model_handle,
       const StartParameterOverride& params_override);
 
-  cpp::result<bool, std::string> StopModel(const std::string& host, int port,
-                                           const std::string& model_handle);
+  cpp::result<bool, std::string> StopModel(const std::string& model_handle);
 
   cpp::result<bool, std::string> GetModelStatus(
-      const std::string& host, int port, const std::string& model_handle);
+      const std::string& model_handle);
 
   cpp::result<ModelPullInfo, std::string> GetModelPullInfo(
       const std::string& model_handle);

Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@ void ModelDelCmd::Exec(const std::string& host, int port,`
`15`	`15`	`return;`
`16`	`16`	`}`
`17`	`17`	`}`
`18`		`-`
	`18`	`+`
`19`	`19`	`// Call API to delete model`
`20`	`20`	`httplib::Client cli(host + ":" + std::to_string(port));`
`21`	`21`	`auto res = cli.Delete("/v1/models/" + model_handle);`