Merge branch 'dev' into feat/cli-docs-models-engines

Gabrielle Ong · web-flow · commit 7f52abe84605 · 2024-11-04T18:39:36.000+08:00
diff --git a/docs/docs/installation/mac.mdx b/docs/docs/installation/mac.mdx
@@ -22,7 +22,7 @@ Before installation, make sure that you have met the required [dependencies](#de
     - Stable: https://github.com/janhq/cortex.cpp/releases
     - Beta: https://github.com/janhq/cortex.cpp/releases
     - Nightly: https://github.com/janhq/cortex.cpp/releases
-2. Ensure that Cortex.cpp is sucessfulyy installed:
+2. Ensure that Cortex.cpp is sucessfully installed:
 ```bash
 # Stable
 cortex
diff --git a/docs/docs/overview.mdx b/docs/docs/overview.mdx
@@ -51,7 +51,7 @@ Cortex.cpp allows users to pull models from multiple Model Hubs, offering flexib
 | Model /Engine  | llama.cpp             | Command                       |
 | -------------- | --------------------- | ----------------------------- |
 | phi-3.5        | ✅                    | cortex run phi3.5             |
-| llama3.2       | ✅                    | cortex run llama3.1           |
+| llama3.2       | ✅                    | cortex run llama3.2           |
 | llama3.1       | ✅                    | cortex run llama3.1           |
 | codestral      | ✅                    | cortex run codestral          |
 | gemma2         | ✅                    | cortex run gemma2             |
diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json
@@ -642,7 +642,8 @@
               "example": {
                 "model": "model-id",
                 "modelPath": "/path/to/gguf",
-                "name": "model display name"
+                "name": "model display name",
+                "option": "symlink"
               }
             }
           }
@@ -3187,6 +3188,11 @@
           "name": {
             "type": "string",
             "description": "The display name of the model."
+          },
+          "option": {
+            "type": "string",
+            "description": "Import options such as symlink or copy.",
+            "enum": ["symlink", "copy"]
           }
         },
         "required": ["model", "modelPath"]
@@ -3434,6 +3440,11 @@
             "description": "To enable mmap, default is true",
             "example": true
           },
+          "size": {
+            "type": "number",
+            "description": "The model file size in bytes",
+            "example": 1073741824
+          },
           "engine": {
             "type": "string",
             "description": "The engine to use.",
diff --git a/engine/config/model_config.h b/engine/config/model_config.h
@@ -58,6 +58,7 @@ struct ModelConfig {
   bool ignore_eos = false;
   int n_probs = 0;
   int min_keep = 0;
+  uint64_t size = 0;
   std::string grammar;
 
   void FromJson(const Json::Value& json) {
@@ -70,6 +71,8 @@ struct ModelConfig {
     //   model = json["model"].asString();
     if (json.isMember("version"))
       version = json["version"].asString();
+    if (json.isMember("size"))
+      size = json["size"].asUInt64();
 
     if (json.isMember("stop") && json["stop"].isArray()) {
       stop.clear();
@@ -176,6 +179,7 @@ struct ModelConfig {
     obj["name"] = name;
     obj["model"] = model;
     obj["version"] = version;
+    obj["size"] = size;
 
     Json::Value stop_array(Json::arrayValue);
     for (const auto& s : stop) {
@@ -269,6 +273,7 @@ struct ModelConfig {
     oss << format_utils::print_comment("END REQUIRED");
     oss << format_utils::print_comment("BEGIN OPTIONAL");
 
+    oss << format_utils::print_float("size", size);
     oss << format_utils::print_bool("stream", stream);
     oss << format_utils::print_float("top_p", top_p);
     oss << format_utils::print_float("temperature", temperature);
diff --git a/engine/config/yaml_config.cc b/engine/config/yaml_config.cc
@@ -75,6 +75,8 @@ void YamlHandler::ModelConfigFromYaml() {
       tmp.model = yaml_node_["model"].as<std::string>();
     if (yaml_node_["version"])
       tmp.version = yaml_node_["version"].as<std::string>();
+    if (yaml_node_["size"])
+      tmp.size = yaml_node_["size"].as<uint64_t>();
     if (yaml_node_["engine"])
       tmp.engine = yaml_node_["engine"].as<std::string>();
     if (yaml_node_["prompt_template"]) {
@@ -266,6 +268,8 @@ void YamlHandler::UpdateModelConfig(ModelConfig new_model_config) {
     if (!model_config_.grammar.empty())
       yaml_node_["grammar"] = model_config_.grammar;
 
+    yaml_node_["size"] = model_config_.size;
+
     yaml_node_["created"] = std::time(nullptr);
   } catch (const std::exception& e) {
     std::cerr << "Error when update model config : " << e.what() << std::endl;
@@ -318,6 +322,7 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
     outFile << "# END REQUIRED\n";
     outFile << "\n";
     outFile << "# BEGIN OPTIONAL\n";
+    outFile << format_utils::writeKeyValue("size", yaml_node_["size"]);
     outFile << format_utils::writeKeyValue("stream", yaml_node_["stream"],
                                            "Default true?");
     outFile << format_utils::writeKeyValue("top_p", yaml_node_["top_p"],
diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
@@ -1,5 +1,6 @@
 #include "database/models.h"
 #include <drogon/HttpTypes.h>
+#include <filesystem>
 #include <optional>
 #include "config/gguf_parser.h"
 #include "config/yaml_config.h"
@@ -320,6 +321,7 @@ void Models::ImportModel(
   auto modelHandle = (*(req->getJsonObject())).get("model", "").asString();
   auto modelPath = (*(req->getJsonObject())).get("modelPath", "").asString();
   auto modelName = (*(req->getJsonObject())).get("name", "").asString();
+  auto option = (*(req->getJsonObject())).get("option", "symlink").asString();
   config::GGUFHandler gguf_handler;
   config::YamlHandler yaml_handler;
   cortex::db::Models modellist_utils_obj;
@@ -339,7 +341,19 @@ void Models::ImportModel(
         std::filesystem::path(model_yaml_path).parent_path());
     gguf_handler.Parse(modelPath);
     config::ModelConfig model_config = gguf_handler.GetModelConfig();
-    model_config.files.push_back(modelPath);
+    // There are 2 options: symlink and copy
+    if (option == "copy") {
+      // Copy GGUF file to the destination path
+      std::filesystem::path file_path =
+          std::filesystem::path(model_yaml_path).parent_path() /
+          std::filesystem::path(modelPath).filename();
+      std::filesystem::copy_file(
+          modelPath, file_path,
+          std::filesystem::copy_options::update_existing);
+      model_config.files.push_back(file_path.string());
+    } else {
+      model_config.files.push_back(modelPath);
+    }
     model_config.model = modelHandle;
     model_config.name = modelName.empty() ? model_config.name : modelName;
     yaml_handler.UpdateModelConfig(model_config);
diff --git a/engine/e2e-test/test_api_model_import.py b/engine/e2e-test/test_api_model_import.py
@@ -29,6 +29,21 @@ def test_model_import_with_name_should_be_success(self):
         response = requests.post("http://localhost:3928/models/import", json=body_json)
         assert response.status_code == 200
 
+    @pytest.mark.skipif(True, reason="Expensive test. Only test when you have local gguf file.")
+    def test_model_import_with_name_should_be_success(self):
+        body_json = {'model': 'testing-model',
+                     'modelPath': '/path/to/local/gguf',
+                     'name': 'test_model',
+                     'option': 'copy'}
+        response = requests.post("http://localhost:3928/models/import", json=body_json)
+        assert response.status_code == 200
+        # Test imported path
+        response = requests.get("http://localhost:3928/models/testing-model")
+        assert response.status_code == 200
+        # Since this is a dynamic test - require actual file path
+        # it's not safe to assert with the gguf file name
+        assert response.json()['files'][0] != '/path/to/local/gguf'
+
     def test_model_import_with_invalid_path_should_fail(self):
         body_json = {'model': 'tinyllama:gguf',
                      'modelPath': '/invalid/path/to/gguf'}
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
@@ -18,7 +18,8 @@
 namespace {
 void ParseGguf(const DownloadItem& ggufDownloadItem,
                std::optional<std::string> author,
-               std::optional<std::string> name) {
+               std::optional<std::string> name,
+               std::optional<std::uint64_t> size) {
   namespace fs = std::filesystem;
   namespace fmu = file_manager_utils;
   config::GGUFHandler gguf_handler;
@@ -35,6 +36,7 @@ void ParseGguf(const DownloadItem& ggufDownloadItem,
   model_config.model = ggufDownloadItem.id;
   model_config.name =
       name.has_value() ? name.value() : gguf_handler.GetModelConfig().name;
+  model_config.size = size.value_or(0);
   yaml_handler.UpdateModelConfig(model_config);
 
   auto yaml_path{ggufDownloadItem.localPath};
@@ -284,8 +286,13 @@ cpp::result<DownloadTask, std::string> ModelService::HandleDownloadUrlAsync(
                                  }}}};
 
   auto on_finished = [author, temp_name](const DownloadTask& finishedTask) {
+    // Sum downloadedBytes from all items
+    uint64_t model_size = 0;
+    for (const auto& item : finishedTask.items) {
+      model_size = model_size + item.bytes.value_or(0);
+    }
     auto gguf_download_item = finishedTask.items[0];
-    ParseGguf(gguf_download_item, author, temp_name);
+    ParseGguf(gguf_download_item, author, temp_name, model_size);
   };
 
   downloadTask.id = unique_model_id;
@@ -349,8 +356,13 @@ cpp::result<std::string, std::string> ModelService::HandleUrl(
                                  }}}};
 
   auto on_finished = [author](const DownloadTask& finishedTask) {
+    // Sum downloadedBytes from all items
+    uint64_t model_size = 0;
+    for (const auto& item : finishedTask.items) {
+      model_size = model_size + item.bytes.value_or(0);
+    }
     auto gguf_download_item = finishedTask.items[0];
-    ParseGguf(gguf_download_item, author, std::nullopt);
+    ParseGguf(gguf_download_item, author, std::nullopt, model_size);
   };
 
   auto result = download_service_->AddDownloadTask(downloadTask, on_finished);