janhq · vansangpfiev · Mar 10, 2025 · Mar 10, 2025 · Mar 10, 2025 · Mar 10, 2025
diff --git a/engine/cli/main.cc b/engine/cli/main.cc
@@ -8,6 +8,7 @@
 #include "utils/file_manager_utils.h"
 #include "utils/logging_utils.h"
 #include "utils/system_info_utils.h"
+#include "utils/widechar_conv.h"
 
 #if defined(__APPLE__) && defined(__MACH__)
 #include <libgen.h>  // for dirname()
@@ -46,7 +47,7 @@ void SetupLogger(trantor::FileLogger& async_logger, bool verbose) {
 
     std::filesystem::create_directories(
 #if defined(_WIN32)
-        std::filesystem::u8path(config.logFolderPath) /
+        std::filesystem::path(cortex::wc::Utf8ToWstring(config.logFolderPath)) /
 #else
         std::filesystem::path(config.logFolderPath) /
 #endif

diff --git a/engine/common/message.h b/engine/common/message.h
@@ -107,7 +107,8 @@ struct Message : JsonSerializable {
           std::move(root.get("object", "thread.message").asString());
       message.created_at = root["created_at"].asUInt();
       if (message.created_at == 0 && root["created"].asUInt64() != 0) {
-        message.created_at = root["created"].asUInt64() / 1000;
+        message.created_at =
+            static_cast<uint32_t>(root["created"].asUInt64() / 1000);
       }
       message.thread_id = std::move(root["thread_id"].asString());
       message.status = StatusFromString(std::move(root["status"].asString()));

diff --git a/engine/config/model_config.h b/engine/config/model_config.h
@@ -35,8 +35,7 @@ struct RemoteModelConfig {
 
     // Load basic string fields
     model = json.get("model", model).asString();
-    header_template =
-        json.get("header_template", header_template).asString();
+    header_template = json.get("header_template", header_template).asString();
     engine = json.get("engine", engine).asString();
     version = json.get("version", version).asString();
     created =
@@ -405,7 +404,8 @@ struct ModelConfig {
     oss << format_utils::print_comment("END REQUIRED");
     oss << format_utils::print_comment("BEGIN OPTIONAL");
 
-    oss << format_utils::print_float("size", size);
+    oss << format_utils::print_kv("size", std::to_string(size),
+                                  format_utils::MAGENTA);
     oss << format_utils::print_bool("stream", stream);
     oss << format_utils::print_float("top_p", top_p);
     oss << format_utils::print_float("temperature", temperature);

diff --git a/engine/main.cc b/engine/main.cc
@@ -105,7 +105,7 @@ void RunServer(std::optional<std::string> host, std::optional<int> port,
   // Create logs/ folder and setup log to file
   std::filesystem::create_directories(
 #if defined(_WIN32)
-      std::filesystem::u8path(config.logFolderPath) /
+      std::filesystem::path(cortex::wc::Utf8ToWstring(config.logFolderPath)) /
 #else
       std::filesystem::path(config.logFolderPath) /
 #endif

diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
@@ -315,7 +315,7 @@ cpp::result<DownloadTask, std::string> ModelService::HandleDownloadUrlAsync(
 
   try {
     std::filesystem::create_directories(local_path.parent_path());
-  } catch (const std::filesystem::filesystem_error& e) {
+  } catch (const std::filesystem::filesystem_error&) {
     // if file exist, remove it
     std::filesystem::remove(local_path.parent_path());
     std::filesystem::create_directories(local_path.parent_path());
@@ -380,7 +380,7 @@ ModelService::EstimateModel(const std::string& model_handle,
     auto mc = yaml_handler.GetModelConfig();
     assert(hw_service_);
     auto hw_info = hw_service_->GetHardwareInfo();
-    auto free_vram_MiB = 0u;
+    int64_t free_vram_MiB = 0;
     for (const auto& gpu : hw_info.gpus) {
       free_vram_MiB += gpu.free_vram;
     }
@@ -444,7 +444,7 @@ cpp::result<std::string, std::string> ModelService::HandleUrl(
 
   try {
     std::filesystem::create_directories(local_path.parent_path());
-  } catch (const std::filesystem::filesystem_error& e) {
+  } catch (const std::filesystem::filesystem_error&) {
     // if file exist, remove it
     std::filesystem::remove(local_path.parent_path());
     std::filesystem::create_directories(local_path.parent_path());
@@ -1326,7 +1326,7 @@ ModelService::MayFallbackToCpu(const std::string& model_path, int ngl,
   }
   // If in GPU acceleration mode:
   // We use all visible GPUs, so only need to sum all free vram
-  auto free_vram_MiB = 0u;
+  int64_t free_vram_MiB = 0;
   for (const auto& gpu : hw_info.gpus) {
     free_vram_MiB += gpu.free_vram;
   }

diff --git a/engine/utils/command_executor.h b/engine/utils/command_executor.h
@@ -37,7 +37,8 @@ class CommandExecutor {
     std::array<char, 128> buffer;
     std::string result;
 
-    while (fgets(buffer.data(), buffer.size(), m_pipe.get()) != nullptr) {
+    while (fgets(buffer.data(), static_cast<int>(buffer.size()),
+                 m_pipe.get()) != nullptr) {
       result += buffer.data();
     }
 

diff --git a/engine/utils/cortex_utils.h b/engine/utils/cortex_utils.h
@@ -1,4 +1,6 @@
 #pragma once
+
+
 #include <drogon/HttpClient.h>
 #include <drogon/HttpResponse.h>
 #include <sys/stat.h>

diff --git a/engine/utils/format_utils.h b/engine/utils/format_utils.h
@@ -67,7 +67,7 @@ inline std::string WriteKeyValue(const std::string& key,
         strValue.pop_back();
       }
       out_file << strValue;
-    } catch (const std::exception& e) {
+    } catch (const std::exception&) {
       out_file << value;  // If not a float, write as is
     }
   } else {

diff --git a/engine/utils/hardware/cpu_info.h b/engine/utils/hardware/cpu_info.h
@@ -187,7 +187,7 @@ struct CpuInfo {
       return CPU{};
     auto cpu = res[0];
     cortex::cpuid::CpuInfo inst;
-    float usage = GetCPUUsage();
+    auto usage = static_cast<float>(GetCPUUsage());
     return CPU{.cores = cpu.numPhysicalCores(),
                .arch = std::string(GetArch()),
                .model = cpu.modelName(),

diff --git a/engine/utils/hardware/gguf/ggml.h b/engine/utils/hardware/gguf/ggml.h
@@ -49,21 +49,21 @@ inline float GetQuantBit(GGMLType gt) {
   switch (gt) {
     case GGML_TYPE_I32:
     case GGML_TYPE_F32:
-      return 32.0;
+      return 32.0f;
     case GGML_TYPE_I16:
     case GGML_TYPE_BF16:
     case GGML_TYPE_F16:
-      return 16.0;
+      return 16.0f;
     case GGML_TYPE_IQ2_S:
     case GGML_TYPE_IQ2_XXS:
     case GGML_TYPE_IQ2_XS:
-      return 2.31;
+      return 2.31f;
     case GGML_TYPE_Q2_K:
-      return 2.5625;
+      return 2.5625f;
     case GGML_TYPE_IQ3_XXS:
     case GGML_TYPE_IQ3_S:
     case GGML_TYPE_Q3_K:
-      return 3.4375;
+      return 3.4375f;
     case GGML_TYPE_Q4_0_4_4:
     case GGML_TYPE_Q4_0_4_8:
     case GGML_TYPE_Q4_0_8_8:
@@ -72,25 +72,25 @@ inline float GetQuantBit(GGMLType gt) {
     case GGML_TYPE_Q4_0:
     case GGML_TYPE_Q4_1:
     case GGML_TYPE_Q4_K:
-      return 4.5;
+      return 4.5f;
     case GGML_TYPE_Q5_0:
     case GGML_TYPE_Q5_1:
     case GGML_TYPE_Q5_K:
-      return 5.5;
+      return 5.5f;
     case GGML_TYPE_Q6_K:
-      return 6.5625;
+      return 6.5625f;
     case GGML_TYPE_I8:
     case GGML_TYPE_Q8_0:
     case GGML_TYPE_Q8_1:
     case GGML_TYPE_Q8_K:
-      return 8.0;
+      return 8.0f;
 
     case GGML_TYPE_I64:
     case GGML_TYPE_F64:
-      return 64.0;
+      return 64.0f;
 
     default:
-      return 8.0;
+      return 8.0f;
   }
 }
 

diff --git a/engine/utils/hardware/gguf/gguf_file_estimate.h b/engine/utils/hardware/gguf/gguf_file_estimate.h
@@ -6,7 +6,7 @@
 
 namespace hardware {
 inline uint64_t BytesToMiB(uint64_t b) {
-  return (double)b / 1024 / 1024;
+  return static_cast<uint64_t>((double)b / 1024 / 1024);
 };
 struct RunConfig {
   int ngl;
@@ -91,8 +91,8 @@ inline std::optional<Estimation> EstimateLLaMACppRun(
   // std::cout << n_vocab << std::endl;
 
   // token_embeddings_size = n_vocab * embedding_length * 2 * quant_bit_in/16 bytes
-  int32_t quant_bit_in = 0;
-  int32_t quant_bit_out = 0;
+  float quant_bit_in = 0;
+  float quant_bit_out = 0;
 
   for (auto const& ti : (*gf).tensor_infos) {
     if (ti->name == "output.weight") {
@@ -109,16 +109,17 @@ inline std::optional<Estimation> EstimateLLaMACppRun(
   // std::cout << "n_vocab: " << n_vocab << std::endl;
   // std::cout << "file_size: " << file_size << std::endl;
   // Model weight
-  int64_t token_embeddings_size =
-      n_vocab * embedding_length * 2 * quant_bit_in / 16;
-  int64_t output_layer_size =
-      n_vocab * embedding_length * 2 * quant_bit_out / 16;
+  auto token_embeddings_size =
+      static_cast<int64_t>(n_vocab * embedding_length * 2 * quant_bit_in / 16);
+  auto output_layer_size =
+      static_cast<int64_t>(n_vocab * embedding_length * 2 * quant_bit_out / 16);
   // RAM = token_embeddings_size + ((total_ngl-ngl) >=1 ? output_layer_size +  (total_ngl - ngl - 1 ) / (total_ngl-1) * (total_file_size - token_embeddings_size - output_layer_size) : 0  )  (bytes)
   int64_t offload = 0;
   if (total_ngl >= rc.ngl + 1) {
-    offload = output_layer_size +
-              (double)(total_ngl - rc.ngl - 1) / (total_ngl - 1) *
-                  (file_size - token_embeddings_size - output_layer_size);
+    offload = static_cast<int64_t>(
+        output_layer_size +
+        (double)(total_ngl - rc.ngl - 1) / (total_ngl - 1) *
+            (file_size - token_embeddings_size - output_layer_size));
   }
 
   int64_t ram_usage = token_embeddings_size + offload;
@@ -133,18 +134,18 @@ inline std::optional<Estimation> EstimateLLaMACppRun(
   // KV cache
   // kv_cache_size = ctx_len/8192 * hidden_dim/4096 * quant_bit/16 * num_block/33 * 1 (GB)
   auto hidden_dim = embedding_length;
-  int kv_quant_bit =
+  auto kv_quant_bit =
       GetQuantBit(rc.kv_cache_type);  // f16, 8 bits for q8_0, 4.5 bits for q4_0
-  int64_t kv_cache_size = (double)(1024 * 1024 * 1024) * rc.ctx_len / 8192 *
-                          hidden_dim / 4096 * kv_quant_bit / 16 * num_block /
-                          33;  //(bytes)
+  auto kv_cache_size = static_cast<int64_t>(
+      (double)(1024 * 1024 * 1024) * rc.ctx_len / 8192 * hidden_dim / 4096 *
+      kv_quant_bit / 16 * num_block / 33);  //(bytes)
 
   // std::cout << "kv_cache_size: " << BytesToMiB(kv_cache_size) << std::endl;
 
   // VRAM = (min(n_batch, n_ubatch))/ 512 * 266 (MiB)
-  int64_t preprocessing_buffer_size =
+  auto preprocessing_buffer_size = static_cast<int64_t>(
       (double)std::min(rc.n_batch, rc.n_ubatch) / 512 * 266 * 1024 * 1024 *
-      n_vocab / 128256 /*llama3 n_vocab*/;  //(bytes)
+      n_vocab / 128256 /*llama3 n_vocab*/);  //(bytes)
   if (total_ngl != rc.ngl) {
     preprocessing_buffer_size += output_layer_size;
   }
@@ -173,8 +174,8 @@ inline std::optional<Estimation> EstimateLLaMACppRun(
     if (rc.free_vram_MiB > res.gpu_mode.vram_MiB) {
       res.gpu_mode.recommend_ngl = total_ngl;
     } else {
-      res.gpu_mode.recommend_ngl =
-          (double)rc.free_vram_MiB / res.gpu_mode.vram_MiB * rc.ngl;
+      res.gpu_mode.recommend_ngl = static_cast<int>(
+          (double)rc.free_vram_MiB / res.gpu_mode.vram_MiB * rc.ngl);
     }
 #if defined(__APPLE__) && defined(__MACH__)
     res.cpu_mode.ram_MiB = res.gpu_mode.vram_MiB + res.gpu_mode.ram_MiB;

diff --git a/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h b/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h
@@ -433,8 +433,8 @@ class VulkanGpu {
       for (uint32_t i = 0; i < memory_properties.memoryHeapCount; ++i) {
         if (memory_properties.memoryHeaps[i].flags &
             VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) {
-          gpu_avail_MiB +=
-              memory_properties.memoryHeaps[i].size / (1024ull * 1024ull);
+          gpu_avail_MiB += static_cast<int>(
+              memory_properties.memoryHeaps[i].size / (1024ull * 1024ull));
         }
       }
 
@@ -449,8 +449,10 @@ class VulkanGpu {
       used_vram_MiB = gpus_usages[device_properties.deviceName];
 
 #endif
-      int free_vram_MiB =
-          total_vram_MiB > used_vram_MiB ? total_vram_MiB - used_vram_MiB : 0;
+      auto free_vram_MiB =
+          total_vram_MiB > used_vram_MiB
+              ? static_cast<int>(total_vram_MiB - used_vram_MiB)
+              : 0;
       if (device_properties.vendorID == kNvidiaVendor ||
           device_properties.vendorID == kAmdVendor) {
         gpus.emplace_back(cortex::hw::GPU{
@@ -507,8 +509,10 @@ class VulkanGpu {
       total_vram_MiB = gpus_[i].free_vram;
       used_vram_MiB = gpus_usages[gpus_[i].name];
 #endif
-      int free_vram_MiB =
-          total_vram_MiB > used_vram_MiB ? total_vram_MiB - used_vram_MiB : 0;
+      auto free_vram_MiB =
+          total_vram_MiB > used_vram_MiB
+              ? static_cast<int>(total_vram_MiB - used_vram_MiB)
+              : 0;
       gpus_[i].free_vram = free_vram_MiB;
     }
 

diff --git a/engine/utils/huggingface_utils.h b/engine/utils/huggingface_utils.h
@@ -308,7 +308,7 @@ inline std::optional<std::string> GetDefaultBranch(
       return default_branch.as<std::string>();
     }
     return std::nullopt;
-  } catch (const std::exception& e) {
+  } catch (const std::exception&) {
     return std::nullopt;
   }
 }
@@ -328,7 +328,7 @@ inline std::optional<std::string> GetModelAuthorCortexsoHub(
       return author.as<std::string>();
     }
     return std::nullopt;
-  } catch (const std::exception& e) {
+  } catch (const std::exception&) {
     return std::nullopt;
   }
 }

diff --git a/engine/utils/url_parser.h b/engine/utils/url_parser.h
@@ -153,7 +153,8 @@ inline std::string FromUrl(const Url& url) {
     } catch (const std::bad_variant_access& e) {
       // Handle the case where the variant does not match any of the expected types
       // This should not happen if the map was created correctly
-      throw std::runtime_error("Invalid variant type in queries map");
+      throw std::runtime_error(
+          std::string("Invalid variant type in queries map: ") + e.what());
     }
   }