Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 67e3554

Browse files
feat: model compatibility API (#1715)
* feat: model estimation * fix: cleanup and improve * chore: cleanup * chore: API docs * chore: CLI docs * fix: correct calculation * fix: handle macOS * chore: typo --------- Co-authored-by: vansangpfiev <sang@jan.ai>
1 parent 9c95577 commit 67e3554

File tree

13 files changed

+1226
-104
lines changed

13 files changed

+1226
-104
lines changed

docs/docs/cli/models/index.mdx

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,11 @@ For example, it returns the following:w
120120

121121
| Option | Description | Required | Default value | Example |
122122
|---------------------------|----------------------------------------------------|----------|---------------|----------------------|
123-
| `-h`, `--help` | Display help for command. | No | - | `-h` |
124-
<!-- | `-f`, `--format <format>` | Specify output format for the models list. | No | `json` | `-f json` | -->
123+
| `-h`, `--help` | Display help for command. | No | - | `-h` |
124+
| `-e`, `--engine` | Display engines. | No | - | `--engine` |
125+
| `-v`, `--version` | Display version for model. | No | - | `--version` |
126+
| `--cpu_mode` | Display CPU mode. | No | - | `--cpu_mode` |
127+
| `--gpu_mode` | Display GPU mode. | No | - | `--gpu_mode` |
125128

126129
## `cortex models start`
127130
:::info

docs/static/openapi/cortex.json

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3940,6 +3940,55 @@
39403940
},
39413941
"required": ["description", "name", "productName", "status"]
39423942
},
3943+
"CpuModeDto": {
3944+
"type": "object",
3945+
"properties": {
3946+
"ram": {
3947+
"type": "number",
3948+
"example": 1024
3949+
}
3950+
}
3951+
},
3952+
"GpuModeDto": {
3953+
"type": "object",
3954+
"properties": {
3955+
"ram": {
3956+
"type": "number",
3957+
"example": 1024
3958+
},
3959+
"vram": {
3960+
"type": "number",
3961+
"example": 1024
3962+
},
3963+
"ngl": {
3964+
"type": "number",
3965+
"example": 30
3966+
},
3967+
"context_length": {
3968+
"type": "number",
3969+
"example": 4096
3970+
},
3971+
"recommend_ngl": {
3972+
"type": "number",
3973+
"example": 33
3974+
}
3975+
}
3976+
},
3977+
"RecommendDto": {
3978+
"type": "object",
3979+
"properties": {
3980+
"cpu_mode": {
3981+
"type": "object",
3982+
"$ref": "#/components/schemas/CpuModeDto"
3983+
},
3984+
"gpu_mode": {
3985+
"type": "array",
3986+
"items": {
3987+
"$ref": "#/components/schemas/GPUDto"
3988+
}
3989+
}
3990+
}
3991+
},
39433992
"ModelDto": {
39443993
"type": "object",
39453994
"properties": {
@@ -4064,6 +4113,10 @@
40644113
"type": "string",
40654114
"description": "The engine to use.",
40664115
"example": "llamacpp"
4116+
},
4117+
"recommendation": {
4118+
"type": "object",
4119+
"$ref": "#/components/schemas/RecommendDto"
40674120
}
40684121
},
40694122
"required": ["id"]

engine/cli/command_line_parser.cc

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -245,14 +245,19 @@ void CommandLineParser::SetupModelCommands() {
245245
"Display engine");
246246
list_models_cmd->add_flag("-v,--version", cml_data_.display_version,
247247
"Display version");
248+
list_models_cmd->add_flag("--cpu_mode", cml_data_.display_cpu_mode,
249+
"Display cpu mode");
250+
list_models_cmd->add_flag("--gpu_mode", cml_data_.display_gpu_mode,
251+
"Display gpu mode");
248252
list_models_cmd->group(kSubcommands);
249253
list_models_cmd->callback([this]() {
250254
if (std::exchange(executed_, true))
251255
return;
252-
commands::ModelListCmd().Exec(cml_data_.config.apiServerHost,
253-
std::stoi(cml_data_.config.apiServerPort),
254-
cml_data_.filter, cml_data_.display_engine,
255-
cml_data_.display_version);
256+
commands::ModelListCmd().Exec(
257+
cml_data_.config.apiServerHost,
258+
std::stoi(cml_data_.config.apiServerPort), cml_data_.filter,
259+
cml_data_.display_engine, cml_data_.display_version,
260+
cml_data_.display_cpu_mode, cml_data_.display_gpu_mode);
256261
});
257262

258263
auto get_models_cmd =

engine/cli/command_line_parser.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ class CommandLineParser {
6464
// for model list
6565
bool display_engine = false;
6666
bool display_version = false;
67+
bool display_cpu_mode = false;
68+
bool display_gpu_mode = false;
6769
std::string filter = "";
6870
std::string log_level = "INFO";
6971

engine/cli/commands/model_list_cmd.cc

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ using Row_t =
2020

2121
void ModelListCmd::Exec(const std::string& host, int port,
2222
const std::string& filter, bool display_engine,
23-
bool display_version) {
23+
bool display_version, bool display_cpu_mode,
24+
bool display_gpu_mode) {
2425
// Start server if server is not started yet
2526
if (!commands::IsServerAlive(host, port)) {
2627
CLI_LOG("Starting server ...");
@@ -39,6 +40,12 @@ void ModelListCmd::Exec(const std::string& host, int port,
3940
column_headers.push_back("Version");
4041
}
4142

43+
if (display_cpu_mode) {
44+
column_headers.push_back("CPU Mode");
45+
}
46+
if (display_gpu_mode) {
47+
column_headers.push_back("GPU Mode");
48+
}
4249
Row_t header{column_headers.begin(), column_headers.end()};
4350
table.add_row(header);
4451
table.format().font_color(Color::green);
@@ -77,6 +84,28 @@ void ModelListCmd::Exec(const std::string& host, int port,
7784
row.push_back(v["version"].asString());
7885
}
7986

87+
if (auto& r = v["recommendation"]; !r.isNull()) {
88+
if (display_cpu_mode) {
89+
if (!r["cpu_mode"].isNull()) {
90+
row.push_back("RAM: " + r["cpu_mode"]["ram"].asString() + " MiB");
91+
}
92+
}
93+
94+
if (display_gpu_mode) {
95+
if (!r["gpu_mode"].isNull()) {
96+
std::string s;
97+
s += "ngl: " + r["gpu_mode"][0]["ngl"].asString() + " - ";
98+
s += "context: " + r["gpu_mode"][0]["context_length"].asString() +
99+
" - ";
100+
s += "RAM: " + r["gpu_mode"][0]["ram"].asString() + " MiB - ";
101+
s += "VRAM: " + r["gpu_mode"][0]["vram"].asString() + " MiB - ";
102+
s += "recommended ngl: " +
103+
r["gpu_mode"][0]["recommend_ngl"].asString();
104+
row.push_back(s);
105+
}
106+
}
107+
}
108+
80109
table.add_row({row.begin(), row.end()});
81110
}
82111
}

engine/cli/commands/model_list_cmd.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ namespace commands {
77
class ModelListCmd {
88
public:
99
void Exec(const std::string& host, int port, const std::string& filter,
10-
bool display_engine = false, bool display_version = false);
10+
bool display_engine = false, bool display_version = false,
11+
bool display_cpu_mode = false, bool display_gpu_mode = false);
1112
};
1213
} // namespace commands

engine/controllers/models.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,10 @@ void Models::ListModel(
171171
Json::Value obj = model_config.ToJson();
172172
obj["id"] = model_entry.model;
173173
obj["model"] = model_entry.model;
174+
auto es = model_service_->GetEstimation(model_entry.model);
175+
if (es.has_value()) {
176+
obj["recommendation"] = hardware::ToJson(es.value());
177+
}
174178
data.append(std::move(obj));
175179
yaml_handler.Reset();
176180
} catch (const std::exception& e) {

engine/main.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "utils/event_processor.h"
2121
#include "utils/file_logger.h"
2222
#include "utils/file_manager_utils.h"
23+
#include "utils/hardware/gguf/gguf_file_estimate.h"
2324
#include "utils/logging_utils.h"
2425
#include "utils/system_info_utils.h"
2526
#include "utils/widechar_conv.h"

0 commit comments

Comments
 (0)