Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 2b74824

Browse files
authored
fix: support ctx_len for model start cli (#1766)
* fix: support ctx_len for model start cli * chore: docs * fix: guard max ctx_len
1 parent 4bf5e75 commit 2b74824

File tree

8 files changed

+49
-17
lines changed

8 files changed

+49
-17
lines changed

docs/docs/cli/models/index.mdx

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,10 @@ This command uses a `model_id` from the model that you have downloaded or availa
159159

160160
| Option | Description | Required | Default value | Example |
161161
|---------------------------|---------------------------------------------------------------------------|----------|----------------------------------------------|------------------------|
162-
| `model_id` | The identifier of the model you want to start. | Yes | `Prompt to select from the available models` | `mistral` |
163-
| `--gpus` | List of GPUs to use. | No | - | `[0,1]` |
164-
| `-h`, `--help` | Display help information for the command. | No | - | `-h` |
162+
| `model_id` | The identifier of the model you want to start. | Yes | `Prompt to select from the available models` | `mistral` |
163+
| `--gpus` | List of GPUs to use. | No | - | `[0,1]` |
164+
| `--ctx_len` | Maximum context length for inference. | No | `min(8192, max_model_context_length)` | `1024` |
165+
| `-h`, `--help` | Display help information for the command. | No | - | `-h` |
165166

166167
## `cortex models stop`
167168
:::info

docs/docs/cli/models/start.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ cortex models start [model_id]:[engine] [options]
3333
|---------------------------|----------------------------------------------------------|----------|----------------------------------------------|-------------------|
3434
| `model_id` | The identifier of the model you want to start. | No | `Prompt to select from the available models` | `mistral` |
3535
| `--gpus` | List of GPUs to use. | No | - | `[0,1]` |
36+
| `--ctx_len` | Maximum context length for inference. | No | `min(8192, max_model_context_length)` | `1024` |
3637
| `-h`, `--help` | Display help information for the command. | No | - | `-h` |
3738

3839

docs/docs/cli/run.mdx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ You can use the `--verbose` flag to display more detailed output of the internal
3636

3737
| Option | Description | Required | Default value | Example |
3838
|-----------------------------|-----------------------------------------------------------------------------|----------|----------------------------------------------|------------------------|
39-
| `model_id` | The identifier of the model you want to chat with. | Yes | - | `mistral` |
40-
| `--gpus` | List of GPUs to use. | No | - | `[0,1]` |
39+
| `model_id` | The identifier of the model you want to chat with. | Yes | - | `mistral` |
40+
| `--gpus` | List of GPUs to use. | No | - | `[0,1]` |
41+
| `--ctx_len` | Maximum context length for inference. | No | `min(8192, max_model_context_length)` | `1024` |
4142
| `-h`, `--help` | Display help information for the command. | No | - | `-h` |
4243
<!-- | `-t`, `--thread <thread_id>` | Specify the Thread ID. Defaults to creating a new thread if none specified. | No | - | `-t jan_1717650808` | | `-c` | -->

engine/cli/command_line_parser.cc

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -163,16 +163,18 @@ void CommandLineParser::SetupCommonCommands() {
163163
run_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
164164
" run [options] [model_id]");
165165
run_cmd->add_option("model_id", cml_data_.model_id, "");
166-
run_cmd->add_option("--gpus", hw_activate_opts_["gpus"],
166+
run_cmd->add_option("--gpus", run_settings_["gpus"],
167167
"List of GPU to activate, for example [0, 1]");
168+
run_cmd->add_option("--ctx_len", run_settings_["ctx_len"],
169+
"Maximum context length for inference");
168170
run_cmd->add_flag("-d,--detach", cml_data_.run_detach, "Detached mode");
169171
run_cmd->callback([this, run_cmd] {
170172
if (std::exchange(executed_, true))
171173
return;
172174
commands::RunCmd rc(cml_data_.config.apiServerHost,
173175
std::stoi(cml_data_.config.apiServerPort),
174176
cml_data_.model_id, download_service_);
175-
rc.Exec(cml_data_.run_detach, hw_activate_opts_);
177+
rc.Exec(cml_data_.run_detach, run_settings_);
176178
});
177179
}
178180

@@ -203,8 +205,10 @@ void CommandLineParser::SetupModelCommands() {
203205
model_start_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
204206
" models start [model_id]");
205207
model_start_cmd->add_option("model_id", cml_data_.model_id, "");
206-
model_start_cmd->add_option("--gpus", hw_activate_opts_["gpus"],
208+
model_start_cmd->add_option("--gpus", run_settings_["gpus"],
207209
"List of GPU to activate, for example [0, 1]");
210+
model_start_cmd->add_option("--ctx_len", run_settings_["ctx_len"],
211+
"Maximum context length for inference");
208212
model_start_cmd->group(kSubcommands);
209213
model_start_cmd->callback([this, model_start_cmd]() {
210214
if (std::exchange(executed_, true))
@@ -216,7 +220,7 @@ void CommandLineParser::SetupModelCommands() {
216220
};
217221
commands::ModelStartCmd().Exec(cml_data_.config.apiServerHost,
218222
std::stoi(cml_data_.config.apiServerPort),
219-
cml_data_.model_id, hw_activate_opts_);
223+
cml_data_.model_id, run_settings_);
220224
});
221225

222226
auto stop_model_cmd =
@@ -562,7 +566,7 @@ void CommandLineParser::SetupHardwareCommands() {
562566
hw_activate_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
563567
" hardware activate --gpus [list_gpu]");
564568
hw_activate_cmd->group(kSubcommands);
565-
hw_activate_cmd->add_option("--gpus", hw_activate_opts_["gpus"],
569+
hw_activate_cmd->add_option("--gpus", run_settings_["gpus"],
566570
"List of GPU to activate, for example [0, 1]");
567571
hw_activate_cmd->callback([this, hw_activate_cmd]() {
568572
if (std::exchange(executed_, true))
@@ -572,14 +576,14 @@ void CommandLineParser::SetupHardwareCommands() {
572576
return;
573577
}
574578

575-
if (hw_activate_opts_["gpus"].empty()) {
579+
if (run_settings_["gpus"].empty()) {
576580
CLI_LOG("[list_gpu] is required\n");
577581
CLI_LOG(hw_activate_cmd->help());
578582
return;
579583
}
580584
commands::HardwareActivateCmd().Exec(
581585
cml_data_.config.apiServerHost,
582-
std::stoi(cml_data_.config.apiServerPort), hw_activate_opts_);
586+
std::stoi(cml_data_.config.apiServerPort), run_settings_);
583587
});
584588
}
585589

engine/cli/command_line_parser.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,5 +79,5 @@ class CommandLineParser {
7979
std::unordered_map<std::string, std::string> config_update_opts_;
8080
bool executed_ = false;
8181
commands::HarwareOptions hw_opts_;
82-
std::unordered_map<std::string, std::string> hw_activate_opts_;
82+
std::unordered_map<std::string, std::string> run_settings_;
8383
};

engine/cli/commands/model_start_cmd.cc

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ bool ModelStartCmd::Exec(
3030

3131
//
3232
bool should_activate_hw = false;
33-
for (auto const& [_, v] : options) {
34-
if (!v.empty()) {
33+
for (auto const& [k, v] : options) {
34+
if (k == "gpus" && !v.empty()) {
3535
should_activate_hw = true;
3636
break;
3737
}
@@ -57,6 +57,9 @@ bool ModelStartCmd::Exec(
5757

5858
Json::Value json_data;
5959
json_data["model"] = model_id.value();
60+
for (auto const& [k, v] : options) {
61+
UpdateConfig(json_data, k, v);
62+
}
6063
auto data_str = json_data.toStyledString();
6164
auto res = curl_utils::SimplePostJson(url.ToFullPath(), data_str);
6265
if (res.has_error()) {
@@ -75,4 +78,16 @@ bool ModelStartCmd::Exec(
7578
}
7679
return true;
7780
}
81+
82+
bool ModelStartCmd::UpdateConfig(Json::Value& data, const std::string& key,
83+
const std::string& value) {
84+
if (key == "ctx_len" && !value.empty()) {
85+
try {
86+
data["ctx_len"] = std::stoi(value);
87+
} catch (const std::exception& e) {
88+
CLI_LOG("Failed to parse numeric value for " << key << ": " << e.what());
89+
}
90+
}
91+
return true;
92+
}
7893
}; // namespace commands

engine/cli/commands/model_start_cmd.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <string>
44
#include <unordered_map>
5+
#include "json/json.h"
56

67
namespace commands {
78

@@ -10,5 +11,8 @@ class ModelStartCmd {
1011
bool Exec(const std::string& host, int port, const std::string& model_handle,
1112
const std::unordered_map<std::string, std::string>& options,
1213
bool print_success_log = true);
14+
private:
15+
bool UpdateConfig(Json::Value& data, const std::string& key,
16+
const std::string& value);
1317
};
1418
} // namespace commands

engine/services/model_service.cc

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,8 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
702702
config::YamlHandler yaml_handler;
703703

704704
try {
705+
constexpr const int kDefautlContextLength = 8192;
706+
int max_model_context_length = kDefautlContextLength;
705707
Json::Value json_data;
706708
// Currently we don't support download vision models, so we need to bypass check
707709
if (!params_override.bypass_model_check()) {
@@ -732,6 +734,8 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
732734
json_data["system_prompt"] = mc.system_template;
733735
json_data["user_prompt"] = mc.user_template;
734736
json_data["ai_prompt"] = mc.ai_template;
737+
json_data["ctx_len"] = std::min(kDefautlContextLength, mc.ctx_len);
738+
max_model_context_length = mc.ctx_len;
735739
} else {
736740
bypass_stop_check_set_.insert(model_handle);
737741
}
@@ -753,12 +757,14 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
753757
ASSIGN_IF_PRESENT(json_data, params_override, cache_enabled);
754758
ASSIGN_IF_PRESENT(json_data, params_override, ngl);
755759
ASSIGN_IF_PRESENT(json_data, params_override, n_parallel);
756-
ASSIGN_IF_PRESENT(json_data, params_override, ctx_len);
757760
ASSIGN_IF_PRESENT(json_data, params_override, cache_type);
758761
ASSIGN_IF_PRESENT(json_data, params_override, mmproj);
759762
ASSIGN_IF_PRESENT(json_data, params_override, model_path);
760763
#undef ASSIGN_IF_PRESENT
761-
764+
if (params_override.ctx_len) {
765+
json_data["ctx_len"] =
766+
std::min(params_override.ctx_len.value(), max_model_context_length);
767+
}
762768
CTL_INF(json_data.toStyledString());
763769
auto may_fallback_res = MayFallbackToCpu(json_data["model_path"].asString(),
764770
json_data["ngl"].asInt(),

0 commit comments

Comments
 (0)