Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit a2a6550

Browse files
authored
fix: bypass model check for mmproj model (#1547)
1 parent 9fe0c6f commit a2a6550

File tree

3 files changed

+63
-46
lines changed

3 files changed

+63
-46
lines changed

engine/controllers/models.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,15 +385,17 @@ void Models::StartModel(
385385
}
386386

387387
auto model_entry = model_service_->GetDownloadedModel(model_handle);
388-
if (!model_entry.has_value()) {
388+
if (!model_entry.has_value() && !params_override.bypass_model_check()) {
389389
Json::Value ret;
390390
ret["message"] = "Cannot find model: " + model_handle;
391391
auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
392392
resp->setStatusCode(drogon::k400BadRequest);
393393
callback(resp);
394394
return;
395395
}
396-
auto engine_name = model_entry.value().engine;
396+
std::string engine_name = params_override.bypass_model_check()
397+
? kLlamaEngine
398+
: model_entry.value().engine;
397399
auto engine_entry = engine_service_->GetEngineInfo(engine_name);
398400
if (engine_entry.has_error()) {
399401
Json::Value ret;

engine/services/model_service.cc

Lines changed: 49 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
#include "utils/logging_utils.h"
1616
#include "utils/result.hpp"
1717
#include "utils/string_utils.h"
18-
#include "utils/json_helper.h"
1918

2019
namespace {
2120
void ParseGguf(const DownloadItem& ggufDownloadItem,
@@ -577,39 +576,44 @@ cpp::result<bool, std::string> ModelService::StartModel(
577576
config::YamlHandler yaml_handler;
578577

579578
try {
580-
auto model_entry = modellist_handler.GetModelInfo(model_handle);
581-
if (model_entry.has_error()) {
582-
CTL_WRN("Error: " + model_entry.error());
583-
return cpp::fail(model_entry.error());
584-
}
585-
yaml_handler.ModelConfigFromFile(
586-
fmu::ToAbsoluteCortexDataPath(
587-
fs::path(model_entry.value().path_to_model_yaml))
588-
.string());
589-
auto mc = yaml_handler.GetModelConfig();
590-
591-
httplib::Client cli(host + ":" + std::to_string(port));
579+
Json::Value json_data;
580+
// Currently we don't support download vision models, so we need to bypass check
581+
if (!params_override.bypass_model_check()) {
582+
auto model_entry = modellist_handler.GetModelInfo(model_handle);
583+
if (model_entry.has_error()) {
584+
CTL_WRN("Error: " + model_entry.error());
585+
return cpp::fail(model_entry.error());
586+
}
587+
yaml_handler.ModelConfigFromFile(
588+
fmu::ToAbsoluteCortexDataPath(
589+
fs::path(model_entry.value().path_to_model_yaml))
590+
.string());
591+
auto mc = yaml_handler.GetModelConfig();
592592

593-
Json::Value json_data = mc.ToJson();
594-
if (mc.files.size() > 0) {
595-
// TODO(sang) support multiple files
596-
json_data["model_path"] =
597-
fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).string();
593+
json_data = mc.ToJson();
594+
if (mc.files.size() > 0) {
595+
// TODO(sang) support multiple files
596+
json_data["model_path"] =
597+
fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).string();
598+
} else {
599+
LOG_WARN << "model_path is empty";
600+
return false;
601+
}
602+
json_data["system_prompt"] = mc.system_template;
603+
json_data["user_prompt"] = mc.user_template;
604+
json_data["ai_prompt"] = mc.ai_template;
598605
} else {
599-
LOG_WARN << "model_path is empty";
600-
return false;
606+
bypass_stop_check_set_.insert(model_handle);
601607
}
608+
httplib::Client cli(host + ":" + std::to_string(port));
609+
602610
json_data["model"] = model_handle;
603611
if (auto& cpt = params_override.custom_prompt_template;
604612
!cpt.value_or("").empty()) {
605613
auto parse_prompt_result = string_utils::ParsePrompt(cpt.value());
606614
json_data["system_prompt"] = parse_prompt_result.system_prompt;
607615
json_data["user_prompt"] = parse_prompt_result.user_prompt;
608616
json_data["ai_prompt"] = parse_prompt_result.ai_prompt;
609-
} else {
610-
json_data["system_prompt"] = mc.system_template;
611-
json_data["user_prompt"] = mc.user_template;
612-
json_data["ai_prompt"] = mc.ai_template;
613617
}
614618

615619
#define ASSIGN_IF_PRESENT(json_obj, param_override, param_name) \
@@ -655,29 +659,38 @@ cpp::result<bool, std::string> ModelService::StopModel(
655659
config::YamlHandler yaml_handler;
656660

657661
try {
658-
auto model_entry = modellist_handler.GetModelInfo(model_handle);
659-
if (model_entry.has_error()) {
660-
CTL_WRN("Error: " + model_entry.error());
661-
return cpp::fail(model_entry.error());
662+
auto bypass_check = (bypass_stop_check_set_.find(model_handle) !=
663+
bypass_stop_check_set_.end());
664+
Json::Value json_data;
665+
if (!bypass_check) {
666+
auto model_entry = modellist_handler.GetModelInfo(model_handle);
667+
if (model_entry.has_error()) {
668+
CTL_WRN("Error: " + model_entry.error());
669+
return cpp::fail(model_entry.error());
670+
}
671+
yaml_handler.ModelConfigFromFile(
672+
fmu::ToAbsoluteCortexDataPath(
673+
fs::path(model_entry.value().path_to_model_yaml))
674+
.string());
675+
auto mc = yaml_handler.GetModelConfig();
676+
json_data["engine"] = mc.engine;
662677
}
663-
yaml_handler.ModelConfigFromFile(
664-
fmu::ToAbsoluteCortexDataPath(
665-
fs::path(model_entry.value().path_to_model_yaml))
666-
.string());
667-
auto mc = yaml_handler.GetModelConfig();
668678

669679
httplib::Client cli(host + ":" + std::to_string(port));
670-
671-
Json::Value json_data;
672680
json_data["model"] = model_handle;
673-
json_data["engine"] = mc.engine;
681+
if (bypass_check) {
682+
json_data["engine"] = kLlamaEngine;
683+
}
674684
CTL_INF(json_data.toStyledString());
675685
assert(inference_svc_);
676686
auto ir =
677687
inference_svc_->UnloadModel(std::make_shared<Json::Value>(json_data));
678688
auto status = std::get<0>(ir)["status_code"].asInt();
679689
auto data = std::get<1>(ir);
680690
if (status == httplib::StatusCode::OK_200) {
691+
if (bypass_check) {
692+
bypass_stop_check_set_.erase(model_handle);
693+
}
681694
return true;
682695
} else {
683696
CTL_ERR("Model failed to stop with status code: " << status);

engine/services/model_service.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,15 @@
88
#include "services/inference_service.h"
99

1010
struct StartParameterOverride {
11-
std::optional<bool> cache_enabled;
12-
std::optional<int> ngl;
13-
std::optional<int> n_parallel;
14-
std::optional<int> ctx_len;
15-
std::optional<std::string> custom_prompt_template;
16-
std::optional<std::string> cache_type;
17-
std::optional<std::string> mmproj;
18-
std::optional<std::string> model_path;
11+
std::optional<bool> cache_enabled;
12+
std::optional<int> ngl;
13+
std::optional<int> n_parallel;
14+
std::optional<int> ctx_len;
15+
std::optional<std::string> custom_prompt_template;
16+
std::optional<std::string> cache_type;
17+
std::optional<std::string> mmproj;
18+
std::optional<std::string> model_path;
19+
bool bypass_model_check() const { return mmproj.has_value(); }
1920
};
2021
class ModelService {
2122
public:
@@ -86,4 +87,5 @@ class ModelService {
8687

8788
std::shared_ptr<DownloadService> download_service_;
8889
std::shared_ptr<services::InferenceService> inference_svc_;
90+
std::unordered_set<std::string> bypass_stop_check_set_;
8991
};

0 commit comments

Comments
 (0)