Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 3be991e

Browse files
committed
update
1 parent 2d8c32e commit 3be991e

File tree

5 files changed

+15
-59
lines changed

5 files changed

+15
-59
lines changed

engine/cli/commands/cortex_upd_cmd.cc

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ std::optional<std::string> CheckNewUpdate(
150150
}
151151

152152
if (CORTEX_VARIANT == file_manager_utils::kBetaVariant) {
153-
for (auto& d : data) {
153+
for (const auto& d : data) {
154154
if (auto tag = d["tag_name"].asString();
155155
tag.find(kBetaComp) != std::string::npos) {
156156
return tag;
@@ -380,7 +380,7 @@ bool CortexUpdCmd::GetBeta(const std::string& v) {
380380
auto json_res = json_helper::ParseJsonString(res->body);
381381

382382
Json::Value json_data;
383-
for (auto& jr : json_res) {
383+
for (const auto& jr : json_res) {
384384
// Get the latest beta or match version
385385
if (auto tag = jr["tag_name"].asString();
386386
(v.empty() && tag.find(kBetaComp) != std::string::npos) ||
@@ -429,13 +429,12 @@ bool CortexUpdCmd::GetBeta(const std::string& v) {
429429

430430
assert(!!downloaded_exe_path);
431431
return InstallNewVersion(dst, downloaded_exe_path.value());
432-
;
433432
}
434433

435434
std::optional<std::string> CortexUpdCmd::HandleGithubRelease(
436435
const Json::Value& assets, const std::string& os_arch) {
437436
std::string matched_variant = "";
438-
for (auto& asset : assets) {
437+
for (const auto& asset : assets) {
439438
auto asset_name = asset["name"].asString();
440439
if (asset_name.find(kCortexBinary) != std::string::npos &&
441440
asset_name.find(os_arch) != std::string::npos &&
@@ -451,7 +450,7 @@ std::optional<std::string> CortexUpdCmd::HandleGithubRelease(
451450
}
452451
CTL_INF("Matched variant: " << matched_variant);
453452

454-
for (auto& asset : assets) {
453+
for (const auto& asset : assets) {
455454
auto asset_name = asset["name"].asString();
456455
if (asset_name == matched_variant) {
457456
auto download_url = asset["browser_download_url"].asString();

engine/config/gguf_parser.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ void GGUFHandler::OpenFile(const std::string& file_path) {
8585
throw std::runtime_error("Failed to get file size");
8686
}
8787
int file_descriptor = open(file_path.c_str(), O_RDONLY);
88-
;
8988
// Memory-map the file
9089
data_ = static_cast<uint8_t*>(
9190
mmap(nullptr, file_size_, PROT_READ, MAP_PRIVATE, file_descriptor, 0));

engine/controllers/server.cc

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -92,15 +92,6 @@ void server::GetModels(const HttpRequestPtr& req,
9292
LOG_TRACE << "Done get models";
9393
}
9494

95-
void server::GetEngines(
96-
const HttpRequestPtr& req,
97-
std::function<void(const HttpResponsePtr&)>&& callback) {
98-
// TODO: namh
99-
// auto ir = inference_svc_->GetEngines(req->getJsonObject());
100-
// auto resp = cortex_utils::CreateCortexHttpJsonResponse(ir);
101-
// callback(resp);
102-
}
103-
10495
void server::FineTuning(
10596
const HttpRequestPtr& req,
10697
std::function<void(const HttpResponsePtr&)>&& callback) {
@@ -122,17 +113,6 @@ void server::LoadModel(const HttpRequestPtr& req,
122113
LOG_TRACE << "Done load model";
123114
}
124115

125-
void server::UnloadEngine(
126-
const HttpRequestPtr& req,
127-
std::function<void(const HttpResponsePtr&)>&& callback) {
128-
// namh implement this
129-
// auto ir = engine_service_->UnloadEngine(req->getJsonObject());
130-
// auto resp = cortex_utils::CreateCortexHttpJsonResponse(std::get<1>(ir));
131-
// resp->setStatusCode(
132-
// static_cast<HttpStatusCode>(std::get<0>(ir)["status_code"].asInt()));
133-
// callback(resp);
134-
}
135-
136116
void server::ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,
137117
std::shared_ptr<services::SyncQueue> q) {
138118
auto err_or_done = std::make_shared<std::atomic_bool>(false);

engine/controllers/server.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -43,24 +43,14 @@ class server : public drogon::HttpController<server, false>,
4343
METHOD_ADD(server::UnloadModel, "unloadmodel", Post);
4444
METHOD_ADD(server::ModelStatus, "modelstatus", Post);
4545
METHOD_ADD(server::GetModels, "models", Get);
46-
METHOD_ADD(server::GetEngines, "engines", Get);
4746

4847
// cortex.python API
4948
METHOD_ADD(server::FineTuning, "finetuning", Post);
5049

5150
// Openai compatible path
5251
ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Post);
53-
// ADD_METHOD_TO(server::GetModels, "/v1/models", Get);
5452
ADD_METHOD_TO(server::FineTuning, "/v1/fine_tuning/job", Post);
55-
56-
// ADD_METHOD_TO(server::handlePrelight, "/v1/chat/completions", Options);
57-
// NOTE: prelight will be added back when browser support is properly planned
58-
5953
ADD_METHOD_TO(server::Embedding, "/v1/embeddings", Post);
60-
// ADD_METHOD_TO(server::handlePrelight, "/v1/embeddings", Options);
61-
62-
// PATH_ADD("/llama/chat_completion", Post);
63-
METHOD_ADD(server::UnloadEngine, "unloadengine", Post);
6454

6555
METHOD_LIST_END
6656
void ChatCompletion(
@@ -81,14 +71,9 @@ class server : public drogon::HttpController<server, false>,
8171
void GetModels(
8272
const HttpRequestPtr& req,
8373
std::function<void(const HttpResponsePtr&)>&& callback) override;
84-
void GetEngines(
85-
const HttpRequestPtr& req,
86-
std::function<void(const HttpResponsePtr&)>&& callback) override;
8774
void FineTuning(
8875
const HttpRequestPtr& req,
8976
std::function<void(const HttpResponsePtr&)>&& callback) override;
90-
void UnloadEngine(const HttpRequestPtr& req,
91-
std::function<void(const HttpResponsePtr&)>&& callback);
9277

9378
private:
9479
void ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,

engine/services/inference_service.cc

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,9 @@
11
#include "inference_service.h"
2+
#include <drogon/HttpTypes.h>
23
#include "utils/engine_constants.h"
34
#include "utils/function_calling/common.h"
45

56
namespace services {
6-
7-
namespace {
8-
constexpr const int k200OK = 200;
9-
constexpr const int k400BadRequest = 400;
10-
constexpr const int k409Conflict = 409;
11-
constexpr const int k500InternalServerError = 500;
12-
} // namespace
13-
147
cpp::result<void, InferResult> InferenceService::HandleChatCompletion(
158
std::shared_ptr<SyncQueue> q, std::shared_ptr<Json::Value> json_body) {
169
std::string engine_type;
@@ -26,7 +19,7 @@ cpp::result<void, InferResult> InferenceService::HandleChatCompletion(
2619
Json::Value res;
2720
res["message"] = "Engine is not loaded yet";
2821
Json::Value stt;
29-
stt["status_code"] = k409Conflict;
22+
stt["status_code"] = drogon::k400BadRequest;
3023
LOG_WARN << "Engine is not loaded yet";
3124
return cpp::fail(std::make_pair(stt, res));
3225
}
@@ -54,9 +47,9 @@ cpp::result<void, InferResult> InferenceService::HandleEmbedding(
5447
auto engine_result = engine_service_->GetLoadedEngine(engine_type);
5548
if (engine_result.has_error()) {
5649
Json::Value res;
57-
res["message"] = "Engine is not loaded yet";
5850
Json::Value stt;
59-
stt["status_code"] = k409Conflict;
51+
res["message"] = "Engine is not loaded yet";
52+
stt["status_code"] = drogon::k400BadRequest;
6053
LOG_WARN << "Engine is not loaded yet";
6154
return cpp::fail(std::make_pair(stt, res));
6255
}
@@ -84,7 +77,7 @@ InferResult InferenceService::LoadModel(
8477

8578
r["message"] = "Could not load engine " + engine_type + ": " +
8679
load_engine_result.error();
87-
stt["status_code"] = k500InternalServerError;
80+
stt["status_code"] = drogon::k500InternalServerError;
8881
return std::make_pair(stt, r);
8982
}
9083

@@ -114,7 +107,7 @@ InferResult InferenceService::UnloadModel(
114107
Json::Value res;
115108
res["message"] = "Engine is not loaded yet";
116109
Json::Value stt;
117-
stt["status_code"] = k409Conflict;
110+
stt["status_code"] = drogon::k400BadRequest;
118111
LOG_WARN << "Engine is not loaded yet";
119112
return std::make_pair(stt, res);
120113
}
@@ -145,7 +138,7 @@ InferResult InferenceService::GetModelStatus(
145138
Json::Value res;
146139
res["message"] = "Engine is not loaded yet";
147140
Json::Value stt;
148-
stt["status_code"] = k409Conflict;
141+
stt["status_code"] = drogon::k400BadRequest;
149142
LOG_WARN << "Engine is not loaded yet";
150143
return std::make_pair(stt, res);
151144
}
@@ -168,7 +161,7 @@ InferResult InferenceService::GetModels(
168161
auto loaded_engines = engine_service_->GetLoadedEngines();
169162
if (loaded_engines.empty()) {
170163
r["message"] = "No engine is loaded yet";
171-
stt["status_code"] = k400BadRequest;
164+
stt["status_code"] = drogon::k400BadRequest;
172165
return std::make_pair(stt, r);
173166
}
174167

@@ -189,7 +182,7 @@ InferResult InferenceService::GetModels(
189182
Json::Value root;
190183
root["data"] = resp_data;
191184
root["object"] = "list";
192-
stt["status_code"] = k200OK;
185+
stt["status_code"] = drogon::k200OK;
193186
return std::make_pair(stt, root);
194187
}
195188

@@ -215,7 +208,7 @@ InferResult InferenceService::FineTuning(
215208
//
216209
// Json::Value res;
217210
// r["message"] = "Could not load engine " + ne;
218-
// stt["status_code"] = k500InternalServerError;
211+
// stt["status_code"] = drogon::k500InternalServerError;
219212
// return std::make_pair(stt, r);
220213
// }
221214
//
@@ -236,7 +229,7 @@ InferResult InferenceService::FineTuning(
236229
// } else {
237230
// LOG_WARN << "Method is not supported yet";
238231
r["message"] = "Method is not supported yet";
239-
stt["status_code"] = k500InternalServerError;
232+
stt["status_code"] = drogon::k500InternalServerError;
240233
// return std::make_pair(stt, r);
241234
// }
242235
// LOG_TRACE << "Done fine-tuning";

0 commit comments

Comments
 (0)