Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 65547d5

Browse files
Merge branch 'dev' of github.com:janhq/cortex.cpp into dev
2 parents f648f63 + f354af6 commit 65547d5

24 files changed

+220
-92
lines changed

.github/workflows/cortex-cpp-quality-gate.yml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,3 +189,41 @@ jobs:
189189
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
190190
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
191191
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
192+
193+
build-docker-and-test:
194+
runs-on: ubuntu-latest
195+
steps:
196+
- name: Getting the repo
197+
uses: actions/checkout@v3
198+
with:
199+
submodules: 'recursive'
200+
201+
- name: Set up QEMU
202+
uses: docker/setup-qemu-action@v3
203+
204+
- name: Set up Docker Buildx
205+
uses: docker/setup-buildx-action@v3
206+
207+
- name: Run Docker
208+
run: |
209+
docker build -t menloltd/cortex:test -f docker/Dockerfile .
210+
docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
211+
212+
- name: use python
213+
uses: actions/setup-python@v5
214+
with:
215+
python-version: "3.10"
216+
217+
- name: Run e2e tests
218+
run: |
219+
cd engine
220+
python -m pip install --upgrade pip
221+
python -m pip install -r e2e-test/requirements.txt
222+
pytest e2e-test/test_api_docker.py
223+
224+
- name: Run Docker
225+
continue-on-error: true
226+
if: always()
227+
run: |
228+
docker stop cortex
229+
docker rm cortex

docker/entrypoint.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22

33
# Install cortex.llamacpp engine
44

5+
echo "apiServerHost: 0.0.0.0" > /root/.cortexrc
6+
echo "enableCors: true" >> /root/.cortexrc
7+
58
cortex engines install llama-cpp -s /opt/cortex.llamacpp
6-
cortex -v
79

810
# Start the cortex server
911

10-
sed -i 's/apiServerHost: 127.0.0.1/apiServerHost: 0.0.0.0/' /root/.cortexrc
11-
1212
cortex start
1313

1414
# Keep the container running by tailing the log files

docs/docs/capabilities/models/sources/index.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ curl --request POST \
272272
Clients can abort a downloading task using the task ID. Below is a sample `curl` command to abort a download task:
273273

274274
```sh
275-
curl --location --request DELETE 'http://127.0.0.1:3928/models/pull' \
275+
curl --location --request DELETE 'http://127.0.0.1:39281/v1/models/pull' \
276276
--header 'Content-Type: application/json' \
277277
--data '{
278278
"taskId": "tinyllama:1b-gguf-q2-k"

docs/static/openapi/cortex.json

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3595,31 +3595,19 @@
35953595
"type": "string",
35963596
"description": "The identifier or URL of the model to use. It can be a model ID on Cortexso (https://huggingface.co/cortexso) or a HuggingFace URL pointing to the model file. For example: 'gpt2' or 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/blob/main/mistral-7b-instruct-v0.1.Q2_K.gguf'",
35973597
"examples": [
3598-
{
3599-
"value": "tinyllama:gguf"
3600-
},
3601-
{
3602-
"value": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/blob/main/mistral-7b-instruct-v0.1.Q2_K.gguf"
3603-
}
3598+
"tinyllama:gguf",
3599+
"https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/blob/main/mistral-7b-instruct-v0.1.Q2_K.gguf"
36043600
]
36053601
},
36063602
"id": {
36073603
"type": "string",
36083604
"description": "The id which will be used to register the model.",
3609-
"examples": [
3610-
{
3611-
"value": "my-custom-model-id"
3612-
}
3613-
]
3605+
"examples": "my-custom-model-id"
36143606
},
36153607
"name": {
36163608
"type": "string",
36173609
"description": "The name which will be used to overwrite the model name.",
3618-
"examples": [
3619-
{
3620-
"value": "my-custom-model-name"
3621-
}
3622-
]
3610+
"examples": "my-custom-model-name"
36233611
}
36243612
}
36253613
},

engine/cli/utils/download_progress.cc

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,12 @@ bool DownloadProgress::Handle(const DownloadType& event_type) {
5050
}
5151
}
5252
#endif
53-
std::unordered_map<std::string, uint64_t> totals;
5453
status_ = DownloadStatus::DownloadStarted;
5554
std::unique_ptr<indicators::DynamicProgress<indicators::ProgressBar>> bars;
5655

5756
std::vector<std::unique_ptr<indicators::ProgressBar>> items;
5857
indicators::show_console_cursor(false);
59-
auto handle_message = [this, &bars, &items, &totals,
58+
auto handle_message = [this, &bars, &items,
6059
event_type](const std::string& message) {
6160
CTL_INF(message);
6261

@@ -98,27 +97,24 @@ bool DownloadProgress::Handle(const DownloadType& event_type) {
9897
}
9998
for (int i = 0; i < ev.download_task_.items.size(); i++) {
10099
auto& it = ev.download_task_.items[i];
101-
uint64_t downloaded = it.downloadedBytes.value_or(0);
102-
if (totals.find(it.id) == totals.end()) {
103-
totals[it.id] = it.bytes.value_or(std::numeric_limits<uint64_t>::max());
104-
CTL_INF("Updated " << it.id << " - total: " << totals[it.id]);
105-
}
106-
107-
if (ev.type_ == DownloadStatus::DownloadStarted ||
108-
ev.type_ == DownloadStatus::DownloadUpdated) {
100+
if (ev.type_ == DownloadStatus::DownloadUpdated) {
101+
uint64_t downloaded = it.downloadedBytes.value_or(0u);
102+
uint64_t total =
103+
it.bytes.value_or(std::numeric_limits<uint64_t>::max());
109104
(*bars)[i].set_option(indicators::option::PrefixText{
110105
pad_string(Repo2Engine(it.id)) +
111-
std::to_string(
112-
int(static_cast<double>(downloaded) / totals[it.id] * 100)) +
106+
std::to_string(int(static_cast<double>(downloaded) / total * 100)) +
113107
'%'});
114108
(*bars)[i].set_progress(
115-
int(static_cast<double>(downloaded) / totals[it.id] * 100));
109+
int(static_cast<double>(downloaded) / total * 100));
116110
(*bars)[i].set_option(indicators::option::PostfixText{
117111
format_utils::BytesToHumanReadable(downloaded) + "/" +
118-
format_utils::BytesToHumanReadable(totals[it.id])});
112+
format_utils::BytesToHumanReadable(total)});
119113
} else if (ev.type_ == DownloadStatus::DownloadSuccess) {
114+
uint64_t total =
115+
it.bytes.value_or(std::numeric_limits<uint64_t>::max());
120116
(*bars)[i].set_progress(100);
121-
auto total_str = format_utils::BytesToHumanReadable(totals[it.id]);
117+
auto total_str = format_utils::BytesToHumanReadable(total);
122118
(*bars)[i].set_option(
123119
indicators::option::PostfixText{total_str + "/" + total_str});
124120
(*bars)[i].set_option(indicators::option::PrefixText{

engine/controllers/configs.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ class Configs : public drogon::HttpController<Configs, false> {
1111
METHOD_LIST_BEGIN
1212

1313
METHOD_ADD(Configs::GetConfigurations, "", Get);
14-
METHOD_ADD(Configs::UpdateConfigurations, "", Patch);
14+
METHOD_ADD(Configs::UpdateConfigurations, "", Options, Patch);
1515

1616
ADD_METHOD_TO(Configs::GetConfigurations, "/v1/configs", Get);
17-
ADD_METHOD_TO(Configs::UpdateConfigurations, "/v1/configs", Patch);
17+
ADD_METHOD_TO(Configs::UpdateConfigurations, "/v1/configs", Options, Patch);
1818

1919
METHOD_LIST_END
2020

engine/controllers/engines.h

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,17 @@ class Engines : public drogon::HttpController<Engines, false> {
1313
METHOD_LIST_BEGIN
1414

1515
METHOD_ADD(Engines::GetInstalledEngineVariants, "/{1}", Get);
16-
METHOD_ADD(Engines::InstallEngine, "/{1}?version={2}&variant={3}", Post);
16+
METHOD_ADD(Engines::InstallEngine, "/{1}?version={2}&variant={3}", Options,
17+
Post);
1718
METHOD_ADD(Engines::UninstallEngine, "/{1}?version={2}&variant={3}", Options,
1819
Delete);
1920
METHOD_ADD(Engines::SetDefaultEngineVariant,
20-
"/{1}/default?version={2}&variant={3}", Post);
21+
"/{1}/default?version={2}&variant={3}", Options, Post);
2122
METHOD_ADD(Engines::GetDefaultEngineVariant, "/{1}/default", Get);
2223

23-
METHOD_ADD(Engines::LoadEngine, "/{1}/load", Post);
24+
METHOD_ADD(Engines::LoadEngine, "/{1}/load", Options, Post);
2425
METHOD_ADD(Engines::UnloadEngine, "/{1}/load", Options, Delete);
25-
METHOD_ADD(Engines::UpdateEngine, "/{1}/update", Post);
26+
METHOD_ADD(Engines::UpdateEngine, "/{1}/update", Options, Post);
2627
METHOD_ADD(Engines::ListEngine, "", Get);
2728

2829
METHOD_ADD(Engines::GetEngineVersions, "/{1}/versions", Get);
@@ -31,17 +32,18 @@ class Engines : public drogon::HttpController<Engines, false> {
3132

3233
ADD_METHOD_TO(Engines::GetInstalledEngineVariants, "/v1/engines/{1}", Get);
3334
ADD_METHOD_TO(Engines::InstallEngine,
34-
"/v1/engines/{1}?version={2}&variant={3}", Post);
35+
"/v1/engines/{1}?version={2}&variant={3}", Options, Post);
3536
ADD_METHOD_TO(Engines::UninstallEngine,
3637
"/v1/engines/{1}?version={2}&variant={3}", Options, Delete);
3738
ADD_METHOD_TO(Engines::SetDefaultEngineVariant,
38-
"/v1/engines/{1}/default?version={2}&variant={3}", Post);
39+
"/v1/engines/{1}/default?version={2}&variant={3}", Options,
40+
Post);
3941
ADD_METHOD_TO(Engines::GetDefaultEngineVariant, "/v1/engines/{1}/default",
4042
Get);
4143

42-
ADD_METHOD_TO(Engines::LoadEngine, "/v1/engines/{1}/load", Post);
43-
ADD_METHOD_TO(Engines::UnloadEngine, "/v1/engines/{1}/load", Post);
44-
ADD_METHOD_TO(Engines::UpdateEngine, "/v1/engines/{1}/update", Post);
44+
ADD_METHOD_TO(Engines::LoadEngine, "/v1/engines/{1}/load", Options, Post);
45+
ADD_METHOD_TO(Engines::UnloadEngine, "/v1/engines/{1}/load", Options, Post);
46+
ADD_METHOD_TO(Engines::UpdateEngine, "/v1/engines/{1}/update", Options, Post);
4547
ADD_METHOD_TO(Engines::GetEngineVersions, "/v1/engines/{1}/versions", Get);
4648
ADD_METHOD_TO(Engines::GetEngineVariants, "/v1/engines/{1}/versions/{2}",
4749
Get);

engine/controllers/models.h

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,27 +10,27 @@ using namespace drogon;
1010
class Models : public drogon::HttpController<Models, false> {
1111
public:
1212
METHOD_LIST_BEGIN
13-
METHOD_ADD(Models::PullModel, "/pull", Post);
14-
METHOD_ADD(Models::GetModelPullInfo, "/pull/info", Post);
13+
METHOD_ADD(Models::PullModel, "/pull", Options, Post);
14+
METHOD_ADD(Models::GetModelPullInfo, "/pull/info", Options, Post);
1515
METHOD_ADD(Models::AbortPullModel, "/pull", Options, Delete);
1616
METHOD_ADD(Models::ListModel, "", Get);
1717
METHOD_ADD(Models::GetModel, "/{1}", Get);
18-
METHOD_ADD(Models::UpdateModel, "/{1}", Patch);
19-
METHOD_ADD(Models::ImportModel, "/import", Post);
18+
METHOD_ADD(Models::UpdateModel, "/{1}", Options, Patch);
19+
METHOD_ADD(Models::ImportModel, "/import", Options, Post);
2020
METHOD_ADD(Models::DeleteModel, "/{1}", Options, Delete);
21-
METHOD_ADD(Models::StartModel, "/start", Post);
22-
METHOD_ADD(Models::StopModel, "/stop", Post);
21+
METHOD_ADD(Models::StartModel, "/start", Options, Post);
22+
METHOD_ADD(Models::StopModel, "/stop", Options, Post);
2323
METHOD_ADD(Models::GetModelStatus, "/status/{1}", Get);
2424

25-
ADD_METHOD_TO(Models::PullModel, "/v1/models/pull", Post);
25+
ADD_METHOD_TO(Models::PullModel, "/v1/models/pull", Options, Post);
2626
ADD_METHOD_TO(Models::AbortPullModel, "/v1/models/pull", Options, Delete);
2727
ADD_METHOD_TO(Models::ListModel, "/v1/models", Get);
2828
ADD_METHOD_TO(Models::GetModel, "/v1/models/{1}", Get);
29-
ADD_METHOD_TO(Models::UpdateModel, "/v1/models/{1}", Patch);
30-
ADD_METHOD_TO(Models::ImportModel, "/v1/models/import", Post);
29+
ADD_METHOD_TO(Models::UpdateModel, "/v1/models/{1}", Options, Patch);
30+
ADD_METHOD_TO(Models::ImportModel, "/v1/models/import", Options, Post);
3131
ADD_METHOD_TO(Models::DeleteModel, "/v1/models/{1}", Options, Delete);
32-
ADD_METHOD_TO(Models::StartModel, "/v1/models/start", Post);
33-
ADD_METHOD_TO(Models::StopModel, "/v1/models/stop", Post);
32+
ADD_METHOD_TO(Models::StartModel, "/v1/models/start", Options, Post);
33+
ADD_METHOD_TO(Models::StopModel, "/v1/models/stop", Options, Post);
3434
ADD_METHOD_TO(Models::GetModelStatus, "/v1/models/status/{1}", Get);
3535
METHOD_LIST_END
3636

engine/controllers/server.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,20 +37,20 @@ class server : public drogon::HttpController<server, false>,
3737
~server();
3838
METHOD_LIST_BEGIN
3939
// list path definitions here;
40-
METHOD_ADD(server::ChatCompletion, "chat_completion", Post);
41-
METHOD_ADD(server::Embedding, "embedding", Post);
42-
METHOD_ADD(server::LoadModel, "loadmodel", Post);
43-
METHOD_ADD(server::UnloadModel, "unloadmodel", Post);
44-
METHOD_ADD(server::ModelStatus, "modelstatus", Post);
40+
METHOD_ADD(server::ChatCompletion, "chat_completion", Options, Post);
41+
METHOD_ADD(server::Embedding, "embedding", Options, Post);
42+
METHOD_ADD(server::LoadModel, "loadmodel", Options, Post);
43+
METHOD_ADD(server::UnloadModel, "unloadmodel", Options, Post);
44+
METHOD_ADD(server::ModelStatus, "modelstatus", Options, Post);
4545
METHOD_ADD(server::GetModels, "models", Get);
4646

4747
// cortex.python API
48-
METHOD_ADD(server::FineTuning, "finetuning", Post);
48+
METHOD_ADD(server::FineTuning, "finetuning", Options, Post);
4949

5050
// Openai compatible path
51-
ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Post);
52-
ADD_METHOD_TO(server::FineTuning, "/v1/fine_tuning/job", Post);
53-
ADD_METHOD_TO(server::Embedding, "/v1/embeddings", Post);
51+
ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Options, Post);
52+
ADD_METHOD_TO(server::FineTuning, "/v1/fine_tuning/job", Options, Post);
53+
ADD_METHOD_TO(server::Embedding, "/v1/embeddings", Options, Post);
5454

5555
METHOD_LIST_END
5656
void ChatCompletion(

engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,22 +100,22 @@ async def test_models_on_cortexso_hub(self, model_url):
100100
json_body = {
101101
"model": model_url
102102
}
103-
response = requests.post("http://localhost:3928/models/pull", json=json_body)
103+
response = requests.post("http://localhost:3928/v1/models/pull", json=json_body)
104104
assert response.status_code == 200, f"Failed to pull model: {model_url}"
105105

106106
await wait_for_websocket_download_success_event(timeout=None)
107107

108108
# Check if the model was pulled successfully
109109
get_model_response = requests.get(
110-
f"http://127.0.0.1:3928/models/{model_url}"
110+
f"http://127.0.0.1:3928/v1/models/{model_url}"
111111
)
112112
assert get_model_response.status_code == 200, f"Failed to fetch model: {model_url}"
113113
assert (
114114
get_model_response.json()["model"] == model_url
115115
), f"Unexpected model name for: {model_url}"
116116

117117
# Check if the model is available in the list of models
118-
response = requests.get("http://localhost:3928/models")
118+
response = requests.get("http://localhost:3928/v1/models")
119119
assert response.status_code == 200
120120
models = [i["id"] for i in response.json()["data"]]
121121
assert model_url in models, f"Model not found in list: {model_url}"
@@ -129,7 +129,7 @@ async def test_models_on_cortexso_hub(self, model_url):
129129
assert exit_code == 0, f"Install engine failed with error: {error}"
130130

131131
# Start the model
132-
response = requests.post("http://localhost:3928/models/start", json=json_body)
132+
response = requests.post("http://localhost:3928/v1/models/start", json=json_body)
133133
assert response.status_code == 200, f"status_code: {response.status_code}"
134134

135135
# Send an inference request
@@ -155,7 +155,7 @@ async def test_models_on_cortexso_hub(self, model_url):
155155
assert response.status_code == 200, f"status_code: {response.status_code} response: {response.json()}"
156156

157157
# Stop the model
158-
response = requests.post("http://localhost:3928/models/stop", json=json_body)
158+
response = requests.post("http://localhost:3928/v1/models/stop", json=json_body)
159159
assert response.status_code == 200, f"status_code: {response.status_code}"
160160

161161
# Uninstall Engine

0 commit comments

Comments
 (0)