Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 48dcae3

Browse files
authored
feat: change unload model and model status to POST (#558)
1 parent 88c7421 commit 48dcae3

File tree

4 files changed

+9
-9
lines changed

4 files changed

+9
-9
lines changed

.github/scripts/e2e-test-llama-linux-and-mac.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,10 @@ response2=$(
8383
)
8484

8585
# unload model
86-
response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/unloadModel" \
86+
response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request POST -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/unloadModel" \
8787
--header 'Content-Type: application/json' \
8888
--data '{
89-
"llama_model_path": "/tmp/testllm"
89+
"model": "testllm"
9090
}')
9191

9292
# load embedding model

.github/scripts/e2e-test-llama-windows.bat

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ if not exist "%MODEL_EMBEDDING_PATH%" (
6262
rem Define JSON strings for curl data
6363
call set "MODEL_LLM_PATH_STRING=%%MODEL_LLM_PATH:\=\\%%"
6464
call set "MODEL_EMBEDDING_PATH_STRING=%%MODEL_EMBEDDING_PATH:\=\\%%"
65-
set "curl_data1={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
66-
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":false,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
67-
set "curl_data3={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
65+
set "curl_data1={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\", \"model_alias\":\"gpt-3.5-turbo\"}"
66+
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
67+
set "curl_data3={\"model\":\"gpt-3.5-turbo\"}"
6868
set "curl_data4={\"llama_model_path\":\"%MODEL_EMBEDDING_PATH_STRING%\", \"embedding\": true, \"model_type\": \"embedding\"}"
6969
set "curl_data5={\"input\": \"Hello\", \"model\": \"test-embedding\", \"encoding_format\": \"float\"}"
7070

@@ -82,7 +82,7 @@ curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --
8282
--header "Content-Type: application/json" ^
8383
--data "%curl_data2%" > %TEMP%\response2.log 2>&1
8484

85-
curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1
85+
curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1
8686

8787
curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1
8888

cortex-cpp/controllers/server.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ class server : public drogon::HttpController<server>,
4444
METHOD_ADD(server::ChatCompletion, "chat_completion", Post);
4545
METHOD_ADD(server::Embedding, "embedding", Post);
4646
METHOD_ADD(server::LoadModel, "loadmodel", Post);
47-
METHOD_ADD(server::UnloadModel, "unloadmodel", Get);
48-
METHOD_ADD(server::ModelStatus, "modelstatus", Get);
47+
METHOD_ADD(server::UnloadModel, "unloadmodel", Post);
48+
METHOD_ADD(server::ModelStatus, "modelstatus", Post);
4949

5050
// Openai compatible path
5151
ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Post);

cortex-cpp/engines/cortex.llamacpp/engine.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# cortex.llamacpp release version
2-
set(VERSION 0.1.1)
2+
set(VERSION 0.1.2)
33
set(ENGINE_VERSION v${VERSION})
44

55
# MESSAGE("ENGINE_VERSION=" ${ENGINE_VERSION})

0 commit comments

Comments
 (0)