feat: change unload model and model status to POST (#558)

vansangpfiev · web-flow · commit 48dcae36ffec · 2024-05-14T10:29:58.000+07:00
diff --git a/.github/scripts/e2e-test-llama-linux-and-mac.sh b/.github/scripts/e2e-test-llama-linux-and-mac.sh
@@ -83,10 +83,10 @@ response2=$(
 )
 
 # unload model
-response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/unloadModel" \
+response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request POST -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/unloadModel" \
     --header 'Content-Type: application/json' \
     --data '{
-    "llama_model_path": "/tmp/testllm"
+    "model": "testllm"
 }')
 
 # load embedding model
diff --git a/.github/scripts/e2e-test-llama-windows.bat b/.github/scripts/e2e-test-llama-windows.bat
@@ -62,9 +62,9 @@ if not exist "%MODEL_EMBEDDING_PATH%" (
 rem Define JSON strings for curl data
 call set "MODEL_LLM_PATH_STRING=%%MODEL_LLM_PATH:\=\\%%"
 call set "MODEL_EMBEDDING_PATH_STRING=%%MODEL_EMBEDDING_PATH:\=\\%%"
-set "curl_data1={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
-set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":false,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
-set "curl_data3={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
+set "curl_data1={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\", \"model_alias\":\"gpt-3.5-turbo\"}"
+set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
+set "curl_data3={\"model\":\"gpt-3.5-turbo\"}"
 set "curl_data4={\"llama_model_path\":\"%MODEL_EMBEDDING_PATH_STRING%\", \"embedding\": true, \"model_type\": \"embedding\"}"
 set "curl_data5={\"input\": \"Hello\", \"model\": \"test-embedding\", \"encoding_format\": \"float\"}"
 
@@ -82,7 +82,7 @@ curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --
 --header "Content-Type: application/json" ^
 --data "%curl_data2%" > %TEMP%\response2.log 2>&1
 
-curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1
+curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1
 
 curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1
 
diff --git a/cortex-cpp/controllers/server.h b/cortex-cpp/controllers/server.h
@@ -44,8 +44,8 @@ class server : public drogon::HttpController<server>,
   METHOD_ADD(server::ChatCompletion, "chat_completion", Post);
   METHOD_ADD(server::Embedding, "embedding", Post);
   METHOD_ADD(server::LoadModel, "loadmodel", Post);
-  METHOD_ADD(server::UnloadModel, "unloadmodel", Get);
-  METHOD_ADD(server::ModelStatus, "modelstatus", Get);
+  METHOD_ADD(server::UnloadModel, "unloadmodel", Post);
+  METHOD_ADD(server::ModelStatus, "modelstatus", Post);
 
   // Openai compatible path
   ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Post);
diff --git a/cortex-cpp/engines/cortex.llamacpp/engine.cmake b/cortex-cpp/engines/cortex.llamacpp/engine.cmake
@@ -1,5 +1,5 @@
 # cortex.llamacpp release version
-set(VERSION 0.1.1)
+set(VERSION 0.1.2)
 set(ENGINE_VERSION v${VERSION})
 
 # MESSAGE("ENGINE_VERSION=" ${ENGINE_VERSION})