@@ -1312,42 +1312,44 @@ namespace inferences {
13121312class llamaCPP : public drogon ::HttpController<llamaCPP> {
13131313public:
13141314 llamaCPP () {
1315- // gpt_params params;
1316- // auto conf = drogon::app().getCustomConfig();
1317- // params.model = conf["llama_model_path"].asString();
1318- // params.n_gpu_layers = conf["ngl"].asInt();
1319- // params.n_ctx = conf["ctx_len"].asInt();
1320- // params.embedding = conf["embedding"].asBool();
1321- // #ifdef GGML_USE_CUBLAS
1322- // LOG_INFO << "Setting up GGML CUBLAS PARAMS";
1323- // params.mul_mat_q = false;
1324- // #endif // GGML_USE_CUBLAS
1325- // if (params.model_alias == "unknown") {
1326- // params.model_alias = params.model;
1327- // }
1328- //
1329- // llama_backend_init(params.numa);
1330- //
1331- // LOG_INFO_LLAMA("build info",
1332- // {{"build", BUILD_NUMBER}, {"commit", BUILD_COMMIT}});
1333- // LOG_INFO_LLAMA("system info",
1334- // {
1335- // {"n_threads", params.n_threads},
1336- // {"total_threads", std::thread::hardware_concurrency()},
1337- // {"system_info", llama_print_system_info()},
1338- // });
1339- //
1340- // // load the model
1341- // if (!llama.loadModel(params)) {
1342- // LOG_ERROR << "Error loading the model will exit the program";
1343- // std::terminate();
1344- // }
1315+ // gpt_params params;
1316+ // auto conf = drogon::app().getCustomConfig();
1317+ // params.model = conf["llama_model_path"].asString();
1318+ // params.n_gpu_layers = conf["ngl"].asInt();
1319+ // params.n_ctx = conf["ctx_len"].asInt();
1320+ // params.embedding = conf["embedding"].asBool();
1321+ // #ifdef GGML_USE_CUBLAS
1322+ // LOG_INFO << "Setting up GGML CUBLAS PARAMS";
1323+ // params.mul_mat_q = false;
1324+ // #endif // GGML_USE_CUBLAS
1325+ // if (params.model_alias == "unknown") {
1326+ // params.model_alias = params.model;
1327+ // }
1328+ //
1329+ // llama_backend_init(params.numa);
1330+ //
1331+ // LOG_INFO_LLAMA("build info",
1332+ // {{"build", BUILD_NUMBER}, {"commit", BUILD_COMMIT}});
1333+ // LOG_INFO_LLAMA("system info",
1334+ // {
1335+ // {"n_threads", params.n_threads},
1336+ // {"total_threads",
1337+ // std::thread::hardware_concurrency()},
1338+ // {"system_info", llama_print_system_info()},
1339+ // });
1340+ //
1341+ // // load the model
1342+ // if (!llama.loadModel(params)) {
1343+ // LOG_ERROR << "Error loading the model will exit the program";
1344+ // std::terminate();
1345+ // }
1346+ // deprecate this if find no usecase
13451347 }
13461348 METHOD_LIST_BEGIN
13471349 // list path definitions here;
1348- METHOD_ADD (llamaCPP::chatCompletion, " chat_completion" ,Post);
1349- METHOD_ADD (llamaCPP::embedding," embedding" ,Post);
1350- METHOD_ADD (llamaCPP::loadModel," loadmodel" ,Post);
1350+ METHOD_ADD (llamaCPP::chatCompletion, " chat_completion" , Post);
1351+ METHOD_ADD (llamaCPP::embedding, " embedding" , Post);
1352+ METHOD_ADD (llamaCPP::loadModel, " loadmodel" , Post);
13511353 // PATH_ADD("/llama/chat_completion", Post);
13521354 METHOD_LIST_END
13531355 void chatCompletion (const HttpRequestPtr &req,
@@ -1357,7 +1359,6 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
13571359 void loadModel (const HttpRequestPtr &req,
13581360 std::function<void (const HttpResponsePtr &)> &&callback);
13591361
1360-
13611362private:
13621363 llama_server_context llama;
13631364 bool model_loaded = false ;
0 commit comments