@@ -153,7 +153,7 @@ void llamaCPP::chatCompletion(
153153 const HttpRequestPtr &req,
154154 std::function<void (const HttpResponsePtr &)> &&callback) {
155155
156- if (!model_loaded ) {
156+ if (!llama. model_loaded_external ) {
157157 Json::Value jsonResp;
158158 jsonResp[" message" ] =
159159 " Model has not been loaded, please load model into nitro" ;
@@ -391,7 +391,7 @@ void llamaCPP::unloadModel(
391391 std::function<void (const HttpResponsePtr &)> &&callback) {
392392 Json::Value jsonResp;
393393 jsonResp[" message" ] = " No model loaded" ;
394- if (model_loaded ) {
394+ if (llama. model_loaded_external ) {
395395 stopBackgroundTask ();
396396
397397 llama_free (llama.ctx );
@@ -408,7 +408,7 @@ void llamaCPP::modelStatus(
408408 const HttpRequestPtr &req,
409409 std::function<void (const HttpResponsePtr &)> &&callback) {
410410 Json::Value jsonResp;
411- bool is_model_loaded = this -> model_loaded ;
411+ bool is_model_loaded = llama. model_loaded_external ;
412412 if (is_model_loaded) {
413413 jsonResp[" model_loaded" ] = is_model_loaded;
414414 jsonResp[" model_data" ] = llama.get_model_props ().dump ();
@@ -456,7 +456,7 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
456456 log_enable ();
457457 std::string llama_log_folder = jsonBody[" llama_log_folder" ].asString ();
458458 log_set_target (llama_log_folder + " llama.log" );
459- } // Set folder for llama log
459+ } // Set folder for llama log
460460 }
461461#ifdef GGML_USE_CUBLAS
462462 LOG_INFO << " Setting up GGML CUBLAS PARAMS" ;
@@ -483,7 +483,9 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
483483 return false ; // Indicate failure
484484 }
485485 llama.initialize ();
486- model_loaded = true ;
486+
487+ llama.model_loaded_external = true ;
488+
487489 LOG_INFO << " Started background task here!" ;
488490 backgroundThread = std::thread (&llamaCPP::backgroundTask, this );
489491 warmupModel ();
@@ -494,7 +496,7 @@ void llamaCPP::loadModel(
494496 const HttpRequestPtr &req,
495497 std::function<void (const HttpResponsePtr &)> &&callback) {
496498
497- if (model_loaded ) {
499+ if (llama. model_loaded_external ) {
498500 LOG_INFO << " model loaded" ;
499501 Json::Value jsonResp;
500502 jsonResp[" message" ] = " Model already loaded" ;
@@ -522,7 +524,7 @@ void llamaCPP::loadModel(
522524}
523525
524526void llamaCPP::backgroundTask () {
525- while (model_loaded ) {
527+ while (llama. model_loaded_external ) {
526528 // model_loaded =
527529 llama.update_slots ();
528530 }
@@ -533,8 +535,9 @@ void llamaCPP::backgroundTask() {
533535}
534536
535537void llamaCPP::stopBackgroundTask () {
536- if (model_loaded) {
537- model_loaded = false ;
538+ if (llama.model_loaded_external ) {
539+ llama.model_loaded_external = false ;
540+ llama.condition_tasks .notify_one ();
538541 LOG_INFO << " changed to false" ;
539542 if (backgroundThread.joinable ()) {
540543 backgroundThread.join ();
0 commit comments