This repository was archived by the owner on Jul 4, 2025. It is now read-only.
File tree Expand file tree Collapse file tree 2 files changed +16
-5
lines changed
Expand file tree Collapse file tree 2 files changed +16
-5
lines changed Original file line number Diff line number Diff line change @@ -456,7 +456,7 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
456456 log_enable ();
457457 std::string llama_log_folder = jsonBody[" llama_log_folder" ].asString ();
458458 log_set_target (llama_log_folder + " llama.log" );
459- } // Set folder for llama log
459+ } // Set folder for llama log
460460 }
461461#ifdef GGML_USE_CUBLAS
462462 LOG_INFO << " Setting up GGML CUBLAS PARAMS" ;
@@ -483,7 +483,10 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
483483 return false ; // Indicate failure
484484 }
485485 llama.initialize ();
486+
486487 model_loaded = true ;
488+ llama.model_loaded_external = true ;
489+
487490 LOG_INFO << " Started background task here!" ;
488491 backgroundThread = std::thread (&llamaCPP::backgroundTask, this );
489492 warmupModel ();
@@ -535,6 +538,8 @@ void llamaCPP::backgroundTask() {
535538void llamaCPP::stopBackgroundTask () {
536539 if (model_loaded) {
537540 model_loaded = false ;
541+ llama.condition_tasks .notify_one ();
542+ llama.model_loaded_external = false ;
538543 LOG_INFO << " changed to false" ;
539544 if (backgroundThread.joinable ()) {
540545 backgroundThread.join ();
Original file line number Diff line number Diff line change @@ -503,6 +503,9 @@ struct llama_server_context {
503503 int32_t id_gen;
504504 int32_t n_ctx; // total context for all clients / slots
505505
506+ // Internal
507+ std::atomic<bool > model_loaded_external = false ;
508+
506509 // system prompt
507510 bool system_need_update = false ;
508511
@@ -1538,10 +1541,13 @@ struct llama_server_context {
15381541 " cache\n " );
15391542 kv_cache_clear ();
15401543 }
1541- std::this_thread::sleep_for (std::chrono::milliseconds (5 ));
1542- // TODO: Need to implement queueing using CV for better performance
1543- // std::unique_lock<std::mutex> lock(mutex_tasks);
1544- // condition_tasks.wait(lock, [&] { return !queue_tasks.empty(); });
1544+ // std::this_thread::sleep_for(std::chrono::milliseconds(5));
1545+ // TODO: Need to implement queueing using CV for better performance
1546+ std::unique_lock<std::mutex> lock (mutex_tasks);
1547+ condition_tasks.wait (lock, [&] {
1548+ return (!queue_tasks.empty () && model_loaded_external) ||
1549+ (!model_loaded_external);
1550+ });
15451551 }
15461552
15471553 for (llama_client_slot &slot : slots) {
You can’t perform that action at this time.
0 commit comments