bug: fix busy waiting with proper CV

tikikun · tikikun · commit 9734cbdd07b3 · 2024-01-12T08:41:27.000+07:00
diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
@@ -456,7 +456,7 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
       log_enable();
       std::string llama_log_folder = jsonBody["llama_log_folder"].asString();
       log_set_target(llama_log_folder + "llama.log");
-    }    // Set folder for llama log
+    } // Set folder for llama log
   }
 #ifdef GGML_USE_CUBLAS
   LOG_INFO << "Setting up GGML CUBLAS PARAMS";
@@ -483,7 +483,10 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
     return false; // Indicate failure
   }
   llama.initialize();
+
   model_loaded = true;
+  llama.model_loaded_external = true;
+
   LOG_INFO << "Started background task here!";
   backgroundThread = std::thread(&llamaCPP::backgroundTask, this);
   warmupModel();
@@ -535,6 +538,8 @@ void llamaCPP::backgroundTask() {
 void llamaCPP::stopBackgroundTask() {
   if (model_loaded) {
     model_loaded = false;
+    llama.condition_tasks.notify_one();
+    llama.model_loaded_external = false;
     LOG_INFO << "changed to false";
     if (backgroundThread.joinable()) {
       backgroundThread.join();
diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h
@@ -503,6 +503,9 @@ struct llama_server_context {
   int32_t id_gen;
   int32_t n_ctx; // total context for all clients / slots
 
+  // Internal
+  std::atomic<bool> model_loaded_external = false;
+
   // system prompt
   bool system_need_update = false;
 
@@ -1538,10 +1541,13 @@ struct llama_server_context {
                 "cache\n");
         kv_cache_clear();
       }
-      std::this_thread::sleep_for(std::chrono::milliseconds(5));
-      // TODO: Need to implement queueing using CV for better performance
-      // std::unique_lock<std::mutex> lock(mutex_tasks);
-      // condition_tasks.wait(lock, [&] { return !queue_tasks.empty(); });
+      // std::this_thread::sleep_for(std::chrono::milliseconds(5));
+      //  TODO: Need to implement queueing using CV for better performance
+      std::unique_lock<std::mutex> lock(mutex_tasks);
+      condition_tasks.wait(lock, [&] {
+        return (!queue_tasks.empty() && model_loaded_external) ||
+               (!model_loaded_external);
+      });
     }
 
     for (llama_client_slot &slot : slots) {