Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 9734cbd

Browse files
committed
bug: fix busy waiting with proper CV
1 parent 50fa2e1 commit 9734cbd

File tree

2 files changed

+16
-5
lines changed

2 files changed

+16
-5
lines changed

controllers/llamaCPP.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
456456
log_enable();
457457
std::string llama_log_folder = jsonBody["llama_log_folder"].asString();
458458
log_set_target(llama_log_folder + "llama.log");
459-
} // Set folder for llama log
459+
} // Set folder for llama log
460460
}
461461
#ifdef GGML_USE_CUBLAS
462462
LOG_INFO << "Setting up GGML CUBLAS PARAMS";
@@ -483,7 +483,10 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
483483
return false; // Indicate failure
484484
}
485485
llama.initialize();
486+
486487
model_loaded = true;
488+
llama.model_loaded_external = true;
489+
487490
LOG_INFO << "Started background task here!";
488491
backgroundThread = std::thread(&llamaCPP::backgroundTask, this);
489492
warmupModel();
@@ -535,6 +538,8 @@ void llamaCPP::backgroundTask() {
535538
void llamaCPP::stopBackgroundTask() {
536539
if (model_loaded) {
537540
model_loaded = false;
541+
llama.condition_tasks.notify_one();
542+
llama.model_loaded_external = false;
538543
LOG_INFO << "changed to false";
539544
if (backgroundThread.joinable()) {
540545
backgroundThread.join();

controllers/llamaCPP.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,9 @@ struct llama_server_context {
503503
int32_t id_gen;
504504
int32_t n_ctx; // total context for all clients / slots
505505

506+
// Internal
507+
std::atomic<bool> model_loaded_external = false;
508+
506509
// system prompt
507510
bool system_need_update = false;
508511

@@ -1538,10 +1541,13 @@ struct llama_server_context {
15381541
"cache\n");
15391542
kv_cache_clear();
15401543
}
1541-
std::this_thread::sleep_for(std::chrono::milliseconds(5));
1542-
// TODO: Need to implement queueing using CV for better performance
1543-
// std::unique_lock<std::mutex> lock(mutex_tasks);
1544-
// condition_tasks.wait(lock, [&] { return !queue_tasks.empty(); });
1544+
// std::this_thread::sleep_for(std::chrono::milliseconds(5));
1545+
// TODO: Need to implement queueing using CV for better performance
1546+
std::unique_lock<std::mutex> lock(mutex_tasks);
1547+
condition_tasks.wait(lock, [&] {
1548+
return (!queue_tasks.empty() && model_loaded_external) ||
1549+
(!model_loaded_external);
1550+
});
15451551
}
15461552

15471553
for (llama_client_slot &slot : slots) {

0 commit comments

Comments
 (0)