Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 8b71a85

Browse files
committed
chore: reformat the code
1 parent 0b7dbea commit 8b71a85

File tree

2 files changed

+40
-40
lines changed

2 files changed

+40
-40
lines changed

controllers/llamaCPP.cc

Lines changed: 37 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,7 @@ using json = nlohmann::json;
1010
/**
1111
* The state of the inference task
1212
*/
13-
enum InferenceStatus {
14-
PENDING,
15-
RUNNING,
16-
FINISHED
17-
};
13+
enum InferenceStatus { PENDING, RUNNING, FINISHED };
1814

1915
/**
2016
* There is a need to save state of current ongoing inference status of a
@@ -141,7 +137,9 @@ std::string create_return_json(const std::string &id, const std::string &model,
141137
return Json::writeString(writer, root);
142138
}
143139

144-
llamaCPP::llamaCPP(): queue(new trantor::ConcurrentTaskQueue(llama.params.n_parallel, "llamaCPP")) {
140+
llamaCPP::llamaCPP()
141+
: queue(new trantor::ConcurrentTaskQueue(llama.params.n_parallel,
142+
"llamaCPP")) {
145143
// Some default values for now below
146144
log_disable(); // Disable the log to file feature, reduce bloat for
147145
// target
@@ -172,7 +170,7 @@ void llamaCPP::inference(
172170

173171
const auto &jsonBody = req->getJsonObject();
174172
// Check if model is loaded
175-
if(checkModelLoaded(callback)) {
173+
if (checkModelLoaded(callback)) {
176174
// Model is loaded
177175
// Do Inference
178176
inferenceImpl(jsonBody, callback);
@@ -329,8 +327,7 @@ void llamaCPP::inferenceImpl(
329327
auto state = create_inference_state(this);
330328
auto chunked_content_provider =
331329
[state, data](char *pBuffer, std::size_t nBuffSize) -> std::size_t {
332-
333-
if(state->inferenceStatus == PENDING) {
330+
if (state->inferenceStatus == PENDING) {
334331
state->inferenceStatus = RUNNING;
335332
} else if (state->inferenceStatus == FINISHED) {
336333
return 0;
@@ -341,7 +338,7 @@ void llamaCPP::inferenceImpl(
341338
state->inferenceStatus = FINISHED;
342339
return 0;
343340
}
344-
341+
345342
task_result result = state->instance->llama.next_result(state->task_id);
346343
if (!result.error) {
347344
const std::string to_send = result.result_json["content"];
@@ -367,10 +364,10 @@ void llamaCPP::inferenceImpl(
367364
LOG_INFO << "reached result stop";
368365
state->inferenceStatus = FINISHED;
369366
}
370-
367+
371368
// Make sure nBufferSize is not zero
372369
// Otherwise it stop streaming
373-
if(!nRead) {
370+
if (!nRead) {
374371
state->inferenceStatus = FINISHED;
375372
}
376373

@@ -380,31 +377,33 @@ void llamaCPP::inferenceImpl(
380377
return 0;
381378
};
382379
// Queued task
383-
state->instance->queue->runTaskInQueue([callback, state, data,
384-
chunked_content_provider]() {
385-
state->task_id =
386-
state->instance->llama.request_completion(data, false, false, -1);
387-
388-
// Start streaming response
389-
auto resp = nitro_utils::nitroStreamResponse(chunked_content_provider,
390-
"chat_completions.txt");
391-
callback(resp);
392-
393-
int retries = 0;
394-
395-
// Since this is an async task, we will wait for the task to be completed
396-
while (state->inferenceStatus != FINISHED && retries < 10) {
397-
// Should wait chunked_content_provider lambda to be called within 3s
398-
if(state->inferenceStatus == PENDING) {
399-
retries += 1;
400-
}
401-
if(state->inferenceStatus != RUNNING)
402-
LOG_INFO << "Wait for task to be released:" << state->task_id;
403-
std::this_thread::sleep_for(std::chrono::milliseconds(100));
404-
}
405-
// Request completed, release it
406-
state->instance->llama.request_cancel(state->task_id);
407-
});
380+
state->instance->queue->runTaskInQueue(
381+
[callback, state, data, chunked_content_provider]() {
382+
state->task_id =
383+
state->instance->llama.request_completion(data, false, false, -1);
384+
385+
// Start streaming response
386+
auto resp = nitro_utils::nitroStreamResponse(chunked_content_provider,
387+
"chat_completions.txt");
388+
callback(resp);
389+
390+
int retries = 0;
391+
392+
// Since this is an async task, we will wait for the task to be
393+
// completed
394+
while (state->inferenceStatus != FINISHED && retries < 10) {
395+
// Should wait chunked_content_provider lambda to be called within
396+
// 3s
397+
if (state->inferenceStatus == PENDING) {
398+
retries += 1;
399+
}
400+
if (state->inferenceStatus != RUNNING)
401+
LOG_INFO << "Wait for task to be released:" << state->task_id;
402+
std::this_thread::sleep_for(std::chrono::milliseconds(100));
403+
}
404+
// Request completed, release it
405+
state->instance->llama.request_cancel(state->task_id);
406+
});
408407
} else {
409408
Json::Value respData;
410409
auto resp = nitro_utils::nitroHttpResponse();
@@ -434,7 +433,7 @@ void llamaCPP::embedding(
434433
const HttpRequestPtr &req,
435434
std::function<void(const HttpResponsePtr &)> &&callback) {
436435
// Check if model is loaded
437-
if(checkModelLoaded(callback)) {
436+
if (checkModelLoaded(callback)) {
438437
// Model is loaded
439438
const auto &jsonBody = req->getJsonObject();
440439
// Run embedding

controllers/llamaCPP.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2526,10 +2526,11 @@ class llamaCPP : public drogon::HttpController<llamaCPP>, public ChatProvider {
25262526

25272527
// Openai compatible path
25282528
ADD_METHOD_TO(llamaCPP::inference, "/v1/chat/completions", Post);
2529-
// ADD_METHOD_TO(llamaCPP::handlePrelight, "/v1/chat/completions", Options); NOTE: prelight will be added back when browser support is properly planned
2529+
// ADD_METHOD_TO(llamaCPP::handlePrelight, "/v1/chat/completions", Options);
2530+
// NOTE: prelight will be added back when browser support is properly planned
25302531

25312532
ADD_METHOD_TO(llamaCPP::embedding, "/v1/embeddings", Post);
2532-
//ADD_METHOD_TO(llamaCPP::handlePrelight, "/v1/embeddings", Options);
2533+
// ADD_METHOD_TO(llamaCPP::handlePrelight, "/v1/embeddings", Options);
25332534

25342535
// PATH_ADD("/llama/chat_completion", Post);
25352536
METHOD_LIST_END

0 commit comments

Comments
 (0)