Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit a56f33d

Browse files
authored
Merge pull request #111 from janhq/82-feat-add-custom-user-assistant-prompt-option-as-a-server-option-for-nitro
82 feat add custom user assistant prompt option as a server option for nitro
2 parents 1f1564c + 2f141f4 commit a56f33d

File tree

2 files changed

+28
-15
lines changed

2 files changed

+28
-15
lines changed

controllers/llamaCPP.cc

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ void llamaCPP::chatCompletion(
7777

7878
const auto &jsonBody = req->getJsonObject();
7979
std::string formatted_output =
80-
"Below is a conversation between an AI system named ASSISTANT and USER\n";
80+
"Below is a conversation between an AI system named " + ai_prompt +
81+
" and " + user_prompt + "\n";
8182

8283
json data;
8384
json stopWords;
@@ -94,9 +95,19 @@ void llamaCPP::chatCompletion(
9495

9596
const Json::Value &messages = (*jsonBody)["messages"];
9697
for (const auto &message : messages) {
97-
std::string role = message["role"].asString();
98+
std::string input_role = message["role"].asString();
99+
std::string role;
100+
if (input_role == "user") {
101+
role = user_prompt;
102+
} else if (input_role == "assistant") {
103+
role = ai_prompt;
104+
} else if (input_role == "system") {
105+
role = system_prompt;
106+
} else {
107+
role = input_role;
108+
}
98109
std::string content = message["content"].asString();
99-
formatted_output += role + ": " + content + "\n";
110+
formatted_output += role + content + "\n";
100111
}
101112
formatted_output += "assistant:";
102113

@@ -105,8 +116,7 @@ void llamaCPP::chatCompletion(
105116
stopWords.push_back(stop_word.asString());
106117
}
107118
// specify default stop words
108-
stopWords.push_back("user:");
109-
stopWords.push_back("### USER:");
119+
stopWords.push_back(user_prompt);
110120
data["stop"] = stopWords;
111121
}
112122

@@ -202,19 +212,19 @@ void llamaCPP::loadModel(
202212
LOG_INFO << "Drogon thread is:" << drogon_thread;
203213
if (jsonBody) {
204214
params.model = (*jsonBody)["llama_model_path"].asString();
205-
params.n_gpu_layers = (*jsonBody)["ngl"].asInt();
206-
params.n_ctx = (*jsonBody)["ctx_len"].asInt();
207-
params.embedding = (*jsonBody)["embedding"].asBool();
215+
params.n_gpu_layers = (*jsonBody).get("ngl", 100).asInt();
216+
params.n_ctx = (*jsonBody).get("ctx_len", 2048).asInt();
217+
params.embedding = (*jsonBody).get("embedding", true).asBool();
208218
// Check if n_parallel exists in jsonBody, if not, set to drogon_thread
209-
if ((*jsonBody).isMember("n_parallel")) {
210-
params.n_parallel = (*jsonBody)["n_parallel"].asInt();
211-
} else {
212-
params.n_parallel = drogon_thread;
213-
}
219+
220+
params.n_parallel = (*jsonBody).get("n_parallel", drogon_thread).asInt();
214221

215222
params.cont_batching = (*jsonBody)["cont_batching"].asBool();
216-
// params.n_threads = (*jsonBody)["n_threads"].asInt();
217-
// params.n_threads_batch = params.n_threads;
223+
224+
this->user_prompt = (*jsonBody).get("user_prompt", "USER: ").asString();
225+
this->ai_prompt = (*jsonBody).get("ai_prompt", "ASSISTANT: ").asString();
226+
this->system_prompt =
227+
(*jsonBody).get("system_prompt", "ASSISTANT's RULE: ").asString();
218228
}
219229
#ifdef GGML_USE_CUBLAS
220230
LOG_INFO << "Setting up GGML CUBLAS PARAMS";

controllers/llamaCPP.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2142,5 +2142,8 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
21422142
size_t sent_count = 0;
21432143
size_t sent_token_probs_index = 0;
21442144
std::thread backgroundThread;
2145+
std::string user_prompt;
2146+
std::string ai_prompt;
2147+
std::string system_prompt;
21452148
};
21462149
}; // namespace inferences

0 commit comments

Comments
 (0)