@@ -77,7 +77,8 @@ void llamaCPP::chatCompletion(
7777
7878 const auto &jsonBody = req->getJsonObject ();
7979 std::string formatted_output =
80- " Below is a conversation between an AI system named ASSISTANT and USER\n " ;
80+ " Below is a conversation between an AI system named " + ai_prompt +
81+ " and " + user_prompt + " \n " ;
8182
8283 json data;
8384 json stopWords;
@@ -94,9 +95,19 @@ void llamaCPP::chatCompletion(
9495
9596 const Json::Value &messages = (*jsonBody)[" messages" ];
9697 for (const auto &message : messages) {
97- std::string role = message[" role" ].asString ();
98+ std::string input_role = message[" role" ].asString ();
99+ std::string role;
100+ if (input_role == " user" ) {
101+ role = user_prompt;
102+ } else if (input_role == " assistant" ) {
103+ role = ai_prompt;
104+ } else if (input_role == " system" ) {
105+ role = system_prompt;
106+ } else {
107+ role = input_role;
108+ }
98109 std::string content = message[" content" ].asString ();
99- formatted_output += role + " : " + content + " \n " ;
110+ formatted_output += role + content + " \n " ;
100111 }
101112 formatted_output += " assistant:" ;
102113
@@ -105,8 +116,7 @@ void llamaCPP::chatCompletion(
105116 stopWords.push_back (stop_word.asString ());
106117 }
107118 // specify default stop words
108- stopWords.push_back (" user:" );
109- stopWords.push_back (" ### USER:" );
119+ stopWords.push_back (user_prompt);
110120 data[" stop" ] = stopWords;
111121 }
112122
@@ -202,19 +212,19 @@ void llamaCPP::loadModel(
202212 LOG_INFO << " Drogon thread is:" << drogon_thread;
203213 if (jsonBody) {
204214 params.model = (*jsonBody)[" llama_model_path" ].asString ();
205- params.n_gpu_layers = (*jsonBody)[ " ngl" ] .asInt ();
206- params.n_ctx = (*jsonBody)[ " ctx_len" ] .asInt ();
207- params.embedding = (*jsonBody)[ " embedding" ] .asBool ();
215+ params.n_gpu_layers = (*jsonBody). get ( " ngl" , 100 ) .asInt ();
216+ params.n_ctx = (*jsonBody). get ( " ctx_len" , 2048 ) .asInt ();
217+ params.embedding = (*jsonBody). get ( " embedding" , true ) .asBool ();
208218 // Check if n_parallel exists in jsonBody, if not, set to drogon_thread
209- if ((*jsonBody).isMember (" n_parallel" )) {
210- params.n_parallel = (*jsonBody)[" n_parallel" ].asInt ();
211- } else {
212- params.n_parallel = drogon_thread;
213- }
219+
220+ params.n_parallel = (*jsonBody).get (" n_parallel" , drogon_thread).asInt ();
214221
215222 params.cont_batching = (*jsonBody)[" cont_batching" ].asBool ();
216- // params.n_threads = (*jsonBody)["n_threads"].asInt();
217- // params.n_threads_batch = params.n_threads;
223+
224+ this ->user_prompt = (*jsonBody).get (" user_prompt" , " USER: " ).asString ();
225+ this ->ai_prompt = (*jsonBody).get (" ai_prompt" , " ASSISTANT: " ).asString ();
226+ this ->system_prompt =
227+ (*jsonBody).get (" system_prompt" , " ASSISTANT's RULE: " ).asString ();
218228 }
219229#ifdef GGML_USE_CUBLAS
220230 LOG_INFO << " Setting up GGML CUBLAS PARAMS" ;
0 commit comments