replace with llama state

tikikun · tikikun · commit 58fa46c19dd7 · 2023-12-15T06:59:56.000+07:00
diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
@@ -203,7 +203,7 @@ void llamaCPP::chatCompletion(
     data["presence_penalty"] = (*jsonBody).get("presence_penalty", 0).asFloat();
     const Json::Value &messages = (*jsonBody)["messages"];
 
-    if (!multi_modal) {
+    if (!llama.multimodal) {
 
       for (const auto &message : messages) {
         std::string input_role = message["role"].asString();
@@ -407,7 +407,6 @@ void llamaCPP::unloadModel(
     llama_free_model(llama.model);
     llama.ctx = nullptr;
     llama.model = nullptr;
-    multi_modal = false;
     jsonResp["message"] = "Model unloaded successfully";
   }
   auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
@@ -442,7 +441,6 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
     if (!jsonBody["mmproj"].isNull()) {
       LOG_INFO << "MMPROJ FILE detected, multi-model enabled!";
       params.mmproj = jsonBody["mmproj"].asString();
-      multi_modal = true;
     }
     params.model = jsonBody["llama_model_path"].asString();
     params.n_gpu_layers = jsonBody.get("ngl", 100).asInt();
diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h
@@ -732,7 +732,6 @@ struct llama_server_context {
       if (images_data != data.end() && images_data->is_array()) {
         for (const auto &img : *images_data) {
           std::string data_b64 = img["data"].get<std::string>();
-          LOG_INFO << data_b64;
           slot_image img_sl;
           img_sl.id =
               img.count("id") != 0 ? img["id"].get<int>() : slot->images.size();
@@ -1913,6 +1912,5 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
   bool caching_enabled;
   std::atomic<int> no_of_chats = 0;
   int clean_cache_threshold;
-  bool multi_modal = false;
 };
 }; // namespace inferences