Merge branch 'main' of https://github.com/janhq/nitro

tikikun · tikikun · commit 7f7fd03ccec1 · 2023-10-05T10:40:59.000+07:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -33,6 +33,16 @@ if(LLAMA_CUBLAS)
   endif()
 endif()
 
+if (LLAMA_CUBLAS)
+    cmake_minimum_required(VERSION 3.17)
+
+    find_package(CUDAToolkit)
+    if (CUDAToolkit_FOUND)
+        message(STATUS "cuBLAS found")
+          add_compile_definitions(GGML_USE_CUBLAS)
+    endif()
+endif()
+
 add_subdirectory(llama.cpp)
 add_executable(${PROJECT_NAME} main.cc)
 
diff --git a/README_temp.md b/README_temp.md
@@ -12,17 +12,28 @@ This will create a build_deps folder, just ignore it
 
 ### Step 2:
 
-Build the app from source
-
-```zsh
-mkdir build && cd build
-cmake ..
-
-# MacOS
-make -j $(sysctl -n hw.physicalcpu)
-# Linux
-make -j $(%NUMBER_OF_PROCESSORS%)
-```
+Generate build file
+- On MacOS with Apple silicon:
+
+    ```zsh
+    mkdir build && cd build
+    cmake ..
+    ```
+
+- On MacOS with Intel processors:
+    ```zsh
+    mkdir build && cd build
+    cmake -DLLAMA_METAL=OFF .. 
+    ```
+
+Build the app
+- On MacOS and Linux
+    ```
+    # MacOS
+    make -j $(sysctl -n hw.physicalcpu)
+    # Linux
+    make -j $(%NUMBER_OF_PROCESSORS%)
+    ```
 
 ### Step 3:
 
diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
@@ -4,6 +4,8 @@
 #include <chrono>
 #include <cstring>
 #include <thread>
+#include <regex>
+
 std::string create_return_json(const std::string &id, const std::string &model,
                                const std::string &content,
                                Json::Value finish_reason = Json::Value()) {
diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h
@@ -7,6 +7,7 @@
 #include "llama.h"
 #include "build-info.h"
 #include "grammar-parser.h"
+#include <regex>
 
 #ifndef NDEBUG
 // crash the server in debug mode, otherwise send an http 500 error
@@ -1385,6 +1386,10 @@ class llamaCPP : public drogon::HttpSimpleController<llamaCPP> {
     params.model = conf["llama_model_path"].asString();
     params.n_gpu_layers = conf["ngl"].asInt();
     params.n_ctx = conf["ctx_len"].asInt();
+    #ifdef GGML_USE_CUBLAS
+    LOG_INFO << "Setting up GGML CUBLAS PARAMS";
+    params.mul_mat_q = false;
+    #endif // GGML_USE_CUBLAS
     if (params.model_alias == "unknown") {
       params.model_alias = params.model;
     }
diff --git a/controllers/nitro_utils.h b/controllers/nitro_utils.h
@@ -50,7 +50,7 @@ inline void nitro_logo(){
             std::cout << resetColor << c;
             colorIndex = 0;
         } else {
-            std::cout << rainbowColors[colorIndex % 6] << c;
+            std::cout << rainbowColors[colorIndex % 2] << c;
             colorIndex++;
         }
     }

Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,7 @@ inline void nitro_logo(){`
`50`	`50`	`std::cout << resetColor << c;`
`51`	`51`	`colorIndex = 0;`
`52`	`52`	`} else {`
`53`		`- std::cout << rainbowColors[colorIndex % 6] << c;`
	`53`	`+ std::cout << rainbowColors[colorIndex % 2] << c;`
`54`	`54`	`colorIndex++;`
`55`	`55`	`}`
`56`	`56`	`}`