Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 68cfdad

Browse files
authored
Merge pull request #411 from janhq/402-feat-grammar-needs-to-be-called-from-loadtime-not-chat-completion
feat: move load grammar file to load time
2 parents a4bf3cf + 1fcc0d3 commit 68cfdad

File tree

2 files changed

+16
-10
lines changed

2 files changed

+16
-10
lines changed

controllers/llamaCPP.cc

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -187,16 +187,8 @@ void llamaCPP::chatCompletion(
187187
data["presence_penalty"] = (*jsonBody).get("presence_penalty", 0).asFloat();
188188
const Json::Value &messages = (*jsonBody)["messages"];
189189

190-
if (!(*jsonBody)["grammar_file"].isNull()) {
191-
std::string grammar_file = (*jsonBody)["grammar_file"].asString();
192-
std::ifstream file(grammar_file);
193-
if (!file) {
194-
LOG_ERROR << "Grammar file not found";
195-
} else {
196-
std::stringstream grammarBuf;
197-
grammarBuf << file.rdbuf();
198-
data["grammar"] = grammarBuf.str();
199-
}
190+
if (!grammar_file_content.empty()) {
191+
data["grammar"] = grammar_file_content;
200192
};
201193

202194
if (!llama.multimodal) {
@@ -514,6 +506,19 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
514506
if (!jsonBody["mlock"].isNull()) {
515507
params.use_mlock = jsonBody["mlock"].asBool();
516508
}
509+
510+
if (!jsonBody["grammar_file"].isNull()) {
511+
std::string grammar_file = jsonBody["grammar_file"].asString();
512+
std::ifstream file(grammar_file);
513+
if (!file) {
514+
LOG_ERROR << "Grammar file not found";
515+
} else {
516+
std::stringstream grammarBuf;
517+
grammarBuf << file.rdbuf();
518+
grammar_file_content = grammarBuf.str();
519+
}
520+
};
521+
517522
params.model = jsonBody["llama_model_path"].asString();
518523
params.n_gpu_layers = jsonBody.get("ngl", 100).asInt();
519524
params.n_ctx = jsonBody.get("ctx_len", 2048).asInt();

controllers/llamaCPP.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2576,5 +2576,6 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
25762576
int clean_cache_threshold;
25772577
std::atomic<bool> single_queue_is_busy; // This value only used under the
25782578
// condition n_parallel is 1
2579+
std::string grammar_file_content;
25792580
};
25802581
}; // namespace inferences

0 commit comments

Comments
 (0)