From 831a3750bb5486638c9209b0f144db8d92355802 Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Thu, 12 Mar 2026 12:44:29 +0100 Subject: [PATCH 1/3] save --- .../continuous_batching/llm_executor.hpp | 4 ++-- .../continuous_batching/servable_initializer.cpp | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/llm/language_model/continuous_batching/llm_executor.hpp b/src/llm/language_model/continuous_batching/llm_executor.hpp index ec7c7d517b..1e162c7645 100644 --- a/src/llm/language_model/continuous_batching/llm_executor.hpp +++ b/src/llm/language_model/continuous_batching/llm_executor.hpp @@ -101,8 +101,8 @@ struct LLMExecutor { #pragma GCC diagnostic ignored "-Wunused-but-set-variable" void printMetrics() { ov::genai::PipelineMetrics metrics = pipe->get_metrics(); - SPDLOG_LOGGER_INFO(llm_executor_logger, "All requests: {}; Scheduled requests: {}; Cache {};", - metrics.requests, metrics.scheduled_requests, formatCacheInfo(metrics.cache_usage, metrics.kv_cache_size_in_bytes, this->isDynamicKVCache)); + SPDLOG_LOGGER_INFO(llm_executor_logger, "All requests: {}; Scheduled requests: {};", + metrics.requests, metrics.scheduled_requests); } }; #pragma GCC diagnostic pop diff --git a/src/llm/language_model/continuous_batching/servable_initializer.cpp b/src/llm/language_model/continuous_batching/servable_initializer.cpp index 27f4f51aee..a9130edcb6 100644 --- a/src/llm/language_model/continuous_batching/servable_initializer.cpp +++ b/src/llm/language_model/continuous_batching/servable_initializer.cpp @@ -210,6 +210,22 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptrschedulerConfig, properties->device, properties->pluginConfig, properties->tokenizerPluginConfig); properties->tokenizer = properties->pipeline->get_tokenizer(); + + // Override chat template from chat_template.jinja file if present in model directory + std::filesystem::path chatTemplateJinjaPath = std::filesystem::path(parsedModelsPath) / "chat_template.jinja"; + if (std::filesystem::exists(chatTemplateJinjaPath)) { + std::ifstream chatTemplateFile(chatTemplateJinjaPath); + if (chatTemplateFile.is_open()) { + std::string chatTemplateContent((std::istreambuf_iterator(chatTemplateFile)), + std::istreambuf_iterator()); + if (!chatTemplateContent.empty()) { + properties->tokenizer.set_chat_template(chatTemplateContent); + SPDLOG_LOGGER_ERROR(llm_calculator_logger, "PPPPPPPPPPPP Loaded custom chat template from: {}", chatTemplateJinjaPath.string()); + } + } else { + SPDLOG_LOGGER_WARN(llm_calculator_logger, "Failed to open chat template file: {}", chatTemplateJinjaPath.string()); + } + } } catch (const std::exception& e) { SPDLOG_ERROR("Error during llm node initialization for models_path: {} exception: {}", parsedModelsPath, e.what()); return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED; From 15dae46ae87568cf97cbcf1aa3c13ca8950d906d Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Thu, 12 Mar 2026 13:52:06 +0100 Subject: [PATCH 2/3] save --- .../legacy/servable_initializer.cpp | 17 +++++++++++++++++ .../legacy/servable_initializer.cpp | 17 +++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/llm/language_model/legacy/servable_initializer.cpp b/src/llm/language_model/legacy/servable_initializer.cpp index 4ee7d4820a..856a2b8902 100644 --- a/src/llm/language_model/legacy/servable_initializer.cpp +++ b/src/llm/language_model/legacy/servable_initializer.cpp @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** +#include #include #include #include @@ -100,6 +101,22 @@ Status LegacyServableInitializer::initialize(std::shared_ptr& ser try { properties->pipeline = std::make_shared(parsedModelsPath, properties->device, properties->pluginConfig); properties->tokenizer = properties->pipeline->get_tokenizer(); + + // Override chat template from chat_template.jinja file if present in model directory + std::filesystem::path chatTemplateJinjaPath = std::filesystem::path(parsedModelsPath) / "chat_template.jinja"; + if (std::filesystem::exists(chatTemplateJinjaPath)) { + std::ifstream chatTemplateFile(chatTemplateJinjaPath); + if (chatTemplateFile.is_open()) { + std::string chatTemplateContent((std::istreambuf_iterator(chatTemplateFile)), + std::istreambuf_iterator()); + if (!chatTemplateContent.empty()) { + properties->tokenizer.set_chat_template(chatTemplateContent); + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Loaded custom chat template from: {}", chatTemplateJinjaPath.string()); + } + } else { + SPDLOG_LOGGER_WARN(llm_calculator_logger, "Failed to open chat template file: {}", chatTemplateJinjaPath.string()); + } + } } catch (const std::exception& e) { SPDLOG_ERROR("Error during llm node initialization for models_path: {} exception: {}", parsedModelsPath, e.what()); return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED; diff --git a/src/llm/visual_language_model/legacy/servable_initializer.cpp b/src/llm/visual_language_model/legacy/servable_initializer.cpp index ec8bfd327a..6ed34cc7d0 100644 --- a/src/llm/visual_language_model/legacy/servable_initializer.cpp +++ b/src/llm/visual_language_model/legacy/servable_initializer.cpp @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** +#include #include #include #include @@ -84,6 +85,22 @@ Status VisualLanguageModelLegacyServableInitializer::initialize(std::shared_ptr< try { properties->pipeline = std::make_shared(parsedModelsPath, properties->device, properties->pluginConfig); properties->tokenizer = properties->pipeline->get_tokenizer(); + + // Override chat template from chat_template.jinja file if present in model directory + std::filesystem::path chatTemplateJinjaPath = std::filesystem::path(parsedModelsPath) / "chat_template.jinja"; + if (std::filesystem::exists(chatTemplateJinjaPath)) { + std::ifstream chatTemplateFile(chatTemplateJinjaPath); + if (chatTemplateFile.is_open()) { + std::string chatTemplateContent((std::istreambuf_iterator(chatTemplateFile)), + std::istreambuf_iterator()); + if (!chatTemplateContent.empty()) { + properties->tokenizer.set_chat_template(chatTemplateContent); + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Loaded custom chat template from: {}", chatTemplateJinjaPath.string()); + } + } else { + SPDLOG_LOGGER_WARN(llm_calculator_logger, "Failed to open chat template file: {}", chatTemplateJinjaPath.string()); + } + } } catch (const std::exception& e) { SPDLOG_ERROR("Error during llm node initialization for models_path: {} exception: {}", parsedModelsPath, e.what()); return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED; From 1a7437ea90ef475247665618992e5e70e82ab8a5 Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Thu, 12 Mar 2026 14:20:29 +0100 Subject: [PATCH 3/3] save --- src/llm/language_model/continuous_batching/llm_executor.hpp | 4 ++-- .../continuous_batching/servable_initializer.cpp | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/llm/language_model/continuous_batching/llm_executor.hpp b/src/llm/language_model/continuous_batching/llm_executor.hpp index 1e162c7645..ec7c7d517b 100644 --- a/src/llm/language_model/continuous_batching/llm_executor.hpp +++ b/src/llm/language_model/continuous_batching/llm_executor.hpp @@ -101,8 +101,8 @@ struct LLMExecutor { #pragma GCC diagnostic ignored "-Wunused-but-set-variable" void printMetrics() { ov::genai::PipelineMetrics metrics = pipe->get_metrics(); - SPDLOG_LOGGER_INFO(llm_executor_logger, "All requests: {}; Scheduled requests: {};", - metrics.requests, metrics.scheduled_requests); + SPDLOG_LOGGER_INFO(llm_executor_logger, "All requests: {}; Scheduled requests: {}; Cache {};", + metrics.requests, metrics.scheduled_requests, formatCacheInfo(metrics.cache_usage, metrics.kv_cache_size_in_bytes, this->isDynamicKVCache)); } }; #pragma GCC diagnostic pop diff --git a/src/llm/language_model/continuous_batching/servable_initializer.cpp b/src/llm/language_model/continuous_batching/servable_initializer.cpp index a9130edcb6..c510b2b91c 100644 --- a/src/llm/language_model/continuous_batching/servable_initializer.cpp +++ b/src/llm/language_model/continuous_batching/servable_initializer.cpp @@ -220,7 +220,6 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptr()); if (!chatTemplateContent.empty()) { properties->tokenizer.set_chat_template(chatTemplateContent); - SPDLOG_LOGGER_ERROR(llm_calculator_logger, "PPPPPPPPPPPP Loaded custom chat template from: {}", chatTemplateJinjaPath.string()); } } else { SPDLOG_LOGGER_WARN(llm_calculator_logger, "Failed to open chat template file: {}", chatTemplateJinjaPath.string());