update tokenizer

leejet · leejet · commit 3002c55d2d7c · 2025-12-07T02:44:41.000+08:00
diff --git a/conditioner.hpp b/conditioner.hpp
@@ -1728,6 +1728,7 @@ struct LLMEmbedder : public Conditioner {
         std::vector<std::pair<int, ggml_tensor*>> image_embeds;
         std::pair<int, int> prompt_attn_range;
         int prompt_template_encode_start_idx = 34;
+        int max_length = 0;
         std::set<int> out_layers;
         if (llm->enable_vision && conditioner_params.ref_images.size() > 0) {
             LOG_INFO("QwenImageEditPlusPipeline");
@@ -1827,11 +1828,12 @@ struct LLMEmbedder : public Conditioner {
             prompt += "[/INST]";
         } else if (version == VERSION_OVIS_IMAGE) {
             prompt_template_encode_start_idx = 28;
+            max_length = prompt_template_encode_start_idx + 256;
 
-            prompt = "<|im_start|>system\nDescribe the image by detailing the color, quantity, text, shape, size, texture, spatial relationships of the objects and background: <|im_end|>\n<|im_start|>user\n";
+            prompt = "<|im_start|>user\nDescribe the image by detailing the color, quantity, text, shape, size, texture, spatial relationships of the objects and background:";
 
             prompt_attn_range.first = static_cast<int>(prompt.size());
-            prompt += conditioner_params.text;
+            prompt += " " + conditioner_params.text;
             prompt_attn_range.second = static_cast<int>(prompt.size());
 
             prompt += "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n";
@@ -1847,7 +1849,7 @@ struct LLMEmbedder : public Conditioner {
             prompt += "<|im_end|>\n<|im_start|>assistant\n";
         }
 
-        auto tokens_and_weights = tokenize(prompt, prompt_attn_range, 0, false);
+        auto tokens_and_weights = tokenize(prompt, prompt_attn_range, max_length, max_length > 0);
         auto& tokens            = std::get<0>(tokens_and_weights);
         auto& weights           = std::get<1>(tokens_and_weights);
 
@@ -1883,8 +1885,6 @@ struct LLMEmbedder : public Conditioner {
         int64_t min_length = 0;
         if (sd_version_is_flux2(version)) {
             min_length = 512;
-        } else if (version == VERSION_OVIS_IMAGE) {
-            min_length = 256;
         }
 
         int64_t zero_pad_len = 0;
@@ -1908,6 +1908,8 @@ struct LLMEmbedder : public Conditioner {
             ggml_ext_tensor_set_f32(new_hidden_states, value, i0, i1, i2, i3);
         });
 
+        // print_ggml_tensor(new_hidden_states);
+
         int64_t t1 = ggml_time_ms();
         LOG_DEBUG("computing condition graph completed, taking %" PRId64 " ms", t1 - t0);
         return {new_hidden_states, nullptr, nullptr};
diff --git a/llm.hpp b/llm.hpp
@@ -356,6 +356,10 @@ namespace LLM {
                 "<|fim_pad|>",
                 "<|repo_name|>",
                 "<|file_sep|>",
+                "<tool_response>",
+                "</tool_response>",
+                "<think>",
+                "</think>",
             };
 
             if (merges_utf8_str.size() > 0) {