@@ -1728,6 +1728,7 @@ struct LLMEmbedder : public Conditioner {
17281728 std::vector<std::pair<int , ggml_tensor*>> image_embeds;
17291729 std::pair<int , int > prompt_attn_range;
17301730 int prompt_template_encode_start_idx = 34 ;
1731+ int max_length = 0 ;
17311732 std::set<int > out_layers;
17321733 if (llm->enable_vision && conditioner_params.ref_images .size () > 0 ) {
17331734 LOG_INFO (" QwenImageEditPlusPipeline" );
@@ -1827,11 +1828,12 @@ struct LLMEmbedder : public Conditioner {
18271828 prompt += " [/INST]" ;
18281829 } else if (version == VERSION_OVIS_IMAGE) {
18291830 prompt_template_encode_start_idx = 28 ;
1831+ max_length = prompt_template_encode_start_idx + 256 ;
18301832
1831- prompt = " <|im_start|>system \n Describe the image by detailing the color, quantity, text, shape, size, texture, spatial relationships of the objects and background: <|im_end|> \n <|im_start|>user \n " ;
1833+ prompt = " <|im_start|>user \n Describe the image by detailing the color, quantity, text, shape, size, texture, spatial relationships of the objects and background:" ;
18321834
18331835 prompt_attn_range.first = static_cast <int >(prompt.size ());
1834- prompt += conditioner_params.text ;
1836+ prompt += " " + conditioner_params.text ;
18351837 prompt_attn_range.second = static_cast <int >(prompt.size ());
18361838
18371839 prompt += " <|im_end|>\n <|im_start|>assistant\n <think>\n\n </think>\n\n " ;
@@ -1847,7 +1849,7 @@ struct LLMEmbedder : public Conditioner {
18471849 prompt += " <|im_end|>\n <|im_start|>assistant\n " ;
18481850 }
18491851
1850- auto tokens_and_weights = tokenize (prompt, prompt_attn_range, 0 , false );
1852+ auto tokens_and_weights = tokenize (prompt, prompt_attn_range, max_length, max_length > 0 );
18511853 auto & tokens = std::get<0 >(tokens_and_weights);
18521854 auto & weights = std::get<1 >(tokens_and_weights);
18531855
@@ -1883,8 +1885,6 @@ struct LLMEmbedder : public Conditioner {
18831885 int64_t min_length = 0 ;
18841886 if (sd_version_is_flux2 (version)) {
18851887 min_length = 512 ;
1886- } else if (version == VERSION_OVIS_IMAGE) {
1887- min_length = 256 ;
18881888 }
18891889
18901890 int64_t zero_pad_len = 0 ;
@@ -1908,6 +1908,8 @@ struct LLMEmbedder : public Conditioner {
19081908 ggml_ext_tensor_set_f32 (new_hidden_states, value, i0, i1, i2, i3);
19091909 });
19101910
1911+ // print_ggml_tensor(new_hidden_states);
1912+
19111913 int64_t t1 = ggml_time_ms ();
19121914 LOG_DEBUG (" computing condition graph completed, taking %" PRId64 " ms" , t1 - t0);
19131915 return {new_hidden_states, nullptr , nullptr };
0 commit comments