feat: support glm4v position embedding.

DongheJin · DongheJin · commit dc7633edf1fa · 2025-12-04T17:29:17.000+08:00
diff --git a/vcpkg.json b/vcpkg.json
@@ -101,7 +101,7 @@
       "name": "opencv4",
       "version>=": "4.7.0",
       "default-features": false,
-      "features": ["ffmpeg", "jpeg", "png"]
+      "features": ["ffmpeg", "jpeg", "png","tiff","webp","openexr","quirc"]
     },
     {
       "name": "yaml-cpp",
diff --git a/xllm/models/llm/glm4.h b/xllm/models/llm/glm4.h
@@ -85,7 +85,6 @@ class Glm4ModelImpl : public LlmModelImplBase<Glm4DecoderLayer> {
     } else {
       h = embed_tokens_(tokens, 0);
     }
-
     auto target_cos_sin = atb_pos_emb_(cos_sin_, positions, 0);
     auto target_cos_sin_chunks = target_cos_sin.chunk(/*chunks=*/2, /*dim=*/-1);
     auto cos_pos = target_cos_sin_chunks[0].contiguous();
@@ -98,7 +97,7 @@ class Glm4ModelImpl : public LlmModelImplBase<Glm4DecoderLayer> {
         for (int dim_idx = 1; dim_idx <= 2; ++dim_idx) {
           int64_t offset = dim_idx;
           int64_t section_len = mrope_section_[dim_idx];
-          int64_t length = section_len * 3;
+          int64_t length = section_len * 2;
           auto idx_first_half = torch::arange(offset, length, 3, torch::kLong);
           auto idx_second_half = torch::arange(offset, length, 3, torch::kLong);
           auto idx_tensor =
@@ -114,7 +113,8 @@ class Glm4ModelImpl : public LlmModelImplBase<Glm4DecoderLayer> {
       sin_pos = apply(sin_pos.reshape(
           {positions.sizes().front(), -1, sin_pos.sizes().back()}));
     }
-
+    cos_pos = cos_pos.reshape({-1, cos_pos.sizes().back() /2, 2});
+    sin_pos = sin_pos.reshape({-1, sin_pos.sizes().back() /2, 2});
     torch::Tensor attn_mask;
     if (FLAGS_enable_chunked_prefill) {
       int max_kv_seq = input_params.kv_max_seq_len;
diff --git a/xllm/models/llm/llm_model_base.h b/xllm/models/llm/llm_model_base.h
@@ -68,6 +68,7 @@ torch::Tensor compute_rotary_embedding(int64_t dim,
     emb = torch::cat({freqs, freqs}, -1);
   } else {
     emb = torch::stack({freqs, freqs}, -1);
+    emb = emb.reshape({seq_len, dim});
   }
   auto rope_cos = torch::cos(emb);
   auto rope_sin = torch::sin(emb);
diff --git a/xllm/models/vlm/glm4v.h b/xllm/models/vlm/glm4v.h
@@ -912,9 +912,6 @@ REGISTER_MODEL_ARGS(glm4v, [&] {
   // LOAD_ARG_OR(pad_token_id, "text_config.pad_token_id", 151329);
   LOAD_ARG_OR(
       eos_token_id_vec, "text_config.eos_token_id", std::vector<int>{151329});
-  LOAD_ARG_OR_FUNC(head_dim, "text_config.head_dim", [&] {
-    return args->hidden_size() / args->n_heads();
-  });
   LOAD_ARG_OR(attention_bias, "text_config.attention_bias", true);
   LOAD_ARG_OR(attention_dropout, "text_config.attention_dropout", 0.0f);
   LOAD_ARG_OR(first_k_dense_replace, "text_config.first_k_dense_replace", 1);
@@ -925,6 +922,9 @@ REGISTER_MODEL_ARGS(glm4v, [&] {
   LOAD_ARG_OR(
       max_position_embeddings, "text_config.max_position_embeddings", 131072);
   LOAD_ARG_OR(n_heads, "text_config.num_attention_heads", 96);
+  LOAD_ARG_OR_FUNC(head_dim, "text_config.head_dim", [&] {
+    return args->hidden_size() / args->n_heads();
+  });
   LOAD_ARG_OR(num_experts_per_tok, "text_config.num_experts_per_tok", 8);
   LOAD_ARG_OR(n_layers, "text_config.num_hidden_layers", 46);
   LOAD_ARG_OR(n_kv_heads, "text_config.num_key_value_heads", 8);

Original file line number	Diff line number	Diff line change
`@@ -101,7 +101,7 @@`
`101`	`101`	`"name": "opencv4",`
`102`	`102`	`"version>=": "4.7.0",`
`103`	`103`	`"default-features": false,`
`104`		`- "features": ["ffmpeg", "jpeg", "png"]`
	`104`	`+ "features": ["ffmpeg", "jpeg", "png","tiff","webp","openexr","quirc"]`
`105`	`105`	`},`
`106`	`106`	`{`
`107`	`107`	`"name": "yaml-cpp",`
Original file line number	Diff line number	Diff line change
`@@ -68,6 +68,7 @@ torch::Tensor compute_rotary_embedding(int64_t dim,`
`68`	`68`	`emb = torch::cat({freqs, freqs}, -1);`
`69`	`69`	`} else {`
`70`	`70`	`emb = torch::stack({freqs, freqs}, -1);`
	`71`	`+ emb = emb.reshape({seq_len, dim});`
`71`	`72`	`}`
`72`	`73`	`auto rope_cos = torch::cos(emb);`
`73`	`74`	`auto rope_sin = torch::sin(emb);`