Skip to content

Commit dc7633e

Browse files
committed
feat: support glm4v position embedding.
1 parent d5dd564 commit dc7633e

File tree

4 files changed

+8
-7
lines changed

4 files changed

+8
-7
lines changed

vcpkg.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@
101101
"name": "opencv4",
102102
"version>=": "4.7.0",
103103
"default-features": false,
104-
"features": ["ffmpeg", "jpeg", "png"]
104+
"features": ["ffmpeg", "jpeg", "png","tiff","webp","openexr","quirc"]
105105
},
106106
{
107107
"name": "yaml-cpp",

xllm/models/llm/glm4.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ class Glm4ModelImpl : public LlmModelImplBase<Glm4DecoderLayer> {
8585
} else {
8686
h = embed_tokens_(tokens, 0);
8787
}
88-
8988
auto target_cos_sin = atb_pos_emb_(cos_sin_, positions, 0);
9089
auto target_cos_sin_chunks = target_cos_sin.chunk(/*chunks=*/2, /*dim=*/-1);
9190
auto cos_pos = target_cos_sin_chunks[0].contiguous();
@@ -98,7 +97,7 @@ class Glm4ModelImpl : public LlmModelImplBase<Glm4DecoderLayer> {
9897
for (int dim_idx = 1; dim_idx <= 2; ++dim_idx) {
9998
int64_t offset = dim_idx;
10099
int64_t section_len = mrope_section_[dim_idx];
101-
int64_t length = section_len * 3;
100+
int64_t length = section_len * 2;
102101
auto idx_first_half = torch::arange(offset, length, 3, torch::kLong);
103102
auto idx_second_half = torch::arange(offset, length, 3, torch::kLong);
104103
auto idx_tensor =
@@ -114,7 +113,8 @@ class Glm4ModelImpl : public LlmModelImplBase<Glm4DecoderLayer> {
114113
sin_pos = apply(sin_pos.reshape(
115114
{positions.sizes().front(), -1, sin_pos.sizes().back()}));
116115
}
117-
116+
cos_pos = cos_pos.reshape({-1, cos_pos.sizes().back() /2, 2});
117+
sin_pos = sin_pos.reshape({-1, sin_pos.sizes().back() /2, 2});
118118
torch::Tensor attn_mask;
119119
if (FLAGS_enable_chunked_prefill) {
120120
int max_kv_seq = input_params.kv_max_seq_len;

xllm/models/llm/llm_model_base.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ torch::Tensor compute_rotary_embedding(int64_t dim,
6868
emb = torch::cat({freqs, freqs}, -1);
6969
} else {
7070
emb = torch::stack({freqs, freqs}, -1);
71+
emb = emb.reshape({seq_len, dim});
7172
}
7273
auto rope_cos = torch::cos(emb);
7374
auto rope_sin = torch::sin(emb);

xllm/models/vlm/glm4v.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -912,9 +912,6 @@ REGISTER_MODEL_ARGS(glm4v, [&] {
912912
// LOAD_ARG_OR(pad_token_id, "text_config.pad_token_id", 151329);
913913
LOAD_ARG_OR(
914914
eos_token_id_vec, "text_config.eos_token_id", std::vector<int>{151329});
915-
LOAD_ARG_OR_FUNC(head_dim, "text_config.head_dim", [&] {
916-
return args->hidden_size() / args->n_heads();
917-
});
918915
LOAD_ARG_OR(attention_bias, "text_config.attention_bias", true);
919916
LOAD_ARG_OR(attention_dropout, "text_config.attention_dropout", 0.0f);
920917
LOAD_ARG_OR(first_k_dense_replace, "text_config.first_k_dense_replace", 1);
@@ -925,6 +922,9 @@ REGISTER_MODEL_ARGS(glm4v, [&] {
925922
LOAD_ARG_OR(
926923
max_position_embeddings, "text_config.max_position_embeddings", 131072);
927924
LOAD_ARG_OR(n_heads, "text_config.num_attention_heads", 96);
925+
LOAD_ARG_OR_FUNC(head_dim, "text_config.head_dim", [&] {
926+
return args->hidden_size() / args->n_heads();
927+
});
928928
LOAD_ARG_OR(num_experts_per_tok, "text_config.num_experts_per_tok", 8);
929929
LOAD_ARG_OR(n_layers, "text_config.num_hidden_layers", 46);
930930
LOAD_ARG_OR(n_kv_heads, "text_config.num_key_value_heads", 8);

0 commit comments

Comments
 (0)