From 3e67742d1c5815f196ce06f15860955708e5d4c3 Mon Sep 17 00:00:00 2001 From: Xuejun Date: Sun, 12 Apr 2026 22:18:36 -0700 Subject: [PATCH] Fix llm param compute error for normal softmax not the softmax in attention --- ggml/src/ggml-openvino/ggml-decoder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index 9d2cf60cf60..d75915cd00d 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -286,7 +286,7 @@ std::pair GgmlOvDecoder::compute_llm_params(ggml_cgr for (int i = 0; i < cgraph->n_nodes; i++) { auto * node = cgraph->nodes[i]; std::string name = std::string(node->name); - if (node->op == GGML_OP_FLASH_ATTN_EXT || node->op == GGML_OP_SOFT_MAX) { + if (node->op == GGML_OP_FLASH_ATTN_EXT || (node->op == GGML_OP_SOFT_MAX && node->src[1] != nullptr)) { compute_params.input_len = node->src[0]->ne[1]; auto * q_perm = node->src[0];