From 2b8c869742500fb2fa3b1160878ce8b860b7f461 Mon Sep 17 00:00:00 2001
From: Chenxi Qian <chenxi.qian.cq@outlook.com>
Date: Thu, 11 Dec 2025 18:02:33 +0800
Subject: [PATCH] npu_moe_gating_top_k support renorm=1

---
 vllm_ascend/ops/fused_moe/experts_selector.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/vllm_ascend/ops/fused_moe/experts_selector.py b/vllm_ascend/ops/fused_moe/experts_selector.py
index 05ec0e38491..1e9892d9e19 100644
--- a/vllm_ascend/ops/fused_moe/experts_selector.py
+++ b/vllm_ascend/ops/fused_moe/experts_selector.py
@@ -220,15 +220,12 @@ def _select_experts_with_fusion_ops(
         k_group=topk_group,
         group_count=num_expert_group,
         group_select_mode=1,  # 0: the maximum in the group; 1: topk2.sum(fix)
-        renorm=0,  # 0: softmax->topk(fix); 1: topk->softmax
+        renorm=1,  # 0: softmax->topk(fix); 1: topk->softmax
         norm_type=norm_type,  # 0: softmax; 1: sigmoid
         # out_flag=False, # todo new api; should the third output be output
         # y2_flag=False, # old api; should the third output be output
         routed_scaling_factor=1,
         eps=float(1e-20))
-    if scoring_func == "softmax":
-        topk_weights = _renormalize_topk_weights(topk_weights, renormalize)
-
     return topk_weights, topk_ids