Skip to content

Commit 48e8f8d

Browse files
committed
fix
Signed-off-by: wangli <wangli858794774@gmail.com>
1 parent d7556d5 commit 48e8f8d

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

vllm_ascend/worker/model_runner_v1.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2932,12 +2932,14 @@ def _dummy_run(
29322932
assert len(num_scheduled_tokens_list) == num_reqs
29332933
num_scheduled_tokens = np.array(num_scheduled_tokens_list,
29342934
dtype=np.int32)
2935+
num_sampled_tokens = np.ones(num_reqs, dtype=np.int32)
29352936

29362937
if not self.in_profile_run and self.dynamic_eplb:
29372938
self.eplb_updator.forward_before()
29382939

29392940
with self.maybe_dummy_run_with_lora(self.lora_config,
2940-
num_scheduled_tokens):
2941+
num_scheduled_tokens,
2942+
num_sampled_tokens):
29412943
if self.is_multimodal_model:
29422944
input_ids = None
29432945
inputs_embeds = self.inputs_embeds.gpu[:num_tokens]

0 commit comments

Comments
 (0)