fix cp config

wangxiyuan · wangxiyuan · commit 262180526d7c · 2025-12-02T08:48:00.000+08:00
Signed-off-by: wangxiyuan &lt;wangxiyuan1007@gmail.com&gt;
diff --git a/vllm_ascend/attention/mla_v1.py b/vllm_ascend/attention/mla_v1.py
@@ -226,7 +226,7 @@ def __init__(self,
         self.block_size = vllm_config.cache_config.block_size
         self.max_blocks = (vllm_config.model_config.max_model_len +
                            self.block_size - 1) // self.block_size
-        self.chunked_prefill_enabled = scheduler_config.chunked_prefill_enabled
+        self.chunked_prefill_enabled = scheduler_config.enable_chunked_prefill
 
         self.speculative_config = vllm_config.speculative_config
         self.decode_threshold = 1
diff --git a/vllm_ascend/core/recompute_scheduler.py b/vllm_ascend/core/recompute_scheduler.py
@@ -456,7 +456,7 @@ def schedule(self) -> RecomputeSchedulerOutput:
 
                     # chunked prefill has to be enabled explicitly to allow
                     # pooling requests to be chunked
-                    if not self.scheduler_config.chunked_prefill_enabled and \
+                    if not self.scheduler_config.enable_chunked_prefill and \
                             num_new_tokens > token_budget:
                         self.waiting.pop_request()
                         skipped_waiting_requests.prepend_request(request)
diff --git a/vllm_ascend/core/scheduler.py b/vllm_ascend/core/scheduler.py
@@ -70,7 +70,7 @@ def __init__(
         self._initialize_common()
 
     def schedule(self) -> SchedulerOutput:
-        if self.scheduler_config.chunked_prefill_enabled:
+        if self.scheduler_config.enable_chunked_prefill:
             return super().schedule()
         scheduled_new_reqs: list[Request] = []
         scheduled_resumed_reqs: list[Request] = []
@@ -534,7 +534,7 @@ def _check_watermark_for_prefill(self,
         return True
 
     def _get_prompt_limit(self, request: Request) -> int:
-        if (self.scheduler_config.chunked_prefill_enabled
+        if (self.scheduler_config.enable_chunked_prefill
                 and not self.scheduler_config.is_multi_step):
             prompt_limit = self.vllm_config.model_config.max_model_len
         else:
diff --git a/vllm_ascend/core/scheduler_dynamic_batch.py b/vllm_ascend/core/scheduler_dynamic_batch.py
@@ -404,7 +404,7 @@ def schedule(self) -> SchedulerOutput:
 
                     # chunked prefill has to be enabled explicitly to allow
                     # pooling requests to be chunked
-                    if not self.scheduler_config.chunked_prefill_enabled and \
+                    if not self.scheduler_config.enable_chunked_prefill and \
                         num_new_tokens > token_budget:
                         self.waiting.pop_request()
                         skipped_waiting_requests.prepend_request(request)
diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py
@@ -332,7 +332,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
             vllm_config.scheduler_config.scheduler_cls = (
                 "vllm_ascend.core.scheduler_dynamic_batch.SchedulerDynamicBatch"
             )
-            vllm_config.scheduler_config.chunked_prefill_enabled = True
+            vllm_config.scheduler_config.enable_chunked_prefill = True
             vllm_config.scheduler_config.SLO_limits_for_dynamic_batch = ascend_config.SLO_limits_for_dynamic_batch
 
         if vllm_config.kv_transfer_config is not None and \
diff --git a/vllm_ascend/torchair/torchair_sfa.py b/vllm_ascend/torchair/torchair_sfa.py
@@ -171,7 +171,7 @@ def __init__(self,
         self.block_size = vllm_config.cache_config.block_size
         self.max_blocks = (vllm_config.model_config.max_model_len +
                            self.block_size - 1) // self.block_size
-        self.chunked_prefill_enabled = scheduler_config.chunked_prefill_enabled
+        self.chunked_prefill_enabled = scheduler_config.enable_chunked_prefill
         if self.chunked_prefill_enabled:
             self.chunked_prefill_workspace_size = min(
                 # Max sure there is enough for 8 full length request or at least
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -330,7 +330,7 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
         # Ascend-specific configurations
         self.ascend_config = get_ascend_config()
         if self.ascend_config.ascend_scheduler_config.enabled:
-            self.chunked_prefill_enabled = self.scheduler_config.chunked_prefill_enabled
+            self.chunked_prefill_enabled = self.scheduler_config.enable_chunked_prefill
         else:
             self.chunked_prefill_enabled = True
         self.weight_prefetch_method = WeightPrefetchMethod(

Original file line number	Diff line number	Diff line change
`@@ -332,7 +332,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:`
`332`	`332`	`vllm_config.scheduler_config.scheduler_cls = (`
`333`	`333`	`"vllm_ascend.core.scheduler_dynamic_batch.SchedulerDynamicBatch"`
`334`	`334`	`)`
`335`		`- vllm_config.scheduler_config.chunked_prefill_enabled = True`
	`335`	`+ vllm_config.scheduler_config.enable_chunked_prefill = True`
`336`	`336`	`vllm_config.scheduler_config.SLO_limits_for_dynamic_batch = ascend_config.SLO_limits_for_dynamic_batch`
`337`	`337`
`338`	`338`	`if vllm_config.kv_transfer_config is not None and \`