File tree Expand file tree Collapse file tree 2 files changed +5
-1
lines changed
model_executor/layers/fused_moe Expand file tree Collapse file tree 2 files changed +5
-1
lines changed Original file line number Diff line number Diff line change 144144 VLLM_DP_MASTER_IP : str = ""
145145 VLLM_DP_MASTER_PORT : int = 0
146146 VLLM_MOE_DP_CHUNK_SIZE : int = 256
147+ VLLM_ENABLE_MOE_DP_CHUNK : bool = True
147148 VLLM_RANDOMIZE_DP_DUMMY_INPUTS : bool = False
148149 VLLM_RAY_DP_PACK_STRATEGY : Literal ["strict" , "fill" , "span" ] = "strict"
149150 VLLM_MARLIN_USE_ATOMIC_ADD : bool = False
@@ -1098,6 +1099,9 @@ def get_vllm_port() -> int | None:
10981099 # rank. All DP ranks process the activations in VLLM_MOE_DP_CHUNK_SIZE
10991100 # units.
11001101 "VLLM_MOE_DP_CHUNK_SIZE" : lambda : int (os .getenv ("VLLM_MOE_DP_CHUNK_SIZE" , "256" )),
1102+ "VLLM_ENABLE_MOE_DP_CHUNK" : lambda : bool (
1103+ int (os .getenv ("VLLM_ENABLE_MOE_DP_CHUNK" , "1" ))
1104+ ),
11011105 # Randomize inputs during dummy runs when using Data Parallel
11021106 "VLLM_RANDOMIZE_DP_DUMMY_INPUTS" : lambda : os .environ .get (
11031107 "VLLM_RANDOMIZE_DP_DUMMY_INPUTS" , "0"
Original file line number Diff line number Diff line change @@ -749,7 +749,7 @@ def use_dp_chunking(self) -> bool:
749749 self .moe_parallel_config .use_pplx_kernels
750750 or self .moe_parallel_config .use_deepep_ll_kernels
751751 or (self .dp_size > 1 and self .use_flashinfer_cutlass_kernels )
752- )
752+ ) and envs . VLLM_ENABLE_MOE_DP_CHUNK
753753
754754 @property
755755 def is_internal_router (self ) -> bool :
You can’t perform that action at this time.
0 commit comments