Skip to content

Commit b55eb8c

Browse files
committed
[moe] Allow disabling dp chunk
Signed-off-by: Ming Yang <minos.future@gmail.com>
1 parent 4478d4c commit b55eb8c

File tree

2 files changed

+5
-1
lines changed

2 files changed

+5
-1
lines changed

vllm/envs.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@
144144
VLLM_DP_MASTER_IP: str = ""
145145
VLLM_DP_MASTER_PORT: int = 0
146146
VLLM_MOE_DP_CHUNK_SIZE: int = 256
147+
VLLM_ENABLE_MOE_DP_CHUNK: bool = True
147148
VLLM_RANDOMIZE_DP_DUMMY_INPUTS: bool = False
148149
VLLM_RAY_DP_PACK_STRATEGY: Literal["strict", "fill", "span"] = "strict"
149150
VLLM_MARLIN_USE_ATOMIC_ADD: bool = False
@@ -1098,6 +1099,9 @@ def get_vllm_port() -> int | None:
10981099
# rank. All DP ranks process the activations in VLLM_MOE_DP_CHUNK_SIZE
10991100
# units.
11001101
"VLLM_MOE_DP_CHUNK_SIZE": lambda: int(os.getenv("VLLM_MOE_DP_CHUNK_SIZE", "256")),
1102+
"VLLM_ENABLE_MOE_DP_CHUNK": lambda: bool(
1103+
int(os.getenv("VLLM_ENABLE_MOE_DP_CHUNK", "1"))
1104+
),
11011105
# Randomize inputs during dummy runs when using Data Parallel
11021106
"VLLM_RANDOMIZE_DP_DUMMY_INPUTS": lambda: os.environ.get(
11031107
"VLLM_RANDOMIZE_DP_DUMMY_INPUTS", "0"

vllm/model_executor/layers/fused_moe/layer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,7 @@ def use_dp_chunking(self) -> bool:
749749
self.moe_parallel_config.use_pplx_kernels
750750
or self.moe_parallel_config.use_deepep_ll_kernels
751751
or (self.dp_size > 1 and self.use_flashinfer_cutlass_kernels)
752-
)
752+
) and envs.VLLM_ENABLE_MOE_DP_CHUNK
753753

754754
@property
755755
def is_internal_router(self) -> bool:

0 commit comments

Comments
 (0)