-
Notifications
You must be signed in to change notification settings - Fork 193
[Klaud Cold] dsv4-fp4-mi355x-sglang-disagg: DeepSeek-V4-Pro SGLang disagg (8k1k conc=1 smoke test) #1708
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[Klaud Cold] dsv4-fp4-mi355x-sglang-disagg: DeepSeek-V4-Pro SGLang disagg (8k1k conc=1 smoke test) #1708
Changes from all commits
a0ad648
42c97e6
d4f6c89
a1015d0
9648a81
316dd21
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -735,6 +735,54 @@ install_transformers_glm5() { | |
| _SETUP_INSTALLED+=("transformers-glm5") | ||
| } | ||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # SGLang: DeepSeek-V4-Pro config.json model_type patch. | ||
| # | ||
| # Transformers in these images doesn't recognize the `deepseek_v4` model_type, | ||
| # so AutoConfig.from_pretrained crashes before SGLang can dispatch. The | ||
| # single-node DSv4 recipes patch the HF-cache config.json directly; for disagg | ||
| # the weights live on shared NFS at $MODEL_DIR/$MODEL_NAME, so patch that | ||
| # config.json instead. Set model_type -> deepseek_v3 (so AutoConfig succeeds) | ||
| # while keeping architectures=['DeepseekV4ForCausalLM'] so SGLang still | ||
| # dispatches to its native DSv4 model class. | ||
| # | ||
| # Idempotent (no-op once model_type is deepseek_v3) and crash-safe under the | ||
| # concurrent multi-node start: writes a temp file in the same dir and os.replace() | ||
| # (atomic same-filesystem rename), so a reader never sees a half-written config. | ||
| # Only runs for MODEL_NAME == DeepSeek-V4-Pro. | ||
| # --------------------------------------------------------------------------- | ||
| patch_dsv4_config() { | ||
| if [[ "$MODEL_NAME" != "DeepSeek-V4-Pro" ]]; then | ||
| return 0 | ||
| fi | ||
| local cfg="${MODEL_DIR}/${MODEL_NAME}/config.json" | ||
| if [[ ! -f "$cfg" ]]; then | ||
| echo "[SETUP] WARN: DSv4 config.json not found at $cfg; skipping model_type patch" | ||
| return 0 | ||
| fi | ||
| python3 - "$cfg" <<'PYEOF' | ||
| import json, os, sys, tempfile | ||
| cfg = sys.argv[1] | ||
| with open(cfg) as f: | ||
| config = json.load(f) | ||
| if config.get("model_type") != "deepseek_v4": | ||
| print(f"[SETUP] DSv4 config.json already patched (model_type={config.get('model_type')!r})") | ||
| sys.exit(0) | ||
| config["model_type"] = "deepseek_v3" | ||
| d = os.path.dirname(cfg) | ||
| fd, tmp = tempfile.mkstemp(dir=d, prefix=".config.json.", suffix=".tmp") | ||
| try: | ||
| with os.fdopen(fd, "w") as f: | ||
| json.dump(config, f, indent=2) | ||
| os.replace(tmp, cfg) | ||
| print(f"[SETUP] Patched {cfg}: model_type deepseek_v4 -> deepseek_v3") | ||
| except Exception: | ||
| os.path.exists(tmp) and os.remove(tmp) | ||
| raise | ||
| PYEOF | ||
| _SETUP_INSTALLED+=("dsv4-config-model-type") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Setup logs false config patchLow Severity
Additional Locations (1)Reviewed by Cursor Bugbot for commit 316dd21. Configure here. |
||
| } | ||
|
|
||
| # ============================================================================= | ||
| # Run installers (engine-gated) | ||
| # ============================================================================= | ||
|
|
@@ -759,6 +807,7 @@ if [[ "$ENGINE" == "vllm-disagg" ]]; then | |
| else | ||
| patch_gluon_pa_mqa_logits_instr_shape | ||
| install_transformers_glm5 | ||
| patch_dsv4_config | ||
| fi | ||
|
|
||
| _SETUP_END=$(date +%s) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| #!/usr/bin/env bash | ||
|
|
||
| # DeepSeek-V4-Pro FP4 disaggregated prefill/decode on MI355X via SGLang + MoRI. | ||
| # Thin, model-agnostic launcher (same shape as the dsr1 / qwen3.5 / glm5 | ||
| # sglang-disagg wrappers): all serving flags live in the DeepSeek-V4-Pro entry | ||
| # of amd_utils/models.yaml, DSv4-specific env + the config.json model_type patch | ||
| # live in amd_utils/env.sh + setup_deps.sh, and topology (P/D node counts, TP/EP) | ||
| # comes from amd-master.yaml. | ||
|
|
||
| source "$(dirname "$0")/../benchmark_lib.sh" | ||
|
|
||
| check_env_vars \ | ||
| CONC_LIST \ | ||
| ISL \ | ||
| OSL \ | ||
| IMAGE \ | ||
| SPEC_DECODING \ | ||
| MODEL_PATH \ | ||
| PREFILL_NUM_WORKERS \ | ||
| PREFILL_TP \ | ||
| PREFILL_EP \ | ||
| PREFILL_DP_ATTN \ | ||
| DECODE_NUM_WORKERS \ | ||
| DECODE_TP \ | ||
| DECODE_EP \ | ||
| DECODE_DP_ATTN \ | ||
| PREFILL_NODES \ | ||
| DECODE_NODES \ | ||
| RANDOM_RANGE_RATIO \ | ||
| FRAMEWORK | ||
|
|
||
| if [[ -n "$SLURM_JOB_ID" ]]; then | ||
| echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" | ||
| fi | ||
|
|
||
| set -x | ||
|
|
||
| # Use upstreamed multi_node scripts (no external clone needed) | ||
| cd "$GITHUB_WORKSPACE/benchmarks/multi_node/amd_utils" || exit 1 | ||
|
|
||
| # Set up SGL launch script-specific environment variables | ||
| export TIME_LIMIT="08:00:00" | ||
| export MODEL_PATH=$MODEL_PATH | ||
| export MODEL_NAME=$MODEL_NAME | ||
| export CONTAINER_IMAGE=$IMAGE | ||
|
|
||
| if [[ "${PREFILL_EP:-1}" -eq 1 ]]; then | ||
| export PREFILL_ENABLE_EP=false | ||
| else | ||
| export PREFILL_ENABLE_EP=true | ||
| fi | ||
|
|
||
| if [[ "$PREFILL_DP_ATTN" == "true" ]]; then | ||
| export PREFILL_ENABLE_DP=true | ||
| else | ||
| export PREFILL_ENABLE_DP=false | ||
| fi | ||
|
|
||
| if [[ "${DECODE_EP:-1}" -eq 1 ]]; then | ||
| export DECODE_ENABLE_EP=false | ||
| else | ||
| export DECODE_ENABLE_EP=true | ||
| fi | ||
|
|
||
| if [[ "$DECODE_DP_ATTN" == "true" ]]; then | ||
| export DECODE_ENABLE_DP=true | ||
| else | ||
| export DECODE_ENABLE_DP=false | ||
| fi | ||
|
|
||
| # Launch jobs based on ISL/OSL | ||
| # Replace ' ' in CONC_LIST with 'x' such that the concurrency list is represented | ||
| # by a list of numbers delimited by 'x'. This is because of how the underlying launch script | ||
| # expects the concurrencies. | ||
| JOB_ID=$(bash ./submit.sh $PREFILL_NODES \ | ||
| $PREFILL_NUM_WORKERS \ | ||
| $DECODE_NODES \ | ||
| $DECODE_NUM_WORKERS \ | ||
| $ISL $OSL "${CONC_LIST// /x}" inf \ | ||
| ${PREFILL_ENABLE_EP} ${PREFILL_ENABLE_DP} \ | ||
| ${DECODE_ENABLE_EP} ${DECODE_ENABLE_DP} \ | ||
| ${PREFILL_TP} ${DECODE_TP} \ | ||
| ${RANDOM_RANGE_RATIO} \ | ||
| ${NODE_LIST:-}) | ||
|
|
||
| if [[ $? -ne 0 ]]; then | ||
| echo "Failed to submit job" >&2 | ||
| exit 1 | ||
| fi | ||
|
|
||
| echo "$JOB_ID" |


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing mainline deep_gemm JIT off
Medium Severity
The
DeepSeek-V4-Problock hardcodesSGLANG_OPT_FP8_WO_A_GEMMandSGLANG_OPT_USE_TOPK_V2for the mainline…-20260610image, but omitsSGLANG_ENABLE_JIT_DEEPGEMM=0(andSGLANG_TOPK_TRANSFORM_512_TORCH=1) that the in-repo mainline DSv4 recipe sets whendeep_gemmis absent. That image line has nodeep_gemm, so startup can still hit JIT or top-k paths that expect it.Reviewed by Cursor Bugbot for commit 316dd21. Configure here.