diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 70a79a273..fb0274784 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -427,7 +427,7 @@ qwen3.5-fp8-mi355x-sglang-disagg: - "DECODE_MTP_SIZE=0" qwen3.5-fp4-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260604 + image: lmsysorg/sglang-rocm:v0.5.13-rocm720-mi35x-20260612 model: amd/Qwen3.5-397B-A17B-MXFP4 model-prefix: qwen3.5 runner: mi355x @@ -469,7 +469,7 @@ qwen3.5-fp4-mi355x-atom: - { tp: 4, conc-start: 4, conc-end: 16 } qwen3.5-fp4-mi355x-sglang-mtp: - image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260604 + image: lmsysorg/sglang-rocm:v0.5.13-rocm720-mi35x-20260612 model: amd/Qwen3.5-397B-A17B-MXFP4 model-prefix: qwen3.5 runner: mi355x diff --git a/benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x.sh b/benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x.sh index 7d87eea27..b036e060e 100644 --- a/benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x.sh +++ b/benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x.sh @@ -19,6 +19,7 @@ if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi export SGLANG_USE_AITER=1 export SGLANG_USE_AITER_UNIFIED_ATTN=1 +export AITER_FLYDSL_FORCE=1 SERVER_LOG=/workspace/server.log MEM_FRAC_STATIC=${MEM_FRAC_STATIC:-0.8} diff --git a/benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x_mtp.sh b/benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x_mtp.sh index 317147668..8081b824e 100755 --- a/benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x_mtp.sh +++ b/benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x_mtp.sh @@ -19,6 +19,7 @@ hf download "$MODEL" export SGLANG_USE_AITER=1 export SGLANG_USE_AITER_UNIFIED_ATTN=1 +export AITER_FLYDSL_FORCE=1 SERVER_LOG=/workspace/server.log MEM_FRAC_STATIC=${MEM_FRAC_STATIC:-0.8} diff --git a/perf-changelog.yaml b/perf-changelog.yaml index c0642188b..4ba690d2e 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3600,3 +3600,11 @@ - "MI355x DSR1-FP4: Include TP4 configurations for 8k1k" - "Expand the TP sweep (included TP=4) for 8k/1k configuration for conc=4 to 64" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1692 + +- config-keys: + - qwen3.5-fp4-mi355x-sglang + - qwen3.5-fp4-mi355x-sglang-mtp + description: + - "Bump image from lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260604 to lmsysorg/sglang-rocm:v0.5.13-rocm720-mi35x-20260612" + - "Enable AITER_FLYDSL_FORCE=1 in both non-MTP and MTP benchmark scripts" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1716