From 08427ba6483cf82e0e597f3004f57859b47f4a43 Mon Sep 17 00:00:00 2001 From: "jacky.cheng" Date: Fri, 12 Jun 2026 08:54:41 +0000 Subject: [PATCH 1/4] [AMD] qwen3.5-mxfp4-mi355x: bump image to v0.5.13 and enable AITER_FLYDSL_FORCE --- .github/configs/amd-master.yaml | 4 ++-- benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x.sh | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 70a79a273..fb0274784 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -427,7 +427,7 @@ qwen3.5-fp8-mi355x-sglang-disagg: - "DECODE_MTP_SIZE=0" qwen3.5-fp4-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260604 + image: lmsysorg/sglang-rocm:v0.5.13-rocm720-mi35x-20260612 model: amd/Qwen3.5-397B-A17B-MXFP4 model-prefix: qwen3.5 runner: mi355x @@ -469,7 +469,7 @@ qwen3.5-fp4-mi355x-atom: - { tp: 4, conc-start: 4, conc-end: 16 } qwen3.5-fp4-mi355x-sglang-mtp: - image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260604 + image: lmsysorg/sglang-rocm:v0.5.13-rocm720-mi35x-20260612 model: amd/Qwen3.5-397B-A17B-MXFP4 model-prefix: qwen3.5 runner: mi355x diff --git a/benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x.sh b/benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x.sh index 7d87eea27..b036e060e 100644 --- a/benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x.sh +++ b/benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x.sh @@ -19,6 +19,7 @@ if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi export SGLANG_USE_AITER=1 export SGLANG_USE_AITER_UNIFIED_ATTN=1 +export AITER_FLYDSL_FORCE=1 SERVER_LOG=/workspace/server.log MEM_FRAC_STATIC=${MEM_FRAC_STATIC:-0.8} From b126701aacac0a8ee68ca66d3345366a2ae2d580 Mon Sep 17 00:00:00 2001 From: "jacky.cheng" Date: Fri, 12 Jun 2026 09:11:27 +0000 Subject: [PATCH 2/4] [AMD] qwen3.5-mxfp4-mi355x-mtp: add AITER_FLYDSL_FORCE=1 to MTP benchmark script --- benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x_mtp.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x_mtp.sh b/benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x_mtp.sh index 317147668..8081b824e 100755 --- a/benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x_mtp.sh +++ b/benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x_mtp.sh @@ -19,6 +19,7 @@ hf download "$MODEL" export SGLANG_USE_AITER=1 export SGLANG_USE_AITER_UNIFIED_ATTN=1 +export AITER_FLYDSL_FORCE=1 SERVER_LOG=/workspace/server.log MEM_FRAC_STATIC=${MEM_FRAC_STATIC:-0.8} From 56da30748e14c4d9a56612929b42dadb52f332e6 Mon Sep 17 00:00:00 2001 From: "jacky.cheng" Date: Fri, 12 Jun 2026 09:23:35 +0000 Subject: [PATCH 3/4] [AMD] perf-changelog: add qwen3.5-mxfp4-mi355x v0.5.13 image bump entry --- perf-changelog.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index c0642188b..ac7c28ce9 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3600,3 +3600,11 @@ - "MI355x DSR1-FP4: Include TP4 configurations for 8k1k" - "Expand the TP sweep (included TP=4) for 8k/1k configuration for conc=4 to 64" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1692 + +- config-keys: + - qwen3.5-fp4-mi355x-sglang + - qwen3.5-fp4-mi355x-sglang-mtp + description: + - "Bump image from lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260604 to lmsysorg/sglang-rocm:v0.5.13-rocm720-mi35x-20260612" + - "Enable AITER_FLYDSL_FORCE=1 in both non-MTP and MTP benchmark scripts" + pr-link: From 9d78e2fa673741356e6dd43f1aa994775adcc9b4 Mon Sep 17 00:00:00 2001 From: "jacky.cheng" Date: Fri, 12 Jun 2026 09:25:54 +0000 Subject: [PATCH 4/4] [Fix] perf-changelog: add missing pr-link for qwen3.5-mxfp4-mi355x entry --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index ac7c28ce9..4ba690d2e 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3607,4 +3607,4 @@ description: - "Bump image from lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260604 to lmsysorg/sglang-rocm:v0.5.13-rocm720-mi35x-20260612" - "Enable AITER_FLYDSL_FORCE=1 in both non-MTP and MTP benchmark scripts" - pr-link: + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1716