Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/configs/amd-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ qwen3.5-fp8-mi355x-sglang-disagg:
- "DECODE_MTP_SIZE=0"

qwen3.5-fp4-mi355x-sglang:
image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260604
image: lmsysorg/sglang-rocm:v0.5.13-rocm720-mi35x-20260612
model: amd/Qwen3.5-397B-A17B-MXFP4
model-prefix: qwen3.5
runner: mi355x
Expand Down Expand Up @@ -469,7 +469,7 @@ qwen3.5-fp4-mi355x-atom:
- { tp: 4, conc-start: 4, conc-end: 16 }

qwen3.5-fp4-mi355x-sglang-mtp:
image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260604
image: lmsysorg/sglang-rocm:v0.5.13-rocm720-mi35x-20260612
Comment thread
yichiche marked this conversation as resolved.
model: amd/Qwen3.5-397B-A17B-MXFP4
model-prefix: qwen3.5
runner: mi355x
Expand Down
1 change: 1 addition & 0 deletions benchmarks/single_node/fixed_seq_len/qwen3.5_fp4_mi355x.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi

export SGLANG_USE_AITER=1
export SGLANG_USE_AITER_UNIFIED_ATTN=1
export AITER_FLYDSL_FORCE=1

SERVER_LOG=/workspace/server.log
MEM_FRAC_STATIC=${MEM_FRAC_STATIC:-0.8}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ hf download "$MODEL"

export SGLANG_USE_AITER=1
export SGLANG_USE_AITER_UNIFIED_ATTN=1
export AITER_FLYDSL_FORCE=1

SERVER_LOG=/workspace/server.log
MEM_FRAC_STATIC=${MEM_FRAC_STATIC:-0.8}
Expand Down
8 changes: 8 additions & 0 deletions perf-changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3600,3 +3600,11 @@
- "MI355x DSR1-FP4: Include TP4 configurations for 8k1k"
- "Expand the TP sweep (included TP=4) for 8k/1k configuration for conc=4 to 64"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1692

- config-keys:
- qwen3.5-fp4-mi355x-sglang
- qwen3.5-fp4-mi355x-sglang-mtp
description:
- "Bump image from lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260604 to lmsysorg/sglang-rocm:v0.5.13-rocm720-mi35x-20260612"
- "Enable AITER_FLYDSL_FORCE=1 in both non-MTP and MTP benchmark scripts"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1716