Skip to content
145 changes: 139 additions & 6 deletions .github/configs/amd-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,22 @@ qwen3.5-fp4-mi355x-sglang:
- { tp: 2, conc-start: 4, conc-end: 256 }
- { tp: 4, conc-start: 4, conc-end: 16 }

# target
qwen3.5-fp4-mi355x-sglang-agentic-hicache:
image: lmsysorg/sglang:v0.5.12-rocm720-mi35x
model: amd/Qwen3.5-397B-A17B-MXFP4
model-prefix: qwen3.5
runner: mi355x
precision: fp4
framework: sglang
multinode: false
scenarios:
agentic-coding:
- duration: 1800
search-space:
- { tp: 2, ep: 1, offloading: none, conc-list: [8, 16, 32, 40, 48, 56, 72] }
- { tp: 2, ep: 1, offloading: hicache, conc-list: [8, 16, 32, 40, 48, 56, 72] }

qwen3.5-fp4-mi355x-atom:
image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
model: amd/Qwen3.5-397B-A17B-MXFP4
Expand Down Expand Up @@ -872,6 +888,22 @@ minimaxm2.5-fp4-mi355x-atom:
- { tp: 4, conc-start: 4, conc-end: 128 }
- { tp: 8, conc-start: 4, conc-end: 16 }

# target
minimaxm2.5-fp4-mi355x-vllm-agentic-lmcache:
image: vllm/vllm-openai-rocm:v0.22.0
model: amd/MiniMax-M2.5-MXFP4
model-prefix: minimaxm2.5
runner: mi355x
precision: fp4
framework: vllm
multinode: false
scenarios:
agentic-coding:
- duration: 1800
search-space:
- { tp: 1, ep: 1, offloading: none, conc-list: [1, 2, 4, 8, 16, 32, 40, 48] }
- { tp: 1, ep: 1, offloading: lmcache, conc-list: [1, 2, 4, 8, 16, 32, 40, 48] }

minimaxm2.5-fp4-mi355x-vllm:
image: vllm/vllm-openai-rocm:v0.22.0
model: amd/MiniMax-M2.5-MXFP4
Expand Down Expand Up @@ -2142,6 +2174,23 @@ dsv4-fp4-mi355x-atom-mtp:
search-space:
- { tp: 8, ep: 1, conc-start: 1, conc-end: 1024, spec-decoding: mtp }

# target
dsv4-fp4-mi355x-atom-agentic-lmcache:
image: rocm/atom-dev:nightly_202606101557
model: deepseek-ai/DeepSeek-V4-Pro
model-prefix: dsv4
runner: mi355x
precision: fp4
framework: atom
multinode: false
scenarios:
agentic-coding:
- duration: 1800
search-space:
- { tp: 8, ep: 1, offloading: none, conc-list: [52] }
#- { tp: 8, ep: 1, offloading: none, conc-list: [44, 48, 52, 56, 60] }
#- { tp: 8, ep: 1, offloading: lmcache, conc-list: [44, 48, 52, 56, 60] }

qwen3.5-bf16-mi325x-sglang-mtp:
image: lmsysorg/sglang:v0.5.12-rocm720-mi30x
model: Qwen/Qwen3.5-397B-A17B
Expand Down Expand Up @@ -2494,6 +2543,23 @@ glm5.1-fp4-mi355x-sglang-agentic:
# sglang manages KV eviction; mi355x glm5.1 caps at tp=4 conc=16 in fixed-seq, so cap conservatively
- { tp: 4, offloading: none, conc-list: [1, 2, 4, 8, 16, 32] }

# target
glm5.1-fp4-mi355x-sglang-agentic-hicache:
image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260529
model: amd/GLM-5.1-MXFP4
model-prefix: glm5.1
runner: mi355x
precision: fp4
framework: sglang
multinode: false
scenarios:
agentic-coding:
- duration: 1800
search-space:
# sglang manages KV eviction; mi355x glm5.1 caps at tp=4 conc=16 in fixed-seq, so cap conservatively
- { tp: 2, ep: 1, offloading: none, conc-list: [4, 8, 16, 32, 40, 48] }
- { tp: 2, ep: 1, offloading: hicache, conc-list: [4, 8, 16, 32, 40, 48] }

kimik2.5-fp4-mi355x-vllm-agentic:
image: vllm/vllm-openai-rocm:v0.22.0
model: amd/Kimi-K2.5-MXFP4
Expand All @@ -2518,8 +2584,40 @@ kimik2.5-fp4-mi355x-vllm-agentic:
- { tp: 4, offloading: none, conc-list: [16, 24, 32, 40] }
- { tp: 4, offloading: cpu, conc-list: [16, 24, 32, 40] }

# target
kimik2.5-fp4-mi355x-vllm-agentic-lmcache:
image: vllm/vllm-openai-rocm:v0.22.0
model: amd/Kimi-K2.5-MXFP4
model-prefix: kimik2.5
runner: mi355x
precision: fp4
framework: vllm
multinode: false
scenarios:
agentic-coding:
- duration: 1800
search-space:
- { tp: 4, ep: 1, offloading: none, conc-list: [1, 2, 4, 8, 16, 32, 40, 48, 56, 64] }
- { tp: 4, ep: 1, offloading: lmcache, conc-list: [1, 2, 4, 8, 16, 32, 40, 48, 56, 64] }

# target
kimik2.5-fp4-mi355x-vllm-agentic-lmcache-060226DRAM1500GB:
image: vllm/vllm-openai-rocm:v0.22.0
model: amd/Kimi-K2.5-MXFP4
model-prefix: kimik2.5
runner: mi355x
precision: fp4
framework: vllm
multinode: false
scenarios:
agentic-coding:
- duration: 1800
search-space:
#- { tp: 4, ep: 1, offloading: none, conc-list: [1, 2, 4, 8, 16, 32, 40, 48, 56, 64] }
- { tp: 4, ep: 1, offloading: lmcache, conc-list: [1, 2, 4, 8, 16, 32, 40, 48, 56, 64] }

minimaxm2.5-fp8-mi355x-vllm-agentic:
image: vllm/vllm-openai-rocm:v0.22.1
image: vllm/vllm-openai-rocm:v0.22.0
model: MiniMaxAI/MiniMax-M2.5
model-prefix: minimaxm2.5
runner: mi355x
Expand All @@ -2536,8 +2634,27 @@ minimaxm2.5-fp8-mi355x-vllm-agentic:
- { tp: 4, ep: 4, offloading: none, conc-list: [1, 2, 4, 8, 16, 32, 48, 56, 64, 72, 96] }
- { tp: 4, ep: 4, offloading: cpu, conc-list: [48, 56, 64, 72, 96] }

# target
minimaxm2.5-fp8-mi355x-vllm-agentic-lmcache:
image: vllm/vllm-openai-rocm:v0.22.0
model: MiniMaxAI/MiniMax-M2.5
model-prefix: minimaxm2.5
runner: mi355x
precision: fp8
framework: vllm
multinode: false
scenarios:
agentic-coding:
# MI355X tp=4 ep=4: compute ceiling ~60 (empirical), KV cliff ~91 (analytical).
# Compute saturates first; cpu offload likely won't help, but worth confirming.
# AMD uses native OffloadingConnector (NOT SimpleCPUOffloadConnector).
- duration: 1800
search-space:
- { tp: 2, ep: 1, offloading: none, conc-list: [1, 2, 4, 8, 16, 32, 40, 48] }
- { tp: 2, ep: 1, offloading: lmcache, conc-list: [1, 2, 4, 8, 16, 32, 40, 48] }

minimaxm2.5-fp8-mi300x-vllm-agentic:
image: vllm/vllm-openai-rocm:v0.22.1
image: vllm/vllm-openai-rocm:v0.22.0
model: MiniMaxAI/MiniMax-M2.5
model-prefix: minimaxm2.5
runner: mi300x
Expand All @@ -2555,7 +2672,7 @@ minimaxm2.5-fp8-mi300x-vllm-agentic:
- { tp: 4, offloading: cpu, conc-list: [16, 20, 24, 28, 32] }

minimaxm2.5-fp8-mi325x-vllm-agentic:
image: vllm/vllm-openai-rocm:v0.22.1
image: vllm/vllm-openai-rocm:v0.22.0
model: MiniMaxAI/MiniMax-M2.5
model-prefix: minimaxm2.5
runner: mi325x
Expand All @@ -2573,8 +2690,9 @@ minimaxm2.5-fp8-mi325x-vllm-agentic:
- { tp: 4, offloading: none, conc-list: [1, 2, 4, 8, 16, 20, 24, 28, 32, 40, 48] }
- { tp: 4, offloading: cpu, conc-list: [16, 20, 24, 28, 32] }

# target
qwen3.5-fp8-mi355x-sglang-agentic-hicache:
image: lmsysorg/sglang-rocm:v0.5.12-rocm720-mi35x-20260521
image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260531
model: Qwen/Qwen3.5-397B-A17B-FP8
model-prefix: qwen3.5
runner: mi355x
Expand All @@ -2585,8 +2703,8 @@ qwen3.5-fp8-mi355x-sglang-agentic-hicache:
agentic-coding:
- duration: 1800
search-space:
- { tp: 8, ep: 1, offloading: none, conc-list: [1, 2, 4, 8, 16, 32] }
- { tp: 8, ep: 1, offloading: hicache, conc-list: [16, 32, 48, 64] }
- { tp: 4, ep: 1, offloading: none, conc-list: [8, 16, 32, 40, 48, 56, 72] }
- { tp: 4, ep: 1, offloading: hicache, conc-list: [8, 16, 32, 40, 48, 56, 72] }

dsv4-fp4-mi355x-vllm-agentic:
image: vllm/vllm-openai-rocm:v0.22.0
Expand Down Expand Up @@ -2618,3 +2736,18 @@ dsv4-fp4-mi355x-sglang-agentic:
search-space:
- { tp: 8, offloading: none, conc-list: [16, 32, 64] }
- { tp: 8, dp-attn: true, offloading: none, conc-list: [64, 128, 256] }

dsv4-fp4-mi355x-sglang-agentic-hicache:
image: rocm/sgl-dev:rocm720-mi35x-0363e6c-20260509-DSv4
model: deepseek-ai/DeepSeek-V4-Pro
model-prefix: dsv4
runner: mi355x
precision: fp4
framework: sglang
multinode: false
scenarios:
agentic-coding:
- duration: 1800
search-space:
- { tp: 8, offloading: none, conc-list: [52] }
- { tp: 8, offloading: hicache, conc-list: [52] }
Loading
Loading