SemiAnalysisAI · seungrokj · Apr 14, 2026 · Apr 14, 2026
@@ -38,8 +38,8 @@ dsr1-fp4-mi355x-atom:
     - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
 
 dsr1-fp4-mi355x-atom-mtp:
-  image: rocm/atom:rocm7.2.0-ubuntu24.04-pytorch2.9-atom0.1.1
-  model: amd/DeepSeek-R1-0528-MXFP4
+  image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2.post
+  model: amd/DeepSeek-R1-0528-MXFP4-MTP-MoEFP4
   model-prefix: dsr1
   runner: mi355x
   precision: fp4
@@ -51,11 +51,11 @@ dsr1-fp4-mi355x-atom-mtp:
     osl: 1024
     search-space:
     - { tp: 4, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-    - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - { tp: 8, conc-start: 4, conc-end: 128, spec-decoding: mtp }
   - isl: 8192
     osl: 1024
     search-space:
-    #- { tp: 4, conc-start: 32, conc-end: 256, spec-decoding: mtp }
+    - { tp: 4, conc-start: 4, conc-end: 256, spec-decoding: mtp }
     - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 dsr1-fp8-mi300x-sglang:

diff --git a/benchmarks/single_node/dsr1_fp4_mi355x_atom_mtp.sh b/benchmarks/single_node/dsr1_fp4_mi355x_atom_mtp.sh
@@ -55,6 +55,7 @@ python3 -m atom.entrypoints.openai_server \
     -tp $TP \
     --kv_cache_dtype fp8 $CALCULATED_MAX_MODEL_LEN $EP \
     --method mtp \
+    --num-speculative-tokens 3 \
     > $SERVER_LOG 2>&1 &
 
 SERVER_PID=$!

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -1330,3 +1330,10 @@
     - "Remove sed hack for TensorRT bug (fixed upstream in v1.3.0rc5)"
     - "Remove enable_block_reuse: false from kv_cache_config (default true is now recommended)"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/854
+
+- config-keys:
+    - dsr1-fp4-mi355x-atom-mtp
+  description:
+    - "DeepSeek R1 MI355X FP4 ATOM-MTP config to support MTP 3 tokens"
+    - "Image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2.post"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1028