diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml
index 70a79a273..bb49bb251 100644
--- a/.github/configs/amd-master.yaml
+++ b/.github/configs/amd-master.yaml
@@ -2137,7 +2137,7 @@ dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp:
 
 
 dsv4-fp4-mi355x-sglang:
-  image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260610
+  image: lmsysorg/sglang-rocm:v0.5.13-rocm720-mi35x-20260612
   model: deepseek-ai/DeepSeek-V4-Pro
   model-prefix: dsv4
   runner: mi355x
diff --git a/benchmarks/single_node/fixed_seq_len/dsv4_fp4_mi355x_sglang.sh b/benchmarks/single_node/fixed_seq_len/dsv4_fp4_mi355x_sglang.sh
index 4aeebc683..ade8d96fe 100755
--- a/benchmarks/single_node/fixed_seq_len/dsv4_fp4_mi355x_sglang.sh
+++ b/benchmarks/single_node/fixed_seq_len/dsv4_fp4_mi355x_sglang.sh
@@ -60,7 +60,9 @@ start_gpu_monitor
 PARALLEL_ARGS=(
     --tensor-parallel-size "$TP"
 )
+CHUNKED_PREFILL_SIZE=8192
 if [ "${DP_ATTENTION}" = "true" ]; then
+    CHUNKED_PREFILL_SIZE=$((8192 * TP))
     PARALLEL_ARGS+=(
         --dp "$TP"
         --enable-dp-attention
@@ -85,7 +87,7 @@ sglang serve \
     --swa-full-tokens-ratio 0.15 \
     --page-size 256 \
     --context-length $MAX_MODEL_LEN \
-    --chunked-prefill-size 8192 \
+    --chunked-prefill-size $CHUNKED_PREFILL_SIZE \
     --disable-shared-experts-fusion \
     --tool-call-parser deepseekv4 \
     --reasoning-parser deepseek-v4 \
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 3ebd09db2..2af0baaaa 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3600,6 +3600,14 @@
     - "MI355x DSR1-FP4: Include TP4 configurations for 8k1k"
     - "Expand the TP sweep (included TP=4) for 8k/1k configuration for conc=4 to 64"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1692
+
+- config-keys:
+    - dsv4-fp4-mi355x-sglang
+  description:
+    - "Bump image to lmsysorg/sglang-rocm:v0.5.13-rocm720-mi35x-20260612."
+    - "Fix the intermediate_pad setting in the MoE computation in sglang PR#27858. This avoids the unnecessary overhead of computing useless padding."
+    - "Correct the chunk prefill setting size under tp8/dp8 config."
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1715
   
 - config-keys:
     - dsv4-fp4-gb200-dynamo-sglang