config pre-mapped buffer of tpu

juncgu-google · juncgu-google · commit a3ff52bec4b6 · 2025-12-06T04:50:30.000Z
Signed-off-by: Juncheng Gu &lt;jcgu@google.com&gt;
diff --git a/examples/offload/gke/benchmarks/deploy-cpu-offload.yaml b/examples/offload/gke/benchmarks/deploy-cpu-offload.yaml
@@ -34,6 +34,12 @@ spec:
           value: "4096"
         - name: TPU_OFFLOAD_NUM_STAGING_BLOCKS
           value: "256"
+        # config the pre-mapped CPU buffer for TPUs
+        # https://docs.cloud.google.com/tpu/docs/performance-guide#tpu_model_performance
+        - name: TPU_PREMAPPED_BUFFER_SIZE
+          value: "68719476736"  # 64 GB
+        - name: TPU_PREMAPPED_BUFFER_TRANSFER_THRESHOLD_BYTES
+          value: "68719476736"  # 64 GB
         ports:
         - containerPort: 8000
         resources: