tweaks

juncgu-google · juncgu-google · commit 616ac130d3e5 · 2025-12-06T04:50:30.000Z
Signed-off-by: Juncheng Gu &lt;jcgu@google.com&gt;
diff --git a/tpu_inference/platforms/tpu_platform.py b/tpu_inference/platforms/tpu_platform.py
@@ -217,6 +217,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
             "Forcing --disable_chunked_mm_input.")
             scheduler_config.disable_chunked_mm_input = True
 
+        # Late initialization to avoid circular import
         from tpu_inference.models.jax.utils.quantization.quantization_utils import \
             update_vllm_config_for_qwix_quantization
 
diff --git a/tpu_inference/worker/tpu_worker.py b/tpu_inference/worker/tpu_worker.py
@@ -299,8 +299,6 @@ def determine_available_memory(self) -> int:
                 kv_cache_specs = self.model_runner.get_kv_cache_spec()
                 num_layers = len(kv_cache_specs)
                 vllm_page_size_bytes = get_uniform_page_size(kv_cache_specs)
-                # rpa_page_size_bytes = get_rpa_page_size_bytes(self.model_runner.mesh,
-                #                                             kv_cache_specs)
                 stage_buffer_size_bytes = staging_buffer_pages * num_layers * vllm_page_size_bytes
 
                 total_hbm_avail = total_hbm_avail - stage_buffer_size_bytes