Skip to content

Commit 616ac13

Browse files
committed
tweaks
Signed-off-by: Juncheng Gu <jcgu@google.com>
1 parent a24e4bb commit 616ac13

File tree

2 files changed

+1
-2
lines changed

2 files changed

+1
-2
lines changed

tpu_inference/platforms/tpu_platform.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
217217
"Forcing --disable_chunked_mm_input.")
218218
scheduler_config.disable_chunked_mm_input = True
219219

220+
# Late initialization to avoid circular import
220221
from tpu_inference.models.jax.utils.quantization.quantization_utils import \
221222
update_vllm_config_for_qwix_quantization
222223

tpu_inference/worker/tpu_worker.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,8 +299,6 @@ def determine_available_memory(self) -> int:
299299
kv_cache_specs = self.model_runner.get_kv_cache_spec()
300300
num_layers = len(kv_cache_specs)
301301
vllm_page_size_bytes = get_uniform_page_size(kv_cache_specs)
302-
# rpa_page_size_bytes = get_rpa_page_size_bytes(self.model_runner.mesh,
303-
# kv_cache_specs)
304302
stage_buffer_size_bytes = staging_buffer_pages * num_layers * vllm_page_size_bytes
305303

306304
total_hbm_avail = total_hbm_avail - stage_buffer_size_bytes

0 commit comments

Comments
 (0)