Skip to content

Commit d80872a

Browse files
clear xla compilation cache before each disagg server launch (#1271)
1 parent 5644ffb commit d80872a

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

tests/e2e/test_local_disagg.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from unittest.mock import patch
88

99
import pytest
10+
import vllm.envs as vllm_envs
1011
from vllm import LLM, EngineArgs, SamplingParams
1112

1213
from tpu_inference.core.core_tpu import DisaggEngineCore, DisaggEngineCoreProc
@@ -94,7 +95,7 @@ def test_disaggregated_serving(test_prompts, sampling_params):
9495
patch("vllm.v1.engine.core.EngineCoreProc", DisaggEngineCoreProc):
9596

9697
model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
97-
98+
os.system(f"rm -rf {vllm_envs.VLLM_XLA_CACHE_PATH}/*")
9899
engine_args = EngineArgs(
99100
model=model_name,
100101
max_model_len=2048,
@@ -194,7 +195,9 @@ def test_disaggregated_serving_correctness(test_prompts, sampling_params):
194195
is_disagg=False)
195196

196197
# Run disaggregated inference
198+
os.system(f"rm -rf {vllm_envs.VLLM_XLA_CACHE_PATH}/*")
197199
print("Running Disaggregated Inference...")
200+
198201
disagg_outputs = _run_inference(model_name=model_name,
199202
test_prompts=small_prompts,
200203
sampling_params=sampling_params,

0 commit comments

Comments
 (0)