benchmark.sh with 2 repo

Ylang Tsou · Ylang Tsou · commit 3c1e18f417dd · 2025-12-05T17:15:33.000+08:00
Signed-off-by: Ylang Tsou &lt;ylangt@google.com&gt;
diff --git a/.buildkite/pipeline_test_pypi.yml b/.buildkite/pipeline_test_pypi.yml
@@ -4,7 +4,6 @@ steps:
   # -----------------------------------------------------------------
    - label: "Performance benchmarks for meta-llama/Llama-3.1-8B-Instruct"
      key: "meta-llama_Llama-3_1-8B-Instruct_Benchmark"
-     soft_fail: true
      agents:
       queue: tpu_v6e_queue
      env:
@@ -19,7 +18,7 @@ steps:
       MAX_NUM_BATCHED_TOKENS: 1024
      commands:
       - |
-        .buildkite/scripts/run_with_pypi.sh bash /usr/local/lib/python3.12/site-packages/tpu_inference/tests/e2e/benchmarking/benchmark.sh
+        .buildkite/scripts/run_with_pypi.sh bash /workspace/tpu_inference/tests/e2e/benchmarking/benchmark.sh
 
    - label: "Performance benchmarks for Qwen/Qwen3-4B"
      key: "Qwen_Qwen3-4B_Benchmark"
@@ -37,4 +36,4 @@ steps:
       MAX_NUM_BATCHED_TOKENS: 4096
      commands:
       - |
-       .buildkite/scripts/run_with_pypi.sh bash /usr/local/lib/python3.12/site-packages/tpu_inference/tests/e2e/benchmarking/benchmark.sh
+       .buildkite/scripts/run_with_pypi.sh bash /workspace/tpu_inference/tests/e2e/benchmarking/benchmark.sh
diff --git a/docker/Dockerfile.pypi b/docker/Dockerfile.pypi
@@ -17,26 +17,14 @@ COPY . .
 RUN export TPU_INFERENCE_VERSION=$(pip index versions tpu-inference --pre 2>/dev/null | grep -oE "[0-9]+\.[0-9]+\.[0-9]+\.dev[0-9]+" | head -n 1) && \
     echo -n "${TPU_INFERENCE_VERSION}" > /tmp/tpu_inference_version
 
-# Clone vLLM
+# Build vllm-tpu wheel
 WORKDIR /workspace
 RUN export VLLM_TPU_VERSION=$(cat /tmp/tpu_inference_version) && \
     bash tpu_inference/.buildkite/scripts/build_vllm_tpu.sh ${VLLM_TPU_VERSION} ${VLLM_TPU_VERSION}
 
-# Install vllm-tpu from whl
+# Install vllm-tpu wheel
 WORKDIR /workspace/vllm
 RUN pip install --no-cache-dir dist/*.whl
 
-# Install test dependencies
-RUN python3 -m pip install tests/vllm_test_utils
-RUN python3 -m pip install --no-cache-dir \
-    git+https://github.com/thuml/depyf.git \
-    pytest-asyncio \
-    git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d#egg=lm-eval[api] \
-    pytest-cov \
-    tblib
-
-# Remove repository
-WORKDIR /workspace
-RUN rm -rf /workspace/vllm /workspace/tpu_inference
 
 CMD ["/bin/bash"]