Skip to content

Commit 2a7abbb

Browse files
author
Ylang Tsou
committed
build with script
Signed-off-by: Ylang Tsou <ylangt@google.com>
1 parent 5637e1d commit 2a7abbb

File tree

4 files changed

+135
-271
lines changed

4 files changed

+135
-271
lines changed

.buildkite/pipeline_test_pypi.yml

Lines changed: 33 additions & 224 deletions
Original file line numberDiff line numberDiff line change
@@ -2,230 +2,39 @@ steps:
22
# -----------------------------------------------------------------
33
# TEST STEPS - Calling wrapper
44
# -----------------------------------------------------------------
5-
- label: "wheel E2E MLPerf tests for JAX models"
6-
key: test_0
5+
- label: "Performance benchmarks for meta-llama/Llama-3.1-8B-Instruct"
6+
key: "meta-llama_Llama-3_1-8B-Instruct_Benchmark"
77
soft_fail: true
88
agents:
9-
queue: tpu_v6e_queue
10-
commands:
11-
- .buildkite/scripts/run_with_pypi.sh bash /workspace/tpu_inference/tests/e2e/benchmarking/mlperf.sh
12-
13-
- label: "wheel E2E multi modality test"
14-
key: test_5
15-
soft_fail: true
16-
agents:
17-
queue: tpu_v6e_queue
18-
commands:
19-
- |
20-
.buildkite/scripts/run_with_pypi.sh \
21-
bash -c 'python3 -m pytest -s -v -x /workspace/tpu_inference/tests/e2e/test_multi_modal_inference.py && \
22-
bash /workspace/tpu_inference/tests/e2e/benchmarking/mm_bench.sh'
23-
24-
- label: "wheel E2E speculative decoding test"
25-
key: test_6
26-
soft_fail: true
27-
agents:
28-
queue: tpu_v6e_queue
29-
commands:
30-
- |
31-
.buildkite/scripts/run_with_pypi.sh \
32-
bash -c 'python3 -m pytest -s -v -x /workspace/tpu_inference/tests/e2e/test_speculative_decoding.py'
33-
34-
- label: "wheel JAX unit tests"
35-
key: test_7
36-
soft_fail: true
37-
agents:
38-
queue: tpu_v6e_queue
39-
commands:
40-
- |
41-
.buildkite/scripts/run_with_pypi.sh \
42-
python3 -m pytest -s -v -x /workspace/tpu_inference/tests/ \
43-
--ignore=/workspace/tpu_inference/tests/kernels \
44-
--ignore=/workspace/tpu_inference/tests/lora \
45-
--ignore=/workspace/tpu_inference/tests/e2e \
46-
--ignore=/workspace/tpu_inference/tpu_inference/mock \
47-
--cov-config=/workspace/tpu_inference/.coveragerc --cov tpu_inference --cov-report term-missing --cov-fail-under=69
48-
49-
- label: "wheel JAX unit tests - kernels"
50-
key: test_8
51-
soft_fail: true
52-
agents:
53-
queue: tpu_v6e_queue
54-
commands:
55-
- |
56-
.buildkite/scripts/run_with_pypi.sh \
57-
python3 -m pytest -s -v -x /workspace/tpu_inference/tests/kernels \
58-
--ignore=/workspace/tpu_inference/tests/kernels/ragged_paged_attention_kernel_v2_test.py \
59-
--ignore=/workspace/tpu_inference/tests/kernels/ragged_kv_cache_update_v2_test.py \
60-
--ignore=/workspace/tpu_inference/tests/kernels/collectives
61-
62-
- label: "wheel JAX unit tests - collective kernels"
63-
key: test_9
64-
soft_fail: true
65-
agents:
66-
queue: tpu_v6e_8_queue
67-
commands:
68-
- |
69-
.buildkite/scripts/run_with_pypi.sh \
70-
python3 -m pytest -s -v -x /workspace/tpu_inference/tests/kernels/collectives
71-
72-
- label: "wheel lora tests for JAX + vLLM models"
73-
key: test_10
74-
soft_fail: true
75-
agents:
76-
queue: tpu_v6e_queue
77-
commands:
78-
- |
79-
.buildkite/scripts/run_with_pypi.sh \
80-
bash -c 'MODEL_IMPL_TYPE=vllm TPU_BACKEND_TYPE=jax python3 -m pytest -s -v -x /workspace/tpu_inference/tests/lora/test_lora.py'
81-
82-
- label: "wheel E2E MLperf tests for DeepSeek-R1 (no accuracy, 12-decoder layers only)"
83-
key: test_12
84-
soft_fail: true
9+
queue: tpu_v6e_queue
8510
env:
86-
NEW_MODEL_DESIGN: "True"
87-
USE_V6E8_QUEUE: "True"
88-
SKIP_ACCURACY_TESTS: "True"
89-
VLLM_MLA_DISABLE: "1"
90-
JAX_RANDOM_WEIGHTS: "True"
91-
agents:
92-
queue: tpu_v6e_8_queue
93-
commands:
94-
- .buildkite/scripts/run_with_pypi.sh bash /workspace/tpu_inference/tests/e2e/benchmarking/mlperf.sh -m deepseek-ai/DeepSeek-R1-0528
95-
96-
97-
98-
- label: "wheel TPU Test 0: test_perf.py"
99-
key: tpu_test_0
100-
soft_fail: true
101-
agents:
102-
queue: tpu_v6e_queue
103-
commands:
104-
- .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_perf.py
105-
106-
- label: "wheel TPU Test 1: test_compilation.py"
107-
key: tpu_test_1
108-
soft_fail: true
109-
agents:
110-
queue: tpu_v6e_queue
111-
commands:
112-
- .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_compilation.py
113-
114-
- label: "wheel TPU Test 2: test_basic.py"
115-
key: tpu_test_2
116-
soft_fail: true
117-
agents:
118-
queue: tpu_v6e_queue
119-
commands:
120-
- .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_basic.py
121-
122-
- label: "wheel TPU Test 3: test_accuracy.py (v1)"
123-
key: tpu_test_3
124-
soft_fail: true
125-
agents:
126-
queue: tpu_v6e_queue
127-
commands:
128-
- .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine
129-
130-
- label: "wheel TPU Test 4: test_quantization_accuracy.py"
131-
key: tpu_test_4
132-
soft_fail: true
133-
agents:
134-
queue: tpu_v6e_queue
135-
commands:
136-
- .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py
137-
138-
- label: "wheel TPU Test 5: examples/offline_inference/tpu.py"
139-
key: tpu_test_5
140-
soft_fail: true
141-
agents:
142-
queue: tpu_v6e_queue
143-
commands:
144-
- .buildkite/scripts/run_with_pypi.sh python3 /workspace/vllm/examples/offline_inference/tpu.py
145-
146-
- label: "wheel TPU Test 6: test_tpu_model_runner.py"
147-
key: tpu_test_6
148-
soft_fail: true
149-
agents:
150-
queue: tpu_v6e_queue
151-
commands:
152-
- .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/worker/test_tpu_model_runner.py
153-
154-
- label: "wheel TPU Test 7: test_sampler.py"
155-
key: tpu_test_7
156-
soft_fail: true
157-
agents:
158-
queue: tpu_v6e_queue
159-
commands:
160-
- .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py
161-
162-
- label: "wheel TPU Test 8: test_topk_topp_sampler.py"
163-
key: tpu_test_8
164-
soft_fail: true
165-
agents:
166-
queue: tpu_v6e_queue
167-
commands:
168-
- .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py
169-
170-
- label: "wheel TPU Test 9: test_multimodal.py"
171-
key: tpu_test_9
172-
soft_fail: true
173-
agents:
174-
queue: tpu_v6e_queue
175-
commands:
176-
- .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_multimodal.py
177-
178-
- label: "wheel TPU Test 10: test_pallas.py"
179-
key: tpu_test_10
180-
soft_fail: true
181-
agents:
182-
queue: tpu_v6e_queue
183-
commands:
184-
- .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py
185-
186-
- label: "wheel TPU Test 11: test_struct_output_generate.py"
187-
key: tpu_test_11
188-
soft_fail: true
189-
agents:
190-
queue: tpu_v6e_queue
191-
commands:
192-
- .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py
193-
194-
- label: "wheel TPU Test 12: test_moe_pallas.py"
195-
key: tpu_test_12
196-
soft_fail: true
197-
agents:
198-
queue: tpu_v6e_queue
199-
commands:
200-
- .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py
201-
202-
- label: "wheel TPU Test 13: ragged_paged_attention_test.py"
203-
key: tpu_test_13
204-
soft_fail: true
205-
agents:
206-
queue: tpu_v6e_queue
207-
commands:
208-
- .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/tpu_inference/tests/ragged_paged_attention_test.py
209-
210-
# -----------------------------------------------------------------
211-
# NOTIFICATION STEP
212-
# -----------------------------------------------------------------
213-
- label: "wheel TPU V1 Test Notification"
214-
depends_on:
215-
- tpu_test_0
216-
- tpu_test_1
217-
- tpu_test_2
218-
- tpu_test_3
219-
- tpu_test_4
220-
- tpu_test_5
221-
- tpu_test_6
222-
- tpu_test_7
223-
- tpu_test_8
224-
- tpu_test_9
225-
- tpu_test_10
226-
- tpu_test_11
227-
- tpu_test_12
228-
- tpu_test_13
229-
agents:
230-
queue: tpu_v6e_queue
231-
commands: "bash .buildkite/scripts/check_results.sh 'TPU V1 Tests Failed' tpu_test_0 tpu_test_1 tpu_test_2 tpu_test_3 tpu_test_4 tpu_test_5 tpu_test_6 tpu_test_7 tpu_test_8 tpu_test_9 tpu_test_10 tpu_test_11 tpu_test_12 tpu_test_13"
11+
TEST_MODEL: meta-llama/Llama-3.1-8B-Instruct
12+
TENSOR_PARALLEL_SIZE: 1
13+
MINIMUM_THROUGHPUT_THRESHOLD: 10.77
14+
INPUT_LEN: 1800
15+
OUTPUT_LEN: 128
16+
PREFIX_LEN: 0
17+
MAX_MODEL_LEN: 2048
18+
MAX_NUM_SEQS: 256
19+
MAX_NUM_BATCHED_TOKENS: 1024
20+
commands:
21+
- |
22+
/usr/local/lib/python3.12/site-packages/.buildkite/scripts/run_with_pypi.sh bash /usr/local/lib/python3.12/site-packages/tpu_inference/tests/e2e/benchmarking/benchmark.sh
23+
24+
- label: "Performance benchmarks for Qwen/Qwen3-4B"
25+
key: "Qwen_Qwen3-4B_Benchmark"
26+
agents:
27+
queue: tpu_v6e_queue
28+
env:
29+
TEST_MODEL: Qwen/Qwen3-4B
30+
TENSOR_PARALLEL_SIZE: 1
31+
MINIMUM_THROUGHPUT_THRESHOLD: 11.00
32+
INPUT_LEN: 1800
33+
OUTPUT_LEN: 128
34+
PREFIX_LEN: 0
35+
MAX_MODEL_LEN: 2048
36+
MAX_NUM_SEQS: 94
37+
MAX_NUM_BATCHED_TOKENS: 4096
38+
commands:
39+
- |
40+
/usr/local/lib/python3.12/site-packages/.buildkite/scripts/run_with_pypi.sh bash /usr/local/lib/python3.12/site-packages/tpu_inference/tests/e2e/benchmarking/benchmark.sh
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
# --- Script Configuration ---
6+
TPU_INFERENCE_VERSION=$1
7+
VLLM_TPU_VERSION=$2
8+
VLLM_BRANCH=${3:-"main"}
9+
VLLM_REPO="https://github.com/vllm-project/vllm.git"
10+
REPO_DIR="vllm"
11+
12+
# --- Argument Validation ---
13+
if [ "$#" -lt 2 ]; then
14+
echo "Usage: $0 <tpu-inference-version> <vllm-tpu-version> [vllm-branch-or-tag]"
15+
echo " [vllm-branch-or-tag] is optional, defaults to 'main'."
16+
exit 1
17+
fi
18+
19+
echo "--- Starting vLLM-TPU wheel build ---"
20+
echo "TPU Inference Version: ${TPU_INFERENCE_VERSION}"
21+
echo "vLLM-TPU Version: ${VLLM_TPU_VERSION}"
22+
echo "vLLM Branch/Tag: ${VLLM_BRANCH}"
23+
24+
# --- Step 1: Clone vLLM repository ---
25+
if [ -d "$REPO_DIR" ]; then
26+
echo "Repository '$REPO_DIR' already exists. Skipping clone."
27+
else
28+
echo "Cloning vLLM repository..."
29+
git clone ${VLLM_REPO}
30+
fi
31+
cd ${REPO_DIR}
32+
33+
# --- Step 1.5: Checkout the specified vLLM branch/tag ---
34+
echo "Checking out vLLM branch/tag: ${VLLM_BRANCH}..."
35+
if ! git checkout "${VLLM_BRANCH}"; then
36+
echo "ERROR: Failed to checkout branch/tag '${VLLM_BRANCH}'. Please check the branch/tag name."
37+
exit 1
38+
fi
39+
echo "Successfully checked out ${VLLM_BRANCH}."
40+
git pull || echo "Warning: Failed to pull updates (may be on a tag)."
41+
42+
# --- Step 2: Update tpu-inference version in requirements ---
43+
REQUIRED_LINE="tpu-inference==${TPU_INFERENCE_VERSION}"
44+
REQUIREMENTS_FILE="requirements/tpu.txt"
45+
BACKUP_FILE="${REQUIREMENTS_FILE}.bak"
46+
47+
echo "Updating tpu-inference version in $REQUIREMENTS_FILE..."
48+
49+
if [ -f "$REQUIREMENTS_FILE" ]; then
50+
# Check if the last character is NOT a newline. If not, append one.
51+
if [ "$(tail -c 1 "$REQUIREMENTS_FILE")" != "" ]; then
52+
echo "" >> "$REQUIREMENTS_FILE"
53+
echo "(Action: Added missing newline to the end of $REQUIREMENTS_FILE for safety.)"
54+
fi
55+
fi
56+
57+
if grep -q "^tpu-inference==" "$REQUIREMENTS_FILE"; then
58+
# Replace the existing version using sed, which creates the .bak file
59+
echo "(Action: Existing version found. Replacing.)"
60+
sed -i.bak "s/^tpu-inference==.*/$REQUIRED_LINE/" "$REQUIREMENTS_FILE"
61+
62+
else
63+
# Line not found -> Append the new line to the file end, and manually create .bak
64+
echo "(Action: Line not found. Appending new dependency.)"
65+
echo "$REQUIRED_LINE" >> "$REQUIREMENTS_FILE"
66+
67+
# Create an empty .bak file for consistency, so cleanup works later.
68+
touch "$BACKUP_FILE"
69+
fi
70+
71+
# --- Step 3: Execute the vLLM TPU build script ---
72+
echo "Ensuring 'build' package is installed..."
73+
pip install build
74+
echo "Executing the vLLM TPU build script..."
75+
bash tools/vllm-tpu/build.sh "${VLLM_TPU_VERSION}"
76+
77+
echo "--- Build complete! ---"
78+
echo "The wheel file can be found in the 'vllm/dist' directory."
79+
80+
# --- Step 4: Cleanup and Revert Requirements File ---
81+
echo "--- Cleaning up local changes ---"
82+
83+
if [ -f "$BACKUP_FILE" ]; then
84+
echo "Reverting $REQUIREMENTS_FILE from backup."
85+
# Remove the modified file
86+
rm -f "$REQUIREMENTS_FILE"
87+
# Rename the backup file back to the original name
88+
mv "$BACKUP_FILE" "$REQUIREMENTS_FILE"
89+
else
90+
echo "Warning: Backup file $BACKUP_FILE not found. Skipping revert."
91+
fi
92+
93+
echo "Cleanup complete. Script finished."

docker/Dockerfile.pypi

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,25 @@ RUN export TPU_INFERENCE_VERSION=$(pip index versions tpu-inference --pre 2>/dev
1818
echo -n "${TPU_INFERENCE_VERSION}" > /tmp/tpu_inference_version
1919

2020
# Clone vLLM
21+
WORKDIR /workspace
22+
RUN export VLLM_TPU_VERSION=$(cat /tmp/tpu_inference_version) && \
23+
bash tpu_inference/.buildkite/scripts/build_vllm_tpu.sh ${VLLM_TPU_VERSION} ${VLLM_TPU_VERSION}
24+
25+
# Install vllm-tpu from whl
2126
WORKDIR /workspace/vllm
22-
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
23-
RUN git clone $VLLM_REPO /workspace/vllm
24-
RUN pip install build
25-
RUN export TPU_INFERENCE_VERSION=$(cat /tmp/tpu_inference_version) && \
26-
sed -i "s/^tpu-inference==.*/tpu-inference==${TPU_INFERENCE_VERSION}/" requirements/tpu.txt && \
27-
bash tools/vllm-tpu/build.sh ${TPU_INFERENCE_VERSION}
28-
29-
# Install vllm-tpu from wheel
3027
RUN pip install --no-cache-dir dist/*.whl
3128

3229
# Install test dependencies
33-
RUN python3 -m pip install -e tests/vllm_test_utils
30+
RUN python3 -m pip install tests/vllm_test_utils
3431
RUN python3 -m pip install --no-cache-dir \
3532
git+https://github.com/thuml/depyf.git \
3633
pytest-asyncio \
3734
git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d#egg=lm-eval[api] \
3835
pytest-cov \
3936
tblib
4037

41-
# Set environment variable to use site-packages
42-
ENV FORCE_USE_SITE_PACKAGES=1
38+
# Remove repository
39+
WORKDIR /workspace
40+
RUN rm -rf /workspace/vllm /workspace/tpu_inference
4341

4442
CMD ["/bin/bash"]

0 commit comments

Comments
 (0)