@@ -2,230 +2,39 @@ steps:
22 # -----------------------------------------------------------------
33 # TEST STEPS - Calling wrapper
44 # -----------------------------------------------------------------
5- - label : " wheel E2E MLPerf tests for JAX models "
6- key : test_0
5+ - label : " Performance benchmarks for meta-llama/Llama-3.1-8B-Instruct "
6+ key : " meta-llama_Llama-3_1-8B-Instruct_Benchmark "
77 soft_fail : true
88 agents :
9- queue : tpu_v6e_queue
10- commands :
11- - .buildkite/scripts/run_with_pypi.sh bash /workspace/tpu_inference/tests/e2e/benchmarking/mlperf.sh
12-
13- - label : " wheel E2E multi modality test"
14- key : test_5
15- soft_fail : true
16- agents :
17- queue : tpu_v6e_queue
18- commands :
19- - |
20- .buildkite/scripts/run_with_pypi.sh \
21- bash -c 'python3 -m pytest -s -v -x /workspace/tpu_inference/tests/e2e/test_multi_modal_inference.py && \
22- bash /workspace/tpu_inference/tests/e2e/benchmarking/mm_bench.sh'
23-
24- - label : " wheel E2E speculative decoding test"
25- key : test_6
26- soft_fail : true
27- agents :
28- queue : tpu_v6e_queue
29- commands :
30- - |
31- .buildkite/scripts/run_with_pypi.sh \
32- bash -c 'python3 -m pytest -s -v -x /workspace/tpu_inference/tests/e2e/test_speculative_decoding.py'
33-
34- - label : " wheel JAX unit tests"
35- key : test_7
36- soft_fail : true
37- agents :
38- queue : tpu_v6e_queue
39- commands :
40- - |
41- .buildkite/scripts/run_with_pypi.sh \
42- python3 -m pytest -s -v -x /workspace/tpu_inference/tests/ \
43- --ignore=/workspace/tpu_inference/tests/kernels \
44- --ignore=/workspace/tpu_inference/tests/lora \
45- --ignore=/workspace/tpu_inference/tests/e2e \
46- --ignore=/workspace/tpu_inference/tpu_inference/mock \
47- --cov-config=/workspace/tpu_inference/.coveragerc --cov tpu_inference --cov-report term-missing --cov-fail-under=69
48-
49- - label : " wheel JAX unit tests - kernels"
50- key : test_8
51- soft_fail : true
52- agents :
53- queue : tpu_v6e_queue
54- commands :
55- - |
56- .buildkite/scripts/run_with_pypi.sh \
57- python3 -m pytest -s -v -x /workspace/tpu_inference/tests/kernels \
58- --ignore=/workspace/tpu_inference/tests/kernels/ragged_paged_attention_kernel_v2_test.py \
59- --ignore=/workspace/tpu_inference/tests/kernels/ragged_kv_cache_update_v2_test.py \
60- --ignore=/workspace/tpu_inference/tests/kernels/collectives
61-
62- - label : " wheel JAX unit tests - collective kernels"
63- key : test_9
64- soft_fail : true
65- agents :
66- queue : tpu_v6e_8_queue
67- commands :
68- - |
69- .buildkite/scripts/run_with_pypi.sh \
70- python3 -m pytest -s -v -x /workspace/tpu_inference/tests/kernels/collectives
71-
72- - label : " wheel lora tests for JAX + vLLM models"
73- key : test_10
74- soft_fail : true
75- agents :
76- queue : tpu_v6e_queue
77- commands :
78- - |
79- .buildkite/scripts/run_with_pypi.sh \
80- bash -c 'MODEL_IMPL_TYPE=vllm TPU_BACKEND_TYPE=jax python3 -m pytest -s -v -x /workspace/tpu_inference/tests/lora/test_lora.py'
81-
82- - label : " wheel E2E MLperf tests for DeepSeek-R1 (no accuracy, 12-decoder layers only)"
83- key : test_12
84- soft_fail : true
9+ queue : tpu_v6e_queue
8510 env :
86- NEW_MODEL_DESIGN : " True"
87- USE_V6E8_QUEUE : " True"
88- SKIP_ACCURACY_TESTS : " True"
89- VLLM_MLA_DISABLE : " 1"
90- JAX_RANDOM_WEIGHTS : " True"
91- agents :
92- queue : tpu_v6e_8_queue
93- commands :
94- - .buildkite/scripts/run_with_pypi.sh bash /workspace/tpu_inference/tests/e2e/benchmarking/mlperf.sh -m deepseek-ai/DeepSeek-R1-0528
95-
96-
97-
98- - label : " wheel TPU Test 0: test_perf.py"
99- key : tpu_test_0
100- soft_fail : true
101- agents :
102- queue : tpu_v6e_queue
103- commands :
104- - .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_perf.py
105-
106- - label : " wheel TPU Test 1: test_compilation.py"
107- key : tpu_test_1
108- soft_fail : true
109- agents :
110- queue : tpu_v6e_queue
111- commands :
112- - .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_compilation.py
113-
114- - label : " wheel TPU Test 2: test_basic.py"
115- key : tpu_test_2
116- soft_fail : true
117- agents :
118- queue : tpu_v6e_queue
119- commands :
120- - .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_basic.py
121-
122- - label : " wheel TPU Test 3: test_accuracy.py (v1)"
123- key : tpu_test_3
124- soft_fail : true
125- agents :
126- queue : tpu_v6e_queue
127- commands :
128- - .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine
129-
130- - label : " wheel TPU Test 4: test_quantization_accuracy.py"
131- key : tpu_test_4
132- soft_fail : true
133- agents :
134- queue : tpu_v6e_queue
135- commands :
136- - .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py
137-
138- - label : " wheel TPU Test 5: examples/offline_inference/tpu.py"
139- key : tpu_test_5
140- soft_fail : true
141- agents :
142- queue : tpu_v6e_queue
143- commands :
144- - .buildkite/scripts/run_with_pypi.sh python3 /workspace/vllm/examples/offline_inference/tpu.py
145-
146- - label : " wheel TPU Test 6: test_tpu_model_runner.py"
147- key : tpu_test_6
148- soft_fail : true
149- agents :
150- queue : tpu_v6e_queue
151- commands :
152- - .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/worker/test_tpu_model_runner.py
153-
154- - label : " wheel TPU Test 7: test_sampler.py"
155- key : tpu_test_7
156- soft_fail : true
157- agents :
158- queue : tpu_v6e_queue
159- commands :
160- - .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py
161-
162- - label : " wheel TPU Test 8: test_topk_topp_sampler.py"
163- key : tpu_test_8
164- soft_fail : true
165- agents :
166- queue : tpu_v6e_queue
167- commands :
168- - .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py
169-
170- - label : " wheel TPU Test 9: test_multimodal.py"
171- key : tpu_test_9
172- soft_fail : true
173- agents :
174- queue : tpu_v6e_queue
175- commands :
176- - .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_multimodal.py
177-
178- - label : " wheel TPU Test 10: test_pallas.py"
179- key : tpu_test_10
180- soft_fail : true
181- agents :
182- queue : tpu_v6e_queue
183- commands :
184- - .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py
185-
186- - label : " wheel TPU Test 11: test_struct_output_generate.py"
187- key : tpu_test_11
188- soft_fail : true
189- agents :
190- queue : tpu_v6e_queue
191- commands :
192- - .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py
193-
194- - label : " wheel TPU Test 12: test_moe_pallas.py"
195- key : tpu_test_12
196- soft_fail : true
197- agents :
198- queue : tpu_v6e_queue
199- commands :
200- - .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py
201-
202- - label : " wheel TPU Test 13: ragged_paged_attention_test.py"
203- key : tpu_test_13
204- soft_fail : true
205- agents :
206- queue : tpu_v6e_queue
207- commands :
208- - .buildkite/scripts/run_with_pypi.sh python3 -m pytest -s -v /workspace/tpu_inference/tests/ragged_paged_attention_test.py
209-
210- # -----------------------------------------------------------------
211- # NOTIFICATION STEP
212- # -----------------------------------------------------------------
213- - label : " wheel TPU V1 Test Notification"
214- depends_on :
215- - tpu_test_0
216- - tpu_test_1
217- - tpu_test_2
218- - tpu_test_3
219- - tpu_test_4
220- - tpu_test_5
221- - tpu_test_6
222- - tpu_test_7
223- - tpu_test_8
224- - tpu_test_9
225- - tpu_test_10
226- - tpu_test_11
227- - tpu_test_12
228- - tpu_test_13
229- agents :
230- queue : tpu_v6e_queue
231- commands : " bash .buildkite/scripts/check_results.sh 'TPU V1 Tests Failed' tpu_test_0 tpu_test_1 tpu_test_2 tpu_test_3 tpu_test_4 tpu_test_5 tpu_test_6 tpu_test_7 tpu_test_8 tpu_test_9 tpu_test_10 tpu_test_11 tpu_test_12 tpu_test_13"
11+ TEST_MODEL : meta-llama/Llama-3.1-8B-Instruct
12+ TENSOR_PARALLEL_SIZE : 1
13+ MINIMUM_THROUGHPUT_THRESHOLD : 10.77
14+ INPUT_LEN : 1800
15+ OUTPUT_LEN : 128
16+ PREFIX_LEN : 0
17+ MAX_MODEL_LEN : 2048
18+ MAX_NUM_SEQS : 256
19+ MAX_NUM_BATCHED_TOKENS : 1024
20+ commands :
21+ - |
22+ /usr/local/lib/python3.12/site-packages/.buildkite/scripts/run_with_pypi.sh bash /usr/local/lib/python3.12/site-packages/tpu_inference/tests/e2e/benchmarking/benchmark.sh
23+
24+ - label : " Performance benchmarks for Qwen/Qwen3-4B"
25+ key : " Qwen_Qwen3-4B_Benchmark"
26+ agents :
27+ queue : tpu_v6e_queue
28+ env :
29+ TEST_MODEL : Qwen/Qwen3-4B
30+ TENSOR_PARALLEL_SIZE : 1
31+ MINIMUM_THROUGHPUT_THRESHOLD : 11.00
32+ INPUT_LEN : 1800
33+ OUTPUT_LEN : 128
34+ PREFIX_LEN : 0
35+ MAX_MODEL_LEN : 2048
36+ MAX_NUM_SEQS : 94
37+ MAX_NUM_BATCHED_TOKENS : 4096
38+ commands :
39+ - |
40+ /usr/local/lib/python3.12/site-packages/.buildkite/scripts/run_with_pypi.sh bash /usr/local/lib/python3.12/site-packages/tpu_inference/tests/e2e/benchmarking/benchmark.sh
0 commit comments