First check-in to add ci/cd test on tpuv7x (#1270)

QiliangCui · web-flow · commit 5644ffbdd213 · 2025-12-09T19:04:32.000-08:00
Signed-off-by: Qiliang Cui &lt;derrhein@gmail.com&gt;
diff --git a/.buildkite/pipeline_jax.yml b/.buildkite/pipeline_jax.yml
@@ -124,6 +124,22 @@ steps:
            --ignore=/workspace/tpu_inference/tests/layers/vllm/test_compressed_tensors_moe.py \
            --cov-config=/workspace/tpu_inference/.coveragerc --cov tpu_inference --cov-report term-missing --cov-fail-under=69
 
+   - label: "JAX unit tests - tpuv7x"
+     key: test_7_tpu7x
+     soft_fail: true
+     agents:
+       queue: tpu_v7x_2_queue
+     commands:
+       - |
+         IS_FOR_V7X=true .buildkite/scripts/run_in_docker.sh \
+           python3 -m pytest -s -v -x /workspace/tpu_inference/tests/ \
+           --ignore=/workspace/tpu_inference/tests/kernels \
+           --ignore=/workspace/tpu_inference/tests/lora \
+           --ignore=/workspace/tpu_inference/tests/e2e \
+           --ignore=/workspace/tpu_inference/tpu_inference/mock \
+           --ignore=/workspace/tpu_inference/tests/layers/vllm/test_compressed_tensors_moe.py \
+           --cov-config=/workspace/tpu_inference/.coveragerc --cov tpu_inference --cov-report term-missing --cov-fail-under=67
+
    - label: "JAX unit tests - kernels"
      key: test_8
      soft_fail: true
@@ -269,6 +285,7 @@ steps:
        - test_5
        - test_6
        - test_7
+       - test_7_tpu7x
        - test_8
        - test_9
        - test_10
@@ -282,4 +299,4 @@ steps:
      commands:
        - |
          .buildkite/scripts/check_results.sh \
-           "TPU JAX Tests Failed" test_0 test_1 test_2 test_3 test_4 test_5 test_6 test_7 test_8 test_9 test_10 test_11 test_12 test_13 test_15 test_16
+           "TPU JAX Tests Failed" test_0 test_1 test_2 test_3 test_4 test_5 test_6 test_7 test_7_tpu7x test_8 test_9 test_10 test_11 test_12 test_13 test_15 test_16
diff --git a/.buildkite/scripts/setup_docker_env.sh b/.buildkite/scripts/setup_docker_env.sh
@@ -55,13 +55,10 @@ setup_environment() {
 
   echo "Cleanup complete."
 
-  echo "Installing Python dependencies"
-  python3 -m pip install --progress-bar off buildkite-test-collector==0.1.9
-  echo "Python dependencies installed"
-
   VLLM_COMMIT_HASH=$(buildkite-agent meta-data get "VLLM_COMMIT_HASH" --default "")
 
   docker build \
       --build-arg VLLM_COMMIT_HASH="${VLLM_COMMIT_HASH}" \
+      --build-arg IS_FOR_V7X="${IS_FOR_V7X:-false}" \
       --no-cache -f docker/Dockerfile -t "${IMAGE_NAME}:${BUILDKITE_COMMIT}" .
 }
diff --git a/tests/layers/jax/sample/test_rejection_sampler.py b/tests/layers/jax/sample/test_rejection_sampler.py
@@ -1181,6 +1181,9 @@ def test_rejection_sampling_approximates_target_distribution(self):
         We expect that as sample size increases, the distance to the target
         distribution decreases much more than the distance to random distributions.
         """
+        if 'TPU7x' in jax.devices()[0].device_kind:
+            pytest.skip("Skipping test on TPU TPU7x.")
+
         vocab_size = 10
         k = 2
         num_reference_probs = 100
diff --git a/tests/layers/vllm/test_compressed_tensors_w8a8_int8.py b/tests/layers/vllm/test_compressed_tensors_w8a8_int8.py
@@ -129,6 +129,9 @@ def test_loading_model(model, mesh):
 ])
 @pytest.mark.parametrize("enable_sp", [False, True])
 def test_row_parallel_linear(model, bias, mesh, enable_sp):
+    if 'TPU7x' in jax.devices()[0].device_kind:
+        pytest.skip("Skipping test on TPU TPU7x.")
+
     dtype = torch.bfloat16
 
     engine_args = EngineArgs(
diff --git a/tests/layers/vllm/test_mxfp4.py b/tests/layers/vllm/test_mxfp4.py
@@ -116,6 +116,9 @@ def test_quant_override(model, mesh):
 @pytest.mark.parametrize("topk", [2])
 def test_mxfp4_fused_moe(mesh, num_tokens, intermediate_size, hidden_size,
                          num_experts, topk):
+    if 'TPU7x' in jax.devices()[0].device_kind:
+        pytest.skip("Skipping test on TPU TPU7x.")
+
     torch.manual_seed(42)
     dtype = torch.bfloat16
 
@@ -205,6 +208,10 @@ def test_mxfp4_fused_moe(mesh, num_tokens, intermediate_size, hidden_size,
 @pytest.mark.parametrize("topk", [2])
 def test_mxfp4_fused_moe_use_kernel(mesh, num_tokens, intermediate_size,
                                     hidden_size, num_experts, topk):
+
+    if 'TPU7x' in jax.devices()[0].device_kind:
+        pytest.skip("Skipping test on TPU TPU7x.")
+
     torch.manual_seed(42)
     dtype = torch.bfloat16
 
diff --git a/tests/layers/vllm/test_unquantized.py b/tests/layers/vllm/test_unquantized.py
@@ -415,6 +415,9 @@ def test_merged_column_parallel_linear(model, bias, mesh, fuse_matmuls,
 @pytest.mark.parametrize("topk", [2])
 def test_fused_moe(use_ep, mesh, num_tokens, intermediate_size, hidden_size,
                    num_experts, topk):
+    if 'TPU7x' in jax.devices()[0].device_kind:
+        pytest.skip("Skipping test on TPU TPU7x.")
+
     torch.manual_seed(42)
     dtype = torch.bfloat16
 
@@ -494,6 +497,9 @@ def test_fused_moe(use_ep, mesh, num_tokens, intermediate_size, hidden_size,
 @pytest.mark.parametrize("topk", [2])
 def test_fused_moe_bias(mesh, num_tokens, intermediate_size, hidden_size,
                         num_experts, topk):
+    if 'TPU7x' in jax.devices()[0].device_kind:
+        pytest.skip("Skipping test on TPU TPU7x.")
+
     torch.manual_seed(42)
     dtype = torch.bfloat16
 
@@ -560,6 +566,9 @@ def test_fused_moe_bias(mesh, num_tokens, intermediate_size, hidden_size,
 @pytest.mark.parametrize("activation", ["silu", "swigluoai"])
 def test_fused_moe_activation(mesh, num_tokens, intermediate_size, hidden_size,
                               num_experts, topk, activation):
+    if 'TPU7x' in jax.devices()[0].device_kind:
+        pytest.skip("Skipping test on TPU TPU7x.")
+
     torch.manual_seed(42)
     dtype = torch.bfloat16
 
@@ -619,6 +628,8 @@ def test_fused_moe_activation(mesh, num_tokens, intermediate_size, hidden_size,
 @pytest.mark.parametrize("has_bias", [False, True])
 def test_fused_moe_use_kernel(mesh, num_tokens, intermediate_size, hidden_size,
                               num_experts, topk, has_bias):
+    if 'TPU7x' in jax.devices()[0].device_kind:
+        pytest.skip("Skipping test on TPU TPU7x.")
 
     if jax.local_device_count() < 8:
         pytest.skip("Test requires at least 8 devices")
diff --git a/tests/models/jax/test_llama_eagle3.py b/tests/models/jax/test_llama_eagle3.py
@@ -126,6 +126,10 @@ def test_eagle3_decoder_layer_init(self, mock_vllm_config: MockVllmConfig,
     def test_forward_pass(self, mock_vllm_config: MockVllmConfig, rng: PRNGKey,
                           mesh: Mesh, mock_model_inputs):
         """Tests the forward pass of the EagleLlama3ForCausalLM model."""
+
+        if 'TPU7x' in jax.devices()[0].device_kind:
+            pytest.skip("Skipping test on TPU TPU7x.")
+
         draft_model_config = mock_vllm_config.speculative_config.draft_model_config
         hf_config = draft_model_config.hf_config
         model = EagleLlama3ForCausalLM(mock_vllm_config, rng, mesh)