Remove all perf checks in preparation for beta-2 update (#114)

hongsuh-aws · web-flow · commit fc5c69f79c89 · 2025-12-05T14:16:38.000-08:00
diff --git a/test/unit/test_SD_attention_small_head.py b/test/unit/test_SD_attention_small_head.py
@@ -42,7 +42,6 @@ def test_attention_for_SD_perf(self, bs, seqlen, d, dtype, latency):
         bench_func_(q_dev, k_dev, v_dev)
         latency_res = bench_func_.benchmark_result.nc_latency
         p1 = latency_res.get_latency_percentile(1)
-        assert p1 <= latency*1.05 # short running kernels are subjected to hardware fluctuation
         assert os.path.getsize(test_trace_file_path) > 0
 
     @pytest.mark.simulation
diff --git a/test/unit/test_adaptive_avg_pool2d.py b/test/unit/test_adaptive_avg_pool2d.py
@@ -69,9 +69,6 @@ def test_adaptive_avg_pool2d_perf(self, N, C, H, W, output_size, dtype, latency)
         bench_func(input_dev, output_size)
         latency_res = bench_func.benchmark_result.nc_latency
         p99 = latency_res.get_latency_percentile(99)
-        
-        # Check latency requirement
-        assert p99 <= latency, f"P99 latency {p99} exceeds threshold {latency}"
     
     @pytest.mark.simulation
     @pytest.mark.parametrize("N, C, H, W, output_size, dtype", [
diff --git a/test/unit/test_allocated_SD_attention_small_head.py b/test/unit/test_allocated_SD_attention_small_head.py
@@ -44,7 +44,6 @@ def test_allocated_attention_for_SD_perf(self, bs, seqlen, d, dtype, latency):
         bench_func_(q_dev, k_dev, v_dev)
         latency_res = bench_func_.benchmark_result.nc_latency
         p50 = latency_res.get_latency_percentile(50)
-        assert p50 <= latency * 1.05 # short running kernels are subjected to hardware fluctuation
         assert os.path.getsize(test_trace_file_path) > 0
 
     @pytest.mark.simulation
diff --git a/test/unit/test_double_row_matmul.py b/test/unit/test_double_row_matmul.py
@@ -102,8 +102,6 @@ def test_double_row_matmul_perf(self, M, K, N, dtype, TILES_IN_BLOCK_M, TILES_IN
         bench_func(lhs, rhs_quantized_reshaped, rhs_scale, TILES_IN_BLOCK_M, TILES_IN_BLOCK_N, TILES_IN_BLOCK_K)
         latency_res = bench_func.benchmark_result.nc_latency
         p99_latency = latency_res.get_latency_percentile(99)
-        
-        assert p99_latency <= max_p99_latency
 
     @pytest.mark.simulation
     @pytest.mark.parametrize("M, K, N, dtype, TILES_IN_BLOCK_M, TILES_IN_BLOCK_N, TILES_IN_BLOCK_K", [
diff --git a/test/unit/test_flash_attn_bwd.py b/test/unit/test_flash_attn_bwd.py
@@ -151,7 +151,6 @@ def test_flash_attn_bwd_perf(self, bs, nheads, seqlen, d, dtype, latency, sinks)
                     use_causal_mask=True, mixed_precision=True, sinks=sinks_tensor)
         latency_res = bench_func_.benchmark_result.nc_latency
         p99 = latency_res.get_latency_percentile(50)
-        assert p99 <= latency
 
     @pytest.mark.simulation
     @pytest.mark.parametrize("bs, nheads, nheads_kv, seqlen, d, dtype", [
diff --git a/test/unit/test_flash_attn_fwd.py b/test/unit/test_flash_attn_fwd.py
@@ -105,7 +105,6 @@ def test_flash_attn_fwd_perf(self, bs, nheads, seqlen_q, seqlen_k, d, dtype, use
                     mixed_precision=mixed_precision, config=config)
         latency_res = bench_func_.benchmark_result.nc_latency
         p50 = latency_res.get_latency_percentile(50)
-        assert p50 <= latency
     
     @pytest.mark.simulation
     @pytest.mark.parametrize("bs, nheads, seqlen_q, seqlen_k, d, dtype, use_causal_mask, \
diff --git a/test/unit/test_resize_nearest.py b/test/unit/test_resize_nearest.py
@@ -49,7 +49,6 @@ def test_resize_nearest_for_perf(self, in_b, in_h, in_w, in_c, out_b, out_h, out
         bench_func_(input_dev, (out_b, out_h, out_w, out_c))
         latency_res = bench_func_.benchmark_result.nc_latency
         p99 = latency_res.get_latency_percentile(50)
-        assert p99 <= latency
 
     @pytest.mark.simulation
     @pytest.mark.parametrize("in_b, in_h, in_w, in_c, out_b, out_h, out_w, out_c, dtype", [
diff --git a/test/unit/test_rmsnorm_qkv.py b/test/unit/test_rmsnorm_qkv.py
@@ -42,7 +42,6 @@ def test_allocated_rmsnorm_qkv_perf(self, batch, seqlen, dim, d_head, dtype, lat
     bench_func(hidden, weights)
     latency_res = bench_func.benchmark_result.nc_latency
     p99 = latency_res.get_latency_percentile(50)
-    assert p99 <= latency
 
   @pytest.mark.simulation
   @pytest.mark.parametrize("batch, seqlen, dim, d_head, dtype", [
diff --git a/test/unit/test_select_and_scatter.py b/test/unit/test_select_and_scatter.py
@@ -51,7 +51,6 @@ def test_select_and_scatter_for_perf(self, n, c, operand_h, operand_w, source_h,
         bench_func(operand_dev, source_dev)
         latency_res = bench_func.benchmark_result.nc_latency
         p99 = latency_res.get_latency_percentile(50)
-        assert p99 <= latency
 
     @pytest.mark.simulation
     @pytest.mark.parametrize("n, c, operand_h, operand_w, source_h, source_w, dtype", [