Skip to content

Commit fc5c69f

Browse files
authored
Remove all perf checks in preparation for beta-2 update (#114)
1 parent 0898a77 commit fc5c69f

9 files changed

Lines changed: 0 additions & 12 deletions

test/unit/test_SD_attention_small_head.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ def test_attention_for_SD_perf(self, bs, seqlen, d, dtype, latency):
4242
bench_func_(q_dev, k_dev, v_dev)
4343
latency_res = bench_func_.benchmark_result.nc_latency
4444
p1 = latency_res.get_latency_percentile(1)
45-
assert p1 <= latency*1.05 # short running kernels are subjected to hardware fluctuation
4645
assert os.path.getsize(test_trace_file_path) > 0
4746

4847
@pytest.mark.simulation

test/unit/test_adaptive_avg_pool2d.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,6 @@ def test_adaptive_avg_pool2d_perf(self, N, C, H, W, output_size, dtype, latency)
6969
bench_func(input_dev, output_size)
7070
latency_res = bench_func.benchmark_result.nc_latency
7171
p99 = latency_res.get_latency_percentile(99)
72-
73-
# Check latency requirement
74-
assert p99 <= latency, f"P99 latency {p99} exceeds threshold {latency}"
7572

7673
@pytest.mark.simulation
7774
@pytest.mark.parametrize("N, C, H, W, output_size, dtype", [

test/unit/test_allocated_SD_attention_small_head.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ def test_allocated_attention_for_SD_perf(self, bs, seqlen, d, dtype, latency):
4444
bench_func_(q_dev, k_dev, v_dev)
4545
latency_res = bench_func_.benchmark_result.nc_latency
4646
p50 = latency_res.get_latency_percentile(50)
47-
assert p50 <= latency * 1.05 # short running kernels are subjected to hardware fluctuation
4847
assert os.path.getsize(test_trace_file_path) > 0
4948

5049
@pytest.mark.simulation

test/unit/test_double_row_matmul.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,6 @@ def test_double_row_matmul_perf(self, M, K, N, dtype, TILES_IN_BLOCK_M, TILES_IN
102102
bench_func(lhs, rhs_quantized_reshaped, rhs_scale, TILES_IN_BLOCK_M, TILES_IN_BLOCK_N, TILES_IN_BLOCK_K)
103103
latency_res = bench_func.benchmark_result.nc_latency
104104
p99_latency = latency_res.get_latency_percentile(99)
105-
106-
assert p99_latency <= max_p99_latency
107105

108106
@pytest.mark.simulation
109107
@pytest.mark.parametrize("M, K, N, dtype, TILES_IN_BLOCK_M, TILES_IN_BLOCK_N, TILES_IN_BLOCK_K", [

test/unit/test_flash_attn_bwd.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,6 @@ def test_flash_attn_bwd_perf(self, bs, nheads, seqlen, d, dtype, latency, sinks)
151151
use_causal_mask=True, mixed_precision=True, sinks=sinks_tensor)
152152
latency_res = bench_func_.benchmark_result.nc_latency
153153
p99 = latency_res.get_latency_percentile(50)
154-
assert p99 <= latency
155154

156155
@pytest.mark.simulation
157156
@pytest.mark.parametrize("bs, nheads, nheads_kv, seqlen, d, dtype", [

test/unit/test_flash_attn_fwd.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@ def test_flash_attn_fwd_perf(self, bs, nheads, seqlen_q, seqlen_k, d, dtype, use
105105
mixed_precision=mixed_precision, config=config)
106106
latency_res = bench_func_.benchmark_result.nc_latency
107107
p50 = latency_res.get_latency_percentile(50)
108-
assert p50 <= latency
109108

110109
@pytest.mark.simulation
111110
@pytest.mark.parametrize("bs, nheads, seqlen_q, seqlen_k, d, dtype, use_causal_mask, \

test/unit/test_resize_nearest.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ def test_resize_nearest_for_perf(self, in_b, in_h, in_w, in_c, out_b, out_h, out
4949
bench_func_(input_dev, (out_b, out_h, out_w, out_c))
5050
latency_res = bench_func_.benchmark_result.nc_latency
5151
p99 = latency_res.get_latency_percentile(50)
52-
assert p99 <= latency
5352

5453
@pytest.mark.simulation
5554
@pytest.mark.parametrize("in_b, in_h, in_w, in_c, out_b, out_h, out_w, out_c, dtype", [

test/unit/test_rmsnorm_qkv.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ def test_allocated_rmsnorm_qkv_perf(self, batch, seqlen, dim, d_head, dtype, lat
4242
bench_func(hidden, weights)
4343
latency_res = bench_func.benchmark_result.nc_latency
4444
p99 = latency_res.get_latency_percentile(50)
45-
assert p99 <= latency
4645

4746
@pytest.mark.simulation
4847
@pytest.mark.parametrize("batch, seqlen, dim, d_head, dtype", [

test/unit/test_select_and_scatter.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ def test_select_and_scatter_for_perf(self, n, c, operand_h, operand_w, source_h,
5151
bench_func(operand_dev, source_dev)
5252
latency_res = bench_func.benchmark_result.nc_latency
5353
p99 = latency_res.get_latency_percentile(50)
54-
assert p99 <= latency
5554

5655
@pytest.mark.simulation
5756
@pytest.mark.parametrize("n, c, operand_h, operand_w, source_h, source_w, dtype", [

0 commit comments

Comments
 (0)