From e9ec9c15edc3656b5473a916f417f49187be323a Mon Sep 17 00:00:00 2001 From: cl Date: Wed, 3 Jun 2026 01:50:01 +0800 Subject: [PATCH] Add coalesce inline-view filter benchmarks --- arrow/benches/coalesce_kernels.rs | 98 +++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/arrow/benches/coalesce_kernels.rs b/arrow/benches/coalesce_kernels.rs index 0816d1a2e8bb..4affcc346e89 100644 --- a/arrow/benches/coalesce_kernels.rs +++ b/arrow/benches/coalesce_kernels.rs @@ -51,6 +51,13 @@ fn add_all_filter_benchmarks(c: &mut Criterion) { true, )])); + // Single BinaryViewArray + let single_binaryview_schema = SchemaRef::new(Schema::new(vec![Field::new( + "value", + DataType::BinaryView, + true, + )])); + // Mixed primitive, StringViewArray let mixed_utf8view_schema = SchemaRef::new(Schema::new(vec![ Field::new("int32_val", DataType::Int32, true), @@ -58,6 +65,13 @@ fn add_all_filter_benchmarks(c: &mut Criterion) { Field::new("utf8view_val", DataType::Utf8View, true), ])); + // Mixed primitive, BinaryViewArray + let mixed_binaryview_schema = SchemaRef::new(Schema::new(vec![ + Field::new("int32_val", DataType::Int32, true), + Field::new("float_val", DataType::Float64, true), + Field::new("binaryview_val", DataType::BinaryView, true), + ])); + // Mixed primitive, StringArray let mixed_utf8_schema = SchemaRef::new(Schema::new(vec![ Field::new("int32_val", DataType::Int32, true), @@ -106,6 +120,42 @@ fn add_all_filter_benchmarks(c: &mut Criterion) { } .build(); + FilterBenchmarkBuilder { + c, + name: "single_utf8view (max_string_len=8)", + batch_size, + num_output_batches: 50, + null_density, + selectivity, + max_string_len: 8, + schema: &single_schema, + } + .build(); + + FilterBenchmarkBuilder { + c, + name: "single_binaryview", + batch_size, + num_output_batches: 50, + null_density, + selectivity, + max_string_len: 30, + schema: &single_binaryview_schema, + } + .build(); + + FilterBenchmarkBuilder { + c, + name: "single_binaryview (max_string_len=8)", + batch_size, + num_output_batches: 50, + null_density, + selectivity, + max_string_len: 8, + schema: &single_binaryview_schema, + } + .build(); + // Model mostly short strings, but some longer ones FilterBenchmarkBuilder { c, @@ -119,6 +169,18 @@ fn add_all_filter_benchmarks(c: &mut Criterion) { } .build(); + FilterBenchmarkBuilder { + c, + name: "mixed_utf8view (max_string_len=8)", + batch_size, + num_output_batches: 20, + null_density, + selectivity, + max_string_len: 8, + schema: &mixed_utf8view_schema, + } + .build(); + // Model mostly longer strings FilterBenchmarkBuilder { c, @@ -132,6 +194,42 @@ fn add_all_filter_benchmarks(c: &mut Criterion) { } .build(); + FilterBenchmarkBuilder { + c, + name: "mixed_binaryview (max_string_len=20)", + batch_size, + num_output_batches: 20, + null_density, + selectivity, + max_string_len: 20, + schema: &mixed_binaryview_schema, + } + .build(); + + FilterBenchmarkBuilder { + c, + name: "mixed_binaryview (max_string_len=8)", + batch_size, + num_output_batches: 20, + null_density, + selectivity, + max_string_len: 8, + schema: &mixed_binaryview_schema, + } + .build(); + + FilterBenchmarkBuilder { + c, + name: "mixed_binaryview (max_string_len=128)", + batch_size, + num_output_batches: 20, + null_density, + selectivity, + max_string_len: 128, + schema: &mixed_binaryview_schema, + } + .build(); + FilterBenchmarkBuilder { c, name: "mixed_utf8",