diff --git a/datafusion/functions/benches/split_part.rs b/datafusion/functions/benches/split_part.rs index 7ef84a058920e..72ca6f66a00d4 100644 --- a/datafusion/functions/benches/split_part.rs +++ b/datafusion/functions/benches/split_part.rs @@ -19,7 +19,7 @@ use arrow::array::{ArrayRef, Int64Array, StringArray, StringViewArray}; use arrow::datatypes::{DataType, Field}; use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; use datafusion_common::config::ConfigOptions; -use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDF}; use datafusion_functions::string::split_part; use rand::distr::Alphanumeric; use rand::prelude::StdRng; @@ -29,14 +29,14 @@ use std::sync::Arc; const N_ROWS: usize = 8192; -/// Generate test data for split_part benchmarks -/// Creates strings with multiple parts separated by the delimiter +/// Creates strings with `num_parts` random alphanumeric segments of `part_len` +/// bytes each, joined by `delimiter`. fn gen_split_part_data( n_rows: usize, - num_parts: usize, // number of parts in each string (separated by delimiter) - part_len: usize, // length of each part - delimiter: &str, // the delimiter to use - use_string_view: bool, // false -> StringArray, true -> StringViewArray + num_parts: usize, + part_len: usize, + delimiter: &str, + use_string_view: bool, ) -> (ColumnarValue, ColumnarValue) { let mut rng = StdRng::seed_from_u64(42); @@ -73,303 +73,154 @@ fn gen_split_part_data( } } -fn gen_positions(n_rows: usize, position: i64) -> ColumnarValue { - let positions: Vec = vec![position; n_rows]; - ColumnarValue::Array(Arc::new(Int64Array::from(positions)) as ArrayRef) +#[expect(clippy::too_many_arguments)] +fn bench_split_part( + group: &mut criterion::BenchmarkGroup<'_, criterion::measurement::WallTime>, + func: &ScalarUDF, + config_options: &Arc, + name: &str, + tag: &str, + strings: ColumnarValue, + delimiters: ColumnarValue, + position: i64, +) { + let positions: ColumnarValue = + ColumnarValue::Array(Arc::new(Int64Array::from(vec![position; N_ROWS]))); + let args = vec![strings, delimiters, positions]; + let arg_fields: Vec<_> = args + .iter() + .enumerate() + .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .collect(); + let return_type = match args[0].data_type() { + DataType::Utf8View => DataType::Utf8View, + _ => DataType::Utf8, + }; + let return_field = Field::new("f", return_type, true).into(); + + group.bench_function(BenchmarkId::new(name, tag), |b| { + b.iter(|| { + black_box( + func.invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: N_ROWS, + return_field: Arc::clone(&return_field), + config_options: Arc::clone(config_options), + }) + .expect("split_part should work"), + ) + }) + }); } fn criterion_benchmark(c: &mut Criterion) { let split_part_func = split_part(); let config_options = Arc::new(ConfigOptions::default()); - let mut group = c.benchmark_group("split_part"); - // Test different scenarios - // Scenario 1: Single-char delimiter, first position (should be fastest with optimization) - { - let (strings, delimiters) = gen_split_part_data(N_ROWS, 10, 8, ".", false); - let positions = gen_positions(N_ROWS, 1); - let args = vec![strings, delimiters, positions]; - let arg_fields: Vec<_> = args - .iter() - .enumerate() - .map(|(idx, arg)| { - Field::new(format!("arg_{idx}"), arg.data_type(), true).into() - }) - .collect(); - let return_field = Field::new("f", DataType::Utf8, true).into(); - - group.bench_function(BenchmarkId::new("single_char_delim", "pos_first"), |b| { - b.iter(|| { - black_box( - split_part_func - .invoke_with_args(ScalarFunctionArgs { - args: args.clone(), - arg_fields: arg_fields.clone(), - number_rows: N_ROWS, - return_field: Arc::clone(&return_field), - config_options: Arc::clone(&config_options), - }) - .expect("split_part should work"), - ) - }) - }); - } - - // Scenario 2: Single-char delimiter, middle position + // Utf8, single-char delimiter, first position { let (strings, delimiters) = gen_split_part_data(N_ROWS, 10, 8, ".", false); - let positions = gen_positions(N_ROWS, 5); - let args = vec![strings, delimiters, positions]; - let arg_fields: Vec<_> = args - .iter() - .enumerate() - .map(|(idx, arg)| { - Field::new(format!("arg_{idx}"), arg.data_type(), true).into() - }) - .collect(); - let return_field = Field::new("f", DataType::Utf8, true).into(); - - group.bench_function(BenchmarkId::new("single_char_delim", "pos_middle"), |b| { - b.iter(|| { - black_box( - split_part_func - .invoke_with_args(ScalarFunctionArgs { - args: args.clone(), - arg_fields: arg_fields.clone(), - number_rows: N_ROWS, - return_field: Arc::clone(&return_field), - config_options: Arc::clone(&config_options), - }) - .expect("split_part should work"), - ) - }) - }); + bench_split_part( + &mut group, + &split_part_func, + &config_options, + "utf8_single_char", + "pos_first", + strings, + delimiters, + 1, + ); } - // Scenario 3: Single-char delimiter, last position + // Utf8, single-char delimiter, middle position { let (strings, delimiters) = gen_split_part_data(N_ROWS, 10, 8, ".", false); - let positions = gen_positions(N_ROWS, 10); - let args = vec![strings, delimiters, positions]; - let arg_fields: Vec<_> = args - .iter() - .enumerate() - .map(|(idx, arg)| { - Field::new(format!("arg_{idx}"), arg.data_type(), true).into() - }) - .collect(); - let return_field = Field::new("f", DataType::Utf8, true).into(); - - group.bench_function(BenchmarkId::new("single_char_delim", "pos_last"), |b| { - b.iter(|| { - black_box( - split_part_func - .invoke_with_args(ScalarFunctionArgs { - args: args.clone(), - arg_fields: arg_fields.clone(), - number_rows: N_ROWS, - return_field: Arc::clone(&return_field), - config_options: Arc::clone(&config_options), - }) - .expect("split_part should work"), - ) - }) - }); + bench_split_part( + &mut group, + &split_part_func, + &config_options, + "utf8_single_char", + "pos_middle", + strings, + delimiters, + 5, + ); } - // Scenario 4: Single-char delimiter, negative position (last element) + // Utf8, single-char delimiter, negative position { let (strings, delimiters) = gen_split_part_data(N_ROWS, 10, 8, ".", false); - let positions = gen_positions(N_ROWS, -1); - let args = vec![strings, delimiters, positions]; - let arg_fields: Vec<_> = args - .iter() - .enumerate() - .map(|(idx, arg)| { - Field::new(format!("arg_{idx}"), arg.data_type(), true).into() - }) - .collect(); - let return_field = Field::new("f", DataType::Utf8, true).into(); - - group.bench_function( - BenchmarkId::new("single_char_delim", "pos_negative"), - |b| { - b.iter(|| { - black_box( - split_part_func - .invoke_with_args(ScalarFunctionArgs { - args: args.clone(), - arg_fields: arg_fields.clone(), - number_rows: N_ROWS, - return_field: Arc::clone(&return_field), - config_options: Arc::clone(&config_options), - }) - .expect("split_part should work"), - ) - }) - }, + bench_split_part( + &mut group, + &split_part_func, + &config_options, + "utf8_single_char", + "pos_negative", + strings, + delimiters, + -1, ); } - // Scenario 5: Multi-char delimiter, first position + // Utf8, multi-char delimiter, middle position { let (strings, delimiters) = gen_split_part_data(N_ROWS, 10, 8, "~@~", false); - let positions = gen_positions(N_ROWS, 1); - let args = vec![strings, delimiters, positions]; - let arg_fields: Vec<_> = args - .iter() - .enumerate() - .map(|(idx, arg)| { - Field::new(format!("arg_{idx}"), arg.data_type(), true).into() - }) - .collect(); - let return_field = Field::new("f", DataType::Utf8, true).into(); - - group.bench_function(BenchmarkId::new("multi_char_delim", "pos_first"), |b| { - b.iter(|| { - black_box( - split_part_func - .invoke_with_args(ScalarFunctionArgs { - args: args.clone(), - arg_fields: arg_fields.clone(), - number_rows: N_ROWS, - return_field: Arc::clone(&return_field), - config_options: Arc::clone(&config_options), - }) - .expect("split_part should work"), - ) - }) - }); - } - - // Scenario 6: Multi-char delimiter, middle position - { - let (strings, delimiters) = gen_split_part_data(N_ROWS, 10, 8, "~@~", false); - let positions = gen_positions(N_ROWS, 5); - let args = vec![strings, delimiters, positions]; - let arg_fields: Vec<_> = args - .iter() - .enumerate() - .map(|(idx, arg)| { - Field::new(format!("arg_{idx}"), arg.data_type(), true).into() - }) - .collect(); - let return_field = Field::new("f", DataType::Utf8, true).into(); - - group.bench_function(BenchmarkId::new("multi_char_delim", "pos_middle"), |b| { - b.iter(|| { - black_box( - split_part_func - .invoke_with_args(ScalarFunctionArgs { - args: args.clone(), - arg_fields: arg_fields.clone(), - number_rows: N_ROWS, - return_field: Arc::clone(&return_field), - config_options: Arc::clone(&config_options), - }) - .expect("split_part should work"), - ) - }) - }); + bench_split_part( + &mut group, + &split_part_func, + &config_options, + "utf8_multi_char", + "pos_middle", + strings, + delimiters, + 5, + ); } - // Scenario 7: StringViewArray, single-char delimiter, first position + // Utf8View, single-char delimiter, first position { let (strings, delimiters) = gen_split_part_data(N_ROWS, 10, 8, ".", true); - let positions = gen_positions(N_ROWS, 1); - let args = vec![strings, delimiters, positions]; - let arg_fields: Vec<_> = args - .iter() - .enumerate() - .map(|(idx, arg)| { - Field::new(format!("arg_{idx}"), arg.data_type(), true).into() - }) - .collect(); - let return_field = Field::new("f", DataType::Utf8, true).into(); - - group.bench_function( - BenchmarkId::new("string_view_single_char", "pos_first"), - |b| { - b.iter(|| { - black_box( - split_part_func - .invoke_with_args(ScalarFunctionArgs { - args: args.clone(), - arg_fields: arg_fields.clone(), - number_rows: N_ROWS, - return_field: Arc::clone(&return_field), - config_options: Arc::clone(&config_options), - }) - .expect("split_part should work"), - ) - }) - }, + bench_split_part( + &mut group, + &split_part_func, + &config_options, + "utf8view_single_char", + "pos_first", + strings, + delimiters, + 1, ); } - // Scenario 8: Many parts (20), position near end - shows benefit of early termination + // Utf8, single-char delimiter, many long parts { - let (strings, delimiters) = gen_split_part_data(N_ROWS, 20, 8, ".", false); - let positions = gen_positions(N_ROWS, 2); - let args = vec![strings, delimiters, positions]; - let arg_fields: Vec<_> = args - .iter() - .enumerate() - .map(|(idx, arg)| { - Field::new(format!("arg_{idx}"), arg.data_type(), true).into() - }) - .collect(); - let return_field = Field::new("f", DataType::Utf8, true).into(); - - group.bench_function(BenchmarkId::new("many_parts_20", "pos_second"), |b| { - b.iter(|| { - black_box( - split_part_func - .invoke_with_args(ScalarFunctionArgs { - args: args.clone(), - arg_fields: arg_fields.clone(), - number_rows: N_ROWS, - return_field: Arc::clone(&return_field), - config_options: Arc::clone(&config_options), - }) - .expect("split_part should work"), - ) - }) - }); + let (strings, delimiters) = gen_split_part_data(N_ROWS, 50, 16, ".", false); + bench_split_part( + &mut group, + &split_part_func, + &config_options, + "utf8_long_strings", + "pos_middle", + strings, + delimiters, + 25, + ); } - // Scenario 9: Long strings with many parts - worst case for old implementation + // Utf8View, single-char delimiter, middle position, long parts { - let (strings, delimiters) = gen_split_part_data(N_ROWS, 50, 16, "/", false); - let positions = gen_positions(N_ROWS, 1); - let args = vec![strings, delimiters, positions]; - let arg_fields: Vec<_> = args - .iter() - .enumerate() - .map(|(idx, arg)| { - Field::new(format!("arg_{idx}"), arg.data_type(), true).into() - }) - .collect(); - let return_field = Field::new("f", DataType::Utf8, true).into(); - - group.bench_function( - BenchmarkId::new("long_strings_50_parts", "pos_first"), - |b| { - b.iter(|| { - black_box( - split_part_func - .invoke_with_args(ScalarFunctionArgs { - args: args.clone(), - arg_fields: arg_fields.clone(), - number_rows: N_ROWS, - return_field: Arc::clone(&return_field), - config_options: Arc::clone(&config_options), - }) - .expect("split_part should work"), - ) - }) - }, + let (strings, delimiters) = gen_split_part_data(N_ROWS, 10, 32, ".", true); + bench_split_part( + &mut group, + &split_part_func, + &config_options, + "utf8view_long_parts", + "pos_middle", + strings, + delimiters, + 5, ); } diff --git a/datafusion/functions/src/string/split_part.rs b/datafusion/functions/src/string/split_part.rs index 0bd197818e4e2..d52c63c25e6c3 100644 --- a/datafusion/functions/src/string/split_part.rs +++ b/datafusion/functions/src/string/split_part.rs @@ -17,15 +17,14 @@ use crate::utils::utf8_to_str_type; use arrow::array::{ - ArrayRef, GenericStringArray, Int64Array, OffsetSizeTrait, StringArrayType, - StringViewArray, + ArrayRef, AsArray, GenericStringBuilder, Int64Array, StringArrayType, + StringLikeArrayBuilder, StringViewBuilder, }; -use arrow::array::{AsArray, GenericStringBuilder}; use arrow::datatypes::DataType; use datafusion_common::ScalarValue; use datafusion_common::cast::as_int64_array; use datafusion_common::types::{NativeType, logical_int64, logical_string}; -use datafusion_common::{DataFusionError, Result, exec_datafusion_err, exec_err}; +use datafusion_common::{Result, exec_datafusion_err, exec_err}; use datafusion_expr::{ Coercion, ColumnarValue, Documentation, TypeSignatureClass, Volatility, }; @@ -97,7 +96,11 @@ impl ScalarUDFImpl for SplitPartFunc { } fn return_type(&self, arg_types: &[DataType]) -> Result { - utf8_to_str_type(&arg_types[0], "split_part") + if arg_types[0] == DataType::Utf8View { + Ok(DataType::Utf8View) + } else { + utf8_to_str_type(&arg_types[0], "split_part") + } } fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { @@ -123,71 +126,62 @@ impl ScalarUDFImpl for SplitPartFunc { // Unpack the ArrayRefs from the arguments let n_array = as_int64_array(&args[2])?; - let result = match (args[0].data_type(), args[1].data_type()) { - (DataType::Utf8View, DataType::Utf8View) => { - split_part_impl::<&StringViewArray, &StringViewArray, i32>( - &args[0].as_string_view(), - &args[1].as_string_view(), - n_array, - ) - } - (DataType::Utf8View, DataType::Utf8) => { - split_part_impl::<&StringViewArray, &GenericStringArray, i32>( - &args[0].as_string_view(), - &args[1].as_string::(), - n_array, - ) - } - (DataType::Utf8View, DataType::LargeUtf8) => { - split_part_impl::<&StringViewArray, &GenericStringArray, i32>( - &args[0].as_string_view(), - &args[1].as_string::(), - n_array, - ) - } - (DataType::Utf8, DataType::Utf8View) => { - split_part_impl::<&GenericStringArray, &StringViewArray, i32>( - &args[0].as_string::(), - &args[1].as_string_view(), - n_array, - ) - } - (DataType::LargeUtf8, DataType::Utf8View) => { - split_part_impl::<&GenericStringArray, &StringViewArray, i64>( - &args[0].as_string::(), - &args[1].as_string_view(), - n_array, - ) - } - (DataType::Utf8, DataType::Utf8) => { - split_part_impl::<&GenericStringArray, &GenericStringArray, i32>( - &args[0].as_string::(), - &args[1].as_string::(), - n_array, - ) - } - (DataType::LargeUtf8, DataType::LargeUtf8) => { - split_part_impl::<&GenericStringArray, &GenericStringArray, i64>( - &args[0].as_string::(), - &args[1].as_string::(), - n_array, - ) - } - (DataType::Utf8, DataType::LargeUtf8) => { - split_part_impl::<&GenericStringArray, &GenericStringArray, i32>( - &args[0].as_string::(), - &args[1].as_string::(), - n_array, + + // Dispatch on delimiter type for a given string array and builder. + macro_rules! split_part_for_delimiter_type { + ($str_arr:expr, $builder:expr) => { + match args[1].data_type() { + DataType::Utf8View => split_part_impl( + $str_arr, + &args[1].as_string_view(), + n_array, + $builder, + ), + DataType::Utf8 => split_part_impl( + $str_arr, + &args[1].as_string::(), + n_array, + $builder, + ), + DataType::LargeUtf8 => split_part_impl( + $str_arr, + &args[1].as_string::(), + n_array, + $builder, + ), + other => { + exec_err!("Unsupported delimiter type {other:?} for split_part") + } + } + }; + } + + let result = match args[0].data_type() { + DataType::Utf8View => split_part_for_delimiter_type!( + &args[0].as_string_view(), + StringViewBuilder::with_capacity(inferred_length) + ), + DataType::Utf8 => { + let str_arr = &args[0].as_string::(); + split_part_for_delimiter_type!( + str_arr, + GenericStringBuilder::::with_capacity( + inferred_length, + str_arr.value_data().len(), + ) ) } - (DataType::LargeUtf8, DataType::Utf8) => { - split_part_impl::<&GenericStringArray, &GenericStringArray, i64>( - &args[0].as_string::(), - &args[1].as_string::(), - n_array, + DataType::LargeUtf8 => { + let str_arr = &args[0].as_string::(); + split_part_for_delimiter_type!( + str_arr, + GenericStringBuilder::::with_capacity( + inferred_length, + str_arr.value_data().len(), + ) ) } - _ => exec_err!("Unsupported combination of argument types for split_part"), + other => exec_err!("Unsupported string type {other:?} for split_part"), }; if is_scalar { // If all inputs are scalar, keep the output as scalar @@ -203,71 +197,93 @@ impl ScalarUDFImpl for SplitPartFunc { } } -fn split_part_impl<'a, StringArrType, DelimiterArrType, StringArrayLen>( +/// Finds the nth split part of `string` by `delimiter`. +#[inline] +fn split_nth<'a>(string: &'a str, delimiter: &str, n: usize) -> Option<&'a str> { + if delimiter.len() == 1 { + // A single-byte UTF-8 string is always ASCII, so we can safely cast + // just the first byte to a character. `str::split(char)` internally + // uses memchr::memchr and is notably faster than `str::split(&str)`, + // even for a single character string. + string.split(delimiter.as_bytes()[0] as char).nth(n) + } else { + string.split(delimiter).nth(n) + } +} + +/// Like `split_nth` but splits from the right. +#[inline] +fn rsplit_nth<'a>(string: &'a str, delimiter: &str, n: usize) -> Option<&'a str> { + if delimiter.len() == 1 { + // A single-byte UTF-8 string is always ASCII, so we can safely cast + // just the first byte to a character. `str::rsplit(char)` internally + // uses memchr::memrchr and is notably faster than `str::rsplit(&str)`, + // even for a single character string. + string.rsplit(delimiter.as_bytes()[0] as char).nth(n) + } else { + string.rsplit(delimiter).nth(n) + } +} + +fn split_part_impl<'a, StringArrType, DelimiterArrType, B>( string_array: &StringArrType, delimiter_array: &DelimiterArrType, n_array: &Int64Array, + mut builder: B, ) -> Result where StringArrType: StringArrayType<'a>, DelimiterArrType: StringArrayType<'a>, - StringArrayLen: OffsetSizeTrait, + B: StringLikeArrayBuilder, { - let mut builder: GenericStringBuilder = GenericStringBuilder::new(); - - string_array + for ((string, delimiter), n) in string_array .iter() .zip(delimiter_array.iter()) .zip(n_array.iter()) - .try_for_each(|((string, delimiter), n)| -> Result<(), DataFusionError> { - match (string, delimiter, n) { - (Some(string), Some(delimiter), Some(n)) => { - let result = match n.cmp(&0) { - std::cmp::Ordering::Greater => { - // Positive index: use nth() to avoid collecting all parts - // This stops iteration as soon as we find the nth element - let idx: usize = (n - 1).try_into().map_err(|_| { - exec_datafusion_err!( - "split_part index {n} exceeds maximum supported value" - ) - })?; - - if delimiter.is_empty() { - // Match PostgreSQL split_part behavior for empty delimiter: - // treat the input as a single field ("ab" -> ["ab"]), - // rather than Rust's split("") result (["", "a", "b", ""]). - (n == 1).then_some(string) - } else { - string.split(delimiter).nth(idx) - } + { + match (string, delimiter, n) { + (Some(string), Some(delimiter), Some(n)) => { + let result = match n.cmp(&0) { + std::cmp::Ordering::Greater => { + let idx: usize = (n - 1).try_into().map_err(|_| { + exec_datafusion_err!( + "split_part index {n} exceeds maximum supported value" + ) + })?; + if delimiter.is_empty() { + // Match PostgreSQL's behavior: empty delimiter + // treats input as a single field, so only position + // 1 returns data. + (n == 1).then_some(string) + } else { + split_nth(string, delimiter, idx) } - std::cmp::Ordering::Less => { - // Negative index: use rsplit().nth() to efficiently get from the end - // rsplit iterates in reverse, so -1 means first from rsplit (index 0) - let idx: usize = (n.unsigned_abs() - 1).try_into().map_err(|_| { + } + std::cmp::Ordering::Less => { + let idx: usize = + (n.unsigned_abs() - 1).try_into().map_err(|_| { exec_datafusion_err!( "split_part index {n} exceeds minimum supported value" ) })?; - if delimiter.is_empty() { - // Match PostgreSQL split_part behavior for empty delimiter: - // treat the input as a single field ("ab" -> ["ab"]), - // rather than Rust's split("") result (["", "a", "b", ""]). - (n == -1).then_some(string) - } else { - string.rsplit(delimiter).nth(idx) - } + if delimiter.is_empty() { + // Match PostgreSQL's behavior: empty delimiter + // treats input as a single field, so only position + // -1 returns data. + (n == -1).then_some(string) + } else { + rsplit_nth(string, delimiter, idx) } - std::cmp::Ordering::Equal => { - return exec_err!("field position must not be zero"); - } - }; - builder.append_value(result.unwrap_or("")); - } - _ => builder.append_null(), + } + std::cmp::Ordering::Equal => { + return exec_err!("field position must not be zero"); + } + }; + builder.append_value(result.unwrap_or("")); } - Ok(()) - })?; + _ => builder.append_null(), + } + } Ok(Arc::new(builder.finish()) as ArrayRef) } diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt index 5c7236f576247..086f37d6c3354 100644 --- a/datafusion/sqllogictest/test_files/string/string_view.slt +++ b/datafusion/sqllogictest/test_files/string/string_view.slt @@ -908,6 +908,52 @@ logical_plan 01)Projection: split_part(test.column1_utf8view, Utf8("f"), Int64(1)) AS c1, split_part(Utf8("testtesttest"), test.column1_utf8view, Int64(1)) AS c2 02)--TableScan: test projection=[column1_utf8view] +# SPLIT_PART with Utf8View +query T +SELECT split_part(arrow_cast('abc~@~def~@~ghi', 'Utf8View'), '~@~', 2); +---- +def + +query T +SELECT split_part(arrow_cast('abc~@~def~@~ghi', 'Utf8View'), '~@~', 20); +---- +(empty) + +query T +SELECT split_part(arrow_cast('abc~@~def~@~ghi', 'Utf8View'), '~@~', -1); +---- +ghi + +statement error DataFusion error: Execution error: field position must not be zero +SELECT split_part(arrow_cast('abc~@~def~@~ghi', 'Utf8View'), '~@~', 0); + +query T +SELECT split_part(arrow_cast('a,b', 'Utf8View'), '', 1); +---- +a,b + +query T +SELECT split_part(arrow_cast('a,b', 'Utf8View'), '', 2); +---- +(empty) + +query T +SELECT split_part(arrow_cast('a,b', 'Utf8View'), '', -1); +---- +a,b + +# Single-char delimiter +query T +SELECT split_part(arrow_cast('a.b.c', 'Utf8View'), '.', 2); +---- +b + +# Verify Utf8View input produces Utf8View output +query T +SELECT arrow_typeof(split_part(arrow_cast('a.b.c', 'Utf8View'), '.', 2)); +---- +Utf8View + ## Ensure no casts for STRPOS query TT EXPLAIN SELECT