From 1cc7189c6b536975ec34c33032b7b6f211c0f4c1 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 2 Jun 2026 16:30:00 -0400 Subject: [PATCH 1/2] test(arrow-select): add take_bytes coverage for sliced values and nullable offset overflow Adds two tests to `take_bytes`: - `test_take_bytes_sliced_values`: takes from a byte array that has been sliced so its value offsets no longer start at zero, covering both the no-null fast path and the nullable path (null indices and selected null values). - `test_take_bytes_offset_overflow_nullable`: verifies the `OffsetOverflowError` is also produced on the nullable code path, not only the no-null fast path. Both pass on the current implementation. Co-Authored-By: Claude Opus 4.8 (1M context) --- arrow-select/src/take.rs | 69 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index cbb65ac915dd..6b926466a153 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -1728,6 +1728,41 @@ mod tests { assert_eq!(result.as_ref(), &expected); } + /// Take from a *sliced* byte array, i.e. one whose value offsets do not + /// start at zero. This exercises copying byte data out of an array with a + /// non-zero base offset for both the no-null fast path and the nullable + /// path (null indices and selected null values). + #[test] + fn test_take_bytes_sliced_values() { + let values = StringArray::from(vec![ + Some("aaa"), + Some("bbb"), + None, + Some("ccccc"), + Some("dd"), + None, + Some("eeee"), + ]); + // Slice so the underlying value offsets no longer start at 0: + // sliced == [None, "ccccc", "dd", None, "eeee"] + let sliced = values.slice(2, 5); + + // Fast path: every output slot is valid (no null indices, no null + // values selected). + let indices = Int32Array::from(vec![1, 2, 4, 1]); + let result = take(&sliced, &indices, None).unwrap(); + let expected = + StringArray::from(vec![Some("ccccc"), Some("dd"), Some("eeee"), Some("ccccc")]); + assert_eq!(result.as_string::(), &expected); + + // Nullable path: a null index (position 1) and selected null values + // (sliced indices 0 and 3 are null). + let indices = Int32Array::from(vec![Some(1), None, Some(0), Some(4), Some(3)]); + let result = take(&sliced, &indices, None).unwrap(); + let expected = StringArray::from(vec![Some("ccccc"), None, None, Some("eeee"), None]); + assert_eq!(result.as_string::(), &expected); + } + fn _test_byte_view() where T: ByteViewType, @@ -2810,9 +2845,37 @@ mod tests { #[test] fn test_take_bytes_offset_overflow() { - let indices = Int32Array::from(vec![0; (i32::MAX >> 4) as usize]); - let text = ('a'..='z').collect::(); - let values = StringArray::from(vec![Some(text.clone())]); + // Select a single large value enough times that the cumulative offset + // exceeds i32::MAX. Using a large value keeps the number of indices + // (and therefore the runtime) small. + let value_len = 1_000_000; + let big = "a".repeat(value_len); + let values = StringArray::from(vec![Some(big.as_str())]); + + let n = i32::MAX as usize / value_len + 1; + let indices = Int32Array::from(vec![0; n]); + assert!(matches!( + take(&values, &indices, None), + Err(ArrowError::OffsetOverflowError(_)) + )); + } + + /// The offset-overflow error must also be produced on the nullable code + /// path (when the output contains nulls), not only on the no-null fast path. + #[test] + fn test_take_bytes_offset_overflow_nullable() { + // Same overflow trigger as `test_take_bytes_offset_overflow`, but with a + // null index so the output contains nulls and the nullable code path is + // exercised. A large value keeps the index count (and runtime) small. + let value_len = 1_000_000; + let big = "a".repeat(value_len); + let values = StringArray::from(vec![Some(big.as_str())]); + + let n = i32::MAX as usize / value_len + 1; + let validity = + NullBuffer::from_iter(std::iter::once(false).chain(std::iter::repeat_n(true, n))); + let indices = Int32Array::new(vec![0i32; n + 1].into(), Some(validity)); + assert!(matches!( take(&values, &indices, None), Err(ArrowError::OffsetOverflowError(_)) From 9737fa4f210430b6544ede8cf41d9377af62f435 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 2 Jun 2026 17:01:28 -0400 Subject: [PATCH 2/2] test(arrow-select): share fixture between take_bytes offset-overflow tests Extract the large-value / selection-count setup into `offset_overflow_fixture` so `test_take_bytes_offset_overflow` and `test_take_bytes_offset_overflow_nullable` no longer duplicate it. Co-Authored-By: Claude Opus 4.8 (1M context) --- arrow-select/src/take.rs | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index 6b926466a153..9b8db19622fa 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -2843,16 +2843,20 @@ mod tests { assert_eq!(array.len(), 3); } - #[test] - fn test_take_bytes_offset_overflow() { - // Select a single large value enough times that the cumulative offset - // exceeds i32::MAX. Using a large value keeps the number of indices - // (and therefore the runtime) small. + /// Fixture for the offset-overflow tests: a single large value plus the + /// number of times it must be selected so the cumulative offset exceeds + /// `i32::MAX`. Using a large value keeps the index count (and the test + /// runtime) small. + fn offset_overflow_fixture() -> (StringArray, usize) { let value_len = 1_000_000; - let big = "a".repeat(value_len); - let values = StringArray::from(vec![Some(big.as_str())]); - + let values = StringArray::from(vec![Some("a".repeat(value_len))]); let n = i32::MAX as usize / value_len + 1; + (values, n) + } + + #[test] + fn test_take_bytes_offset_overflow() { + let (values, n) = offset_overflow_fixture(); let indices = Int32Array::from(vec![0; n]); assert!(matches!( take(&values, &indices, None), @@ -2864,14 +2868,9 @@ mod tests { /// path (when the output contains nulls), not only on the no-null fast path. #[test] fn test_take_bytes_offset_overflow_nullable() { - // Same overflow trigger as `test_take_bytes_offset_overflow`, but with a - // null index so the output contains nulls and the nullable code path is - // exercised. A large value keeps the index count (and runtime) small. - let value_len = 1_000_000; - let big = "a".repeat(value_len); - let values = StringArray::from(vec![Some(big.as_str())]); - - let n = i32::MAX as usize / value_len + 1; + let (values, n) = offset_overflow_fixture(); + // A null index forces the output to contain nulls, exercising the + // nullable code path. let validity = NullBuffer::from_iter(std::iter::once(false).chain(std::iter::repeat_n(true, n))); let indices = Int32Array::new(vec![0i32; n + 1].into(), Some(validity));