Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions arrow-arith/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,6 @@ arrow-data = { workspace = true }
arrow-schema = { workspace = true }
chrono = { workspace = true }
num-traits = { version = "0.2.19", default-features = false, features = ["std"] }

[dev-dependencies]
rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] }
347 changes: 337 additions & 10 deletions arrow-arith/src/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

use arrow_array::*;
use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_quaternary_op_helper};
use arrow_buffer::{BooleanBuffer, NullBuffer, buffer_bin_and_not};
use arrow_buffer::{BooleanBuffer, NullBuffer};
use arrow_schema::ArrowError;

/// Logical 'and' boolean values with Kleene logic
Expand Down Expand Up @@ -252,7 +252,7 @@ where
/// assert_eq!(and_ab, BooleanArray::from(vec![Some(false), Some(true), None]));
/// ```
pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
binary_boolean_kernel(left, right, |a, b| a & b)
left.binary(right, |a, b| a & b)
}

/// Performs `OR` operation on two arrays. If either left or right value is null then the
Expand All @@ -269,7 +269,7 @@ pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, Ar
/// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), Some(true), None]));
/// ```
pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
binary_boolean_kernel(left, right, |a, b| a | b)
left.binary(right, |a, b| a | b)
}

/// Performs `AND_NOT` operation on two arrays. If either left or right value is null then the
Expand All @@ -287,10 +287,7 @@ pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, Arr
/// // It's equal to and(left, not(right))
/// assert_eq!(andn_ab, and(&a, &not(&b).unwrap()).unwrap());
pub fn and_not(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
binary_boolean_kernel(left, right, |a, b| {
let buffer = buffer_bin_and_not(a.inner(), b.offset(), b.inner(), a.offset(), a.len());
BooleanBuffer::new(buffer, left.offset(), left.len())
})
left.binary(right, |a, b| a & !b)
}

/// Performs unary `NOT` operation on an arrays. If value is null then the result is also
Expand All @@ -306,9 +303,7 @@ pub fn and_not(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray
/// assert_eq!(not_a, BooleanArray::from(vec![Some(true), Some(false), None]));
/// ```
pub fn not(left: &BooleanArray) -> Result<BooleanArray, ArrowError> {
let nulls = left.nulls().cloned();
let values = !left.values();
Ok(BooleanArray::new(values, nulls))
Ok(left.unary(|a| !a))
}

/// Returns a non-null [BooleanArray] with whether each value of the array is null.
Expand Down Expand Up @@ -971,4 +966,336 @@ mod tests {
.into_iter()
.collect()
}

#[test]
fn test_boolean_kernels_with_nulls_and_offsets() {
// Construct BooleanArrays with mixed values and nulls
let left = BooleanArray::from(vec![
Some(true), Some(false), None, Some(true), Some(false), None, Some(true)
]);
let right = BooleanArray::from(vec![
None, Some(true), Some(false), None, Some(true), Some(false), Some(true)
]);

// Create sliced views with non-zero offsets
let left_sliced = left.slice(1, 5); // Some(false), None, Some(true), Some(false), None
let right_sliced = right.slice(2, 5); // Some(false), None, Some(true), Some(false), Some(true)

// Test and
let result_full = and(&left, &right).unwrap();
let result_sliced = and(&left_sliced, &right_sliced).unwrap();

let expected_full = BooleanArray::from(vec![
None, Some(false), None, None, Some(false), None, Some(true)
]);
let expected_sliced = BooleanArray::from(vec![
Some(false), None, Some(true), Some(false), None
]);

assert_eq!(result_full, expected_full);
assert_eq!(result_sliced, expected_sliced);

// Test or
let result_full = or(&left, &right).unwrap();
let result_sliced = or(&left_sliced, &right_sliced).unwrap();

let expected_full = BooleanArray::from(vec![
None, Some(true), None, None, Some(true), None, Some(true)
]);
let expected_sliced = BooleanArray::from(vec![
Some(false), None, Some(true), Some(false), None
]);

assert_eq!(result_full, expected_full);
assert_eq!(result_sliced, expected_sliced);

// Test and_kleene: true if both true, false if either false, null otherwise
let result_full = and_kleene(&left, &right).unwrap();
let result_sliced = and_kleene(&left_sliced, &right_sliced).unwrap();

let expected_full = BooleanArray::from(vec![
None, Some(false), Some(false), None, Some(false), Some(false), Some(true)
]);
let expected_sliced = BooleanArray::from(vec![
Some(false), None, Some(true), Some(false), None
]);

assert_eq!(result_full, expected_full);
assert_eq!(result_sliced, expected_sliced);

// Test or_kleene: false if both false, true if either true, null otherwise
let result_full = or_kleene(&left, &right).unwrap();
let result_sliced = or_kleene(&left_sliced, &right_sliced).unwrap();

let expected_full = BooleanArray::from(vec![
Some(true), Some(true), None, Some(true), Some(true), None, Some(true)
]);
let expected_sliced = BooleanArray::from(vec![
Some(false), None, Some(true), Some(false), Some(true)
]);

assert_eq!(result_full, expected_full);
assert_eq!(result_sliced, expected_sliced);

// Test not
let result_full = not(&left).unwrap();
let result_sliced = not(&left_sliced).unwrap();

let expected_full = BooleanArray::from(vec![
Some(false), Some(true), None, Some(false), Some(true), None, Some(false)
]);
let expected_sliced = BooleanArray::from(vec![
Some(true), None, Some(false), Some(true), None
]);

assert_eq!(result_full, expected_full);
assert_eq!(result_sliced, expected_sliced);
}

#[test]
fn test_boolean_kernels_zero_length_and_all_null() {
// Empty arrays
let empty = BooleanArray::from(Vec::<Option<bool>>::new());
let result_and = and(&empty, &empty).unwrap();
let result_or = or(&empty, &empty).unwrap();
let result_not = not(&empty).unwrap();
let result_and_kleene = and_kleene(&empty, &empty).unwrap();
let result_or_kleene = or_kleene(&empty, &empty).unwrap();

assert_eq!(result_and.len(), 0);
assert_eq!(result_or.len(), 0);
assert_eq!(result_not.len(), 0);
assert_eq!(result_and_kleene.len(), 0);
assert_eq!(result_or_kleene.len(), 0);

// All-null arrays
let all_null = BooleanArray::new_null(5);
let result_and = and(&all_null, &all_null).unwrap();
let result_or = or(&all_null, &all_null).unwrap();
let result_not = not(&all_null).unwrap();
let result_and_kleene = and_kleene(&all_null, &all_null).unwrap();
let result_or_kleene = or_kleene(&all_null, &all_null).unwrap();

assert_eq!(result_and, all_null);
assert_eq!(result_or, all_null);
assert_eq!(result_not, all_null);
assert_eq!(result_and_kleene, all_null);
assert_eq!(result_or_kleene, all_null);

// Array with only first element non-null
let partial = BooleanArray::from(vec![Some(true), None, None, None, None]);
let result_not = not(&partial).unwrap();
let expected_not = BooleanArray::from(vec![Some(false), None, None, None, None]);
assert_eq!(result_not, expected_not);

// Array with only last element non-null
let partial = BooleanArray::from(vec![None, None, None, None, Some(false)]);
let result_not = not(&partial).unwrap();
let expected_not = BooleanArray::from(vec![None, None, None, None, Some(true)]);
assert_eq!(result_not, expected_not);
}

// Helper functions for reference implementations
fn ref_and_sql(a: Option<bool>, b: Option<bool>) -> Option<bool> {
match (a, b) {
(Some(a), Some(b)) => Some(a & b),
_ => None,
}
}

fn ref_or_sql(a: Option<bool>, b: Option<bool>) -> Option<bool> {
match (a, b) {
(Some(a), Some(b)) => Some(a | b),
_ => None,
}
}

fn ref_and_kleene(a: Option<bool>, b: Option<bool>) -> Option<bool> {
match (a, b) {
(Some(a), Some(b)) => Some(a & b),
(None, Some(b)) => if !b { Some(false) } else { None },
(Some(a), None) => if !a { Some(false) } else { None },
(None, None) => None,
}
}

fn ref_or_kleene(a: Option<bool>, b: Option<bool>) -> Option<bool> {
match (a, b) {
(Some(a), Some(b)) => Some(a | b),
(None, Some(b)) => if b { Some(true) } else { None },
(Some(a), None) => if a { Some(true) } else { None },
(None, None) => None,
}
}

fn ref_not(a: Option<bool>) -> Option<bool> {
a.map(|x| !x)
}

#[test]
fn test_boolean_kernels_random_equivalence() {
use rand::{Rng, SeedableRng};

// Use a fixed seed for reproducible tests
let mut rng = rand::rngs::StdRng::from_seed([48u8; 32]);

for _ in 0..20 { // 20 random iterations
// Pick random length 1..64
let len = rng.random_range(1..=64);

// Generate random Vec<Option<bool>> for left and right
let mut left_vec = Vec::with_capacity(len);
let mut right_vec = Vec::with_capacity(len);
for _ in 0..len {
let is_null = rng.random_bool(0.2); // 20% chance of null
let val = if is_null { None } else { Some(rng.random_bool(0.5)) };
left_vec.push(val);
let is_null = rng.random_bool(0.2);
let val = if is_null { None } else { Some(rng.random_bool(0.5)) };
right_vec.push(val);
}

// Construct BooleanArrays
let left = BooleanArray::from(left_vec.clone());
let right = BooleanArray::from(right_vec.clone());

// Construct sliced variants if possible
let (left_slice, right_slice) = if len > 1 {
let slice_len = len - 1;
(left.slice(1, slice_len), right.slice(1, slice_len))
} else {
(left.clone(), right.clone()) // fallback for len=1
};

// Test each kernel
let kernels = vec![
("and", Box::new(|l: &BooleanArray, r: &BooleanArray| and(l, r).unwrap()) as Box<dyn Fn(&BooleanArray, &BooleanArray) -> BooleanArray>),
("or", Box::new(|l, r| or(l, r).unwrap())),
("and_kleene", Box::new(|l, r| and_kleene(l, r).unwrap())),
("or_kleene", Box::new(|l, r| or_kleene(l, r).unwrap())),
];

for (name, kernel) in kernels {
// Full arrays
let result = kernel(&left, &right);
let expected: Vec<Option<bool>> = left_vec.iter().zip(&right_vec).map(|(a, b)| match name {
"and" => ref_and_sql(*a, *b),
"or" => ref_or_sql(*a, *b),
"and_kleene" => ref_and_kleene(*a, *b),
"or_kleene" => ref_or_kleene(*a, *b),
_ => unreachable!(),
}).collect();
let result_vec: Vec<Option<bool>> = result.iter().collect();
assert_eq!(result_vec, expected, "Full {} mismatch", name);

// Sliced arrays
if len > 1 {
let result_slice = kernel(&left_slice, &right_slice);
let expected_slice: Vec<Option<bool>> = left_vec[1..].iter().zip(&right_vec[1..]).map(|(a, b)| match name {
"and" => ref_and_sql(*a, *b),
"or" => ref_or_sql(*a, *b),
"and_kleene" => ref_and_kleene(*a, *b),
"or_kleene" => ref_or_kleene(*a, *b),
_ => unreachable!(),
}).collect();
let result_slice_vec: Vec<Option<bool>> = result_slice.iter().collect();
assert_eq!(result_slice_vec, expected_slice, "Sliced {} mismatch", name);
}
}

// Test not separately
let result_not = not(&left).unwrap();
let expected_not: Vec<Option<bool>> = left_vec.iter().map(|a| ref_not(*a)).collect();
let result_not_vec: Vec<Option<bool>> = result_not.iter().collect();
assert_eq!(result_not_vec, expected_not, "Full not mismatch");

if len > 1 {
let result_not_slice = not(&left_slice).unwrap();
let expected_not_slice: Vec<Option<bool>> = left_vec[1..].iter().map(|a| ref_not(*a)).collect();
let result_not_slice_vec: Vec<Option<bool>> = result_not_slice.iter().collect();
assert_eq!(result_not_slice_vec, expected_not_slice, "Sliced not mismatch");
}
}
}

#[test]
fn test_boolean_array_byte_boundary_regressions() {
// Test historically dangerous bitmap patterns for BooleanArray binary/unary operations
// Construct BooleanArray from Vec<Option<bool>> with length 10: [T, F, None, T, F, None, T, F, None, T]
// Underlying bitmap: bits for values and nulls
let data = vec![Some(true), Some(false), None, Some(true), Some(false), None, Some(true), Some(false), None, Some(true)];
let array = BooleanArray::from(data.clone());

// Slice cases: (slice_start, slice_len, description)
let slice_cases = vec![
(0, 5, "start=0, len=5"),
(1, 4, "start=1, len=4 (offset+len=5)"),
(3, 5, "start=3, len=5 (cross potential boundary)"),
(5, 5, "start=5, len=5"),
];

for (start, len, desc) in slice_cases {
let slice = array.slice(start, len);
let slice_data = &data[start..start+len];

// Test unary NOT
let result_not = slice.unary(|a| !a);
let expected_not: Vec<Option<bool>> = slice_data.iter().map(|x| x.map(|b| !b)).collect();
let result_not_vec: Vec<Option<bool>> = result_not.iter().collect();
assert_eq!(result_not_vec, expected_not, "NOT {} mismatch", desc);

// For binary, need another slice; use the same slice for simplicity, but with different op
// Test binary AND with itself (should be identity for non-null)
let result_and = slice.binary(&slice, |a, b| a & b).unwrap();
let expected_and: Vec<Option<bool>> = slice_data.iter().map(|x| match x {
Some(b) => Some(b & b),
None => None,
}).collect();
let result_and_vec: Vec<Option<bool>> = result_and.iter().collect();
assert_eq!(result_and_vec, expected_and, "AND self {} mismatch", desc);

// Test binary OR with itself
let result_or = slice.binary(&slice, |a, b| a | b).unwrap();
let expected_or: Vec<Option<bool>> = slice_data.iter().map(|x| match x {
Some(b) => Some(b | b),
None => None,
}).collect();
let result_or_vec: Vec<Option<bool>> = result_or.iter().collect();
assert_eq!(result_or_vec, expected_or, "OR self {} mismatch", desc);
}
}

#[test]
fn test_and_kleene_byte_boundary_regressions() {
// Test and_kleene with slices that hit byte boundaries
let left_data = vec![Some(true), Some(false), None, Some(true), Some(false), None, Some(true), Some(false), None, Some(true)];
let right_data = vec![Some(false), Some(true), Some(true), Some(false), Some(true), Some(false), Some(true), Some(false), Some(true), Some(false)];
let left = BooleanArray::from(left_data.clone());
let right = BooleanArray::from(right_data.clone());

// Slice cases
let slice_cases = vec![
(0, 5),
(1, 4),
(3, 5),
(5, 5),
];

for (start, len) in slice_cases {
let left_slice = left.slice(start, len);
let right_slice = right.slice(start, len);
let left_slice_data = &left_data[start..start+len];
let right_slice_data = &right_data[start..start+len];

let result = and_kleene(&left_slice, &right_slice).unwrap();
let expected: Vec<Option<bool>> = left_slice_data.iter().zip(right_slice_data).map(|(a, b)| match (a, b) {
(Some(a), Some(b)) => Some(a & b),
(None, Some(b)) => if !b { Some(false) } else { None },
(Some(a), None) => if !a { Some(false) } else { None },
(None, None) => None,
}).collect();
let result_vec: Vec<Option<bool>> = result.iter().collect();
assert_eq!(result_vec, expected, "and_kleene slice start={}, len={} mismatch", start, len);
}
}
}
Loading
Loading