diff --git a/arrow-arith/Cargo.toml b/arrow-arith/Cargo.toml index f2a4604c116e..6a02f543ecda 100644 --- a/arrow-arith/Cargo.toml +++ b/arrow-arith/Cargo.toml @@ -42,3 +42,6 @@ arrow-data = { workspace = true } arrow-schema = { workspace = true } chrono = { workspace = true } num-traits = { version = "0.2.19", default-features = false, features = ["std"] } + +[dev-dependencies] +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } diff --git a/arrow-arith/src/boolean.rs b/arrow-arith/src/boolean.rs index d94df49de256..ef9f085284a6 100644 --- a/arrow-arith/src/boolean.rs +++ b/arrow-arith/src/boolean.rs @@ -24,7 +24,7 @@ use arrow_array::*; use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_quaternary_op_helper}; -use arrow_buffer::{BooleanBuffer, NullBuffer, buffer_bin_and_not}; +use arrow_buffer::{BooleanBuffer, NullBuffer}; use arrow_schema::ArrowError; /// Logical 'and' boolean values with Kleene logic @@ -252,7 +252,7 @@ where /// assert_eq!(and_ab, BooleanArray::from(vec![Some(false), Some(true), None])); /// ``` pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result { - binary_boolean_kernel(left, right, |a, b| a & b) + left.binary(right, |a, b| a & b) } /// Performs `OR` operation on two arrays. If either left or right value is null then the @@ -269,7 +269,7 @@ pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result Result { - binary_boolean_kernel(left, right, |a, b| a | b) + left.binary(right, |a, b| a | b) } /// Performs `AND_NOT` operation on two arrays. If either left or right value is null then the @@ -287,10 +287,7 @@ pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result Result { - binary_boolean_kernel(left, right, |a, b| { - let buffer = buffer_bin_and_not(a.inner(), b.offset(), b.inner(), a.offset(), a.len()); - BooleanBuffer::new(buffer, left.offset(), left.len()) - }) + left.binary(right, |a, b| a & !b) } /// Performs unary `NOT` operation on an arrays. If value is null then the result is also @@ -306,9 +303,7 @@ pub fn and_not(left: &BooleanArray, right: &BooleanArray) -> Result Result { - let nulls = left.nulls().cloned(); - let values = !left.values(); - Ok(BooleanArray::new(values, nulls)) + Ok(left.unary(|a| !a)) } /// Returns a non-null [BooleanArray] with whether each value of the array is null. @@ -971,4 +966,336 @@ mod tests { .into_iter() .collect() } + + #[test] + fn test_boolean_kernels_with_nulls_and_offsets() { + // Construct BooleanArrays with mixed values and nulls + let left = BooleanArray::from(vec![ + Some(true), Some(false), None, Some(true), Some(false), None, Some(true) + ]); + let right = BooleanArray::from(vec![ + None, Some(true), Some(false), None, Some(true), Some(false), Some(true) + ]); + + // Create sliced views with non-zero offsets + let left_sliced = left.slice(1, 5); // Some(false), None, Some(true), Some(false), None + let right_sliced = right.slice(2, 5); // Some(false), None, Some(true), Some(false), Some(true) + + // Test and + let result_full = and(&left, &right).unwrap(); + let result_sliced = and(&left_sliced, &right_sliced).unwrap(); + + let expected_full = BooleanArray::from(vec![ + None, Some(false), None, None, Some(false), None, Some(true) + ]); + let expected_sliced = BooleanArray::from(vec![ + Some(false), None, Some(true), Some(false), None + ]); + + assert_eq!(result_full, expected_full); + assert_eq!(result_sliced, expected_sliced); + + // Test or + let result_full = or(&left, &right).unwrap(); + let result_sliced = or(&left_sliced, &right_sliced).unwrap(); + + let expected_full = BooleanArray::from(vec![ + None, Some(true), None, None, Some(true), None, Some(true) + ]); + let expected_sliced = BooleanArray::from(vec![ + Some(false), None, Some(true), Some(false), None + ]); + + assert_eq!(result_full, expected_full); + assert_eq!(result_sliced, expected_sliced); + + // Test and_kleene: true if both true, false if either false, null otherwise + let result_full = and_kleene(&left, &right).unwrap(); + let result_sliced = and_kleene(&left_sliced, &right_sliced).unwrap(); + + let expected_full = BooleanArray::from(vec![ + None, Some(false), Some(false), None, Some(false), Some(false), Some(true) + ]); + let expected_sliced = BooleanArray::from(vec![ + Some(false), None, Some(true), Some(false), None + ]); + + assert_eq!(result_full, expected_full); + assert_eq!(result_sliced, expected_sliced); + + // Test or_kleene: false if both false, true if either true, null otherwise + let result_full = or_kleene(&left, &right).unwrap(); + let result_sliced = or_kleene(&left_sliced, &right_sliced).unwrap(); + + let expected_full = BooleanArray::from(vec![ + Some(true), Some(true), None, Some(true), Some(true), None, Some(true) + ]); + let expected_sliced = BooleanArray::from(vec![ + Some(false), None, Some(true), Some(false), Some(true) + ]); + + assert_eq!(result_full, expected_full); + assert_eq!(result_sliced, expected_sliced); + + // Test not + let result_full = not(&left).unwrap(); + let result_sliced = not(&left_sliced).unwrap(); + + let expected_full = BooleanArray::from(vec![ + Some(false), Some(true), None, Some(false), Some(true), None, Some(false) + ]); + let expected_sliced = BooleanArray::from(vec![ + Some(true), None, Some(false), Some(true), None + ]); + + assert_eq!(result_full, expected_full); + assert_eq!(result_sliced, expected_sliced); + } + + #[test] + fn test_boolean_kernels_zero_length_and_all_null() { + // Empty arrays + let empty = BooleanArray::from(Vec::>::new()); + let result_and = and(&empty, &empty).unwrap(); + let result_or = or(&empty, &empty).unwrap(); + let result_not = not(&empty).unwrap(); + let result_and_kleene = and_kleene(&empty, &empty).unwrap(); + let result_or_kleene = or_kleene(&empty, &empty).unwrap(); + + assert_eq!(result_and.len(), 0); + assert_eq!(result_or.len(), 0); + assert_eq!(result_not.len(), 0); + assert_eq!(result_and_kleene.len(), 0); + assert_eq!(result_or_kleene.len(), 0); + + // All-null arrays + let all_null = BooleanArray::new_null(5); + let result_and = and(&all_null, &all_null).unwrap(); + let result_or = or(&all_null, &all_null).unwrap(); + let result_not = not(&all_null).unwrap(); + let result_and_kleene = and_kleene(&all_null, &all_null).unwrap(); + let result_or_kleene = or_kleene(&all_null, &all_null).unwrap(); + + assert_eq!(result_and, all_null); + assert_eq!(result_or, all_null); + assert_eq!(result_not, all_null); + assert_eq!(result_and_kleene, all_null); + assert_eq!(result_or_kleene, all_null); + + // Array with only first element non-null + let partial = BooleanArray::from(vec![Some(true), None, None, None, None]); + let result_not = not(&partial).unwrap(); + let expected_not = BooleanArray::from(vec![Some(false), None, None, None, None]); + assert_eq!(result_not, expected_not); + + // Array with only last element non-null + let partial = BooleanArray::from(vec![None, None, None, None, Some(false)]); + let result_not = not(&partial).unwrap(); + let expected_not = BooleanArray::from(vec![None, None, None, None, Some(true)]); + assert_eq!(result_not, expected_not); + } + + // Helper functions for reference implementations + fn ref_and_sql(a: Option, b: Option) -> Option { + match (a, b) { + (Some(a), Some(b)) => Some(a & b), + _ => None, + } + } + + fn ref_or_sql(a: Option, b: Option) -> Option { + match (a, b) { + (Some(a), Some(b)) => Some(a | b), + _ => None, + } + } + + fn ref_and_kleene(a: Option, b: Option) -> Option { + match (a, b) { + (Some(a), Some(b)) => Some(a & b), + (None, Some(b)) => if !b { Some(false) } else { None }, + (Some(a), None) => if !a { Some(false) } else { None }, + (None, None) => None, + } + } + + fn ref_or_kleene(a: Option, b: Option) -> Option { + match (a, b) { + (Some(a), Some(b)) => Some(a | b), + (None, Some(b)) => if b { Some(true) } else { None }, + (Some(a), None) => if a { Some(true) } else { None }, + (None, None) => None, + } + } + + fn ref_not(a: Option) -> Option { + a.map(|x| !x) + } + + #[test] + fn test_boolean_kernels_random_equivalence() { + use rand::{Rng, SeedableRng}; + + // Use a fixed seed for reproducible tests + let mut rng = rand::rngs::StdRng::from_seed([48u8; 32]); + + for _ in 0..20 { // 20 random iterations + // Pick random length 1..64 + let len = rng.random_range(1..=64); + + // Generate random Vec> for left and right + let mut left_vec = Vec::with_capacity(len); + let mut right_vec = Vec::with_capacity(len); + for _ in 0..len { + let is_null = rng.random_bool(0.2); // 20% chance of null + let val = if is_null { None } else { Some(rng.random_bool(0.5)) }; + left_vec.push(val); + let is_null = rng.random_bool(0.2); + let val = if is_null { None } else { Some(rng.random_bool(0.5)) }; + right_vec.push(val); + } + + // Construct BooleanArrays + let left = BooleanArray::from(left_vec.clone()); + let right = BooleanArray::from(right_vec.clone()); + + // Construct sliced variants if possible + let (left_slice, right_slice) = if len > 1 { + let slice_len = len - 1; + (left.slice(1, slice_len), right.slice(1, slice_len)) + } else { + (left.clone(), right.clone()) // fallback for len=1 + }; + + // Test each kernel + let kernels = vec![ + ("and", Box::new(|l: &BooleanArray, r: &BooleanArray| and(l, r).unwrap()) as Box BooleanArray>), + ("or", Box::new(|l, r| or(l, r).unwrap())), + ("and_kleene", Box::new(|l, r| and_kleene(l, r).unwrap())), + ("or_kleene", Box::new(|l, r| or_kleene(l, r).unwrap())), + ]; + + for (name, kernel) in kernels { + // Full arrays + let result = kernel(&left, &right); + let expected: Vec> = left_vec.iter().zip(&right_vec).map(|(a, b)| match name { + "and" => ref_and_sql(*a, *b), + "or" => ref_or_sql(*a, *b), + "and_kleene" => ref_and_kleene(*a, *b), + "or_kleene" => ref_or_kleene(*a, *b), + _ => unreachable!(), + }).collect(); + let result_vec: Vec> = result.iter().collect(); + assert_eq!(result_vec, expected, "Full {} mismatch", name); + + // Sliced arrays + if len > 1 { + let result_slice = kernel(&left_slice, &right_slice); + let expected_slice: Vec> = left_vec[1..].iter().zip(&right_vec[1..]).map(|(a, b)| match name { + "and" => ref_and_sql(*a, *b), + "or" => ref_or_sql(*a, *b), + "and_kleene" => ref_and_kleene(*a, *b), + "or_kleene" => ref_or_kleene(*a, *b), + _ => unreachable!(), + }).collect(); + let result_slice_vec: Vec> = result_slice.iter().collect(); + assert_eq!(result_slice_vec, expected_slice, "Sliced {} mismatch", name); + } + } + + // Test not separately + let result_not = not(&left).unwrap(); + let expected_not: Vec> = left_vec.iter().map(|a| ref_not(*a)).collect(); + let result_not_vec: Vec> = result_not.iter().collect(); + assert_eq!(result_not_vec, expected_not, "Full not mismatch"); + + if len > 1 { + let result_not_slice = not(&left_slice).unwrap(); + let expected_not_slice: Vec> = left_vec[1..].iter().map(|a| ref_not(*a)).collect(); + let result_not_slice_vec: Vec> = result_not_slice.iter().collect(); + assert_eq!(result_not_slice_vec, expected_not_slice, "Sliced not mismatch"); + } + } + } + + #[test] + fn test_boolean_array_byte_boundary_regressions() { + // Test historically dangerous bitmap patterns for BooleanArray binary/unary operations + // Construct BooleanArray from Vec> with length 10: [T, F, None, T, F, None, T, F, None, T] + // Underlying bitmap: bits for values and nulls + let data = vec![Some(true), Some(false), None, Some(true), Some(false), None, Some(true), Some(false), None, Some(true)]; + let array = BooleanArray::from(data.clone()); + + // Slice cases: (slice_start, slice_len, description) + let slice_cases = vec![ + (0, 5, "start=0, len=5"), + (1, 4, "start=1, len=4 (offset+len=5)"), + (3, 5, "start=3, len=5 (cross potential boundary)"), + (5, 5, "start=5, len=5"), + ]; + + for (start, len, desc) in slice_cases { + let slice = array.slice(start, len); + let slice_data = &data[start..start+len]; + + // Test unary NOT + let result_not = slice.unary(|a| !a); + let expected_not: Vec> = slice_data.iter().map(|x| x.map(|b| !b)).collect(); + let result_not_vec: Vec> = result_not.iter().collect(); + assert_eq!(result_not_vec, expected_not, "NOT {} mismatch", desc); + + // For binary, need another slice; use the same slice for simplicity, but with different op + // Test binary AND with itself (should be identity for non-null) + let result_and = slice.binary(&slice, |a, b| a & b).unwrap(); + let expected_and: Vec> = slice_data.iter().map(|x| match x { + Some(b) => Some(b & b), + None => None, + }).collect(); + let result_and_vec: Vec> = result_and.iter().collect(); + assert_eq!(result_and_vec, expected_and, "AND self {} mismatch", desc); + + // Test binary OR with itself + let result_or = slice.binary(&slice, |a, b| a | b).unwrap(); + let expected_or: Vec> = slice_data.iter().map(|x| match x { + Some(b) => Some(b | b), + None => None, + }).collect(); + let result_or_vec: Vec> = result_or.iter().collect(); + assert_eq!(result_or_vec, expected_or, "OR self {} mismatch", desc); + } + } + + #[test] + fn test_and_kleene_byte_boundary_regressions() { + // Test and_kleene with slices that hit byte boundaries + let left_data = vec![Some(true), Some(false), None, Some(true), Some(false), None, Some(true), Some(false), None, Some(true)]; + let right_data = vec![Some(false), Some(true), Some(true), Some(false), Some(true), Some(false), Some(true), Some(false), Some(true), Some(false)]; + let left = BooleanArray::from(left_data.clone()); + let right = BooleanArray::from(right_data.clone()); + + // Slice cases + let slice_cases = vec![ + (0, 5), + (1, 4), + (3, 5), + (5, 5), + ]; + + for (start, len) in slice_cases { + let left_slice = left.slice(start, len); + let right_slice = right.slice(start, len); + let left_slice_data = &left_data[start..start+len]; + let right_slice_data = &right_data[start..start+len]; + + let result = and_kleene(&left_slice, &right_slice).unwrap(); + let expected: Vec> = left_slice_data.iter().zip(right_slice_data).map(|(a, b)| match (a, b) { + (Some(a), Some(b)) => Some(a & b), + (None, Some(b)) => if !b { Some(false) } else { None }, + (Some(a), None) => if !a { Some(false) } else { None }, + (None, None) => None, + }).collect(); + let result_vec: Vec> = result.iter().collect(); + assert_eq!(result_vec, expected, "and_kleene slice start={}, len={} mismatch", start, len); + } + } } diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index 7967084aa7ab..bb54a5f0f823 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -21,7 +21,7 @@ use crate::iterator::BooleanIter; use crate::{Array, ArrayAccessor, ArrayRef, Scalar}; use arrow_buffer::{BooleanBuffer, Buffer, MutableBuffer, NullBuffer, bit_util}; use arrow_data::{ArrayData, ArrayDataBuilder}; -use arrow_schema::DataType; +use arrow_schema::{ArrowError, DataType}; use std::any::Any; use std::sync::Arc; @@ -284,6 +284,52 @@ impl BooleanArray { pub fn into_parts(self) -> (BooleanBuffer, Option) { (self.values, self.nulls) } + + /// Apply a bitwise unary operation to this array, returning a new array. + /// + /// The operation is applied to the values, and nulls are cloned. + /// + /// # Arguments + /// + /// * `op` - The unary operation to apply. + pub fn unary(&self, mut op: F) -> Self + where + F: FnMut(u64) -> u64, + { + let buffer = self.values().inner().bitwise_unary(self.values().offset(), self.len(), op); + let values = BooleanBuffer::new(buffer, 0, self.len()); + let nulls = self.nulls().cloned(); + Self::new(values, nulls) + } + + /// Apply a bitwise binary operation between this array and another, returning a new array. + /// + /// The operation is applied to the values, and nulls are combined as union. + /// + /// # Arguments + /// + /// * `other` - The other array. + /// * `op` - The binary operation to apply. + pub fn binary(&self, other: &BooleanArray, mut op: F) -> Result + where + F: FnMut(u64, u64) -> u64, + { + if self.len() != other.len() { + return Err(ArrowError::ComputeError( + "Cannot perform bitwise operation on arrays of different length".to_string(), + )); + } + let buffer = self.values().inner().bitwise_binary( + other.values().inner(), + self.values().offset(), + other.values().offset(), + self.len(), + op, + ); + let values = BooleanBuffer::new(buffer, 0, self.len()); + let nulls = NullBuffer::union(self.nulls(), other.nulls()); + Ok(Self::new(values, nulls)) + } } impl Array for BooleanArray { @@ -829,4 +875,45 @@ mod tests { assert_eq!(values.values(), &[0b1000_0000]); assert!(nulls.is_none()); } + + #[test] + fn test_boolean_array_binary_nulls() { + // Test BooleanArray::binary with nulls + let left = BooleanArray::from(vec![Some(true), None, Some(false), Some(true)]); + let right = BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]); + + // Test and + let result = BooleanArray::binary(&left, &right, |a, b| a & b).unwrap(); + let expected = BooleanArray::from(vec![Some(false), None, None, Some(false)]); + assert_eq!(result.iter().collect::>(), expected.iter().collect::>()); + + // Test or + let result = BooleanArray::binary(&left, &right, |a, b| a | b).unwrap(); + let expected = BooleanArray::from(vec![Some(true), None, None, Some(true)]); + assert_eq!(result.iter().collect::>(), expected.iter().collect::>()); + + // Test with offsets (sliced arrays) + let left_sliced = left.slice(1, 3); // [None, false, true] + let right_sliced = right.slice(1, 3); // [true, None, false] + + let result = BooleanArray::binary(&left_sliced, &right_sliced, |a, b| a & b).unwrap(); + let expected = BooleanArray::from(vec![None, None, Some(false)]); + assert_eq!(result.iter().collect::>(), expected.iter().collect::>()); + } + + #[test] + fn test_boolean_array_unary_nulls() { + // Test BooleanArray::unary with nulls + let array = BooleanArray::from(vec![Some(true), None, Some(false), Some(true)]); + + let result = BooleanArray::unary(&array, |a| !a); + let expected = BooleanArray::from(vec![Some(false), None, Some(true), Some(false)]); + assert_eq!(result.iter().collect::>(), expected.iter().collect::>()); + + // Test with offsets + let sliced = array.slice(1, 3); // [None, false, true] + let result = BooleanArray::unary(&sliced, |a| !a); + let expected = BooleanArray::from(vec![None, Some(true), Some(false)]); + assert_eq!(result.iter().collect::>(), expected.iter().collect::>()); + } } diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index eebd6434986c..3bbb8f6e59ad 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -28,7 +28,6 @@ use crate::{bit_util, bytes::Bytes, native::ArrowNativeType}; #[cfg(feature = "pool")] use crate::pool::MemoryPool; -use super::ops::bitwise_unary_op_helper; use super::{MutableBuffer, ScalarBuffer}; /// A contiguous memory region that can be shared with other buffers and across @@ -344,7 +343,7 @@ impl Buffer { return self.slice_with_length(offset / 8, bit_util::ceil(len, 8)); } - bitwise_unary_op_helper(self, offset, len, |a| a) + self.bitwise_unary(offset, len, |a| a) } /// Returns a `BitChunks` instance which can be used to iterate over this buffers bits @@ -365,21 +364,6 @@ impl Buffer { /// it is not allocated with alignment [`ALIGNMENT`] /// /// # Example: Creating a [`MutableBuffer`] from a [`Buffer`] - /// ``` - /// # use arrow_buffer::buffer::{Buffer, MutableBuffer}; - /// let buffer: Buffer = Buffer::from(&[1u8, 2, 3, 4][..]); - /// // Only possible to convert a Buffer into a MutableBuffer if uniquely owned - /// // (i.e., there are no other references to it). - /// let mut mutable_buffer = match buffer.into_mutable() { - /// Ok(mutable) => mutable, - /// Err(orig_buffer) => { - /// panic!("buffer was not uniquely owned"); - /// } - /// }; - /// mutable_buffer.push(5u8); - /// let buffer = Buffer::from(mutable_buffer); - /// assert_eq!(buffer.as_slice(), &[1u8, 2, 3, 4, 5]) - /// ``` /// /// [`ALIGNMENT`]: crate::alloc::ALIGNMENT pub fn into_mutable(self) -> Result { @@ -461,6 +445,77 @@ impl Buffer { pub fn claim(&self, pool: &dyn MemoryPool) { self.data.claim(pool) } + + /// Apply a bitwise unary operation to this buffer, returning a new buffer. + /// + /// The operation is applied to `len` bits starting at `offset` bits. + /// + /// # Arguments + /// + /// * `offset` - The bit offset to start the operation. + /// * `len` - The number of bits to process. + /// * `op` - The unary operation to apply. + pub fn bitwise_unary(&self, offset: usize, len: usize, mut op: F) -> Buffer + where + F: FnMut(u64) -> u64, + { + // reserve capacity and set length so we can get a typed view of u64 chunks + let mut result = + MutableBuffer::new(crate::util::bit_util::ceil(len, 8)).with_bitset(len / 64 * 8, false); + + let left_chunks = self.bit_chunks(offset, len); + + let result_chunks = result.typed_data_mut::().iter_mut(); + + result_chunks + .zip(left_chunks.iter()) + .for_each(|(res, left)| { + *res = op(left); + }); + + let remainder_bytes = crate::util::bit_util::ceil(left_chunks.remainder_len(), 8); + let rem = op(left_chunks.remainder_bits()); + // we are counting its starting from the least significant bit, to to_le_bytes should be correct + let rem = &rem.to_le_bytes()[0..remainder_bytes]; + result.extend_from_slice(rem); + + result.into() + } + + /// Apply a bitwise binary operation between this buffer and another, returning a new buffer. + /// + /// The operation is applied to `len` bits starting at `self_offset` in self and `other_offset` in other. + /// + /// # Arguments + /// + /// * `other` - The other buffer. + /// * `self_offset` - The bit offset in self. + /// * `other_offset` - The bit offset in other. + /// * `len` - The number of bits to process. + /// * `op` - The binary operation to apply. + pub fn bitwise_binary(&self, other: &Buffer, self_offset: usize, other_offset: usize, len: usize, mut op: F) -> Buffer + where + F: FnMut(u64, u64) -> u64, + { + let left_chunks = self.bit_chunks(self_offset, len); + let right_chunks = other.bit_chunks(other_offset, len); + + let chunks = left_chunks + .iter() + .zip(right_chunks.iter()) + .map(|(left, right)| op(left, right)); + // Soundness: `BitChunks` is a `BitChunks` iterator which + // correctly reports its upper bound + let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) }; + + let remainder_bytes = crate::util::bit_util::ceil(left_chunks.remainder_len(), 8); + let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits()); + // we are counting its starting from the least significant bit, to to_le_bytes should be correct + let rem = &rem.to_le_bytes()[0..remainder_bytes]; + buffer.extend_from_slice(rem); + + buffer.into() + } } /// Note that here we deliberately do not implement @@ -615,6 +670,7 @@ impl FromIterator for Buffer { #[cfg(test)] mod tests { use crate::i256; + use rand::{Rng, SeedableRng}; use std::panic::{RefUnwindSafe, UnwindSafe}; use std::thread; @@ -1082,4 +1138,252 @@ mod tests { drop(capture); assert_eq!(buffer2.strong_count(), 1); } + + #[test] + fn test_buffer_bitwise_binary_offsets() { + use crate::buffer::ops::{buffer_bin_and, buffer_bin_or}; + + let left = Buffer::from(vec![0b10101010u8, 0b01010101u8]); + let right = Buffer::from(vec![0b11110000u8, 0b00001111u8]); + + // Test with offset 0, len full (16 bits) + let new_result = left.bitwise_binary(&right, 0, 0, 16, |a, b| a & b); + let old_result = buffer_bin_and(&left, 0, &right, 0, 16); + assert_eq!(new_result, old_result); + + // Test with offset 1, len 7 (crosses byte boundary) + let new_result = left.bitwise_binary(&right, 1, 1, 7, |a, b| a & b); + let old_result = buffer_bin_and(&left, 1, &right, 1, 7); + assert_eq!(new_result, old_result); + + // Test tail bits, len not multiple of 8 (10 bits) + let new_result = left.bitwise_binary(&right, 0, 0, 10, |a, b| a & b); + let old_result = buffer_bin_and(&left, 0, &right, 0, 10); + assert_eq!(new_result, old_result); + + // Test or operation + let new_result = left.bitwise_binary(&right, 0, 0, 16, |a, b| a | b); + let old_result = buffer_bin_or(&left, 0, &right, 0, 16); + assert_eq!(new_result, old_result); + } + + #[test] + fn test_buffer_bitwise_unary_offsets() { + use crate::buffer::ops::buffer_unary_not; + + let buffer = Buffer::from(vec![0b10101010u8, 0b01010101u8]); + + // Test with offset 0, len full + let new_result = buffer.bitwise_unary(0, 16, |a| !a); + let old_result = buffer_unary_not(&buffer, 0, 16); + assert_eq!(new_result, old_result); + + // Test with offset 2, len 10 (crosses byte, tail bits) + let new_result = buffer.bitwise_unary(2, 10, |a| !a); + let old_result = buffer_unary_not(&buffer, 2, 10); + assert_eq!(new_result, old_result); + } + + #[test] + fn test_buffer_bitwise_binary_random_equivalence() { + use crate::buffer::ops::{buffer_bin_and, buffer_bin_or, buffer_bin_xor}; + + // Use a fixed seed for reproducible tests + let mut rng = rand::rngs::StdRng::from_seed([42u8; 32]); + + let buffer_sizes = [8, 16, 32, 64]; + let offsets = [0, 1, 3, 7, 8, 13]; + let lengths = [1, 2, 7, 8, 9, 15, 16]; + + for &size in &buffer_sizes { + for _ in 0..5 { // Generate 5 random pairs per size + let left_vec: Vec = (0..size).map(|_| rng.random::()).collect(); + let right_vec: Vec = (0..size).map(|_| rng.random::()).collect(); + let left = Buffer::from(left_vec); + let right = Buffer::from(right_vec); + let buffer_bits = left.len() * 8; + + for &offset in &offsets { + if offset >= buffer_bits { continue; } + for &len in &lengths { + if offset + len > buffer_bits { continue; } + + // Test AND + let new_and = left.bitwise_binary(&right, offset, offset, len, |a, b| a & b); + let old_and = buffer_bin_and(&left, offset, &right, offset, len); + assert_eq!(new_and, old_and, "AND failed for offset={}, len={}", offset, len); + + // Test OR + let new_or = left.bitwise_binary(&right, offset, offset, len, |a, b| a | b); + let old_or = buffer_bin_or(&left, offset, &right, offset, len); + assert_eq!(new_or, old_or, "OR failed for offset={}, len={}", offset, len); + + // Test XOR + let new_xor = left.bitwise_binary(&right, offset, offset, len, |a, b| a ^ b); + let old_xor = buffer_bin_xor(&left, offset, &right, offset, len); + assert_eq!(new_xor, old_xor, "XOR failed for offset={}, len={}", offset, len); + } + } + } + } + } + + #[test] + fn test_buffer_bitwise_unary_random_equivalence() { + use crate::buffer::ops::buffer_unary_not; + + // Use a fixed seed for reproducible tests + let mut rng = rand::rngs::StdRng::from_seed([43u8; 32]); + + let buffer_sizes = [8, 16, 32, 64]; + let offsets = [0, 1, 3, 7, 8, 13]; + let lengths = [1, 2, 7, 8, 9, 15, 16]; + + for &size in &buffer_sizes { + for _ in 0..5 { // Generate 5 random buffers per size + let vec: Vec = (0..size).map(|_| rng.random::()).collect(); + let buffer = Buffer::from(vec); + let buffer_bits = buffer.len() * 8; + + for &offset in &offsets { + if offset >= buffer_bits { continue; } + for &len in &lengths { + if offset + len > buffer_bits { continue; } + + let new_not = buffer.bitwise_unary(offset, len, |a| !a); + let old_not = buffer_unary_not(&buffer, offset, len); + assert_eq!(new_not, old_not, "NOT failed for offset={}, len={}", offset, len); + } + } + } + } + } + + #[test] + fn test_buffer_bitwise_boundaries() { + // Use a fixed seed for reproducible tests + let mut rng = rand::rngs::StdRng::from_seed([45u8; 32]); + + // Create a large buffer: 1024 bytes + let data: Vec = (0..1024).map(|_| rng.random::()).collect(); + let buffer = Buffer::from(data); + let total_bits = buffer.len() * 8; + + // Boundary configurations for unary + let boundary_configs = vec![ + (0, 0), // zero length + (0, total_bits), // full length + (1, total_bits - 1), // offset 1, to end + (7, total_bits - 7), // offset 7, crosses byte + (8, total_bits - 8), // offset at byte boundary + (total_bits - 1, 1), // last bit + (total_bits - 8, 8), // last byte + (total_bits - 9, 9), // last byte plus one + ]; + + for (offset, len) in boundary_configs { + if offset + len > total_bits { + continue; // skip invalid + } + + // Test bitwise_unary + let result = buffer.bitwise_unary(offset, len, |a| !a); + let expected_len = (len + 7) / 8; + assert_eq!(result.len(), expected_len, "Wrong length for offset={}, len={}", offset, len); + + // Idempotence: NOT twice should be identity, but since result is packed, compare lengths + let result_twice = Buffer::from(result.clone()).bitwise_unary(0, len, |a| !a); + assert_eq!(result_twice.len(), expected_len, "NOT twice length mismatch for offset={}, len={}", offset, len); + } + } + + #[test] + fn test_buffer_bitwise_binary_boundaries() { + use crate::buffer::ops::{buffer_bin_and, buffer_bin_or}; + + // Use a fixed seed for reproducible tests + let mut rng = rand::rngs::StdRng::from_seed([46u8; 32]); + + // Create two large buffers: 1024 bytes each + let data_left: Vec = (0..1024).map(|_| rng.random::()).collect(); + let data_right: Vec = (0..1024).map(|_| rng.random::()).collect(); + let left = Buffer::from(data_left); + let right = Buffer::from(data_right); + let total_bits = left.len() * 8; + + // Boundary configurations for binary + let boundary_configs = vec![ + (0, 0), // zero length + (0, total_bits), // full length + (1, total_bits - 1), // offset 1, to end + (7, total_bits - 7), // offset 7, crosses byte + (8, total_bits - 8), // offset at byte boundary + (total_bits - 1, 1), // last bit + (total_bits - 8, 8), // last byte + (total_bits - 9, 9), // last byte plus one + ]; + + for (offset, len) in boundary_configs { + if offset + len > total_bits { + continue; // skip invalid + } + + // Test bitwise_binary AND + let result_and = left.bitwise_binary(&right, offset, offset, len, |a, b| a & b); + let expected_len = (len + 7) / 8; + assert_eq!(result_and.len(), expected_len, "AND wrong length for offset={}, len={}", offset, len); + + // Compare with legacy for a few cases + if len <= 64 { // to keep test fast + let old_and = buffer_bin_and(&left, offset, &right, offset, len); + assert_eq!(result_and, old_and, "AND mismatch with legacy for offset={}, len={}", offset, len); + } + + // Test bitwise_binary OR + let result_or = left.bitwise_binary(&right, offset, offset, len, |a, b| a | b); + assert_eq!(result_or.len(), expected_len, "OR wrong length for offset={}, len={}", offset, len); + + // Compare with legacy for a few cases + if len <= 64 { + let old_or = buffer_bin_or(&left, offset, &right, offset, len); + assert_eq!(result_or, old_or, "OR mismatch with legacy for offset={}, len={}", offset, len); + } + } + } + + + + #[test] + fn test_buffer_bitwise_byte_boundary_regressions() { + use crate::buffer::ops::{buffer_bin_and, buffer_bin_or, buffer_unary_not}; + + // Construct small buffers + let left = Buffer::from(vec![0b11110000u8, 0b00001111u8]); // 240, 15 + let right = Buffer::from(vec![0b10101010u8, 0b01010101u8]); // 170, 85 + + // (offset, len, description) + let cases: &[(usize, usize, &str)] = &[ + (0, 8, "(offset=0, len=8) exact byte"), + (1, 7, "(offset=1, len=7) (offset+len)%8 == 0"), + (3, 5, "(offset=3, len=5) partial byte"), + (4, 8, "(offset=4, len=8) cross-byte"), + ]; + + for &(offset, len, desc) in cases { + // New AND vs legacy AND + let result_and = left.bitwise_binary(&right, offset, offset, len, |a, b| a & b); + let legacy_and = buffer_bin_and(&left, offset, &right, offset, len); + assert_eq!(result_and.as_slice(), &legacy_and[..], "AND {}", desc); + + // New OR vs legacy OR + let result_or = left.bitwise_binary(&right, offset, offset, len, |a, b| a | b); + let legacy_or = buffer_bin_or(&left, offset, &right, offset, len); + assert_eq!(result_or.as_slice(), &legacy_or[..], "OR {}", desc); + + // New NOT vs legacy NOT (unary, using only `left`) + let result_not = left.bitwise_unary(offset, len, |a| !a); + let legacy_not = buffer_unary_not(&left, offset, len); + assert_eq!(result_not.as_slice(), &legacy_not[..], "NOT {}", desc); + } + } }