From efcb5655eb8c8711bf0ccf546aa7da30b466cccd Mon Sep 17 00:00:00 2001 From: amorynan Date: Sat, 27 Dec 2025 17:00:30 +0800 Subject: [PATCH 1/2] feat: Add push_validity_into_children methods to StructArray Add methods to push struct-level validity into child fields: - push_validity_into_children(preserve_struct_validity: bool) - push_validity_into_children_default() - convenience method with preserve=false The functionality propagates null information from struct level down to individual fields, with options to preserve or remove the struct-level validity. Includes comprehensive tests covering all scenarios: - preserve_struct_validity = true - preserve_struct_validity = false (default) - no nulls edge case Signed-off-by: amorynan --- vortex-array/src/arrays/struct_/array.rs | 109 ++++++++++++++ vortex-array/src/arrays/struct_/tests.rs | 178 +++++++++++++++++++++++ 2 files changed, 287 insertions(+) diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index a0b2418b903..bf91b52101f 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -13,6 +13,7 @@ use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_err; +use vortex_mask::Mask; use crate::Array; use crate::ArrayRef; @@ -451,4 +452,112 @@ impl StructArray { Self::try_new_with_dtype(children, new_fields, self.len, self.validity.clone()) } + + /// Push the struct-level validity into the children fields. + /// + /// This method takes the top-level validity of the struct array and applies it to each child field + /// using a mask operation. The resulting struct array will have the struct-level nulls propagated + /// down to the individual fields. + /// + /// # Parameters + /// + /// * `preserve_struct_validity` - If true, the new struct array retains the original struct-level + /// validity. If false, the new struct array has `Validity::AllValid` since all null information + /// is now contained within the individual fields. + /// + /// # Returns + /// + /// A new `StructArray` where each child field has been masked with the struct's validity. + /// + /// # Examples + /// + /// ``` + /// use vortex_array::arrays::StructArray; + /// use vortex_array::validity::Validity; + /// use vortex_array::IntoArray; + /// use vortex_buffer::buffer; + /// + /// // Create struct with top-level nulls + /// let struct_array = StructArray::try_new( + /// ["a", "b"].into(), + /// vec![ + /// buffer![1i32, 2i32, 3i32].into_array(), + /// buffer![10i32, 20i32, 30i32].into_array(), + /// ], + /// 3, + /// Validity::from_iter([true, false, true]), // row 1 is null + /// ).unwrap(); + /// + /// // Push validity into children, preserving struct validity + /// let pushed = struct_array.push_validity_into_children(true).unwrap(); + /// // pushed.fields()[0] now has nulls at position 1 + /// // pushed.fields()[1] now has nulls at position 1 + /// // pushed.validity still shows row 1 as null + /// + /// // Push validity into children, removing struct validity + /// let pushed_no_struct = struct_array.push_validity_into_children(false).unwrap(); + /// // pushed_no_struct.fields()[0] now has nulls at position 1 + /// // pushed_no_struct.fields()[1] now has nulls at position 1 + /// // pushed_no_struct.validity is AllValid + /// ``` + /// Push validity into children with default behavior (preserve_struct_validity = false). + /// + /// This is equivalent to calling `push_validity_into_children(false)`. + pub fn push_validity_into_children_default(&self) -> VortexResult { + self.push_validity_into_children(false) + } + + + pub fn push_validity_into_children(&self, preserve_struct_validity: bool) -> VortexResult { + use crate::compute::mask; + + // Get the struct-level validity mask + let struct_validity_mask = self.validity_mask(); + + // If the struct has no nulls, return a clone + if struct_validity_mask.all_true() { + return if preserve_struct_validity { + Ok(self.clone()) + } else { + // Remove struct validity if requested + Self::try_new( + self.names().clone(), + self.fields().clone(), + self.len(), + Validity::AllValid, + ) + }; + } + + // Apply the struct validity mask to each child field + // We want to set nulls where the struct is null (i.e., where struct_validity_mask is false) + // So we need to invert the mask: where struct is invalid, set child to invalid + let null_mask = struct_validity_mask.iter_bools(|iter| { + Mask::from_iter(iter.map(|valid| !valid)) // invert: valid->invalid, invalid->valid + }); + + let masked_fields: Vec = self + .fields() + .iter() + .map(|field| { + // Use the mask function to apply null positions to each field + mask(field.as_ref(), &null_mask) + }) + .collect::>>()?; + + // Determine the new struct validity (default to false = remove struct validity) + let new_struct_validity = if preserve_struct_validity { + self.validity.clone() + } else { + Validity::AllValid + }; + + // Construct the new struct array + Self::try_new( + self.names().clone(), + masked_fields, + self.len(), + new_struct_validity, + ) + } } diff --git a/vortex-array/src/arrays/struct_/tests.rs b/vortex-array/src/arrays/struct_/tests.rs index d768adff136..5e3575128e7 100644 --- a/vortex-array/src/arrays/struct_/tests.rs +++ b/vortex-array/src/arrays/struct_/tests.rs @@ -7,6 +7,7 @@ use vortex_dtype::FieldName; use vortex_dtype::FieldNames; use vortex_dtype::Nullability; use vortex_dtype::PType; +use vortex_scalar::Scalar; use crate::Array; use crate::IntoArray; @@ -150,3 +151,180 @@ fn test_uncompressed_size_in_bytes() { assert_eq!(canonical_size, 2); assert_eq!(uncompressed_size, Some(4000)); } + +#[test] +fn test_push_validity_into_children_preserve_struct() { + // Create struct with top-level nulls + // structArray : [a, b] + // fields: [1, 2, 3] (a), [10, 20, 30] (b) + // validity: [true, false, true] + // row 1 is null at struct level + let struct_array = StructArray::try_new( + ["a", "b"].into(), + vec![ + buffer![1i32, 2i32, 3i32].into_array(), + buffer![10i32, 20i32, 30i32].into_array(), + ], + 3, + Validity::from_iter([true, false, true]), // row 1 is null at struct level + ) + .unwrap(); + + // Push validity into children, preserving struct validity + let pushed = struct_array.push_validity_into_children(true).unwrap(); + + // Check that struct validity is preserved + assert_eq!(pushed.validity_mask(), struct_array.validity_mask()); + + // Check that children now have nulls where struct was null + let field_a = pushed.fields()[0].as_ref(); + let field_b = pushed.fields()[1].as_ref(); + + + assert!(field_a.is_valid(0)); + assert!(!field_a.is_valid(1)); // Should be null due to struct null + assert!(field_a.is_valid(2)); + + assert!(field_b.is_valid(0)); + assert!(!field_b.is_valid(1)); // Should be null due to struct null + assert!(field_b.is_valid(2)); + + + // Original values should be preserved where valid + assert_eq!(field_a.scalar_at(0), 1i32.into()); + assert_eq!(field_a.scalar_at(2), 3i32.into()); + assert_eq!(field_b.scalar_at(0), 10i32.into()); + assert_eq!(field_b.scalar_at(2), 30i32.into()); + + + // Verify pushed struct array values (preserve_struct_validity = true) + assert!(pushed.is_valid(0)); // Row 0 should be valid + assert!(!pushed.is_valid(1)); // Row 1 should be null (preserved) + assert!(pushed.is_valid(2)); // Row 2 should be valid + + // Row 0: {a: 1, b: 10} - should be valid struct with valid fields + let row0 = pushed.scalar_at(0); + assert!(row0.is_valid()); + + // Row 1: null - should be null struct (preserved from original) + let row1 = pushed.scalar_at(1); + assert!(!row1.is_valid()); + + // Row 2: {a: 3, b: 30} - should be valid struct with valid fields + let row2 = pushed.scalar_at(2); + assert!(row2.is_valid()); + +} + +#[test] +fn test_push_validity_into_children_remove_struct() { + + // Create struct with top-level nulls + let struct_array = StructArray::try_new( + ["a", "b"].into(), + vec![ + buffer![1i32, 2i32, 3i32].into_array(), + buffer![10i32, 20i32, 30i32].into_array(), + ], + 3, + Validity::from_iter([true, false, true]), // row 1 is null at struct level + ) + .unwrap(); + + + // Push validity into children, removing struct validity when default behavior is used (preserve_struct_validity = false) + let pushed = struct_array.push_validity_into_children_default().unwrap(); + + + // Check that struct validity is now AllValid + assert!(pushed.validity_mask().all_true()); + + // Check that children still have nulls where struct was null + let field_a = pushed.fields()[0].as_ref(); + let field_b = pushed.fields()[1].as_ref(); + + + assert!(field_a.is_valid(0)); + assert!(!field_a.is_valid(1)); // Should be null due to struct null + assert!(field_a.is_valid(2)); + + assert!(field_b.is_valid(0)); + assert!(!field_b.is_valid(1)); // Should be null due to struct null + assert!(field_b.is_valid(2)); + + + // Original values should be preserved where valid + assert_eq!(field_a.scalar_at(0), 1i32.into()); + assert_eq!(field_a.scalar_at(2), 3i32.into()); + assert_eq!(field_b.scalar_at(0), 10i32.into()); + assert_eq!(field_b.scalar_at(2), 30i32.into()); + + // Verify null values using proper null scalar comparison + use vortex_dtype::{DType, Nullability, PType}; + let null_i32_scalar = Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable)); + assert_eq!(field_a.scalar_at(1), null_i32_scalar); + assert_eq!(field_b.scalar_at(1), null_i32_scalar); + + // Alternative: check if the scalar is null + assert!(!field_a.scalar_at(1).is_valid()); + assert!(!field_b.scalar_at(1).is_valid()); + + // Verify pushed struct array values (preserve_struct_validity = false) + assert!(pushed.is_valid(0)); // Row 0 should be valid + assert!(pushed.is_valid(1)); // Row 1 should be valid (validity removed) + assert!(pushed.is_valid(2)); // Row 2 should be valid + + // Row 0: {a: 1, b: 10} - should be valid struct with valid fields + let row0 = pushed.scalar_at(0); + assert!(row0.is_valid()); + + // Row 1: {a: null, b: null} - should be valid struct but with null fields + let row1 = pushed.scalar_at(1); + assert!(row1.is_valid()); // Struct is valid, but fields are null + + // Row 2: {a: 3, b: 30} - should be valid struct with valid fields + let row2 = pushed.scalar_at(2); + assert!(row2.is_valid()); + +} + +#[test] +fn test_push_validity_into_children_no_nulls() { + // Create struct without any nulls + let struct_array = StructArray::try_new( + ["a", "b"].into(), + vec![ + buffer![1i32, 2i32, 3i32].into_array(), + buffer![10i32, 20i32, 30i32].into_array(), + ], + 3, + Validity::AllValid, + ) + .unwrap(); + + + // Push validity into children (should be no-op when preserve=true) + let pushed_preserve = struct_array.push_validity_into_children(true).unwrap(); + assert_eq!(pushed_preserve.validity_mask(), struct_array.validity_mask()); + + // Push validity into children (should change validity to AllValid when preserve=false) + let pushed_remove = struct_array.push_validity_into_children(false).unwrap(); + assert!(pushed_remove.validity_mask().all_true()); + + // Fields should remain unchanged + for i in 0..struct_array.fields().len() { + assert_eq!( + pushed_preserve.fields()[i].scalar_at(0), + struct_array.fields()[i].scalar_at(0) + ); + assert_eq!( + pushed_preserve.fields()[i].scalar_at(1), + struct_array.fields()[i].scalar_at(1) + ); + assert_eq!( + pushed_preserve.fields()[i].scalar_at(2), + struct_array.fields()[i].scalar_at(2) + ); + } + +} From cefd1e156f04b072a835015508dd4cd25b6a1480 Mon Sep 17 00:00:00 2001 From: amorynan Date: Tue, 30 Dec 2025 00:02:15 +0800 Subject: [PATCH 2/2] fix comments --- vortex-array/src/arrays/struct_/array.rs | 24 +++++++++--------------- vortex-array/src/arrays/struct_/tests.rs | 4 ++-- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index bf91b52101f..ae82e00a226 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -501,25 +501,19 @@ impl StructArray { /// // pushed_no_struct.validity is AllValid /// ``` /// Push validity into children with default behavior (preserve_struct_validity = false). - /// - /// This is equivalent to calling `push_validity_into_children(false)`. - pub fn push_validity_into_children_default(&self) -> VortexResult { - self.push_validity_into_children(false) - } - pub fn push_validity_into_children(&self, preserve_struct_validity: bool) -> VortexResult { use crate::compute::mask; - // Get the struct-level validity mask + // Get the struct-level validity mask. let struct_validity_mask = self.validity_mask(); - // If the struct has no nulls, return a clone + // If the struct has no nulls, return a clone. if struct_validity_mask.all_true() { return if preserve_struct_validity { Ok(self.clone()) } else { - // Remove struct validity if requested + // Remove struct validity if requested. Self::try_new( self.names().clone(), self.fields().clone(), @@ -529,9 +523,9 @@ impl StructArray { }; } - // Apply the struct validity mask to each child field - // We want to set nulls where the struct is null (i.e., where struct_validity_mask is false) - // So we need to invert the mask: where struct is invalid, set child to invalid + // Apply the struct validity mask to each child field. + // We want to set nulls where the struct is null (i.e., where struct_validity_mask is false). + // So we need to invert the mask: where struct is invalid, set child to invalid. let null_mask = struct_validity_mask.iter_bools(|iter| { Mask::from_iter(iter.map(|valid| !valid)) // invert: valid->invalid, invalid->valid }); @@ -540,19 +534,19 @@ impl StructArray { .fields() .iter() .map(|field| { - // Use the mask function to apply null positions to each field + // Use the mask function to apply null positions to each field. mask(field.as_ref(), &null_mask) }) .collect::>>()?; - // Determine the new struct validity (default to false = remove struct validity) + // Determine the new struct validity (default to false = remove struct validity). let new_struct_validity = if preserve_struct_validity { self.validity.clone() } else { Validity::AllValid }; - // Construct the new struct array + // Construct the new struct array. Self::try_new( self.names().clone(), masked_fields, diff --git a/vortex-array/src/arrays/struct_/tests.rs b/vortex-array/src/arrays/struct_/tests.rs index 5e3575128e7..cc141c1c51c 100644 --- a/vortex-array/src/arrays/struct_/tests.rs +++ b/vortex-array/src/arrays/struct_/tests.rs @@ -232,8 +232,8 @@ fn test_push_validity_into_children_remove_struct() { .unwrap(); - // Push validity into children, removing struct validity when default behavior is used (preserve_struct_validity = false) - let pushed = struct_array.push_validity_into_children_default().unwrap(); + // Push validity into children, removing struct validity (preserve_struct_validity = false) + let pushed = struct_array.push_validity_into_children(false).unwrap(); // Check that struct validity is now AllValid