Skip to content
12 changes: 10 additions & 2 deletions fuzz/src/array/compare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,16 @@ pub fn compare_canonical_array(array: &dyn Array, value: &Scalar, operator: Oper
)
.into_array()
}
d @ (DType::Null | DType::Extension(_)) => {
unreachable!("DType {d} not supported for fuzzing")
DType::Null => {
unreachable!("DType null not supported for fuzzing")
}
DType::Extension(..) => {
// Extension arrays delegate comparison to their storage type
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: periods at the end of sentences (this is true everywhere)

compare_canonical_array(
array.to_extension().storage(),
&value.as_extension().storage(),
operator,
)
}
}
}
Expand Down
33 changes: 28 additions & 5 deletions fuzz/src/array/fill_null.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::sync::Arc;

use vortex_array::ArrayRef;
use vortex_array::Canonical;
use vortex_array::IntoArray;
use vortex_array::ToCanonical;
use vortex_array::arrays::BoolArray;
use vortex_array::arrays::ConstantArray;
use vortex_array::arrays::DecimalArray;
use vortex_array::arrays::ExtensionArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::VarBinViewArray;
use vortex_array::compute::fill_null;
Expand All @@ -16,13 +19,13 @@ use vortex_array::vtable::ValidityHelper;
use vortex_buffer::Buffer;
use vortex_buffer::BufferMut;
use vortex_dtype::DType;
use vortex_dtype::ExtDType;
use vortex_dtype::Nullability;
use vortex_dtype::match_each_decimal_value_type;
use vortex_dtype::match_each_native_ptype;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_scalar::Scalar;

/// Apply fill_null on the canonical form of the array to get a consistent baseline.
/// This implementation manually fills null values for each canonical type
/// without using the fill_null method, to serve as an independent baseline for testing.
Expand All @@ -40,13 +43,33 @@ pub fn fill_null_canonical_array(
Canonical::VarBinView(array) => {
fill_varbinview_array(&array, fill_value, result_nullability)
}
Canonical::Struct(_)
| Canonical::List(_)
| Canonical::FixedSizeList(_)
| Canonical::Extension(_) => fill_null(canonical.as_ref(), fill_value)?,
Canonical::Extension(array) => fill_extension_array(&array, fill_value),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Move this match arm to the bottom of the match statement

Canonical::Struct(_) | Canonical::List(_) | Canonical::FixedSizeList(_) => {
fill_null(canonical.as_ref(), fill_value)?
}
})
}

fn fill_extension_array(array: &ExtensionArray, fill_value: &Scalar) -> ArrayRef {
let filled_storage = fill_null_canonical_array(
array.storage().to_canonical(),
&fill_value.as_extension().storage(),
)
.vortex_expect("fill_null should succeed in canonical form");

if filled_storage.dtype().nullability() == array.ext_dtype().storage_dtype().nullability() {
ExtensionArray::new(array.ext_dtype().clone(), filled_storage).into_array()
} else {
let new_ext_dtype = Arc::new(ExtDType::new(
array.ext_dtype().id().clone(),
Arc::new(filled_storage.dtype().clone()),
array.ext_dtype().metadata().cloned(),
));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A comment here would be nice to explain why we need to do this


ExtensionArray::new(new_ext_dtype, filled_storage).into_array()
}
}

fn fill_bool_array(
array: &BoolArray,
fill_value: &Scalar,
Expand Down
23 changes: 21 additions & 2 deletions fuzz/src/array/filter.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::sync::Arc;

use vortex_array::Array;
use vortex_array::ArrayRef;
use vortex_array::IntoArray;
use vortex_array::ToCanonical;
use vortex_array::accessor::ArrayAccessor;
use vortex_array::arrays::BoolArray;
use vortex_array::arrays::DecimalArray;
use vortex_array::arrays::ExtensionArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::StructArray;
use vortex_array::arrays::VarBinViewArray;
use vortex_array::validity::Validity;
use vortex_buffer::BitBuffer;
use vortex_buffer::Buffer;
use vortex_dtype::DType;
use vortex_dtype::ExtDType;
use vortex_dtype::match_each_decimal_value_type;
use vortex_dtype::match_each_native_ptype;
use vortex_error::VortexResult;
Expand Down Expand Up @@ -115,8 +119,23 @@ pub fn filter_canonical_array(array: &dyn Array, filter: &[bool]) -> VortexResul
}
take_canonical_array_non_nullable_indices(array, indices.as_slice())
}
d @ (DType::Null | DType::Extension(_)) => {
unreachable!("DType {d} not supported for fuzzing")
DType::Extension(ext_dtype) => {
// Extension arrays delegate filter to their storage type
let filtered_storage = filter_canonical_array(array.to_extension().storage(), filter)?;

if filtered_storage.dtype().nullability() == ext_dtype.storage_dtype().nullability() {
Ok(ExtensionArray::new(ext_dtype.clone(), filtered_storage).into_array())
} else {
let new_ext_dtype = Arc::new(ExtDType::new(
ext_dtype.id().clone(),
Arc::new(filtered_storage.dtype().clone()),
ext_dtype.metadata().cloned(),
));
Ok(ExtensionArray::new(new_ext_dtype, filtered_storage).into_array())
}
}
DType::Null => {
unreachable!("Cannot search sorted on Null array")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not true

}
}
}
12 changes: 10 additions & 2 deletions fuzz/src/array/search_sorted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,16 @@ pub fn search_sorted_canonical_array(
let scalar_vals = (0..array.len()).map(|i| array.scalar_at(i)).collect_vec();
Ok(scalar_vals.search_sorted(&scalar.cast(array.dtype())?, side))
}
d @ (DType::Null | DType::Extension(_)) => {
unreachable!("DType {d} not supported for fuzzing")
DType::Extension(..) => {
// Extension arrays delegate search to their storage type
search_sorted_canonical_array(
array.to_extension().storage(),
&scalar.as_extension().storage(),
side,
)
}
DType::Null => {
unreachable!("Cannot search sorted on Null array")
}
}
}
23 changes: 21 additions & 2 deletions fuzz/src/array/slice.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors
use std::sync::Arc;

use vortex_array::Array;
use vortex_array::ArrayRef;
Expand All @@ -8,13 +9,15 @@ use vortex_array::ToCanonical;
use vortex_array::accessor::ArrayAccessor;
use vortex_array::arrays::BoolArray;
use vortex_array::arrays::DecimalArray;
use vortex_array::arrays::ExtensionArray;
use vortex_array::arrays::FixedSizeListArray;
use vortex_array::arrays::ListViewArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::StructArray;
use vortex_array::arrays::VarBinViewArray;
use vortex_array::validity::Validity;
use vortex_dtype::DType;
use vortex_dtype::ExtDType;
use vortex_dtype::match_each_decimal_value_type;
use vortex_dtype::match_each_native_ptype;
use vortex_error::VortexResult;
Expand Down Expand Up @@ -113,8 +116,24 @@ pub fn slice_canonical_array(
.to_array(),
)
}
d @ (DType::Null | DType::Extension(_)) => {
unreachable!("DType {d} not supported for fuzzing")
DType::Extension(ext_dtype) => {
// Extension arrays delegate slicing to their storage type
let sliced_storage =
slice_canonical_array(array.to_extension().storage(), start, stop)?;

if sliced_storage.dtype().nullability() == ext_dtype.storage_dtype().nullability() {
Ok(ExtensionArray::new(ext_dtype.clone(), sliced_storage).into_array())
} else {
let new_ext_dtype = Arc::new(ExtDType::new(
ext_dtype.id().clone(),
Arc::new(sliced_storage.dtype().clone()),
ext_dtype.metadata().cloned(),
));
Ok(ExtensionArray::new(new_ext_dtype, sliced_storage).into_array())
}
Comment on lines 123 to 136
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually this seems to be duplicated in a few files, maybe we can extract this out into a helper method and put it in fuzz/src/array/mod.rs?

}
DType::Null => {
unreachable!("Cannot search sorted on Null array")
}
}
}
23 changes: 21 additions & 2 deletions fuzz/src/array/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::cmp::Ordering;
use std::sync::Arc;

use vortex_array::Array;
use vortex_array::ArrayRef;
Expand All @@ -10,9 +11,11 @@ use vortex_array::ToCanonical;
use vortex_array::accessor::ArrayAccessor;
use vortex_array::arrays::BoolArray;
use vortex_array::arrays::DecimalArray;
use vortex_array::arrays::ExtensionArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::VarBinViewArray;
use vortex_dtype::DType;
use vortex_dtype::ExtDType;
use vortex_dtype::NativePType;
use vortex_dtype::match_each_decimal_value_type;
use vortex_dtype::match_each_native_ptype;
Expand Down Expand Up @@ -80,8 +83,24 @@ pub fn sort_canonical_array(array: &dyn Array) -> VortexResult<ArrayRef> {
});
take_canonical_array_non_nullable_indices(array, &sort_indices)
}
d @ (DType::Null | DType::Extension(_)) => {
unreachable!("DType {d} not supported for fuzzing")
DType::Null => {
// Null arrays don't need sorting - all elements are null
Ok(array.to_array())
}
DType::Extension(ext_dtype) => {
// Extension arrays delegate sorting to their storage type
let sorted_storage = sort_canonical_array(array.to_extension().storage())?;

if sorted_storage.dtype().nullability() == ext_dtype.storage_dtype().nullability() {
Ok(ExtensionArray::new(ext_dtype.clone(), sorted_storage).into_array())
} else {
let new_ext_dtype = Arc::new(ExtDType::new(
ext_dtype.id().clone(),
Arc::new(sorted_storage.dtype().clone()),
ext_dtype.metadata().cloned(),
));
Ok(ExtensionArray::new(new_ext_dtype, sorted_storage).into_array())
}
}
}
}
Expand Down
24 changes: 21 additions & 3 deletions fuzz/src/array/take.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::sync::Arc;

use vortex_array::Array;
use vortex_array::ArrayRef;
use vortex_array::IntoArray;
use vortex_array::ToCanonical;
use vortex_array::accessor::ArrayAccessor;
use vortex_array::arrays::BoolArray;
use vortex_array::arrays::DecimalArray;
use vortex_array::arrays::ExtensionArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::StructArray;
use vortex_array::arrays::VarBinViewArray;
Expand All @@ -16,14 +19,14 @@ use vortex_array::validity::Validity;
use vortex_buffer::Buffer;
use vortex_dtype::DType;
use vortex_dtype::DecimalDType;
use vortex_dtype::ExtDType;
use vortex_dtype::NativeDecimalType;
use vortex_dtype::NativePType;
use vortex_dtype::Nullability;
use vortex_dtype::match_each_decimal_value_type;
use vortex_dtype::match_each_native_ptype;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;

pub fn take_canonical_array_non_nullable_indices(
array: &dyn Array,
indices: &[usize],
Expand Down Expand Up @@ -141,8 +144,23 @@ pub fn take_canonical_array(
}
Ok(builder.finish())
}
d @ (DType::Null | DType::Extension(_)) => {
unreachable!("DType {d} not supported for fuzzing")
DType::Extension(ext_dtype) => {
// Extension arrays delegate take to their storage type
let taken_storage = take_canonical_array(array.to_extension().storage(), indices)?;

if taken_storage.dtype().nullability() == ext_dtype.storage_dtype().nullability() {
Ok(ExtensionArray::new(ext_dtype.clone(), taken_storage).into_array())
} else {
let new_ext_dtype = Arc::new(ExtDType::new(
ext_dtype.id().clone(),
Arc::new(taken_storage.dtype().clone()),
ext_dtype.metadata().cloned(),
));
Ok(ExtensionArray::new(new_ext_dtype, taken_storage).into_array())
}
}
DType::Null => {
unreachable!("Null type not supported for fuzzing")
}
}
}
Expand Down
37 changes: 34 additions & 3 deletions vortex-array/src/arrays/arbitrary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use arbitrary::Unstructured;
use vortex_buffer::BitBuffer;
use vortex_buffer::Buffer;
use vortex_dtype::DType;
use vortex_dtype::ExtDType;
use vortex_dtype::IntegerPType;
use vortex_dtype::NativePType;
use vortex_dtype::Nullability;
Expand Down Expand Up @@ -164,12 +165,42 @@ fn random_array_chunk(
DType::FixedSizeList(elem_dtype, list_size, null) => {
random_fixed_size_list(u, elem_dtype, *list_size, *null, chunk_len)
}
DType::Extension(..) => {
todo!("Extension arrays are not implemented")
}
DType::Extension(ext_dtype) => random_extension(u, ext_dtype, chunk_len),
}
}

/// Creates a random extension array.
///
/// If the `chunk_len` is specified, the length of the array will be equal to the chunk length.
fn random_extension(
u: &mut Unstructured,
ext_dtype: &Arc<ExtDType>,
chunk_len: Option<usize>,
) -> Result<ArrayRef> {
use crate::builders::ExtensionBuilder;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you put this import at the top?


// Determine array length
let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);

// Create builder for the extension array
let mut builder = ExtensionBuilder::with_capacity(ext_dtype.clone(), array_length);

// Generate random values
for _ in 0..array_length {
// Wrap in extension scalar using Scalar::extension()
let ext_scalar = Scalar::extension(
ext_dtype.clone(),
random_scalar(u, ext_dtype.storage_dtype())?,
);

// Append to builder
builder
.append_scalar(&ext_scalar)
.vortex_expect("can append extension scalar");
}

Ok(builder.finish())
}
/// Creates a random fixed-size list array.
///
/// If the `chunk_len` is specified, the length of the array will be equal to the chunk length.
Expand Down
Loading
Loading