Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions vortex-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ harness = false
name = "chunk_array_builder"
harness = false

[[bench]]
name = "chunked_fsl_canonicalize"
harness = false

[[bench]]
name = "scalar_at_struct"
harness = false
Expand Down
67 changes: 67 additions & 0 deletions vortex-array/benches/chunked_fsl_canonicalize.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! Benchmarks canonicalizing a [`ChunkedArray`] of [`FixedSizeListArray`] chunks.
//!
//! Parameterized over:
//! - Number of chunks
//! - Fixed size list length (elements per list)

#![expect(clippy::cast_possible_truncation)]
#![expect(clippy::unwrap_used)]

use divan::Bencher;
use vortex_array::Canonical;
use vortex_array::IntoArray;
use vortex_array::LEGACY_SESSION;
use vortex_array::VortexSessionExecute;
use vortex_array::arrays::ChunkedArray;
use vortex_array::arrays::FixedSizeListArray;
use vortex_array::validity::Validity;
use vortex_buffer::Buffer;

fn main() {
divan::main();
}

/// Number of lists in each chunk.
const LISTS_PER_CHUNK: usize = 1_000;

/// Number of chunks in the source array.
const NUM_CHUNKS: &[usize] = &[2, 8, 32];

/// Fixed size list lengths (elements per list).
const LIST_SIZES: &[usize] = &[16, 256, 1024];

/// Creates a `FixedSizeListArray` with the given list size and number of lists.
fn create_fsl(list_size: usize, num_lists: usize) -> FixedSizeListArray {
let total_elements = list_size * num_lists;
let elements: Buffer<i64> = (0..total_elements as i64).collect();
FixedSizeListArray::new(
elements.into_array(),
list_size as u32,
Validity::NonNullable,
num_lists,
)
}

/// Builds a `ChunkedArray` of `FixedSizeListArray` chunks.
fn create_chunked_fsl(list_size: usize, num_chunks: usize) -> ChunkedArray {
let chunk = create_fsl(list_size, LISTS_PER_CHUNK);
let dtype = chunk.dtype().clone();
let chunks = (0..num_chunks)
.map(|_| chunk.clone().into_array())
.collect();
ChunkedArray::try_new(chunks, dtype).unwrap()
}

#[divan::bench(args = NUM_CHUNKS, consts = LIST_SIZES)]
fn canonicalize<const LIST_SIZE: usize>(bencher: Bencher, num_chunks: usize) {
let chunked = create_chunked_fsl(LIST_SIZE, num_chunks).into_array();

bencher
.with_inputs(|| (&chunked, LEGACY_SESSION.create_execution_ctx()))
.bench_refs(|(array, execution_ctx)| {
array.clone().execute::<Canonical>(execution_ctx).unwrap()
});
}
86 changes: 86 additions & 0 deletions vortex-array/src/arrays/chunked/vtable/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@ use crate::IntoArray;
use crate::array::ArrayView;
use crate::arrays::Chunked;
use crate::arrays::ChunkedArray;
use crate::arrays::FixedSizeListArray;
use crate::arrays::ListViewArray;
use crate::arrays::PrimitiveArray;
use crate::arrays::StructArray;
use crate::arrays::VariantArray;
use crate::arrays::chunked::ChunkedArrayExt;
use crate::arrays::fixed_size_list::FixedSizeListArrayExt;
use crate::arrays::listview::ListViewArrayExt;
use crate::arrays::listview::ListViewRebuildMode;
use crate::arrays::variant::VariantArrayExt;
Expand Down Expand Up @@ -58,6 +60,15 @@ pub(super) fn _canonicalize(
elem_dtype,
ctx,
)?),
DType::FixedSizeList(elem_dtype, list_size, _) => {
Canonical::FixedSizeList(swizzle_fixed_size_list_chunks(
&owned_chunks,
array.array().validity()?,
elem_dtype,
*list_size,
ctx,
)?)
}
DType::Variant(_) => Canonical::Variant(pack_variant_chunks(owned_chunks, ctx)?),
_ => {
let mut builder = builder_with_capacity_in(ctx.allocator(), array.dtype(), array.len());
Expand Down Expand Up @@ -240,6 +251,37 @@ fn swizzle_list_chunks(
})
}

/// Packs [`FixedSizeListArray`]s together into a single [`FixedSizeListArray`] whose `elements`
/// child is a [`ChunkedArray`].
///
/// Every chunk shares the same `list_size`, and each chunk's `elements` child is exactly
/// `list_size * chunk.len()` long and starts at the first list, so we can reuse the chunks'
/// `elements` children directly as the chunks of a combined `elements` array without copying.
///
/// The caller guarantees there are at least 2 chunks.
fn swizzle_fixed_size_list_chunks(
chunks: &[ArrayRef],
validity: Validity,
elem_dtype: &DType,
list_size: u32,
ctx: &mut ExecutionCtx,
) -> VortexResult<FixedSizeListArray> {
let len: usize = chunks.iter().map(|c| c.len()).sum();

let mut element_chunks = Vec::with_capacity(chunks.len());
for chunk in chunks {
let chunk_array = chunk.clone().execute::<FixedSizeListArray>(ctx)?;
Comment thread
connortsui20 marked this conversation as resolved.
// A canonical `FixedSizeListArray` keeps its `elements` child trimmed to exactly
// `list_size * chunk.len()` starting at the first list, so the children concatenate
// cleanly into the combined `elements` array.
element_chunks.push(chunk_array.elements().clone());
}

let chunked_elements = ChunkedArray::try_new(element_chunks, elem_dtype.clone())?.into_array();

FixedSizeListArray::try_new(chunked_elements, list_size, validity, len)
}

#[cfg(test)]
mod tests {
use std::sync::Arc;
Expand All @@ -263,6 +305,7 @@ mod tests {
use crate::accessor::ArrayAccessor;
use crate::arrays::ChunkedArray;
use crate::arrays::ConstantArray;
use crate::arrays::FixedSizeListArray;
use crate::arrays::ListArray;
use crate::arrays::PrimitiveArray;
use crate::arrays::StructArray;
Expand Down Expand Up @@ -551,6 +594,49 @@ mod tests {
);
}

#[test]
fn pack_fixed_size_lists() -> VortexResult<()> {
let f1 = FixedSizeListArray::try_new(
buffer![1, 2, 3, 4, 5, 6].into_array(),
2,
Validity::NonNullable,
3,
)?;
let f2 = FixedSizeListArray::try_new(
buffer![7, 8, 9, 10].into_array(),
2,
Validity::NonNullable,
2,
)?;
let dtype = f1.dtype().clone();

let chunked =
ChunkedArray::try_new(vec![f1.into_array(), f2.into_array()], dtype)?.into_array();

let canonical = chunked
.clone()
.execute::<Canonical>(&mut LEGACY_SESSION.create_execution_ctx())?;
let fsl = match canonical {
Canonical::FixedSizeList(fsl) => fsl,
other => vortex_bail!("expected FixedSizeList canonical array, got {other:?}"),
};

assert_eq!(fsl.len(), 5);
let expected = FixedSizeListArray::try_new(
buffer![1, 2, 3, 4, 5, 6, 7, 8, 9, 10].into_array(),
2,
Validity::NonNullable,
5,
)?;
for idx in 0..5 {
assert_eq!(
chunked.execute_scalar(idx, &mut LEGACY_SESSION.create_execution_ctx())?,
expected.execute_scalar(idx, &mut LEGACY_SESSION.create_execution_ctx())?,
);
}
Ok(())
}

#[test]
fn list_canonicalize_uses_memory_session_allocator() {
let allocations = Arc::new(AtomicUsize::new(0));
Expand Down
5 changes: 3 additions & 2 deletions vortex-array/src/arrays/chunked/vtable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,9 @@ impl VTable for Chunked {

fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
match array.dtype() {
// Struct, List, and Variant need child swizzling that the builder path cannot express.
DType::Struct(..) | DType::List(..) | DType::Variant(..) => {
// Struct, List, FixedSizeList, and Variant need child swizzling that the builder path
// cannot express.
DType::Struct(..) | DType::List(..) | DType::FixedSizeList(..) | DType::Variant(..) => {
// TODO(joe)[#7674]: iterative execution here too
Ok(ExecutionResult::done(_canonicalize(array.as_view(), ctx)?))
}
Expand Down
Loading