diff --git a/Cargo.lock b/Cargo.lock index 23ca027d592..86cec8f1247 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11043,6 +11043,7 @@ dependencies = [ "arrow-schema 58.2.0", "bytes", "codspeed-divan-compat", + "inventory", "mimalloc", "rand 0.10.1", "rstest", @@ -11052,6 +11053,7 @@ dependencies = [ "vortex-error", "vortex-mask", "vortex-session", + "vortex-utils", ] [[package]] diff --git a/vortex-row/Cargo.toml b/vortex-row/Cargo.toml index 50d6547474a..947b1df47da 100644 --- a/vortex-row/Cargo.toml +++ b/vortex-row/Cargo.toml @@ -18,12 +18,14 @@ workspace = true [dependencies] bytes = { workspace = true } +inventory = { workspace = true } smallvec = { workspace = true } vortex-array = { workspace = true } vortex-buffer = { workspace = true } vortex-error = { workspace = true } vortex-mask = { workspace = true } vortex-session = { workspace = true } +vortex-utils = { workspace = true, features = ["dyn-traits"] } [dev-dependencies] arrow-array = { workspace = true } diff --git a/vortex-row/public-api.lock b/vortex-row/public-api.lock index a7221cd91ee..bded46f3c30 100644 --- a/vortex-row/public-api.lock +++ b/vortex-row/public-api.lock @@ -240,6 +240,24 @@ impl core::marker::StructuralPartialEq for vortex_row::options::SortField pub const vortex_row::options::FIELDS_INLINE: usize +pub mod vortex_row::registry + +pub struct vortex_row::registry::RowEncodeRegistration + +pub vortex_row::registry::RowEncodeRegistration::encode: vortex_row::registry::DynEncodeFn + +pub vortex_row::registry::RowEncodeRegistration::id: fn() -> vortex_array::array::ArrayId + +pub vortex_row::registry::RowEncodeRegistration::size: vortex_row::registry::DynSizeFn + +impl inventory::Collect for vortex_row::registry::RowEncodeRegistration + +pub fn vortex_row::registry::lookup(&vortex_array::array::ArrayId) -> core::option::Option<(vortex_row::registry::DynSizeFn, vortex_row::registry::DynEncodeFn)> + +pub type vortex_row::registry::DynEncodeFn = fn(&vortex_array::array::erased::ArrayRef, vortex_row::options::SortField, &[u32], &mut [u32], &mut [u8], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> + +pub type vortex_row::registry::DynSizeFn = fn(&vortex_array::array::erased::ArrayRef, vortex_row::options::SortField, &mut [u32], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> + pub mod vortex_row::size pub struct vortex_row::size::RowSize @@ -356,6 +374,16 @@ pub fn vortex_row::options::RowEncodeOptions::hash<__H: core::hash::Hasher>(&sel impl core::marker::StructuralPartialEq for vortex_row::options::RowEncodeOptions +pub struct vortex_row::RowEncodeRegistration + +pub vortex_row::RowEncodeRegistration::encode: vortex_row::registry::DynEncodeFn + +pub vortex_row::RowEncodeRegistration::id: fn() -> vortex_array::array::ArrayId + +pub vortex_row::RowEncodeRegistration::size: vortex_row::registry::DynSizeFn + +impl inventory::Collect for vortex_row::registry::RowEncodeRegistration + pub struct vortex_row::RowSize impl core::clone::Clone for vortex_row::size::RowSize diff --git a/vortex-row/src/encode.rs b/vortex-row/src/encode.rs index 7510b78bc9f..82bec7a0f47 100644 --- a/vortex-row/src/encode.rs +++ b/vortex-row/src/encode.rs @@ -48,6 +48,7 @@ use crate::options::RowEncodeOptions; use crate::options::SortField; use crate::options::deserialize_row_encode_options; use crate::options::serialize_row_encode_options; +use crate::registry; use crate::size::ColKind; use crate::size::compute_sizes; @@ -490,6 +491,11 @@ pub fn dispatch_encode( { return Ok(()); } + if let Some((_, encode_fn)) = registry::lookup(&col.encoding_id()) + && encode_fn(col, field, offsets, cursors, out, ctx)?.is_some() + { + return Ok(()); + } let canonical = col.clone().execute::(ctx)?; codec::field_encode(&canonical, field, offsets, cursors, out, ctx) } diff --git a/vortex-row/src/lib.rs b/vortex-row/src/lib.rs index 2896ae05acf..bd583bf77ad 100644 --- a/vortex-row/src/lib.rs +++ b/vortex-row/src/lib.rs @@ -30,6 +30,7 @@ pub mod convert; pub mod encode; mod kernels; pub mod options; +pub mod registry; pub mod size; #[cfg(test)] @@ -41,6 +42,7 @@ pub use encode::RowEncode; pub use encode::RowEncodeKernel; pub use options::RowEncodeOptions; pub use options::SortField; +pub use registry::RowEncodeRegistration; pub use size::RowSize; pub use size::RowSizeKernel; use vortex_array::scalar_fn::session::ScalarFnSessionExt; diff --git a/vortex-row/src/registry.rs b/vortex-row/src/registry.rs new file mode 100644 index 00000000000..d806d090e0a --- /dev/null +++ b/vortex-row/src/registry.rs @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Registry for per-encoding row-encode fast paths from downstream crates. +//! +//! Encodings that live outside `vortex-array` (such as `RunEnd` in `encodings/runend`) cannot +//! be directly downcast from inside the variadic [`RowSize`] / [`RowEncode`] dispatch loops. +//! Instead, they submit a [`RowEncodeRegistration`] via the `inventory` crate, and the +//! dispatch loop looks them up by [`ArrayId`]. +//! +//! [`RowSize`]: super::size::RowSize +//! [`RowEncode`]: super::encode::RowEncode + +use std::sync::OnceLock; + +use vortex_array::ArrayId; +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_error::VortexResult; +use vortex_utils::aliases::hash_map::HashMap; + +use crate::options::SortField; + +/// Function pointer signature for an encoding's per-row size contribution. +pub type DynSizeFn = + fn(&ArrayRef, SortField, &mut [u32], &mut ExecutionCtx) -> VortexResult>; + +/// Function pointer signature for an encoding's per-row byte encoding. +pub type DynEncodeFn = fn( + &ArrayRef, + SortField, + &[u32], + &mut [u32], + &mut [u8], + &mut ExecutionCtx, +) -> VortexResult>; + +/// A registration submitted by an encoding crate to plug into the row encoder. +/// +/// Because [`ArrayId`] requires runtime string interning, the encoding id is passed as a +/// function pointer that is called once at registry initialization time. +pub struct RowEncodeRegistration { + /// Returns the [`ArrayId`] of the encoding this registration applies to. + pub id: fn() -> ArrayId, + /// Per-row size contribution function. + pub size: DynSizeFn, + /// Per-row encoding function. + pub encode: DynEncodeFn, +} + +inventory::collect!(RowEncodeRegistration); + +/// Look up a (size, encode) pair for the given encoding id. +pub fn lookup(id: &ArrayId) -> Option<(DynSizeFn, DynEncodeFn)> { + static MAP: OnceLock> = OnceLock::new(); + let map = MAP.get_or_init(|| { + inventory::iter:: + .into_iter() + .map(|r| ((r.id)(), (r.size, r.encode))) + .collect() + }); + map.get(id).copied() +} diff --git a/vortex-row/src/size.rs b/vortex-row/src/size.rs index 8fb1bdbcf08..0b54f19de4e 100644 --- a/vortex-row/src/size.rs +++ b/vortex-row/src/size.rs @@ -42,6 +42,7 @@ use crate::options::RowEncodeOptions; use crate::options::SortField; use crate::options::deserialize_row_encode_options; use crate::options::serialize_row_encode_options; +use crate::registry; /// Classification of a single input column for the size pass. /// @@ -283,6 +284,11 @@ pub fn dispatch_size( { return Ok(()); } + if let Some((size_fn, _)) = registry::lookup(&col.encoding_id()) + && size_fn(col, field, sizes, ctx)?.is_some() + { + return Ok(()); + } let canonical = col.clone().execute::(ctx)?; codec::field_size(&canonical, field, sizes, ctx) }