From e5ce2fa3ddd0297392064c71f841b5e53619659d Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Wed, 27 May 2026 11:36:29 +0300 Subject: [PATCH 01/10] Draft registry API --- Cargo.toml | 3 + crates/numcodecs-registry/Cargo.toml | 25 ++ crates/numcodecs-registry/LICENSE | 1 + crates/numcodecs-registry/README.md | 32 +++ crates/numcodecs-registry/src/lib.rs | 186 +++++++++++++++ crates/numcodecs/Cargo.toml | 1 + crates/numcodecs/src/codec.rs | 333 ++++++++++++++++++++++++++- crates/numcodecs/src/lib.rs | 5 +- 8 files changed, 583 insertions(+), 3 deletions(-) create mode 100644 crates/numcodecs-registry/Cargo.toml create mode 120000 crates/numcodecs-registry/LICENSE create mode 100644 crates/numcodecs-registry/README.md create mode 100644 crates/numcodecs-registry/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 2a179bfc0..ee9246b00 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ resolver = "2" members = [ "crates/numcodecs", "crates/numcodecs-python", + "crates/numcodecs-registry", "crates/numcodecs-wasm-builder", "crates/numcodecs-wasm-logging", "crates/numcodecs-wasm-guest", @@ -49,6 +50,7 @@ rust-version = "1.87" # workspace-internal numcodecs crates numcodecs = { version = "0.3.1", path = "crates/numcodecs", default-features = false } numcodecs-python = { version = "0.7.1", path = "crates/numcodecs-python", default-features = false } +numcodecs-registry = { version = "0.1", path = "crates/numcodecs-registry", default-features = false } numcodecs-wasm-builder = { version = "0.2", path = "crates/numcodecs-wasm-builder", default-features = false } numcodecs-wasm-guest = { version = "0.3", path = "crates/numcodecs-wasm-guest", default-features = false } numcodecs-wasm-host = { version = "0.2", path = "crates/numcodecs-wasm-host", default-features = false } @@ -90,6 +92,7 @@ burn = { version = "0.18", default-features = false } clap = { version = "4.6", default-features = false } convert_case = { version = "0.8", default-features = false } ebcc = { version = "0.3.0-alpha", default-features = false } +erased-serde = { version = "0.4", default-features = false } format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } diff --git a/crates/numcodecs-registry/Cargo.toml b/crates/numcodecs-registry/Cargo.toml new file mode 100644 index 000000000..d3c137b68 --- /dev/null +++ b/crates/numcodecs-registry/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "numcodecs-registry" +version = "0.1.0" +edition = { workspace = true } +authors = { workspace = true } +repository = { workspace = true } +license = { workspace = true } +rust-version = { workspace = true } + +description = "registries for numcodecs codecs" +readme = "README.md" +categories = ["compression", "encoding"] +keywords = ["numcodecs", "registry", "compression", "encoding"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +erased-serde = { workspace = true, features = ["std"] } +numcodecs = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true, features = ["std"] } +thiserror = { workspace = true } + +[lints] +workspace = true diff --git a/crates/numcodecs-registry/LICENSE b/crates/numcodecs-registry/LICENSE new file mode 120000 index 000000000..30cff7403 --- /dev/null +++ b/crates/numcodecs-registry/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/crates/numcodecs-registry/README.md b/crates/numcodecs-registry/README.md new file mode 100644 index 000000000..c0ada0e18 --- /dev/null +++ b/crates/numcodecs-registry/README.md @@ -0,0 +1,32 @@ +[![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs] + +[CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main +[workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain + +[MSRV]: https://img.shields.io/badge/MSRV-1.87.0-blue +[repo]: https://github.com/juntyr/numcodecs-rs + +[Latest Version]: https://img.shields.io/crates/v/numcodecs-registry +[crates.io]: https://crates.io/crates/numcodecs-registry + +[Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-registry +[docs.rs]: https://docs.rs/numcodecs-registry/ + +[Rust Doc Main]: https://img.shields.io/badge/docs-main-blue +[docs]: https://juntyr.github.io/numcodecs-rs/numcodecs_registry + +# numcodecs-registry + +Registries for compression codecs implementing the [`numcodecs`] API. + +[`numcodecs`]: https://numcodecs.readthedocs.io/en/stable/ + +## License + +Licensed under the Mozilla Public License, Version 2.0 ([LICENSE](LICENSE) or https://www.mozilla.org/en-US/MPL/2.0/). + +## Funding + +The `numcodecs-registry` crate has been developed as part of [ESiWACE3](https://www.esiwace.eu), the third phase of the Centre of Excellence in Simulation of Weather and Climate in Europe. + +Funded by the European Union. This work has received funding from the European High Performance Computing Joint Undertaking (JU) under grant agreement No 101093054. diff --git a/crates/numcodecs-registry/src/lib.rs b/crates/numcodecs-registry/src/lib.rs new file mode 100644 index 000000000..2981b80e0 --- /dev/null +++ b/crates/numcodecs-registry/src/lib.rs @@ -0,0 +1,186 @@ +//! [![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs] +//! +//! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main +//! [workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain +//! +//! [MSRV]: https://img.shields.io/badge/MSRV-1.87.0-blue +//! [repo]: https://github.com/juntyr/numcodecs-rs +//! +//! [Latest Version]: https://img.shields.io/crates/v/numcodecs-registry +//! [crates.io]: https://crates.io/crates/numcodecs-registry +//! +//! [Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-registry +//! [docs.rs]: https://docs.rs/numcodecs-registry/ +//! +//! [Rust Doc Main]: https://img.shields.io/badge/docs-main-blue +//! [docs]: https://juntyr.github.io/numcodecs-rs/numcodecs_registry +//! +//! Registries for compression codecs implementing the [`numcodecs`] API. +//! +//! [`numcodecs`]: https://numcodecs.readthedocs.io/en/stable/ + +#![expect(missing_docs, clippy::missing_errors_doc)] // FIXME + +use std::{ + collections::HashMap, + error::Error, + sync::{LazyLock, RwLock}, +}; + +use numcodecs::{DynCodec, DynCodecType, ErasedDynCodec, ErasedDynCodecType, ErasedError}; +use serde::{Deserialize, Deserializer}; +use serde_json::{Map, Value}; + +pub trait Registry: 'static + Send + Sync { + /// Error type that may be returned during + /// [`get_codec`][`Registry::get_codec`] and + /// and [`register_codec`][`Codec::Registry`]. + type Error: 'static + Send + Sync + Error; + + fn get_codec<'de, D: Deserializer<'de>>( + &self, + config: D, + ) -> Result; + + fn get_codec_typed<'de, T: DynCodec, D: Deserializer<'de>>( + &self, + config: D, + ) -> Result, Self::Error> { + self.get_codec(config).map(|codec| codec.downcast().ok()) + } + + fn register_codec( + &mut self, + ty: T, + ) -> Result, Self::Error>; +} + +pub struct ErasedRegistry { + registry: Box, +} + +impl ErasedRegistry { + pub fn new(registry: T) -> Self { + Self { + registry: Box::new(registry), + } + } +} + +impl Registry for ErasedRegistry { + type Error = ErasedError; + + fn get_codec<'de, D: Deserializer<'de>>( + &self, + config: D, + ) -> Result { + self.registry + .erased_get_codec(&mut ::erase(config)) + } + + fn register_codec( + &mut self, + ty: T, + ) -> Result, Self::Error> { + self.registry + .erased_register_codec(ErasedDynCodecType::new(ty)) + } +} + +trait ErasedRegistryDispatch: 'static + Send + Sync { + fn erased_get_codec( + &self, + config: &mut dyn erased_serde::Deserializer, + ) -> Result; + + fn erased_register_codec( + &mut self, + ty: ErasedDynCodecType, + ) -> Result, ErasedError>; +} + +impl ErasedRegistryDispatch for T { + fn erased_get_codec( + &self, + config: &mut dyn erased_serde::Deserializer, + ) -> Result { + match self.get_codec(config) { + Ok(codec) => Ok(codec), + Err(err) => Err(ErasedError::new(err)), + } + } + + fn erased_register_codec( + &mut self, + ty: ErasedDynCodecType, + ) -> Result, ErasedError> { + match self.register_codec(ty) { + Ok(codec) => Ok(codec), + Err(err) => Err(ErasedError::new(err)), + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum LocalRegistryError { + #[error("codec not found")] + CodecNotFound, + #[error("invalid codec config")] + InvalidCodecConfig, +} + +pub struct LocalRegistry { + tys: HashMap, +} + +impl LocalRegistry { + pub fn new() -> Self { + Self { + tys: HashMap::new(), + } + } +} + +impl Registry for LocalRegistry { + type Error = LocalRegistryError; + + fn get_codec<'de, D: Deserializer<'de>>( + &self, + config: D, + ) -> Result { + let mut config = Map::::deserialize(config).unwrap(); + + let Some(id) = config.remove("id") else { + panic!("missing codec `id`"); + }; + + let Value::String(id) = id else { + panic!("codec `id` must be a string"); + }; + + let Some(ty) = self.tys.get(&id) else { + panic!("unknown codec with id `{id}`"); + }; + + let codec = ty.codec_from_config(config).unwrap(); + + Ok(codec) + } + + fn register_codec( + &mut self, + ty: T, + ) -> Result, Self::Error> { + Ok(self + .tys + .insert(String::from(ty.codec_id()), ErasedDynCodecType::new(ty))) + } +} + +static REGISTRY: LazyLock> = + LazyLock::new(|| RwLock::new(ErasedRegistry::new(LocalRegistry::new()))); + +#[must_use] +pub fn global_registry() -> &'static RwLock { + LazyLock::force(®ISTRY) +} diff --git a/crates/numcodecs/Cargo.toml b/crates/numcodecs/Cargo.toml index daf48e7b5..70bd6b1b2 100644 --- a/crates/numcodecs/Cargo.toml +++ b/crates/numcodecs/Cargo.toml @@ -15,6 +15,7 @@ keywords = ["numcodecs", "compression", "encoding"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +erased-serde = { workspace = true, features = ["std"] } ndarray = { workspace = true } schemars = { workspace = true, features = ["derive"] } semver = { workspace = true, features = ["std", "serde"] } diff --git a/crates/numcodecs/src/codec.rs b/crates/numcodecs/src/codec.rs index ee87e241d..b17816d99 100644 --- a/crates/numcodecs/src/codec.rs +++ b/crates/numcodecs/src/codec.rs @@ -1,4 +1,4 @@ -use std::{borrow::Cow, error::Error, fmt, marker::PhantomData}; +use std::{any::Any, borrow::Cow, error::Error, fmt, marker::PhantomData}; use schemars::{JsonSchema, Schema, SchemaGenerator, generate::SchemaSettings, json_schema}; use semver::{Version, VersionReq}; @@ -169,6 +169,337 @@ impl DynCodecType for StaticCodecType { } } +pub struct ErasedError { + error: Box, +} + +impl ErasedError { + pub fn new(err: T) -> Self { + Self { + error: Box::new(err), + } + } +} + +impl fmt::Debug for ErasedError { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(&self.error, fmt) + } +} + +impl fmt::Display for ErasedError { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.error, fmt) + } +} + +impl Error for ErasedError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + self.error.source() + } +} + +pub struct ErasedDynCodec { + codec: Box, +} + +impl ErasedDynCodec { + pub fn new(codec: T) -> Self { + Self { + codec: Box::new(codec), + } + } + + pub fn downcast(self) -> Result { + if self.codec.erased_as_any().is::() { + let raw = Box::into_raw(self.codec); + #[expect(unsafe_code)] + // SAFETY: we have checked that self.codec is of type T + let codec = unsafe { Box::from_raw(raw.cast::()) }; + Ok(*codec) + } else { + Err(self) + } + } + + pub fn downcast_ref(&self) -> Option<&T> { + self.codec.erased_as_any().downcast_ref() + } + + pub fn downcast_mut(&mut self) -> Option<&mut T> { + self.codec.erased_as_any_mut().downcast_mut() + } +} + +impl Clone for ErasedDynCodec { + fn clone(&self) -> Self { + ErasedDynCodec { + codec: self.codec.erased_clone(), + } + } +} + +impl Codec for ErasedDynCodec { + type Error = ErasedError; + + fn encode(&self, data: AnyCowArray) -> Result { + self.codec.erased_encode(data) + } + + fn decode(&self, encoded: AnyCowArray) -> Result { + self.codec.erased_decode(encoded) + } + + fn decode_into( + &self, + encoded: AnyArrayView, + decoded: AnyArrayViewMut, + ) -> Result<(), Self::Error> { + self.codec.erased_decode_into(encoded, decoded) + } +} + +impl DynCodec for ErasedDynCodec { + type Type = ErasedDynCodecType; + + fn ty(&self) -> Self::Type { + ErasedDynCodecType { + ty: self.codec.erased_ty(), + } + } + + fn get_config(&self, serializer: S) -> Result { + erased_serde::serialize(self.codec.erased_as_serialize(), serializer) + } +} + +pub struct ErasedDynCodecType { + ty: Box, +} + +impl ErasedDynCodecType { + pub fn new(ty: T) -> Self { + Self { ty: Box::new(ty) } + } + + pub fn downcast(self) -> Result { + if self.ty.erased_as_any().is::() { + let raw = Box::into_raw(self.ty); + #[expect(unsafe_code)] + // SAFETY: we have checked that self.ty is of type T + let ty = unsafe { Box::from_raw(raw.cast::()) }; + Ok(*ty) + } else { + Err(self) + } + } + + pub fn downcast_ref(&self) -> Option<&T> { + self.ty.erased_as_any().downcast_ref() + } + + pub fn downcast_mut(&mut self) -> Option<&mut T> { + self.ty.erased_as_any_mut().downcast_mut() + } +} + +impl DynCodecType for ErasedDynCodecType { + type Codec = ErasedDynCodec; + + fn codec_id(&self) -> &str { + self.ty.erased_codec_id() + } + + fn codec_config_schema(&self) -> Schema { + self.ty.erased_codec_config_schema() + } + + fn codec_from_config<'de, D: Deserializer<'de>>( + &self, + config: D, + ) -> Result { + match self + .ty + .erased_codec_from_config(&mut ::erase(config)) + { + Ok(codec) => Ok(ErasedDynCodec { codec }), + Err(err) => Err(serde::de::Error::custom(err)), // TODO: improve + } + } +} + +/// Dyn-compatible type-erased dynamically typed compression codec. +/// +/// Every codec that implements [`DynCodec`], which includes every codec that +/// implements [`StaticCodec`], also implements [`ErasedCodec`]. +trait ErasedDynCodecDispatch: 'static + Send + Sync { + /// Encodes the `data` and returns the result. + /// + /// # Errors + /// + /// Errors if encoding the buffer fails. + fn erased_encode(&self, data: AnyCowArray) -> Result; + + /// Decodes the `encoded` data and returns the result. + /// + /// # Errors + /// + /// Errors if decoding the buffer fails. + fn erased_decode(&self, encoded: AnyCowArray) -> Result; + + /// Decodes the `encoded` data and writes the result into the provided + /// `decoded` output. + /// + /// The output must have the correct type and shape. + /// + /// # Errors + /// + /// Errors if decoding the buffer fails. + fn erased_decode_into( + &self, + encoded: AnyArrayView, + decoded: AnyArrayViewMut, + ) -> Result<(), ErasedError>; + + // Serializes the configuration parameters for this codec. + + // The config include an `id` field with the + // [`ErasedCodecType::codec_id`]. + + // # Errors + + // Errors if serializing the codec configuration fails. + // fn erased_get_config( + // &self, + // ser: &mut dyn erased_serde::Serializer, + // ) -> Result<(), erased_serde::Error>; + + fn erased_as_serialize(&self) -> &dyn erased_serde::Serialize; + + /// Returns the type object for this codec. + fn erased_ty(&self) -> Box; + + fn erased_clone(&self) -> Box; + + fn erased_as_any(&self) -> &dyn Any; + + fn erased_as_any_mut(&mut self) -> &mut dyn Any; +} + +/// Dyn-compatible type-erased type object for dynamically typed compression +/// codecs. +/// +/// Every codec type that implements [`DynCodecType`] also implements +/// [`ErasedCodecType`]. +trait ErasedDynCodecTypeDispatch: 'static + Send + Sync { + /// Codec identifier. + fn erased_codec_id(&self) -> &str; + + /// JSON schema for the codec's configuration. + fn erased_codec_config_schema(&self) -> Schema; + + /// Instantiate a codec of this type from a JSON `config`uration. + /// + /// The `config` must *not* contain an `id` field. If the `config` *may* + /// contain one, use the [`erased_codec_from_config_with_id`] helper + /// function. + /// + /// # Errors + /// + /// Errors if constructing the codec fails. + fn erased_codec_from_config<'de>( + &self, + config: &mut dyn erased_serde::Deserializer<'de>, + ) -> Result, erased_serde::Error>; + + fn erased_as_any(&self) -> &dyn Any; + + fn erased_as_any_mut(&mut self) -> &mut dyn Any; +} + +impl ErasedDynCodecDispatch for T { + fn erased_encode(&self, data: AnyCowArray) -> Result { + Codec::encode(self, data).map_err(ErasedError::new) + } + + fn erased_decode(&self, encoded: AnyCowArray) -> Result { + Codec::decode(self, encoded).map_err(ErasedError::new) + } + + fn erased_decode_into( + &self, + encoded: AnyArrayView, + decoded: AnyArrayViewMut, + ) -> Result<(), ErasedError> { + Codec::decode_into(self, encoded, decoded).map_err(ErasedError::new) + } + + // fn erased_get_config(&self) -> Result { + // DynCodec::get_config(self, serde_json::value::Serializer).map_err(ErasedError::new) + // } + + fn erased_as_serialize(&self) -> &dyn erased_serde::Serialize { + #[repr(transparent)] + struct Wrapper(T); + + impl Serialize for Wrapper { + fn serialize(&self, serializer: S) -> Result { + DynCodec::get_config(&self.0, serializer) + } + } + + #[expect(unsafe_code)] + // SAFETY: Wrapper is a transparent newtype around Self + unsafe { + &*std::ptr::from_ref(self).cast::>() + } + } + + fn erased_ty(&self) -> Box { + Box::new(DynCodec::ty(self)) + } + + fn erased_clone(&self) -> Box { + Box::new(Clone::clone(self)) + } + + fn erased_as_any(&self) -> &dyn Any { + self + } + + fn erased_as_any_mut(&mut self) -> &mut dyn Any { + self + } +} + +impl ErasedDynCodecTypeDispatch for T { + fn erased_codec_id(&self) -> &str { + DynCodecType::codec_id(self) + } + + fn erased_codec_config_schema(&self) -> Schema { + DynCodecType::codec_config_schema(self) + } + + fn erased_codec_from_config<'de>( + &self, + config: &mut dyn erased_serde::Deserializer<'de>, + ) -> Result, erased_serde::Error> { + match DynCodecType::codec_from_config(self, config) { + Ok(codec) => Ok(Box::new(codec)), + Err(err) => Err(err), + } + } + + fn erased_as_any(&self) -> &dyn Any { + self + } + + fn erased_as_any_mut(&mut self) -> &mut dyn Any { + self + } +} + /// Utility struct to serialize a [`StaticCodec`]'s [`StaticCodec::Config`] /// together with its [`StaticCodec::CODEC_ID`] #[derive(Serialize, Deserialize)] diff --git a/crates/numcodecs/src/lib.rs b/crates/numcodecs/src/lib.rs index b4db09d2b..d6d48221a 100644 --- a/crates/numcodecs/src/lib.rs +++ b/crates/numcodecs/src/lib.rs @@ -27,6 +27,7 @@ pub use array::{ AnyArrayViewMut, AnyCowArray, AnyRawData, ArrayDType, ArrayDataMutExt, }; pub use codec::{ - Codec, DynCodec, DynCodecType, StaticCodec, StaticCodecConfig, StaticCodecType, - StaticCodecVersion, codec_from_config_with_id, serialize_codec_config_with_id, + Codec, DynCodec, DynCodecType, ErasedDynCodec, ErasedDynCodecType, ErasedError, StaticCodec, + StaticCodecConfig, StaticCodecType, StaticCodecVersion, codec_from_config_with_id, + serialize_codec_config_with_id, }; From 1edab74393fb997f349386ced2ba58900e139561 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Wed, 27 May 2026 15:00:10 +0300 Subject: [PATCH 02/10] Clean up --- crates/numcodecs-python/Cargo.toml | 1 + crates/numcodecs-python/src/registry.rs | 51 +++++++- crates/numcodecs-registry/Cargo.toml | 2 - crates/numcodecs-registry/src/lib.rs | 161 +++++++++++++----------- crates/numcodecs/src/codec.rs | 141 ++++++++------------- 5 files changed, 190 insertions(+), 166 deletions(-) diff --git a/crates/numcodecs-python/Cargo.toml b/crates/numcodecs-python/Cargo.toml index c8ed8daf5..a2d431ee1 100644 --- a/crates/numcodecs-python/Cargo.toml +++ b/crates/numcodecs-python/Cargo.toml @@ -18,6 +18,7 @@ keywords = ["numcodecs", "compression", "encoding", "python", "pyo3"] convert_case = { workspace = true } ndarray = { workspace = true } numcodecs = { workspace = true } +numcodecs-registry = { workspace = true } numpy = { workspace = true } pyo3 = { workspace = true } pyo3-error = { workspace = true } diff --git a/crates/numcodecs-python/src/registry.rs b/crates/numcodecs-python/src/registry.rs index e3f121ad3..8ad79835d 100644 --- a/crates/numcodecs-python/src/registry.rs +++ b/crates/numcodecs-python/src/registry.rs @@ -1,8 +1,13 @@ +use numcodecs::{DynCodec, DynCodecType, ErasedDynCodec}; +use numcodecs_registry::Registry; use pyo3::{prelude::*, sync::PyOnceLock, types::PyDict}; +use pythonize::Pythonizer; +use serde::Deserializer; +use serde_transcode::transcode; #[expect(unused_imports)] // FIXME: use expect, only used in docs use crate::PyCodecClassMethods; -use crate::{PyCodec, PyCodecClass}; +use crate::{PyCodec, PyCodecAdapter, PyCodecClass, export_codec_class}; /// Dynamic registry of codec classes. pub struct PyCodecRegistry { @@ -56,3 +61,47 @@ impl PyCodecRegistry { Ok(()) } } + +impl Registry for PyCodecRegistry { + type Error = PyErr; + + fn get_codec<'de, D: Deserializer<'de>>( + &self, + config: D, + ) -> Result { + Python::attach(|py| { + let config = transcode(config, Pythonizer::new(py))?; + let config: Bound = config.extract()?; + + let codec = Self::get_codec(config.as_borrowed())?; + let codec = PyCodecAdapter::from_codec(codec)?; + + Ok(ErasedDynCodec::new(codec)) + }) + } + + fn get_codec_typed<'de, T: DynCodec, D: Deserializer<'de>>( + &self, + config: D, + ) -> Result, Self::Error> { + Python::attach(|py| { + let config = transcode(config, Pythonizer::new(py))?; + let config: Bound = config.extract()?; + + let codec = Self::get_codec(config.as_borrowed())?; + // clone is necessary since we cannot move out of a PyCodec + let codec = PyCodecAdapter::with_downcast(py, &codec, |codec: &T| codec.clone()); + + Ok(codec) + }) + } + + fn register_codec(&mut self, ty: T) -> Result<(), Self::Error> { + Python::attach(|py| { + let module = PyModule::new(py, "codec")?; + let class = export_codec_class(py, ty, module.as_borrowed())?; + Self::register_codec(class.as_borrowed(), None)?; + Ok(()) + }) + } +} diff --git a/crates/numcodecs-registry/Cargo.toml b/crates/numcodecs-registry/Cargo.toml index d3c137b68..da035e02a 100644 --- a/crates/numcodecs-registry/Cargo.toml +++ b/crates/numcodecs-registry/Cargo.toml @@ -18,8 +18,6 @@ keywords = ["numcodecs", "registry", "compression", "encoding"] erased-serde = { workspace = true, features = ["std"] } numcodecs = { workspace = true } serde = { workspace = true } -serde_json = { workspace = true, features = ["std"] } -thiserror = { workspace = true } [lints] workspace = true diff --git a/crates/numcodecs-registry/src/lib.rs b/crates/numcodecs-registry/src/lib.rs index 2981b80e0..5efad36f1 100644 --- a/crates/numcodecs-registry/src/lib.rs +++ b/crates/numcodecs-registry/src/lib.rs @@ -19,29 +19,42 @@ //! //! [`numcodecs`]: https://numcodecs.readthedocs.io/en/stable/ -#![expect(missing_docs, clippy::missing_errors_doc)] // FIXME - -use std::{ - collections::HashMap, - error::Error, - sync::{LazyLock, RwLock}, -}; +use std::{error::Error, sync::RwLock}; use numcodecs::{DynCodec, DynCodecType, ErasedDynCodec, ErasedDynCodecType, ErasedError}; -use serde::{Deserialize, Deserializer}; -use serde_json::{Map, Value}; +use serde::Deserializer; +/// Registry of codec types. pub trait Registry: 'static + Send + Sync { /// Error type that may be returned during /// [`get_codec`][`Registry::get_codec`] and /// and [`register_codec`][`Codec::Registry`]. type Error: 'static + Send + Sync + Error; + /// Instantiate a codec of any type from its `config`uration. + /// + /// The config *must* include the `id` field with the + /// [`DynCodecType::codec_id`]. + /// + /// # Errors + /// + /// Errors if no codec with a matching `id` has been registered, or if + /// constructing the codec fails. fn get_codec<'de, D: Deserializer<'de>>( &self, config: D, ) -> Result; + /// Instantiate a codec with a concrete type from its `config`uration. + /// + /// The config *must* include the `id` field with the + /// [`DynCodecType::codec_id`]. + /// + /// # Errors + /// + /// Errors if no codec with a matching `id` has been registered, if + /// constructing the codec fails, or if the constructed codec is not of the + /// concrete type. fn get_codec_typed<'de, T: DynCodec, D: Deserializer<'de>>( &self, config: D, @@ -49,17 +62,24 @@ pub trait Registry: 'static + Send + Sync { self.get_codec(config).map(|codec| codec.downcast().ok()) } - fn register_codec( - &mut self, - ty: T, - ) -> Result, Self::Error>; + /// Register a codec type. + /// + /// When a codec type is registered, it will replace any type previously + /// registered under the same codec identifier, if present. + /// + /// # Errors + /// + /// Errors if registering the codec type fails. + fn register_codec(&mut self, ty: T) -> Result<(), Self::Error>; } +/// Type-erased [`Registry`]. pub struct ErasedRegistry { registry: Box, } impl ErasedRegistry { + /// Erase the type information of the concrete `registry`. pub fn new(registry: T) -> Self { Self { registry: Box::new(registry), @@ -78,10 +98,7 @@ impl Registry for ErasedRegistry { .erased_get_codec(&mut ::erase(config)) } - fn register_codec( - &mut self, - ty: T, - ) -> Result, Self::Error> { + fn register_codec(&mut self, ty: T) -> Result<(), Self::Error> { self.registry .erased_register_codec(ErasedDynCodecType::new(ty)) } @@ -93,10 +110,7 @@ trait ErasedRegistryDispatch: 'static + Send + Sync { config: &mut dyn erased_serde::Deserializer, ) -> Result; - fn erased_register_codec( - &mut self, - ty: ErasedDynCodecType, - ) -> Result, ErasedError>; + fn erased_register_codec(&mut self, ty: ErasedDynCodecType) -> Result<(), ErasedError>; } impl ErasedRegistryDispatch for T { @@ -110,77 +124,76 @@ impl ErasedRegistryDispatch for T { } } - fn erased_register_codec( - &mut self, - ty: ErasedDynCodecType, - ) -> Result, ErasedError> { + fn erased_register_codec(&mut self, ty: ErasedDynCodecType) -> Result<(), ErasedError> { match self.register_codec(ty) { - Ok(codec) => Ok(codec), + Ok(()) => Ok(()), Err(err) => Err(ErasedError::new(err)), } } } -#[derive(Debug, thiserror::Error)] -pub enum LocalRegistryError { - #[error("codec not found")] - CodecNotFound, - #[error("invalid codec config")] - InvalidCodecConfig, -} - -pub struct LocalRegistry { - tys: HashMap, -} - -impl LocalRegistry { - pub fn new() -> Self { - Self { - tys: HashMap::new(), +/// Global registry singleton. +/// +/// If the global registry is used, its backing registry must be provided +/// exactly once using [`export_global`]. +/// +/// The global registry must not be used to provide the backing of itself, +/// which would result in an infinite loop at runtime. +pub struct GlobalRegistry; + +impl GlobalRegistry { + fn get() -> &'static RwLock { + #[expect(unsafe_code)] + unsafe extern "C" { + #[expect(improper_ctypes)] + safe fn _numcodecs_registry_get_global_registry() -> &'static RwLock; } + + _numcodecs_registry_get_global_registry() } } -impl Registry for LocalRegistry { - type Error = LocalRegistryError; +impl Registry for GlobalRegistry { + type Error = ErasedError; fn get_codec<'de, D: Deserializer<'de>>( &self, config: D, ) -> Result { - let mut config = Map::::deserialize(config).unwrap(); - - let Some(id) = config.remove("id") else { - panic!("missing codec `id`"); - }; - - let Value::String(id) = id else { - panic!("codec `id` must be a string"); - }; - - let Some(ty) = self.tys.get(&id) else { - panic!("unknown codec with id `{id}`"); - }; - - let codec = ty.codec_from_config(config).unwrap(); - - Ok(codec) + #[expect(clippy::expect_used)] + let registry = Self::get().read().expect("global registry was poisoned"); + registry.get_codec(config) } - fn register_codec( - &mut self, - ty: T, - ) -> Result, Self::Error> { - Ok(self - .tys - .insert(String::from(ty.codec_id()), ErasedDynCodecType::new(ty))) + fn register_codec(&mut self, ty: T) -> Result<(), Self::Error> { + #[expect(clippy::expect_used)] + let mut registry = Self::get().write().expect("global registry was poisoned"); + registry.register_codec(ty) } } -static REGISTRY: LazyLock> = - LazyLock::new(|| RwLock::new(ErasedRegistry::new(LocalRegistry::new()))); - -#[must_use] -pub fn global_registry() -> &'static RwLock { - LazyLock::force(®ISTRY) +#[macro_export] +/// `export_global!(registry: ty = expr)` exports the provided registry as the +/// global registry singleton. +/// +/// This macro must only be used at most once in every binary or shared +/// library. +macro_rules! export_global { + (registry: $ty:ty = $init:expr) => { + const _: () = { + use std::sync::{LazyLock, RwLock}; + + use $crate::ErasedRegistry; + + static _GLOBAL_REGISTRY: LazyLock> = + LazyLock::new(|| RwLock::new(ErasedRegistry::new($init))); + + #[allow(improper_ctypes, unsafe_code)] + #[unsafe(no_mangle)] + extern "C" fn _numcodecs_registry_get_global_registry() + -> &'static RwLock { + LazyLock::force(&_GLOBAL_REGISTRY) + } + }; + }; } diff --git a/crates/numcodecs/src/codec.rs b/crates/numcodecs/src/codec.rs index b17816d99..fc96940a3 100644 --- a/crates/numcodecs/src/codec.rs +++ b/crates/numcodecs/src/codec.rs @@ -169,11 +169,13 @@ impl DynCodecType for StaticCodecType { } } +/// Type-erased [`Error`] type. pub struct ErasedError { error: Box, } impl ErasedError { + /// Erase the type information of the concrete `err`or. pub fn new(err: T) -> Self { Self { error: Box::new(err), @@ -199,17 +201,24 @@ impl Error for ErasedError { } } +/// Type-erased dynamically typed compression codec. pub struct ErasedDynCodec { codec: Box, } impl ErasedDynCodec { + /// Erase the type information of the concrete `codec`. pub fn new(codec: T) -> Self { Self { codec: Box::new(codec), } } + /// Try to downcast into a concretely-typed codec. + /// + /// # Errors + /// + /// Returns `self` if the type-erased codec is not of the concrete type. pub fn downcast(self) -> Result { if self.codec.erased_as_any().is::() { let raw = Box::into_raw(self.codec); @@ -222,10 +231,14 @@ impl ErasedDynCodec { } } + /// Try to downcast to a concretely-typed codec reference. + #[must_use] pub fn downcast_ref(&self) -> Option<&T> { self.codec.erased_as_any().downcast_ref() } + /// Try to downcast to a concretely-typed mutable codec reference. + #[must_use] pub fn downcast_mut(&mut self) -> Option<&mut T> { self.codec.erased_as_any_mut().downcast_mut() } @@ -233,7 +246,7 @@ impl ErasedDynCodec { impl Clone for ErasedDynCodec { fn clone(&self) -> Self { - ErasedDynCodec { + Self { codec: self.codec.erased_clone(), } } @@ -273,15 +286,23 @@ impl DynCodec for ErasedDynCodec { } } +/// Type-erased dynamically typed compression codec type. pub struct ErasedDynCodecType { ty: Box, } impl ErasedDynCodecType { + /// Erase the type information of the concrete codec `ty`pe. pub fn new(ty: T) -> Self { Self { ty: Box::new(ty) } } + /// Try to downcast into a concretely-typed codec type. + /// + /// # Errors + /// + /// Returns `self` if the type-erased codec type is not of the concrete + /// type. pub fn downcast(self) -> Result { if self.ty.erased_as_any().is::() { let raw = Box::into_raw(self.ty); @@ -294,10 +315,14 @@ impl ErasedDynCodecType { } } + /// Try to downcast to a concretely-typed codec type reference. + #[must_use] pub fn downcast_ref(&self) -> Option<&T> { self.ty.erased_as_any().downcast_ref() } + /// Try to downcast to a concretely-typed mutable codec type reference. + #[must_use] pub fn downcast_mut(&mut self) -> Option<&mut T> { self.ty.erased_as_any_mut().downcast_mut() } @@ -328,92 +353,34 @@ impl DynCodecType for ErasedDynCodecType { } } -/// Dyn-compatible type-erased dynamically typed compression codec. -/// -/// Every codec that implements [`DynCodec`], which includes every codec that -/// implements [`StaticCodec`], also implements [`ErasedCodec`]. trait ErasedDynCodecDispatch: 'static + Send + Sync { - /// Encodes the `data` and returns the result. - /// - /// # Errors - /// - /// Errors if encoding the buffer fails. fn erased_encode(&self, data: AnyCowArray) -> Result; - - /// Decodes the `encoded` data and returns the result. - /// - /// # Errors - /// - /// Errors if decoding the buffer fails. fn erased_decode(&self, encoded: AnyCowArray) -> Result; - - /// Decodes the `encoded` data and writes the result into the provided - /// `decoded` output. - /// - /// The output must have the correct type and shape. - /// - /// # Errors - /// - /// Errors if decoding the buffer fails. fn erased_decode_into( &self, encoded: AnyArrayView, decoded: AnyArrayViewMut, ) -> Result<(), ErasedError>; - // Serializes the configuration parameters for this codec. - - // The config include an `id` field with the - // [`ErasedCodecType::codec_id`]. - - // # Errors - - // Errors if serializing the codec configuration fails. - // fn erased_get_config( - // &self, - // ser: &mut dyn erased_serde::Serializer, - // ) -> Result<(), erased_serde::Error>; - - fn erased_as_serialize(&self) -> &dyn erased_serde::Serialize; + fn erased_clone(&self) -> Box; - /// Returns the type object for this codec. fn erased_ty(&self) -> Box; - fn erased_clone(&self) -> Box; - fn erased_as_any(&self) -> &dyn Any; - fn erased_as_any_mut(&mut self) -> &mut dyn Any; + + fn erased_as_serialize(&self) -> &dyn erased_serde::Serialize; } -/// Dyn-compatible type-erased type object for dynamically typed compression -/// codecs. -/// -/// Every codec type that implements [`DynCodecType`] also implements -/// [`ErasedCodecType`]. trait ErasedDynCodecTypeDispatch: 'static + Send + Sync { - /// Codec identifier. fn erased_codec_id(&self) -> &str; - - /// JSON schema for the codec's configuration. fn erased_codec_config_schema(&self) -> Schema; - - /// Instantiate a codec of this type from a JSON `config`uration. - /// - /// The `config` must *not* contain an `id` field. If the `config` *may* - /// contain one, use the [`erased_codec_from_config_with_id`] helper - /// function. - /// - /// # Errors - /// - /// Errors if constructing the codec fails. - fn erased_codec_from_config<'de>( + fn erased_codec_from_config( &self, - config: &mut dyn erased_serde::Deserializer<'de>, + config: &mut dyn erased_serde::Deserializer, ) -> Result, erased_serde::Error>; fn erased_as_any(&self) -> &dyn Any; - fn erased_as_any_mut(&mut self) -> &mut dyn Any; } @@ -434,35 +401,14 @@ impl ErasedDynCodecDispatch for T { Codec::decode_into(self, encoded, decoded).map_err(ErasedError::new) } - // fn erased_get_config(&self) -> Result { - // DynCodec::get_config(self, serde_json::value::Serializer).map_err(ErasedError::new) - // } - - fn erased_as_serialize(&self) -> &dyn erased_serde::Serialize { - #[repr(transparent)] - struct Wrapper(T); - - impl Serialize for Wrapper { - fn serialize(&self, serializer: S) -> Result { - DynCodec::get_config(&self.0, serializer) - } - } - - #[expect(unsafe_code)] - // SAFETY: Wrapper is a transparent newtype around Self - unsafe { - &*std::ptr::from_ref(self).cast::>() - } + fn erased_clone(&self) -> Box { + Box::new(Clone::clone(self)) } fn erased_ty(&self) -> Box { Box::new(DynCodec::ty(self)) } - fn erased_clone(&self) -> Box { - Box::new(Clone::clone(self)) - } - fn erased_as_any(&self) -> &dyn Any { self } @@ -470,6 +416,23 @@ impl ErasedDynCodecDispatch for T { fn erased_as_any_mut(&mut self) -> &mut dyn Any { self } + + fn erased_as_serialize(&self) -> &dyn erased_serde::Serialize { + #[repr(transparent)] + struct SerializeDynCodec(T); + + impl Serialize for SerializeDynCodec { + fn serialize(&self, serializer: S) -> Result { + DynCodec::get_config(&self.0, serializer) + } + } + + #[expect(unsafe_code)] + // SAFETY: SerializeDynCodec is a transparent newtype around Self + unsafe { + &*std::ptr::from_ref(self).cast::>() + } + } } impl ErasedDynCodecTypeDispatch for T { @@ -481,9 +444,9 @@ impl ErasedDynCodecTypeDispatch for T { DynCodecType::codec_config_schema(self) } - fn erased_codec_from_config<'de>( + fn erased_codec_from_config( &self, - config: &mut dyn erased_serde::Deserializer<'de>, + config: &mut dyn erased_serde::Deserializer, ) -> Result, erased_serde::Error> { match DynCodecType::codec_from_config(self, config) { Ok(codec) => Ok(Box::new(codec)), From f0c0af1215ad3e245486777156a28df21a5caf4c Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Wed, 27 May 2026 15:06:07 +0300 Subject: [PATCH 03/10] export python registry in numcodecs-wasm --- crates/numcodecs-python/src/registry.rs | 4 +--- py/numcodecs-wasm/Cargo.toml | 1 + py/numcodecs-wasm/src/lib.rs | 4 ++++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/numcodecs-python/src/registry.rs b/crates/numcodecs-python/src/registry.rs index 8ad79835d..c32931c9f 100644 --- a/crates/numcodecs-python/src/registry.rs +++ b/crates/numcodecs-python/src/registry.rs @@ -10,9 +10,7 @@ use crate::PyCodecClassMethods; use crate::{PyCodec, PyCodecAdapter, PyCodecClass, export_codec_class}; /// Dynamic registry of codec classes. -pub struct PyCodecRegistry { - _private: (), -} +pub struct PyCodecRegistry; impl PyCodecRegistry { /// Instantiate a codec from a configuration dictionary. diff --git a/py/numcodecs-wasm/Cargo.toml b/py/numcodecs-wasm/Cargo.toml index 2f3a65124..4d8ea9f21 100644 --- a/py/numcodecs-wasm/Cargo.toml +++ b/py/numcodecs-wasm/Cargo.toml @@ -29,6 +29,7 @@ anyhow = { workspace = true } # FIXME: https://github.com/bytecodealliance/rustix/issues/1620 memfd = { version = "0.6.5", default-features = false } numcodecs-python = { workspace = true } +numcodecs-registry = { workspace = true } numcodecs-wasm-host-reproducible = { workspace = true } pyo3 = { workspace = true, features = ["macros", "abi3-py310"] } pyo3-error = { workspace = true } diff --git a/py/numcodecs-wasm/src/lib.rs b/py/numcodecs-wasm/src/lib.rs index 66010b32e..923878a9e 100644 --- a/py/numcodecs-wasm/src/lib.rs +++ b/py/numcodecs-wasm/src/lib.rs @@ -60,3 +60,7 @@ fn read_codec_instruction_counter<'py>( Ok(instruction_counter.0) } + +numcodecs_registry::export_global! { + registry: numcodecs_python::PyCodecRegistry = numcodecs_python::PyCodecRegistry +} From 30125a7bc3ee593c0df88db38a2c363a58205fd1 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Wed, 27 May 2026 23:25:33 +0300 Subject: [PATCH 04/10] draft wit bridge for external codec --- crates/numcodecs-wasm-guest/Cargo.toml | 6 + crates/numcodecs-wasm-guest/src/convert.rs | 26 ++- crates/numcodecs-wasm-guest/src/lib.rs | 194 ++++++++++++++++++++- wit/codecs.wit | 43 ++++- wit/world.wit | 2 + 5 files changed, 267 insertions(+), 4 deletions(-) diff --git a/crates/numcodecs-wasm-guest/Cargo.toml b/crates/numcodecs-wasm-guest/Cargo.toml index 31df535e3..7344a11cb 100644 --- a/crates/numcodecs-wasm-guest/Cargo.toml +++ b/crates/numcodecs-wasm-guest/Cargo.toml @@ -16,6 +16,12 @@ keywords = ["numcodecs", "compression", "encoding", "wasm-component", "wasm-bind numcodecs = { workspace = true } wit-bindgen = { workspace = true, features = ["macros", "realloc"] } +schemars = { workspace = true } +serde = { workspace = true } +serde-transcode = { workspace = true } +serde_json = { workspace = true, features = ["std"] } +thiserror = { workspace = true } + [target.'cfg(target_arch = "wasm32")'.dependencies] format_serde_error = { workspace = true, features = ["serde_json"] } ndarray = { workspace = true, features = ["std"] } diff --git a/crates/numcodecs-wasm-guest/src/convert.rs b/crates/numcodecs-wasm-guest/src/convert.rs index 08a2440a4..8882e5c2d 100644 --- a/crates/numcodecs-wasm-guest/src/convert.rs +++ b/crates/numcodecs-wasm-guest/src/convert.rs @@ -74,6 +74,28 @@ pub fn into_wit_any_array(array: AnyArray) -> Result Result { + let dtype = match dtype { + AnyArrayDType::U8 => wit::AnyArrayDType::U8, + AnyArrayDType::U16 => wit::AnyArrayDType::U16, + AnyArrayDType::U32 => wit::AnyArrayDType::U32, + AnyArrayDType::U64 => wit::AnyArrayDType::U64, + AnyArrayDType::I8 => wit::AnyArrayDType::I8, + AnyArrayDType::I16 => wit::AnyArrayDType::I16, + AnyArrayDType::I32 => wit::AnyArrayDType::I32, + AnyArrayDType::I64 => wit::AnyArrayDType::I64, + AnyArrayDType::F32 => wit::AnyArrayDType::F32, + AnyArrayDType::F64 => wit::AnyArrayDType::F64, + array => { + return Err(AnyArrayConversionError::UnsupportedDtype { + dtype, + }); + } + }; + + Ok(dtype) +} + #[derive(Debug, Error)] pub enum AnyArrayConversionError { #[error("numcodecs-wasm-guest received an array of an invalid shape")] @@ -108,11 +130,11 @@ pub fn into_wit_error(err: T) -> wit::Error { #[expect(clippy::cast_possible_truncation)] #[must_use] -fn usize_as_u32_slice(slice: &[usize]) -> Vec { +pub(crate) fn usize_as_u32_slice(slice: &[usize]) -> Vec { slice.iter().map(|x| *x as u32).collect() } #[must_use] -fn u32_as_usize_vec(vec: Vec) -> Vec { +pub(crate) fn u32_as_usize_vec(vec: Vec) -> Vec { vec.into_iter().map(|x| x as usize).collect() } diff --git a/crates/numcodecs-wasm-guest/src/lib.rs b/crates/numcodecs-wasm-guest/src/lib.rs index 3462f2fa6..96e0cd70f 100644 --- a/crates/numcodecs-wasm-guest/src/lib.rs +++ b/crates/numcodecs-wasm-guest/src/lib.rs @@ -32,7 +32,7 @@ use ::{ serde::Deserialize, }; -#[cfg(target_arch = "wasm32")] +// #[cfg(target_arch = "wasm32")] mod convert; #[cfg(target_arch = "wasm32")] @@ -182,3 +182,195 @@ impl wit::GuestCodec for T { } } } + +pub fn get_codec<'de, D: serde::Deserializer<'de>>( + config: D, +) -> Result { + let mut config_bytes = Vec::new(); + serde_transcode::transcode(config, &mut serde_json::Serializer::new(&mut config_bytes)) + .map_err(convert::into_wit_error) + .map_err(ExternalError::new)?; + let config = String::from_utf8(config_bytes) + .map_err(convert::into_wit_error) + .map_err(ExternalError::new)?; + + let codec = + bindings::numcodecs::abc::registry::get_codec(&config).map_err(ExternalError::new)?; + let ty = codec.ty(); + + let codec_id = ty.codec_id(); + let schema: schemars::Schema = serde_json::from_str(&ty.codec_config_schema()) + .map_err(convert::into_wit_error) + .map_err(ExternalError::new)?; + + Ok(ExternalCodec { + codec, + ty: ExternalCodecType { + ty: std::sync::Arc::new(ty), + codec_id: codec_id.into(), + schema: std::sync::Arc::new(schema), + }, + }) +} + +pub struct ExternalCodec { + codec: bindings::numcodecs::abc::registry::ErasedDynCodec, + ty: ExternalCodecType, +} + +impl Clone for ExternalCodec { + fn clone(&self) -> Self { + Self { + codec: self.codec.clone(), + ty: ExternalCodecType { + ty: self.ty.ty.clone(), + codec_id: self.ty.codec_id.clone(), + schema: self.ty.schema.clone(), + }, + } + } +} + +impl numcodecs::Codec for ExternalCodec { + type Error = ExternalError; + + fn encode(&self, data: numcodecs::AnyCowArray) -> Result { + match self + .codec + .encode(&convert::into_wit_any_array(data.into_owned())) + { + Ok(encoded) => match convert::from_wit_any_array(encoded) { + Ok(encoded) => Ok(encoded), + Err(err) => Err(ExternalError::new(convert::into_wit_error(err))), + }, + Err(err) => Err(ExternalError::new(err)), + } + } + + fn decode(&self, encoded: numcodecs::AnyCowArray) -> Result { + match self + .codec + .decode(&convert::into_wit_any_array(encoded.into_owned())) + { + Ok(decoded) => match convert::from_wit_any_array(decoded) { + Ok(decoded) => Ok(decoded), + Err(err) => Err(ExternalError::new(convert::into_wit_error(err))), + }, + Err(err) => Err(ExternalError::new(err)), + } + } + + fn decode_into( + &self, + encoded: numcodecs::AnyArrayView, + mut decoded: numcodecs::AnyArrayViewMut, + ) -> Result<(), Self::Error> { + match self.codec.decode_into( + &convert::into_wit_any_array(encoded.into_owned()), + &bindings::numcodecs::abc::types::AnyArrayPrototype { + dtype: match convert::into_wit_any_array_dtype(decoded.dtype()) { + Ok(dtype) => dtype, + Err(err) => return Err(ExternalError::new(convert::into_wit_error(err))), + }, + shape: convert::usize_as_u32_slice(decoded.shape()), + }, + ) { + Ok(dec) => match convert::from_wit_any_array(dec) { + Ok(dec) => { + decoded.assign(&dec); + Ok(()) + } + Err(err) => Err(ExternalError::new(convert::into_wit_error(err))), + }, + Err(err) => Err(ExternalError::new(err)), + } + } +} + +impl numcodecs::DynCodec for ExternalCodec { + type Type = ExternalCodecType; + + fn get_config(&self, serializer: S) -> Result { + let config = self + .codec + .get_config() + .map_err(ExternalError::new) + .map_err(serde::ser::Error::custom)?; + serde_transcode::transcode(&mut serde_json::Deserializer::from_str(&config), serializer) + } + + fn ty(&self) -> Self::Type { + ExternalCodecType { + ty: self.ty.ty.clone(), + codec_id: self.ty.codec_id.clone(), + schema: self.ty.schema.clone(), + } + } +} + +pub struct ExternalCodecType { + ty: std::sync::Arc, + codec_id: std::sync::Arc, + schema: std::sync::Arc, +} + +impl numcodecs::DynCodecType for ExternalCodecType { + type Codec = ExternalCodec; + + fn codec_id(&self) -> &str { + &*self.codec_id + } + + fn codec_config_schema(&self) -> schemars::Schema { + (*self.schema).clone() + } + + fn codec_from_config<'de, D: serde::Deserializer<'de>>( + &self, + config: D, + ) -> Result { + let mut config_bytes = Vec::new(); + serde_transcode::transcode(config, &mut serde_json::Serializer::new(&mut config_bytes)) + .map_err(serde::de::Error::custom)?; + let config = String::from_utf8(config_bytes).map_err(serde::de::Error::custom)?; + + let codec = self + .ty + .from_config(&config) + .map_err(ExternalError::new) + .map_err(serde::de::Error::custom)?; + + Ok(ExternalCodec { + codec, + ty: ExternalCodecType { + ty: self.ty.clone(), + codec_id: self.codec_id.clone(), + schema: self.schema.clone(), + }, + }) + } +} + +#[derive(Debug, thiserror::Error)] +#[error("{msg}")] +pub struct ExternalError { + msg: String, + source: Option>, +} + +impl ExternalError { + pub(crate) fn new(error: bindings::numcodecs::abc::types::Error) -> Self { + let mut root = Self { + msg: error.message, + source: None, + }; + + let mut err = &mut root; + + for msg in error.chain { + err = &mut *err.source.insert(Box::new(Self { msg, source: None })); + } + + root + } +} diff --git a/wit/codecs.wit b/wit/codecs.wit index 863623366..c319c25f1 100644 --- a/wit/codecs.wit +++ b/wit/codecs.wit @@ -1,6 +1,8 @@ package numcodecs:abc@0.1.1; -interface codec { +// TODO: major version bump + +interface types { type json = string; type json-schema = json; type usize = u32; @@ -47,6 +49,13 @@ interface codec { message: string, chain: list, } +} + +interface codec { + use types.{ + any-array, any-array-prototype, any-array-data, any-array-dtype, error, + json, json-schema, usize, + }; resource codec { from-config: static func(config: json) -> result; @@ -65,3 +74,35 @@ interface codec { codec-config-schema: func() -> json-schema; } + +interface registry { + use types.{ + any-array, any-array-prototype, any-array-data, any-array-dtype, error, + json, json-schema, usize, + }; + + resource erased-dyn-codec { + encode: func(data: any-array) -> result; + + decode: func(encoded: any-array) -> result; + + @since(version = 0.1.1) + decode-into: func(encoded: any-array, decoded: any-array-prototype) -> result; + + clone: func() -> erased-dyn-codec; + + get-config: func() -> result; + + ty: func() -> erased-dyn-codec-type; + } + + resource erased-dyn-codec-type { + codec-id: func() -> string; + + codec-config-schema: func() -> json-schema; + + from-config: func(config: json) -> result; + } + + get-codec: func(config: json) -> result; +} diff --git a/wit/world.wit b/wit/world.wit index 44c9b58a9..1b5e1704d 100644 --- a/wit/world.wit +++ b/wit/world.wit @@ -2,8 +2,10 @@ package numcodecs:abc@0.1.1; world imports { import codec; + export registry; } world exports { + import registry; export codec; } From 8a9dac573e47ade099fb93ec39cac6fb3e3a0c72 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 28 May 2026 09:36:43 +0300 Subject: [PATCH 05/10] only allow lookup no updates through registry trait --- crates/numcodecs-python/src/registry.rs | 13 +- crates/numcodecs-registry/src/lib.rs | 54 ++--- crates/numcodecs-wasm-guest/Cargo.toml | 8 +- crates/numcodecs-wasm-guest/src/convert.rs | 120 ++++++---- crates/numcodecs-wasm-guest/src/external.rs | 214 +++++++++++++++++ crates/numcodecs-wasm-guest/src/lib.rs | 243 +++----------------- wit/codecs.wit | 4 +- 7 files changed, 338 insertions(+), 318 deletions(-) create mode 100644 crates/numcodecs-wasm-guest/src/external.rs diff --git a/crates/numcodecs-python/src/registry.rs b/crates/numcodecs-python/src/registry.rs index c32931c9f..9adbaa6c3 100644 --- a/crates/numcodecs-python/src/registry.rs +++ b/crates/numcodecs-python/src/registry.rs @@ -1,4 +1,4 @@ -use numcodecs::{DynCodec, DynCodecType, ErasedDynCodec}; +use numcodecs::{DynCodec, ErasedDynCodec}; use numcodecs_registry::Registry; use pyo3::{prelude::*, sync::PyOnceLock, types::PyDict}; use pythonize::Pythonizer; @@ -7,7 +7,7 @@ use serde_transcode::transcode; #[expect(unused_imports)] // FIXME: use expect, only used in docs use crate::PyCodecClassMethods; -use crate::{PyCodec, PyCodecAdapter, PyCodecClass, export_codec_class}; +use crate::{PyCodec, PyCodecAdapter, PyCodecClass}; /// Dynamic registry of codec classes. pub struct PyCodecRegistry; @@ -93,13 +93,4 @@ impl Registry for PyCodecRegistry { Ok(codec) }) } - - fn register_codec(&mut self, ty: T) -> Result<(), Self::Error> { - Python::attach(|py| { - let module = PyModule::new(py, "codec")?; - let class = export_codec_class(py, ty, module.as_borrowed())?; - Self::register_codec(class.as_borrowed(), None)?; - Ok(()) - }) - } } diff --git a/crates/numcodecs-registry/src/lib.rs b/crates/numcodecs-registry/src/lib.rs index 5efad36f1..4e79b80ce 100644 --- a/crates/numcodecs-registry/src/lib.rs +++ b/crates/numcodecs-registry/src/lib.rs @@ -19,9 +19,9 @@ //! //! [`numcodecs`]: https://numcodecs.readthedocs.io/en/stable/ -use std::{error::Error, sync::RwLock}; +use std::error::Error; -use numcodecs::{DynCodec, DynCodecType, ErasedDynCodec, ErasedDynCodecType, ErasedError}; +use numcodecs::{DynCodec, ErasedDynCodec, ErasedError}; use serde::Deserializer; /// Registry of codec types. @@ -61,16 +61,6 @@ pub trait Registry: 'static + Send + Sync { ) -> Result, Self::Error> { self.get_codec(config).map(|codec| codec.downcast().ok()) } - - /// Register a codec type. - /// - /// When a codec type is registered, it will replace any type previously - /// registered under the same codec identifier, if present. - /// - /// # Errors - /// - /// Errors if registering the codec type fails. - fn register_codec(&mut self, ty: T) -> Result<(), Self::Error>; } /// Type-erased [`Registry`]. @@ -97,11 +87,6 @@ impl Registry for ErasedRegistry { self.registry .erased_get_codec(&mut ::erase(config)) } - - fn register_codec(&mut self, ty: T) -> Result<(), Self::Error> { - self.registry - .erased_register_codec(ErasedDynCodecType::new(ty)) - } } trait ErasedRegistryDispatch: 'static + Send + Sync { @@ -109,8 +94,6 @@ trait ErasedRegistryDispatch: 'static + Send + Sync { &self, config: &mut dyn erased_serde::Deserializer, ) -> Result; - - fn erased_register_codec(&mut self, ty: ErasedDynCodecType) -> Result<(), ErasedError>; } impl ErasedRegistryDispatch for T { @@ -123,13 +106,6 @@ impl ErasedRegistryDispatch for T { Err(err) => Err(ErasedError::new(err)), } } - - fn erased_register_codec(&mut self, ty: ErasedDynCodecType) -> Result<(), ErasedError> { - match self.register_codec(ty) { - Ok(()) => Ok(()), - Err(err) => Err(ErasedError::new(err)), - } - } } /// Global registry singleton. @@ -142,11 +118,11 @@ impl ErasedRegistryDispatch for T { pub struct GlobalRegistry; impl GlobalRegistry { - fn get() -> &'static RwLock { + fn get() -> &'static ErasedRegistry { #[expect(unsafe_code)] unsafe extern "C" { #[expect(improper_ctypes)] - safe fn _numcodecs_registry_get_global_registry() -> &'static RwLock; + safe fn _numcodecs_registry_get_global_registry() -> &'static ErasedRegistry; } _numcodecs_registry_get_global_registry() @@ -160,15 +136,14 @@ impl Registry for GlobalRegistry { &self, config: D, ) -> Result { - #[expect(clippy::expect_used)] - let registry = Self::get().read().expect("global registry was poisoned"); - registry.get_codec(config) + Self::get().get_codec(config) } - fn register_codec(&mut self, ty: T) -> Result<(), Self::Error> { - #[expect(clippy::expect_used)] - let mut registry = Self::get().write().expect("global registry was poisoned"); - registry.register_codec(ty) + fn get_codec_typed<'de, T: DynCodec, D: Deserializer<'de>>( + &self, + config: D, + ) -> Result, Self::Error> { + Self::get().get_codec_typed(config) } } @@ -181,17 +156,16 @@ impl Registry for GlobalRegistry { macro_rules! export_global { (registry: $ty:ty = $init:expr) => { const _: () = { - use std::sync::{LazyLock, RwLock}; + use std::sync::LazyLock; use $crate::ErasedRegistry; - static _GLOBAL_REGISTRY: LazyLock> = - LazyLock::new(|| RwLock::new(ErasedRegistry::new($init))); + static _GLOBAL_REGISTRY: LazyLock = + LazyLock::new(|| ErasedRegistry::new($init)); #[allow(improper_ctypes, unsafe_code)] #[unsafe(no_mangle)] - extern "C" fn _numcodecs_registry_get_global_registry() - -> &'static RwLock { + extern "C" fn _numcodecs_registry_get_global_registry() -> &'static ErasedRegistry { LazyLock::force(&_GLOBAL_REGISTRY) } }; diff --git a/crates/numcodecs-wasm-guest/Cargo.toml b/crates/numcodecs-wasm-guest/Cargo.toml index 7344a11cb..7582f84d0 100644 --- a/crates/numcodecs-wasm-guest/Cargo.toml +++ b/crates/numcodecs-wasm-guest/Cargo.toml @@ -16,17 +16,13 @@ keywords = ["numcodecs", "compression", "encoding", "wasm-component", "wasm-bind numcodecs = { workspace = true } wit-bindgen = { workspace = true, features = ["macros", "realloc"] } -schemars = { workspace = true } -serde = { workspace = true } -serde-transcode = { workspace = true } -serde_json = { workspace = true, features = ["std"] } -thiserror = { workspace = true } - [target.'cfg(target_arch = "wasm32")'.dependencies] format_serde_error = { workspace = true, features = ["serde_json"] } ndarray = { workspace = true, features = ["std"] } +numcodecs-registry = { workspace = true } schemars = { workspace = true } serde = { workspace = true } +serde-transcode = { workspace = true } serde_json = { workspace = true, features = ["std"] } thiserror = { workspace = true } diff --git a/crates/numcodecs-wasm-guest/src/convert.rs b/crates/numcodecs-wasm-guest/src/convert.rs index 8882e5c2d..8e21ed3f3 100644 --- a/crates/numcodecs-wasm-guest/src/convert.rs +++ b/crates/numcodecs-wasm-guest/src/convert.rs @@ -6,43 +6,47 @@ use thiserror::Error; use crate::wit; -pub fn from_wit_any_array(array: wit::AnyArray) -> Result { +pub fn from_wit_any_array( + array: wit::types::AnyArray, +) -> Result { let shape = u32_as_usize_vec(array.shape); let array = match array.data { - wit::AnyArrayData::U8(data) => AnyArray::U8(Array::from_shape_vec(shape, data)?), - wit::AnyArrayData::U16(data) => AnyArray::U16(Array::from_shape_vec(shape, data)?), - wit::AnyArrayData::U32(data) => AnyArray::U32(Array::from_shape_vec(shape, data)?), - wit::AnyArrayData::U64(data) => AnyArray::U64(Array::from_shape_vec(shape, data)?), - wit::AnyArrayData::I8(data) => AnyArray::I8(Array::from_shape_vec(shape, data)?), - wit::AnyArrayData::I16(data) => AnyArray::I16(Array::from_shape_vec(shape, data)?), - wit::AnyArrayData::I32(data) => AnyArray::I32(Array::from_shape_vec(shape, data)?), - wit::AnyArrayData::I64(data) => AnyArray::I64(Array::from_shape_vec(shape, data)?), - wit::AnyArrayData::F32(data) => AnyArray::F32(Array::from_shape_vec(shape, data)?), - wit::AnyArrayData::F64(data) => AnyArray::F64(Array::from_shape_vec(shape, data)?), + wit::types::AnyArrayData::U8(data) => AnyArray::U8(Array::from_shape_vec(shape, data)?), + wit::types::AnyArrayData::U16(data) => AnyArray::U16(Array::from_shape_vec(shape, data)?), + wit::types::AnyArrayData::U32(data) => AnyArray::U32(Array::from_shape_vec(shape, data)?), + wit::types::AnyArrayData::U64(data) => AnyArray::U64(Array::from_shape_vec(shape, data)?), + wit::types::AnyArrayData::I8(data) => AnyArray::I8(Array::from_shape_vec(shape, data)?), + wit::types::AnyArrayData::I16(data) => AnyArray::I16(Array::from_shape_vec(shape, data)?), + wit::types::AnyArrayData::I32(data) => AnyArray::I32(Array::from_shape_vec(shape, data)?), + wit::types::AnyArrayData::I64(data) => AnyArray::I64(Array::from_shape_vec(shape, data)?), + wit::types::AnyArrayData::F32(data) => AnyArray::F32(Array::from_shape_vec(shape, data)?), + wit::types::AnyArrayData::F64(data) => AnyArray::F64(Array::from_shape_vec(shape, data)?), }; Ok(array) } -pub fn zeros_from_wit_any_array_prototype(prototype: wit::AnyArrayPrototype) -> AnyArray { +pub fn zeros_from_wit_any_array_prototype(prototype: wit::types::AnyArrayPrototype) -> AnyArray { let shape = u32_as_usize_vec(prototype.shape); match prototype.dtype { - wit::AnyArrayDtype::U8 => AnyArray::U8(Array::zeros(shape)), - wit::AnyArrayDtype::U16 => AnyArray::U16(Array::zeros(shape)), - wit::AnyArrayDtype::U32 => AnyArray::U32(Array::zeros(shape)), - wit::AnyArrayDtype::U64 => AnyArray::U64(Array::zeros(shape)), - wit::AnyArrayDtype::I8 => AnyArray::I8(Array::zeros(shape)), - wit::AnyArrayDtype::I16 => AnyArray::I16(Array::zeros(shape)), - wit::AnyArrayDtype::I32 => AnyArray::I32(Array::zeros(shape)), - wit::AnyArrayDtype::I64 => AnyArray::I64(Array::zeros(shape)), - wit::AnyArrayDtype::F32 => AnyArray::F32(Array::zeros(shape)), - wit::AnyArrayDtype::F64 => AnyArray::F64(Array::zeros(shape)), + wit::types::AnyArrayDtype::U8 => AnyArray::U8(Array::zeros(shape)), + wit::types::AnyArrayDtype::U16 => AnyArray::U16(Array::zeros(shape)), + wit::types::AnyArrayDtype::U32 => AnyArray::U32(Array::zeros(shape)), + wit::types::AnyArrayDtype::U64 => AnyArray::U64(Array::zeros(shape)), + wit::types::AnyArrayDtype::I8 => AnyArray::I8(Array::zeros(shape)), + wit::types::AnyArrayDtype::I16 => AnyArray::I16(Array::zeros(shape)), + wit::types::AnyArrayDtype::I32 => AnyArray::I32(Array::zeros(shape)), + wit::types::AnyArrayDtype::I64 => AnyArray::I64(Array::zeros(shape)), + wit::types::AnyArrayDtype::F32 => AnyArray::F32(Array::zeros(shape)), + wit::types::AnyArrayDtype::F64 => AnyArray::F64(Array::zeros(shape)), } } -pub fn into_wit_any_array(array: AnyArray) -> Result { +pub fn into_wit_any_array( + array: AnyArray, +) -> Result { fn array_into_standard_layout_vec(array: ArrayD) -> Vec { if array.is_standard_layout() { array.into_raw_vec_and_offset().0 @@ -54,16 +58,32 @@ pub fn into_wit_any_array(array: AnyArray) -> Result wit::AnyArrayData::U8(array_into_standard_layout_vec(array)), - AnyArray::U16(array) => wit::AnyArrayData::U16(array_into_standard_layout_vec(array)), - AnyArray::U32(array) => wit::AnyArrayData::U32(array_into_standard_layout_vec(array)), - AnyArray::U64(array) => wit::AnyArrayData::U64(array_into_standard_layout_vec(array)), - AnyArray::I8(array) => wit::AnyArrayData::I8(array_into_standard_layout_vec(array)), - AnyArray::I16(array) => wit::AnyArrayData::I16(array_into_standard_layout_vec(array)), - AnyArray::I32(array) => wit::AnyArrayData::I32(array_into_standard_layout_vec(array)), - AnyArray::I64(array) => wit::AnyArrayData::I64(array_into_standard_layout_vec(array)), - AnyArray::F32(array) => wit::AnyArrayData::F32(array_into_standard_layout_vec(array)), - AnyArray::F64(array) => wit::AnyArrayData::F64(array_into_standard_layout_vec(array)), + AnyArray::U8(array) => wit::types::AnyArrayData::U8(array_into_standard_layout_vec(array)), + AnyArray::U16(array) => { + wit::types::AnyArrayData::U16(array_into_standard_layout_vec(array)) + } + AnyArray::U32(array) => { + wit::types::AnyArrayData::U32(array_into_standard_layout_vec(array)) + } + AnyArray::U64(array) => { + wit::types::AnyArrayData::U64(array_into_standard_layout_vec(array)) + } + AnyArray::I8(array) => wit::types::AnyArrayData::I8(array_into_standard_layout_vec(array)), + AnyArray::I16(array) => { + wit::types::AnyArrayData::I16(array_into_standard_layout_vec(array)) + } + AnyArray::I32(array) => { + wit::types::AnyArrayData::I32(array_into_standard_layout_vec(array)) + } + AnyArray::I64(array) => { + wit::types::AnyArrayData::I64(array_into_standard_layout_vec(array)) + } + AnyArray::F32(array) => { + wit::types::AnyArrayData::F32(array_into_standard_layout_vec(array)) + } + AnyArray::F64(array) => { + wit::types::AnyArrayData::F64(array_into_standard_layout_vec(array)) + } array => { return Err(AnyArrayConversionError::UnsupportedDtype { dtype: array.dtype(), @@ -71,25 +91,25 @@ pub fn into_wit_any_array(array: AnyArray) -> Result Result { +pub fn into_wit_any_array_dtype( + dtype: AnyArrayDType, +) -> Result { let dtype = match dtype { - AnyArrayDType::U8 => wit::AnyArrayDType::U8, - AnyArrayDType::U16 => wit::AnyArrayDType::U16, - AnyArrayDType::U32 => wit::AnyArrayDType::U32, - AnyArrayDType::U64 => wit::AnyArrayDType::U64, - AnyArrayDType::I8 => wit::AnyArrayDType::I8, - AnyArrayDType::I16 => wit::AnyArrayDType::I16, - AnyArrayDType::I32 => wit::AnyArrayDType::I32, - AnyArrayDType::I64 => wit::AnyArrayDType::I64, - AnyArrayDType::F32 => wit::AnyArrayDType::F32, - AnyArrayDType::F64 => wit::AnyArrayDType::F64, + AnyArrayDType::U8 => wit::types::AnyArrayDType::U8, + AnyArrayDType::U16 => wit::types::AnyArrayDType::U16, + AnyArrayDType::U32 => wit::types::AnyArrayDType::U32, + AnyArrayDType::U64 => wit::types::AnyArrayDType::U64, + AnyArrayDType::I8 => wit::types::AnyArrayDType::I8, + AnyArrayDType::I16 => wit::types::AnyArrayDType::I16, + AnyArrayDType::I32 => wit::types::AnyArrayDType::I32, + AnyArrayDType::I64 => wit::types::AnyArrayDType::I64, + AnyArrayDType::F32 => wit::types::AnyArrayDType::F32, + AnyArrayDType::F64 => wit::types::AnyArrayDType::F64, array => { - return Err(AnyArrayConversionError::UnsupportedDtype { - dtype, - }); + return Err(AnyArrayConversionError::UnsupportedDtype { dtype }); } }; @@ -108,10 +128,10 @@ pub enum AnyArrayConversionError { } #[must_use] -pub fn into_wit_error(err: T) -> wit::Error { +pub fn into_wit_error(err: T) -> wit::types::Error { let mut source: Option<&dyn Error> = err.source(); - let mut error = wit::Error { + let mut error = wit::types::Error { message: format!("{err}"), chain: if source.is_some() { Vec::with_capacity(4) diff --git a/crates/numcodecs-wasm-guest/src/external.rs b/crates/numcodecs-wasm-guest/src/external.rs new file mode 100644 index 000000000..b40e76088 --- /dev/null +++ b/crates/numcodecs-wasm-guest/src/external.rs @@ -0,0 +1,214 @@ +use std::sync::Arc; + +use numcodecs::{ + self, AnyArray, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, DynCodec, DynCodecType, + ErasedDynCodec, +}; +use numcodecs_registry::{self, Registry, export_global}; +use schemars::Schema; +use serde::{self, Deserializer, Serializer}; +use serde_transcode::transcode; + +use crate::{convert, wit}; + +pub struct ExternalCodec { + codec: wit::exports::ErasedDynCodec, + ty: ExternalCodecType, +} + +impl Clone for ExternalCodec { + fn clone(&self) -> Self { + Self { + codec: self.codec.clone(), + ty: ExternalCodecType { + ty: self.ty.ty.clone(), + codec_id: self.ty.codec_id.clone(), + schema: self.ty.schema.clone(), + }, + } + } +} + +impl Codec for ExternalCodec { + type Error = ExternalError; + + fn encode(&self, data: AnyCowArray) -> Result { + match self + .codec + .encode(&convert::into_wit_any_array(data.into_owned())) + { + Ok(encoded) => match convert::from_wit_any_array(encoded) { + Ok(encoded) => Ok(encoded), + Err(err) => Err(ExternalError::new(convert::into_wit_error(err))), + }, + Err(err) => Err(ExternalError::new(err)), + } + } + + fn decode(&self, encoded: AnyCowArray) -> Result { + match self + .codec + .decode(&convert::into_wit_any_array(encoded.into_owned())) + { + Ok(decoded) => match convert::from_wit_any_array(decoded) { + Ok(decoded) => Ok(decoded), + Err(err) => Err(ExternalError::new(convert::into_wit_error(err))), + }, + Err(err) => Err(ExternalError::new(err)), + } + } + + fn decode_into( + &self, + encoded: AnyArrayView, + mut decoded: AnyArrayViewMut, + ) -> Result<(), Self::Error> { + match self.codec.decode_into( + &convert::into_wit_any_array(encoded.into_owned()), + &wit::types::AnyArrayPrototype { + dtype: match convert::into_wit_any_array_dtype(decoded.dtype()) { + Ok(dtype) => dtype, + Err(err) => return Err(ExternalError::new(convert::into_wit_error(err))), + }, + shape: convert::usize_as_u32_slice(decoded.shape()), + }, + ) { + Ok(dec) => match convert::from_wit_any_array(dec) { + Ok(dec) => { + decoded.assign(&dec); + Ok(()) + } + Err(err) => Err(ExternalError::new(convert::into_wit_error(err))), + }, + Err(err) => Err(ExternalError::new(err)), + } + } +} + +impl DynCodec for ExternalCodec { + type Type = ExternalCodecType; + + fn get_config(&self, serializer: S) -> Result { + let config = self + .codec + .get_config() + .map_err(ExternalError::new) + .map_err(serde::ser::Error::custom)?; + transcode(&mut serde_json::Deserializer::from_str(&config), serializer) + } + + fn ty(&self) -> Self::Type { + ExternalCodecType { + ty: self.ty.ty.clone(), + codec_id: self.ty.codec_id.clone(), + schema: self.ty.schema.clone(), + } + } +} + +pub struct ExternalCodecType { + ty: Arc, + codec_id: Arc, + schema: Arc, +} + +impl DynCodecType for ExternalCodecType { + type Codec = ExternalCodec; + + fn codec_id(&self) -> &str { + &*self.codec_id + } + + fn codec_config_schema(&self) -> Schema { + (*self.schema).clone() + } + + fn codec_from_config<'de, D: Deserializer<'de>>( + &self, + config: D, + ) -> Result { + let mut config_bytes = Vec::new(); + transcode(config, &mut serde_json::Serializer::new(&mut config_bytes)) + .map_err(serde::de::Error::custom)?; + let config = String::from_utf8(config_bytes).map_err(serde::de::Error::custom)?; + + let codec = self + .ty + .from_config(&config) + .map_err(ExternalError::new) + .map_err(serde::de::Error::custom)?; + + Ok(ExternalCodec { + codec, + ty: ExternalCodecType { + ty: self.ty.clone(), + codec_id: self.codec_id.clone(), + schema: self.schema.clone(), + }, + }) + } +} + +#[derive(Debug, thiserror::Error)] +#[error("{msg}")] +pub struct ExternalError { + msg: String, + source: Option>, +} + +impl ExternalError { + pub(crate) fn new(error: wit::types::Error) -> Self { + let mut root = Self { + msg: error.message, + source: None, + }; + + let mut err = &mut root; + + for msg in error.chain { + err = &mut *err.source.insert(Box::new(Self { msg, source: None })); + } + + root + } +} + +pub struct ExternalRegistry; + +impl Registry for ExternalRegistry { + type Error = ExternalError; + + fn get_codec<'de, D: Deserializer<'de>>( + &self, + config: D, + ) -> Result { + let mut config_bytes = Vec::new(); + transcode(config, &mut serde_json::Serializer::new(&mut config_bytes)) + .map_err(convert::into_wit_error) + .map_err(ExternalError::new)?; + let config = String::from_utf8(config_bytes) + .map_err(convert::into_wit_error) + .map_err(ExternalError::new)?; + + let codec = wit::imports::get_codec(&config).map_err(ExternalError::new)?; + let ty = codec.ty(); + + let codec_id = ty.codec_id(); + let schema: Schema = serde_json::from_str(&ty.codec_config_schema()) + .map_err(convert::into_wit_error) + .map_err(ExternalError::new)?; + + let codec = ExternalCodec { + codec, + ty: ExternalCodecType { + ty: std::sync::Arc::new(ty), + codec_id: codec_id.into(), + schema: std::sync::Arc::new(schema), + }, + }; + + Ok(ErasedDynCodec::new(codec)) + } +} + +export_global! { registry: ExternalRegistry = ExternalRegistry } diff --git a/crates/numcodecs-wasm-guest/src/lib.rs b/crates/numcodecs-wasm-guest/src/lib.rs index 96e0cd70f..4eec5f3df 100644 --- a/crates/numcodecs-wasm-guest/src/lib.rs +++ b/crates/numcodecs-wasm-guest/src/lib.rs @@ -32,15 +32,14 @@ use ::{ serde::Deserialize, }; -// #[cfg(target_arch = "wasm32")] +#[cfg(target_arch = "wasm32")] mod convert; +#[cfg(target_arch = "wasm32")] +mod external; #[cfg(target_arch = "wasm32")] -use crate::{ - bindings::exports::numcodecs::abc::codec as wit, - convert::{ - from_wit_any_array, into_wit_any_array, into_wit_error, zeros_from_wit_any_array_prototype, - }, +use crate::convert::{ + from_wit_any_array, into_wit_any_array, into_wit_error, zeros_from_wit_any_array_prototype, }; #[doc(hidden)] @@ -55,6 +54,14 @@ pub mod bindings { }); } +#[cfg(target_arch = "wasm32")] +mod wit { + pub use crate::bindings::{ + exports::numcodecs::abc::codec as exports, numcodecs::abc::registry as imports, + numcodecs::abc::types, + }; +} + #[macro_export] /// Export a [`StaticCodec`] type using the WASM component model. /// @@ -96,14 +103,14 @@ macro_rules! export_codec { #[cfg(target_arch = "wasm32")] #[doc(hidden)] -impl wit::Guest for T { +impl wit::exports::Guest for T { type Codec = Self; fn codec_id() -> String { String::from(::CODEC_ID) } - fn codec_config_schema() -> wit::JsonSchema { + fn codec_config_schema() -> wit::types::JsonSchema { schema_for!(::Config<'static>) .as_value() .to_string() @@ -111,12 +118,16 @@ impl wit::Guest for T { } #[cfg(target_arch = "wasm32")] -impl wit::GuestCodec for T { - fn from_config(config: String) -> Result { +impl wit::exports::GuestCodec for T { + fn from_config(config: String) -> Result { let err = match ::Config::deserialize( &mut serde_json::Deserializer::from_str(&config), ) { - Ok(config) => return Ok(wit::Codec::new(::from_config(config))), + Ok(config) => { + return Ok(wit::exports::Codec::new( + ::from_config(config), + )); + } Err(err) => err, }; @@ -124,7 +135,10 @@ impl wit::GuestCodec for T { Err(into_wit_error(err)) } - fn encode(&self, data: wit::AnyArray) -> Result { + fn encode( + &self, + data: wit::types::AnyArray, + ) -> Result { let data = match from_wit_any_array(data) { Ok(data) => data, Err(err) => return Err(into_wit_error(err)), @@ -139,7 +153,10 @@ impl wit::GuestCodec for T { } } - fn decode(&self, encoded: wit::AnyArray) -> Result { + fn decode( + &self, + encoded: wit::types::AnyArray, + ) -> Result { let encoded = match from_wit_any_array(encoded) { Ok(encoded) => encoded, Err(err) => return Err(into_wit_error(err)), @@ -156,9 +173,9 @@ impl wit::GuestCodec for T { fn decode_into( &self, - encoded: wit::AnyArray, - decoded: wit::AnyArrayPrototype, - ) -> Result { + encoded: wit::types::AnyArray, + decoded: wit::types::AnyArrayPrototype, + ) -> Result { let encoded = match from_wit_any_array(encoded) { Ok(encoded) => encoded, Err(err) => return Err(into_wit_error(err)), @@ -175,202 +192,10 @@ impl wit::GuestCodec for T { } } - fn get_config(&self) -> Result { + fn get_config(&self) -> Result { match serde_json::to_string(&::get_config(self)) { Ok(config) => Ok(config), Err(err) => Err(into_wit_error(err)), } } } - -pub fn get_codec<'de, D: serde::Deserializer<'de>>( - config: D, -) -> Result { - let mut config_bytes = Vec::new(); - serde_transcode::transcode(config, &mut serde_json::Serializer::new(&mut config_bytes)) - .map_err(convert::into_wit_error) - .map_err(ExternalError::new)?; - let config = String::from_utf8(config_bytes) - .map_err(convert::into_wit_error) - .map_err(ExternalError::new)?; - - let codec = - bindings::numcodecs::abc::registry::get_codec(&config).map_err(ExternalError::new)?; - let ty = codec.ty(); - - let codec_id = ty.codec_id(); - let schema: schemars::Schema = serde_json::from_str(&ty.codec_config_schema()) - .map_err(convert::into_wit_error) - .map_err(ExternalError::new)?; - - Ok(ExternalCodec { - codec, - ty: ExternalCodecType { - ty: std::sync::Arc::new(ty), - codec_id: codec_id.into(), - schema: std::sync::Arc::new(schema), - }, - }) -} - -pub struct ExternalCodec { - codec: bindings::numcodecs::abc::registry::ErasedDynCodec, - ty: ExternalCodecType, -} - -impl Clone for ExternalCodec { - fn clone(&self) -> Self { - Self { - codec: self.codec.clone(), - ty: ExternalCodecType { - ty: self.ty.ty.clone(), - codec_id: self.ty.codec_id.clone(), - schema: self.ty.schema.clone(), - }, - } - } -} - -impl numcodecs::Codec for ExternalCodec { - type Error = ExternalError; - - fn encode(&self, data: numcodecs::AnyCowArray) -> Result { - match self - .codec - .encode(&convert::into_wit_any_array(data.into_owned())) - { - Ok(encoded) => match convert::from_wit_any_array(encoded) { - Ok(encoded) => Ok(encoded), - Err(err) => Err(ExternalError::new(convert::into_wit_error(err))), - }, - Err(err) => Err(ExternalError::new(err)), - } - } - - fn decode(&self, encoded: numcodecs::AnyCowArray) -> Result { - match self - .codec - .decode(&convert::into_wit_any_array(encoded.into_owned())) - { - Ok(decoded) => match convert::from_wit_any_array(decoded) { - Ok(decoded) => Ok(decoded), - Err(err) => Err(ExternalError::new(convert::into_wit_error(err))), - }, - Err(err) => Err(ExternalError::new(err)), - } - } - - fn decode_into( - &self, - encoded: numcodecs::AnyArrayView, - mut decoded: numcodecs::AnyArrayViewMut, - ) -> Result<(), Self::Error> { - match self.codec.decode_into( - &convert::into_wit_any_array(encoded.into_owned()), - &bindings::numcodecs::abc::types::AnyArrayPrototype { - dtype: match convert::into_wit_any_array_dtype(decoded.dtype()) { - Ok(dtype) => dtype, - Err(err) => return Err(ExternalError::new(convert::into_wit_error(err))), - }, - shape: convert::usize_as_u32_slice(decoded.shape()), - }, - ) { - Ok(dec) => match convert::from_wit_any_array(dec) { - Ok(dec) => { - decoded.assign(&dec); - Ok(()) - } - Err(err) => Err(ExternalError::new(convert::into_wit_error(err))), - }, - Err(err) => Err(ExternalError::new(err)), - } - } -} - -impl numcodecs::DynCodec for ExternalCodec { - type Type = ExternalCodecType; - - fn get_config(&self, serializer: S) -> Result { - let config = self - .codec - .get_config() - .map_err(ExternalError::new) - .map_err(serde::ser::Error::custom)?; - serde_transcode::transcode(&mut serde_json::Deserializer::from_str(&config), serializer) - } - - fn ty(&self) -> Self::Type { - ExternalCodecType { - ty: self.ty.ty.clone(), - codec_id: self.ty.codec_id.clone(), - schema: self.ty.schema.clone(), - } - } -} - -pub struct ExternalCodecType { - ty: std::sync::Arc, - codec_id: std::sync::Arc, - schema: std::sync::Arc, -} - -impl numcodecs::DynCodecType for ExternalCodecType { - type Codec = ExternalCodec; - - fn codec_id(&self) -> &str { - &*self.codec_id - } - - fn codec_config_schema(&self) -> schemars::Schema { - (*self.schema).clone() - } - - fn codec_from_config<'de, D: serde::Deserializer<'de>>( - &self, - config: D, - ) -> Result { - let mut config_bytes = Vec::new(); - serde_transcode::transcode(config, &mut serde_json::Serializer::new(&mut config_bytes)) - .map_err(serde::de::Error::custom)?; - let config = String::from_utf8(config_bytes).map_err(serde::de::Error::custom)?; - - let codec = self - .ty - .from_config(&config) - .map_err(ExternalError::new) - .map_err(serde::de::Error::custom)?; - - Ok(ExternalCodec { - codec, - ty: ExternalCodecType { - ty: self.ty.clone(), - codec_id: self.codec_id.clone(), - schema: self.schema.clone(), - }, - }) - } -} - -#[derive(Debug, thiserror::Error)] -#[error("{msg}")] -pub struct ExternalError { - msg: String, - source: Option>, -} - -impl ExternalError { - pub(crate) fn new(error: bindings::numcodecs::abc::types::Error) -> Self { - let mut root = Self { - msg: error.message, - source: None, - }; - - let mut err = &mut root; - - for msg in error.chain { - err = &mut *err.source.insert(Box::new(Self { msg, source: None })); - } - - root - } -} diff --git a/wit/codecs.wit b/wit/codecs.wit index c319c25f1..38cfeaf7a 100644 --- a/wit/codecs.wit +++ b/wit/codecs.wit @@ -1,6 +1,6 @@ package numcodecs:abc@0.1.1; -// TODO: major version bump +// TODO: major version bump + since annotations interface types { type json = string; @@ -90,7 +90,7 @@ interface registry { decode-into: func(encoded: any-array, decoded: any-array-prototype) -> result; clone: func() -> erased-dyn-codec; - + get-config: func() -> result; ty: func() -> erased-dyn-codec-type; From 3ead154fd8098cb3e85cfbd2b74cb80a3520867a Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 28 May 2026 10:07:26 +0300 Subject: [PATCH 06/10] some cleanup --- crates/numcodecs-wasm-guest/src/convert.rs | 22 +++---- crates/numcodecs-wasm-guest/src/external.rs | 65 +++++++++------------ crates/numcodecs-wasm-guest/src/lib.rs | 16 ++--- 3 files changed, 46 insertions(+), 57 deletions(-) diff --git a/crates/numcodecs-wasm-guest/src/convert.rs b/crates/numcodecs-wasm-guest/src/convert.rs index 8e21ed3f3..a845cf46c 100644 --- a/crates/numcodecs-wasm-guest/src/convert.rs +++ b/crates/numcodecs-wasm-guest/src/convert.rs @@ -98,17 +98,17 @@ pub fn into_wit_any_array_dtype( dtype: AnyArrayDType, ) -> Result { let dtype = match dtype { - AnyArrayDType::U8 => wit::types::AnyArrayDType::U8, - AnyArrayDType::U16 => wit::types::AnyArrayDType::U16, - AnyArrayDType::U32 => wit::types::AnyArrayDType::U32, - AnyArrayDType::U64 => wit::types::AnyArrayDType::U64, - AnyArrayDType::I8 => wit::types::AnyArrayDType::I8, - AnyArrayDType::I16 => wit::types::AnyArrayDType::I16, - AnyArrayDType::I32 => wit::types::AnyArrayDType::I32, - AnyArrayDType::I64 => wit::types::AnyArrayDType::I64, - AnyArrayDType::F32 => wit::types::AnyArrayDType::F32, - AnyArrayDType::F64 => wit::types::AnyArrayDType::F64, - array => { + AnyArrayDType::U8 => wit::types::AnyArrayDtype::U8, + AnyArrayDType::U16 => wit::types::AnyArrayDtype::U16, + AnyArrayDType::U32 => wit::types::AnyArrayDtype::U32, + AnyArrayDType::U64 => wit::types::AnyArrayDtype::U64, + AnyArrayDType::I8 => wit::types::AnyArrayDtype::I8, + AnyArrayDType::I16 => wit::types::AnyArrayDtype::I16, + AnyArrayDType::I32 => wit::types::AnyArrayDtype::I32, + AnyArrayDType::I64 => wit::types::AnyArrayDtype::I64, + AnyArrayDType::F32 => wit::types::AnyArrayDtype::F32, + AnyArrayDType::F64 => wit::types::AnyArrayDtype::F64, + dtype => { return Err(AnyArrayConversionError::UnsupportedDtype { dtype }); } }; diff --git a/crates/numcodecs-wasm-guest/src/external.rs b/crates/numcodecs-wasm-guest/src/external.rs index b40e76088..9b3bdb5f0 100644 --- a/crates/numcodecs-wasm-guest/src/external.rs +++ b/crates/numcodecs-wasm-guest/src/external.rs @@ -12,7 +12,7 @@ use serde_transcode::transcode; use crate::{convert, wit}; pub struct ExternalCodec { - codec: wit::exports::ErasedDynCodec, + codec: wit::registry::ErasedDynCodec, ty: ExternalCodecType, } @@ -33,27 +33,20 @@ impl Codec for ExternalCodec { type Error = ExternalError; fn encode(&self, data: AnyCowArray) -> Result { - match self - .codec - .encode(&convert::into_wit_any_array(data.into_owned())) - { - Ok(encoded) => match convert::from_wit_any_array(encoded) { - Ok(encoded) => Ok(encoded), - Err(err) => Err(ExternalError::new(convert::into_wit_error(err))), - }, + match self.codec.encode( + &convert::into_wit_any_array(data.into_owned()).map_err(ExternalError::from_error)?, + ) { + Ok(encoded) => convert::from_wit_any_array(encoded).map_err(ExternalError::from_error), Err(err) => Err(ExternalError::new(err)), } } fn decode(&self, encoded: AnyCowArray) -> Result { - match self - .codec - .decode(&convert::into_wit_any_array(encoded.into_owned())) - { - Ok(decoded) => match convert::from_wit_any_array(decoded) { - Ok(decoded) => Ok(decoded), - Err(err) => Err(ExternalError::new(convert::into_wit_error(err))), - }, + match self.codec.decode( + &convert::into_wit_any_array(encoded.into_owned()) + .map_err(ExternalError::from_error)?, + ) { + Ok(decoded) => convert::from_wit_any_array(decoded).map_err(ExternalError::from_error), Err(err) => Err(ExternalError::new(err)), } } @@ -64,21 +57,17 @@ impl Codec for ExternalCodec { mut decoded: AnyArrayViewMut, ) -> Result<(), Self::Error> { match self.codec.decode_into( - &convert::into_wit_any_array(encoded.into_owned()), + &convert::into_wit_any_array(encoded.into_owned()) + .map_err(ExternalError::from_error)?, &wit::types::AnyArrayPrototype { - dtype: match convert::into_wit_any_array_dtype(decoded.dtype()) { - Ok(dtype) => dtype, - Err(err) => return Err(ExternalError::new(convert::into_wit_error(err))), - }, + dtype: convert::into_wit_any_array_dtype(decoded.dtype()) + .map_err(ExternalError::from_error)?, shape: convert::usize_as_u32_slice(decoded.shape()), }, ) { Ok(dec) => match convert::from_wit_any_array(dec) { - Ok(dec) => { - decoded.assign(&dec); - Ok(()) - } - Err(err) => Err(ExternalError::new(convert::into_wit_error(err))), + Ok(dec) => decoded.assign(&dec).map_err(ExternalError::from_error), + Err(err) => Err(ExternalError::from_error(err)), }, Err(err) => Err(ExternalError::new(err)), } @@ -107,7 +96,7 @@ impl DynCodec for ExternalCodec { } pub struct ExternalCodecType { - ty: Arc, + ty: Arc, codec_id: Arc, schema: Arc, } @@ -157,7 +146,7 @@ pub struct ExternalError { } impl ExternalError { - pub(crate) fn new(error: wit::types::Error) -> Self { + fn new(error: wit::types::Error) -> Self { let mut root = Self { msg: error.message, source: None, @@ -171,6 +160,10 @@ impl ExternalError { root } + + fn from_error(err: impl std::error::Error) -> Self { + Self::new(convert::into_wit_error(err)) + } } pub struct ExternalRegistry; @@ -184,19 +177,15 @@ impl Registry for ExternalRegistry { ) -> Result { let mut config_bytes = Vec::new(); transcode(config, &mut serde_json::Serializer::new(&mut config_bytes)) - .map_err(convert::into_wit_error) - .map_err(ExternalError::new)?; - let config = String::from_utf8(config_bytes) - .map_err(convert::into_wit_error) - .map_err(ExternalError::new)?; + .map_err(ExternalError::from_error)?; + let config = String::from_utf8(config_bytes).map_err(ExternalError::from_error)?; - let codec = wit::imports::get_codec(&config).map_err(ExternalError::new)?; + let codec = wit::registry::get_codec(&config).map_err(ExternalError::new)?; let ty = codec.ty(); let codec_id = ty.codec_id(); - let schema: Schema = serde_json::from_str(&ty.codec_config_schema()) - .map_err(convert::into_wit_error) - .map_err(ExternalError::new)?; + let schema: Schema = + serde_json::from_str(&ty.codec_config_schema()).map_err(ExternalError::from_error)?; let codec = ExternalCodec { codec, diff --git a/crates/numcodecs-wasm-guest/src/lib.rs b/crates/numcodecs-wasm-guest/src/lib.rs index 4eec5f3df..a6bbc7bba 100644 --- a/crates/numcodecs-wasm-guest/src/lib.rs +++ b/crates/numcodecs-wasm-guest/src/lib.rs @@ -57,8 +57,8 @@ pub mod bindings { #[cfg(target_arch = "wasm32")] mod wit { pub use crate::bindings::{ - exports::numcodecs::abc::codec as exports, numcodecs::abc::registry as imports, - numcodecs::abc::types, + exports::numcodecs::abc::codec, + numcodecs::abc::{registry, types}, }; } @@ -103,7 +103,7 @@ macro_rules! export_codec { #[cfg(target_arch = "wasm32")] #[doc(hidden)] -impl wit::exports::Guest for T { +impl wit::codec::Guest for T { type Codec = Self; fn codec_id() -> String { @@ -118,15 +118,15 @@ impl wit::exports::Guest for T { } #[cfg(target_arch = "wasm32")] -impl wit::exports::GuestCodec for T { - fn from_config(config: String) -> Result { +impl wit::codec::GuestCodec for T { + fn from_config(config: String) -> Result { let err = match ::Config::deserialize( &mut serde_json::Deserializer::from_str(&config), ) { Ok(config) => { - return Ok(wit::exports::Codec::new( - ::from_config(config), - )); + return Ok(wit::codec::Codec::new(::from_config( + config, + ))); } Err(err) => err, }; From 766a338143f6532c4eb10f4f8b457ee4f9bb2bf7 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 29 May 2026 10:59:31 +0300 Subject: [PATCH 07/10] start adding host bindings for the wasm registry --- crates/numcodecs-registry/Cargo.toml | 1 + crates/numcodecs-registry/src/lib.rs | 64 +++++- .../Cargo.toml | 1 + .../src/codec.rs | 9 + .../src/tests.rs | 7 +- .../src/transform/mod.rs | 31 +-- crates/numcodecs-wasm-host/Cargo.toml | 1 + crates/numcodecs-wasm-host/src/lib.rs | 2 + crates/numcodecs-wasm-host/src/registry.rs | 185 ++++++++++++++++++ crates/numcodecs-wasm-host/src/wit.rs | 25 ++- py/numcodecs-wasm-template/pyproject.toml | 2 +- py/numcodecs-wasm/src/lib.rs | 3 +- 12 files changed, 307 insertions(+), 24 deletions(-) create mode 100644 crates/numcodecs-wasm-host/src/registry.rs diff --git a/crates/numcodecs-registry/Cargo.toml b/crates/numcodecs-registry/Cargo.toml index da035e02a..4797c456a 100644 --- a/crates/numcodecs-registry/Cargo.toml +++ b/crates/numcodecs-registry/Cargo.toml @@ -18,6 +18,7 @@ keywords = ["numcodecs", "registry", "compression", "encoding"] erased-serde = { workspace = true, features = ["std"] } numcodecs = { workspace = true } serde = { workspace = true } +thiserror = { workspace = true } [lints] workspace = true diff --git a/crates/numcodecs-registry/src/lib.rs b/crates/numcodecs-registry/src/lib.rs index 4e79b80ce..9bbb274be 100644 --- a/crates/numcodecs-registry/src/lib.rs +++ b/crates/numcodecs-registry/src/lib.rs @@ -19,7 +19,7 @@ //! //! [`numcodecs`]: https://numcodecs.readthedocs.io/en/stable/ -use std::error::Error; +use std::{error::Error, sync::Arc}; use numcodecs::{DynCodec, ErasedDynCodec, ErasedError}; use serde::Deserializer; @@ -63,6 +63,42 @@ pub trait Registry: 'static + Send + Sync { } } +impl Registry for Box { + type Error = R::Error; + + fn get_codec<'de, D: Deserializer<'de>>( + &self, + config: D, + ) -> Result { + R::get_codec(self, config) + } + + fn get_codec_typed<'de, T: DynCodec, D: Deserializer<'de>>( + &self, + config: D, + ) -> Result, Self::Error> { + R::get_codec_typed(self, config) + } +} + +impl Registry for Arc { + type Error = R::Error; + + fn get_codec<'de, D: Deserializer<'de>>( + &self, + config: D, + ) -> Result { + R::get_codec(self, config) + } + + fn get_codec_typed<'de, T: DynCodec, D: Deserializer<'de>>( + &self, + config: D, + ) -> Result, Self::Error> { + R::get_codec_typed(self, config) + } +} + /// Type-erased [`Registry`]. pub struct ErasedRegistry { registry: Box, @@ -171,3 +207,29 @@ macro_rules! export_global { }; }; } + +#[derive(Debug, thiserror::Error)] +#[error("codec not found")] +/// Codec was not found in the registry +pub struct CodecNotFoundError; + +/// Empty registry that contains no codecs +pub struct EmptyRegistry; + +impl Registry for EmptyRegistry { + type Error = CodecNotFoundError; + + fn get_codec<'de, D: Deserializer<'de>>( + &self, + _config: D, + ) -> Result { + Err(CodecNotFoundError) + } + + fn get_codec_typed<'de, T: DynCodec, D: Deserializer<'de>>( + &self, + _config: D, + ) -> Result, Self::Error> { + Err(CodecNotFoundError) + } +} diff --git a/crates/numcodecs-wasm-host-reproducible/Cargo.toml b/crates/numcodecs-wasm-host-reproducible/Cargo.toml index 75af6bd19..830e0f59e 100644 --- a/crates/numcodecs-wasm-host-reproducible/Cargo.toml +++ b/crates/numcodecs-wasm-host-reproducible/Cargo.toml @@ -26,6 +26,7 @@ numcodecs-wasm-host = { workspace = true } indexmap = { workspace = true, features = ["std"] } log = { workspace = true } numcodecs = { workspace = true } +numcodecs-registry = { workspace = true } polonius-the-crab = { workspace = true } schemars = { workspace = true } semver = { workspace = true } diff --git a/crates/numcodecs-wasm-host-reproducible/src/codec.rs b/crates/numcodecs-wasm-host-reproducible/src/codec.rs index 1328c22bd..9b4747a28 100644 --- a/crates/numcodecs-wasm-host-reproducible/src/codec.rs +++ b/crates/numcodecs-wasm-host-reproducible/src/codec.rs @@ -4,6 +4,7 @@ use std::sync::{Arc, Mutex}; use numcodecs::{ AnyArray, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, DynCodec, DynCodecType, }; +use numcodecs_registry::Registry; use numcodecs_wasm_host::{CodecError, RuntimeError, WasmCodec, WasmCodecComponent}; use schemars::Schema; use serde::Serializer; @@ -323,6 +324,7 @@ where pub fn new( engine: E, wasm_component: impl Into>, + registry: impl Registry, ) -> Result where E: Send + Sync, @@ -343,6 +345,8 @@ where } })?; + let registry = Arc::new(registry); + let component_instantiater = Arc::new(move |component: &Component, codec_id: &str| { let mut store = Store::new(&engine, ()); @@ -359,6 +363,11 @@ where source: RuntimeError::from(err), } })?; + numcodecs_wasm_host::add_registry_to_linker(&mut linker, &mut store, registry.clone()) + .map_err(|err| ReproducibleWasmCodecError::Runtime { + codec_id: Arc::from(codec_id), + source: RuntimeError::from(err), + })?; let instance = linker.instantiate(&mut store, component).map_err(|err| { ReproducibleWasmCodecError::Runtime { diff --git a/crates/numcodecs-wasm-host-reproducible/src/tests.rs b/crates/numcodecs-wasm-host-reproducible/src/tests.rs index d22978864..8addebfb9 100644 --- a/crates/numcodecs-wasm-host-reproducible/src/tests.rs +++ b/crates/numcodecs-wasm-host-reproducible/src/tests.rs @@ -2,6 +2,7 @@ use ndarray::Array; use ndarray_rand::RandomExt; use ndarray_rand::rand_distr::Normal; use numcodecs::{Codec, DynCodecType}; +use numcodecs_registry::EmptyRegistry; use crate::ReproducibleWasmCodecType; @@ -53,7 +54,11 @@ fn codec_roundtrip() { let engine = wasmtime_runtime_layer::Engine::new(wasmtime::Engine::new(&config).unwrap()); - let ty = match ReproducibleWasmCodecType::new(engine, include_bytes!("../tests/round.wasm")) { + let ty = match ReproducibleWasmCodecType::new( + engine, + include_bytes!("../tests/round.wasm"), + EmptyRegistry, + ) { Ok(ty) => ty, Err(err) => panic!( "ReproducibleWasmCodecType::new:\n===\n{err}\n===\n{err:?}\n===\n{err:#}\n===\n{err:#?}\n===\n" diff --git a/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs b/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs index bcf0a663d..73e8f3c85 100644 --- a/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs +++ b/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs @@ -12,7 +12,10 @@ pub mod nan; #[expect(clippy::too_many_lines)] // FIXME pub fn transform_wasm_component(wasm_component: impl Into>) -> Result, Error> { let NumcodecsWitInterfaces { + package, codec: codec_interface, + registry: registry_interface, + types: types_interface, .. } = NumcodecsWitInterfaces::get(); @@ -24,33 +27,37 @@ pub fn transform_wasm_component(wasm_component: impl Into>) -> Result>) -> Result>(); @@ -150,7 +157,7 @@ pub fn transform_wasm_component(wasm_component: impl Into>) -> Result Result<(), anyhow::Error> { + let NumcodecsWitInterfaces { + registry: numcodecs_registry_interface, + types: numcodecs_types_interface, + .. + } = NumcodecsWitInterfaces::get(); + + let registry = Arc::new(registry); + + let numcodecs_types_error_record = RecordType::new( + Some(TypeIdentifier::new( + "error", + Some(numcodecs_types_interface.clone()), + )), + [ + ("message", ValueType::String), + ("chain", ValueType::List(ListType::new(ValueType::String))), + ], + )?; + + let numcodecs_registry_instance = + linker.define_instance(numcodecs_registry_interface.clone())?; + + let numcodecs_registry_codec_resource = ResourceType::with_destructor( + ctx.as_context_mut(), + Some(TypeIdentifier::new( + "erased-dyn-codec", + Some(numcodecs_registry_interface.clone()), + )), + |_ctx, codec: ErasedDynCodec| { + std::mem::drop(codec); + Ok(()) + }, + )?; + + numcodecs_registry_instance.define_resource( + "erased-dyn-codec", + numcodecs_registry_codec_resource.clone(), + )?; + + let numcodecs_registry_codec_type_resource = ResourceType::with_destructor( + ctx.as_context_mut(), + Some(TypeIdentifier::new( + "erased-dyn-codec-type", + Some(numcodecs_registry_interface.clone()), + )), + |_ctx, codec_ty: ErasedDynCodecType| { + std::mem::drop(codec_ty); + Ok(()) + }, + )?; + + numcodecs_registry_instance.define_resource( + "erased-dyn-codec-type", + numcodecs_registry_codec_type_resource, + )?; + + let my_numcodecs_registry_codec_resource = numcodecs_registry_codec_resource.clone(); + let codec_clone = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ValueType::Borrow(numcodecs_registry_codec_resource.clone())], + [ValueType::Own(numcodecs_registry_codec_resource.clone())], + ), + move |ctx, args, results| { + let [Value::Borrow(codec)] = args else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.clone arguments" + ); + }; + + let [result] = results else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.clone results" + ); + }; + + let codec = { + let codec_ctx = ctx.as_context(); + let codec: &ErasedDynCodec = codec.rep(&codec_ctx)?; + codec.clone() + }; + + *result = Value::Own(ResourceOwn::new( + ctx, + codec, + my_numcodecs_registry_codec_resource.clone(), + )?); + + Ok(()) + }, + ); + numcodecs_registry_instance.define_func("[method]erased-dyn-codec.clone", codec_clone)?; + + let get_codec_result = ResultType::new( + Some(ValueType::Own(numcodecs_registry_codec_resource.clone())), + Some(ValueType::Record(numcodecs_types_error_record.clone())), + ); + + let my_numcodecs_registry_codec_resource = numcodecs_registry_codec_resource; + let get_codec = Func::new( + ctx, + FuncType::new( + [ValueType::String], + [ValueType::Result(get_codec_result.clone())], + ), + move |ctx, args, results| { + let [Value::String(config)] = args else { + anyhow::bail!("invalid numcodecs:abc/registry#get-codec arguments"); + }; + + let [result] = results else { + anyhow::bail!("invalid numcodecs:abc/registry#get-codec results"); + }; + + let res = match registry.get_codec(&mut serde_json::Deserializer::from_str(config)) { + Ok(codec) => Ok(Some(Value::Own(ResourceOwn::new( + ctx, + codec, + my_numcodecs_registry_codec_resource.clone(), + )?))), + Err(err) => Err(Some(Value::Record(into_wit_error( + err, + &numcodecs_types_error_record, + )?))), + }; + + *result = Value::Result(ResultValue::new(get_codec_result.clone(), res)?); + + Ok(()) + }, + ); + numcodecs_registry_instance.define_func("get-codec", get_codec)?; + + Ok(()) +} + +fn into_wit_error(err: T, ty: &RecordType) -> Result { + let mut source: Option<&dyn Error> = err.source(); + + let message = Value::String(Arc::from(format!("{err}"))); + let mut chain = if source.is_some() { + Vec::with_capacity(4) + } else { + Vec::new() + }; + + while let Some(err) = source.take() { + chain.push(Value::String(Arc::from(format!("{err}")))); + source = err.source(); + } + + Record::new( + ty.clone(), + [ + ("message", message), + ( + "chain", + Value::List(List::new(ListType::new(ValueType::String), chain)?), + ), + ], + ) +} diff --git a/crates/numcodecs-wasm-host/src/wit.rs b/crates/numcodecs-wasm-host/src/wit.rs index d8fdc0a18..1204ad78d 100644 --- a/crates/numcodecs-wasm-host/src/wit.rs +++ b/crates/numcodecs-wasm-host/src/wit.rs @@ -8,8 +8,14 @@ use crate::error::{CodecError, RuntimeError}; /// WebAssembly Interface Type (WIT) interfaces for `numcodecs` #[non_exhaustive] pub struct NumcodecsWitInterfaces { + /// The `numcodecs:abc` package + pub package: PackageIdentifier, /// The `numcodecs:abc/codec` interface pub codec: InterfaceIdentifier, + /// The `numcodecs:abc/registry` interface + pub registry: InterfaceIdentifier, + /// The `numcodecs:abc/types` interface + pub types: InterfaceIdentifier, } impl NumcodecsWitInterfaces { @@ -18,14 +24,17 @@ impl NumcodecsWitInterfaces { pub fn get() -> &'static Self { static NUMCODECS_WIT_INTERFACES: OnceLock = OnceLock::new(); - NUMCODECS_WIT_INTERFACES.get_or_init(|| Self { - codec: InterfaceIdentifier::new( - PackageIdentifier::new( - PackageName::new("numcodecs", "abc"), - Some(Version::new(0, 1, 1)), - ), - "codec", - ), + NUMCODECS_WIT_INTERFACES.get_or_init(|| { + let package = PackageIdentifier::new( + PackageName::new("numcodecs", "abc"), + Some(Version::new(0, 1, 1)), + ); + Self { + package: package.clone(), + codec: InterfaceIdentifier::new(package.clone(), "codec"), + registry: InterfaceIdentifier::new(package.clone(), "registry"), + types: InterfaceIdentifier::new(package, "types"), + } }) } } diff --git a/py/numcodecs-wasm-template/pyproject.toml b/py/numcodecs-wasm-template/pyproject.toml index f317b8209..5e44ec501 100644 --- a/py/numcodecs-wasm-template/pyproject.toml +++ b/py/numcodecs-wasm-template/pyproject.toml @@ -16,7 +16,7 @@ license = { file = "LICENSE" } requires-python = ">=3.10" dependencies = [ - "numcodecs-wasm~=0.2.4", # wasi 0.2.6 + "numcodecs-wasm @ file:///Users/junityre/numcodecs-rs/py/numcodecs-wasm/dist/numcodecs_wasm-0.2.4-cp310-abi3-macosx_11_0_arm64.whl", # wasi 0.2.6 ] [project.entry-points."numcodecs.codecs"] diff --git a/py/numcodecs-wasm/src/lib.rs b/py/numcodecs-wasm/src/lib.rs index 923878a9e..02de59600 100644 --- a/py/numcodecs-wasm/src/lib.rs +++ b/py/numcodecs-wasm/src/lib.rs @@ -32,7 +32,8 @@ fn create_codec_class<'py>( ) -> Result, PyErr> { let engine = default_engine(py)?; - let codec_ty = ReproducibleWasmCodecType::new(engine, wasm) + // TODO: we should allow restricting the codecs that the reproducible codec can 'see' + let codec_ty = ReproducibleWasmCodecType::new(engine, wasm, numcodecs_python::PyCodecRegistry) .map_err(|err| pyo3_error::PyErrChain::new(py, err))?; let codec_class = numcodecs_python::export_codec_class(py, codec_ty, module.as_borrowed())?; From c78a230f29b30352f0c6dc8e8da515f7f6759082 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 29 May 2026 12:27:35 +0300 Subject: [PATCH 08/10] add more host functions, codec encode/decode missing --- crates/numcodecs-python/tests/schema.rs | 2 +- crates/numcodecs-wasm-guest/src/external.rs | 2 +- crates/numcodecs-wasm-guest/src/lib.rs | 2 +- crates/numcodecs-wasm-host/src/registry.rs | 226 +++++++++++++++++++- wit/codecs.wit | 2 +- 5 files changed, 222 insertions(+), 12 deletions(-) diff --git a/crates/numcodecs-python/tests/schema.rs b/crates/numcodecs-python/tests/schema.rs index 5bc4a7493..4e7abd391 100644 --- a/crates/numcodecs-python/tests/schema.rs +++ b/crates/numcodecs-python/tests/schema.rs @@ -23,7 +23,7 @@ fn collect_schemas() -> Result<(), PyErr> { println!( "{codec_id}: {:#}", - codec_ty.codec_config_schema().as_value() + codec_ty.codec_config_schema().to_value() ); } diff --git a/crates/numcodecs-wasm-guest/src/external.rs b/crates/numcodecs-wasm-guest/src/external.rs index 9b3bdb5f0..033c9ef6d 100644 --- a/crates/numcodecs-wasm-guest/src/external.rs +++ b/crates/numcodecs-wasm-guest/src/external.rs @@ -123,7 +123,7 @@ impl DynCodecType for ExternalCodecType { let codec = self .ty - .from_config(&config) + .codec_from_config(&config) .map_err(ExternalError::new) .map_err(serde::de::Error::custom)?; diff --git a/crates/numcodecs-wasm-guest/src/lib.rs b/crates/numcodecs-wasm-guest/src/lib.rs index a6bbc7bba..89f1f9fbe 100644 --- a/crates/numcodecs-wasm-guest/src/lib.rs +++ b/crates/numcodecs-wasm-guest/src/lib.rs @@ -112,7 +112,7 @@ impl wit::codec::Guest for T { fn codec_config_schema() -> wit::types::JsonSchema { schema_for!(::Config<'static>) - .as_value() + .to_value() .to_string() } } diff --git a/crates/numcodecs-wasm-host/src/registry.rs b/crates/numcodecs-wasm-host/src/registry.rs index ed79ff36b..3334b426c 100644 --- a/crates/numcodecs-wasm-host/src/registry.rs +++ b/crates/numcodecs-wasm-host/src/registry.rs @@ -1,6 +1,6 @@ use std::{error::Error, sync::Arc}; -use numcodecs::{ErasedDynCodec, ErasedDynCodecType}; +use numcodecs::{DynCodec, DynCodecType, ErasedDynCodec, ErasedDynCodecType}; use numcodecs_registry::Registry; use wasm_component_layer::{ AsContext, AsContextMut, Func, FuncType, Linker, List, ListType, Record, RecordType, @@ -74,7 +74,7 @@ pub fn add_registry_to_linker( numcodecs_registry_instance.define_resource( "erased-dyn-codec-type", - numcodecs_registry_codec_type_resource, + numcodecs_registry_codec_type_resource.clone(), )?; let my_numcodecs_registry_codec_resource = numcodecs_registry_codec_resource.clone(); @@ -98,8 +98,8 @@ pub fn add_registry_to_linker( }; let codec = { - let codec_ctx = ctx.as_context(); - let codec: &ErasedDynCodec = codec.rep(&codec_ctx)?; + let ctx = ctx.as_context(); + let codec: &ErasedDynCodec = codec.rep(&ctx)?; codec.clone() }; @@ -114,17 +114,227 @@ pub fn add_registry_to_linker( ); numcodecs_registry_instance.define_func("[method]erased-dyn-codec.clone", codec_clone)?; - let get_codec_result = ResultType::new( + let string_result = ResultType::new( + Some(ValueType::String), + Some(ValueType::Record(numcodecs_types_error_record.clone())), + ); + + let my_numcodecs_types_error_record = numcodecs_types_error_record.clone(); + let codec_get_config = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ValueType::Borrow(numcodecs_registry_codec_resource.clone())], + [ValueType::Result(string_result.clone())], + ), + move |ctx, args, results| { + let [Value::Borrow(codec)] = args else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.get-config arguments" + ); + }; + + let [result] = results else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.get-config results" + ); + }; + + let config = { + let ctx = ctx.as_context(); + let codec: &ErasedDynCodec = codec.rep(&ctx)?; + + let mut config_bytes = Vec::new(); + match codec.get_config(&mut serde_json::Serializer::new(&mut config_bytes)) { + Ok(()) => match String::from_utf8(config_bytes) { + Ok(config) => Ok(config), + Err(err) => Err(into_wit_error(err, &my_numcodecs_types_error_record)?), + }, + Err(err) => Err(into_wit_error(err, &my_numcodecs_types_error_record)?), + } + }; + + let res = match config { + Ok(config) => Ok(Some(Value::String(Arc::from(config)))), + Err(err) => Err(Some(Value::Record(err))), + }; + + *result = Value::Result(ResultValue::new(string_result.clone(), res)?); + + Ok(()) + }, + ); + numcodecs_registry_instance + .define_func("[method]erased-dyn-codec.get-config", codec_get_config)?; + + let my_numcodecs_registry_codec_type_resource = numcodecs_registry_codec_type_resource.clone(); + let codec_ty = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ValueType::Borrow(numcodecs_registry_codec_resource.clone())], + [ValueType::Own( + numcodecs_registry_codec_type_resource.clone(), + )], + ), + move |ctx, args, results| { + let [Value::Borrow(codec)] = args else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.ty arguments" + ); + }; + + let [result] = results else { + anyhow::bail!("invalid numcodecs:abc/registry#[method]erased-dyn-codec.ty results"); + }; + + let ty = { + let ctx = ctx.as_context(); + let codec: &ErasedDynCodec = codec.rep(&ctx)?; + codec.ty() + }; + + *result = Value::Own(ResourceOwn::new( + ctx, + ty, + my_numcodecs_registry_codec_type_resource.clone(), + )?); + + Ok(()) + }, + ); + numcodecs_registry_instance.define_func("[method]erased-dyn-codec.ty", codec_ty)?; + + let codec_type_id = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ValueType::Borrow( + numcodecs_registry_codec_type_resource.clone(), + )], + [ValueType::String], + ), + move |ctx, args, results| { + let [Value::Borrow(ty)] = args else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec-type.codec-id arguments" + ); + }; + + let [result] = results else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codectype.codec-id results" + ); + }; + + let ctx = ctx.as_context(); + let ty: &ErasedDynCodecType = ty.rep(&ctx)?; + + *result = Value::String(Arc::from(ty.codec_id())); + + Ok(()) + }, + ); + numcodecs_registry_instance + .define_func("[method]erased-dyn-codec-type.codec-id", codec_type_id)?; + + let codec_type_schema = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ValueType::Borrow( + numcodecs_registry_codec_type_resource.clone(), + )], + [ValueType::String], + ), + move |ctx, args, results| { + let [Value::Borrow(ty)] = args else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec-type.codec-config-schema arguments" + ); + }; + + let [result] = results else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codectype.codec-config-schema results" + ); + }; + + let ctx = ctx.as_context(); + let ty: &ErasedDynCodecType = ty.rep(&ctx)?; + + *result = Value::String(Arc::from(ty.codec_config_schema().to_value().to_string())); + + Ok(()) + }, + ); + numcodecs_registry_instance.define_func( + "[method]erased-dyn-codec-type.codec-config-schema", + codec_type_schema, + )?; + + let codec_result = ResultType::new( Some(ValueType::Own(numcodecs_registry_codec_resource.clone())), Some(ValueType::Record(numcodecs_types_error_record.clone())), ); + let my_numcodecs_registry_codec_resource = numcodecs_registry_codec_resource.clone(); + let my_numcodecs_types_error_record = numcodecs_types_error_record.clone(); + let my_codec_result = codec_result.clone(); + let codec_from_config = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ + ValueType::Borrow(numcodecs_registry_codec_type_resource), + ValueType::String, + ], + [ValueType::Result(my_codec_result.clone())], + ), + move |ctx, args, results| { + let [Value::Borrow(ty), Value::String(config)] = args else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec-type.codec-from-config arguments" + ); + }; + + let [result] = results else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codectype.codec-from-config results" + ); + }; + + let res = { + let ctx = ctx.as_context(); + let ty: &ErasedDynCodecType = ty.rep(&ctx)?; + ty.codec_from_config(&mut serde_json::Deserializer::from_str(config)) + }; + + let res = match res { + Ok(codec) => Ok(Some(Value::Own(ResourceOwn::new( + ctx, + codec, + my_numcodecs_registry_codec_resource.clone(), + )?))), + Err(err) => Err(Some(Value::Record(into_wit_error( + err, + &my_numcodecs_types_error_record, + )?))), + }; + + *result = Value::Result(ResultValue::new(my_codec_result.clone(), res)?); + + Ok(()) + }, + ); + numcodecs_registry_instance.define_func( + "[method]erased-dyn-codec-type.codec-from-config", + codec_from_config, + )?; + let my_numcodecs_registry_codec_resource = numcodecs_registry_codec_resource; + let my_numcodecs_types_error_record = numcodecs_types_error_record; + let my_codec_result = codec_result; let get_codec = Func::new( ctx, FuncType::new( [ValueType::String], - [ValueType::Result(get_codec_result.clone())], + [ValueType::Result(my_codec_result.clone())], ), move |ctx, args, results| { let [Value::String(config)] = args else { @@ -143,11 +353,11 @@ pub fn add_registry_to_linker( )?))), Err(err) => Err(Some(Value::Record(into_wit_error( err, - &numcodecs_types_error_record, + &my_numcodecs_types_error_record, )?))), }; - *result = Value::Result(ResultValue::new(get_codec_result.clone(), res)?); + *result = Value::Result(ResultValue::new(my_codec_result.clone(), res)?); Ok(()) }, diff --git a/wit/codecs.wit b/wit/codecs.wit index 38cfeaf7a..0e60b92d9 100644 --- a/wit/codecs.wit +++ b/wit/codecs.wit @@ -101,7 +101,7 @@ interface registry { codec-config-schema: func() -> json-schema; - from-config: func(config: json) -> result; + codec-from-config: func(config: json) -> result; } get-codec: func(config: json) -> result; From f203ab86b6e72fd11056025014dc3868f2985349 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 29 May 2026 14:40:28 +0300 Subject: [PATCH 09/10] add host codec encode/decode functions --- crates/numcodecs-wasm-host/src/codec.rs | 62 +++++++- crates/numcodecs-wasm-host/src/registry.rs | 171 ++++++++++++++++++++- 2 files changed, 224 insertions(+), 9 deletions(-) diff --git a/crates/numcodecs-wasm-host/src/codec.rs b/crates/numcodecs-wasm-host/src/codec.rs index cf7e4443e..ebeb9bae1 100644 --- a/crates/numcodecs-wasm-host/src/codec.rs +++ b/crates/numcodecs-wasm-host/src/codec.rs @@ -311,7 +311,7 @@ impl WasmCodec { } } - fn any_array_data_ty() -> &'static VariantType { + pub(crate) fn any_array_data_ty() -> &'static VariantType { static ANY_ARRAY_DATA_TY: OnceLock = OnceLock::new(); #[expect(clippy::expect_used)] @@ -337,7 +337,7 @@ impl WasmCodec { }) } - fn any_array_ty() -> &'static RecordType { + pub(crate) fn any_array_ty() -> &'static RecordType { static ANY_ARRAY_TY: OnceLock = OnceLock::new(); #[expect(clippy::expect_used)] @@ -359,7 +359,7 @@ impl WasmCodec { } #[expect(clippy::needless_pass_by_value)] - fn array_into_wasm(array: AnyArrayView) -> Result { + pub(crate) fn array_into_wasm(array: AnyArrayView) -> Result { fn list_from_standard_layout<'a, T: 'static + Copy, S: Data, D: Dimension>( array: &'a ArrayBase, ) -> List @@ -451,7 +451,7 @@ impl WasmCodec { .map_err(RuntimeError::from) } - fn any_array_dtype_ty() -> &'static EnumType { + pub(crate) fn any_array_dtype_ty() -> &'static EnumType { static ANY_ARRAY_DTYPE_TY: OnceLock = OnceLock::new(); #[expect(clippy::expect_used)] @@ -468,7 +468,7 @@ impl WasmCodec { }) } - fn any_array_prototype_ty() -> &'static RecordType { + pub(crate) fn any_array_prototype_ty() -> &'static RecordType { static ANY_ARRAY_PROTOTYPE_TY: OnceLock = OnceLock::new(); #[expect(clippy::expect_used)] @@ -486,7 +486,7 @@ impl WasmCodec { }) } - fn array_prototype_into_wasm( + pub(crate) fn array_prototype_into_wasm( dtype: AnyArrayDType, shape: &[usize], ) -> Result { @@ -522,7 +522,7 @@ impl WasmCodec { .map_err(RuntimeError::from) } - fn with_array_view_from_wasm_record( + pub(crate) fn with_array_view_from_wasm_record( record: &Record, with: impl for<'a> FnOnce(AnyArrayView<'a>) -> Result, ) -> Result { @@ -602,4 +602,52 @@ impl WasmCodec { with(array) } + + pub(crate) fn array_prototype_from_wasm_record( + record: &Record, + ) -> Result { + let Some(Value::Variant(dtype)) = record.field("dtype") else { + return Err(RuntimeError::from(anyhow::Error::msg(format!( + "{record:?} is missing dtype field" + )))); + }; + if let Some(ty) = dtype.value() { + return Err(RuntimeError::from(anyhow::Error::msg(format!( + "{record:?} has an invalid dtype variant type {ty:?}" + )))); + } + + let dtype = match dtype.discriminant() { + 0 => AnyArrayDType::U8, + 1 => AnyArrayDType::U16, + 2 => AnyArrayDType::U32, + 3 => AnyArrayDType::U64, + 4 => AnyArrayDType::I8, + 5 => AnyArrayDType::I16, + 6 => AnyArrayDType::I32, + 7 => AnyArrayDType::I64, + 8 => AnyArrayDType::F32, + 9 => AnyArrayDType::F64, + discriminant => { + return Err(RuntimeError::from(anyhow::Error::msg(format!( + "{record:?} has an invalid dtype variant [{discriminant}]" + )))); + } + }; + + let Some(Value::List(shape)) = record.field("shape") else { + return Err(RuntimeError::from(anyhow::Error::msg(format!( + "process result record {record:?} is missing shape field" + )))); + }; + let shape = shape + .typed::()? + .iter() + .copied() + .map(usize::try_from) + .collect::, _>>() + .map_err(anyhow::Error::new)?; + + Ok(AnyArray::zeros(dtype, &shape)) + } } diff --git a/crates/numcodecs-wasm-host/src/registry.rs b/crates/numcodecs-wasm-host/src/registry.rs index 3334b426c..96d73893a 100644 --- a/crates/numcodecs-wasm-host/src/registry.rs +++ b/crates/numcodecs-wasm-host/src/registry.rs @@ -1,13 +1,13 @@ use std::{error::Error, sync::Arc}; -use numcodecs::{DynCodec, DynCodecType, ErasedDynCodec, ErasedDynCodecType}; +use numcodecs::{Codec, DynCodec, DynCodecType, ErasedDynCodec, ErasedDynCodecType}; use numcodecs_registry::Registry; use wasm_component_layer::{ AsContext, AsContextMut, Func, FuncType, Linker, List, ListType, Record, RecordType, ResourceOwn, ResourceType, ResultType, ResultValue, TypeIdentifier, Value, ValueType, }; -use crate::wit::NumcodecsWitInterfaces; +use crate::{WasmCodec, wit::NumcodecsWitInterfaces}; /// Adds the `registry` to the `linker` to define the `numcodecs:abc/registry` /// interface. @@ -77,6 +77,173 @@ pub fn add_registry_to_linker( numcodecs_registry_codec_type_resource.clone(), )?; + let any_array_record = WasmCodec::any_array_ty().clone(); + + let any_array_result = ResultType::new( + Some(ValueType::Record(any_array_record.clone())), + Some(ValueType::Record(numcodecs_types_error_record.clone())), + ); + + let my_any_array_result = any_array_result.clone(); + let my_numcodecs_types_error_record = numcodecs_types_error_record.clone(); + let codec_encode = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ + ValueType::Borrow(numcodecs_registry_codec_resource.clone()), + ValueType::Record(any_array_record.clone()), + ], + [ValueType::Result(any_array_result.clone())], + ), + move |ctx, args, results| { + let [Value::Borrow(codec), Value::Record(data)] = args else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.encode arguments" + ); + }; + + let [result] = results else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.encode results" + ); + }; + + let encoded = WasmCodec::with_array_view_from_wasm_record(data, |data| { + let ctx = ctx.as_context(); + let codec: &ErasedDynCodec = codec.rep(&ctx)?; + + let encoded = codec.encode(data.cow()).map_err(anyhow::Error::new)?; + Ok(encoded) + }); + + let encoded = match encoded { + Ok(encoded) => Ok(WasmCodec::array_into_wasm(encoded.view())?), + Err(err) => Err(into_wit_error(err, &my_numcodecs_types_error_record)?), + }; + + let res = match encoded { + Ok(encoded) => Ok(Some(Value::Record(encoded))), + Err(err) => Err(Some(Value::Record(err))), + }; + + *result = Value::Result(ResultValue::new(my_any_array_result.clone(), res)?); + + Ok(()) + }, + ); + numcodecs_registry_instance.define_func("[method]erased-dyn-codec.encode", codec_encode)?; + + let my_any_array_result = any_array_result.clone(); + let my_numcodecs_types_error_record = numcodecs_types_error_record.clone(); + let codec_decode = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ + ValueType::Borrow(numcodecs_registry_codec_resource.clone()), + ValueType::Record(any_array_record.clone()), + ], + [ValueType::Result(any_array_result.clone())], + ), + move |ctx, args, results| { + let [Value::Borrow(codec), Value::Record(encoded)] = args else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.decode arguments" + ); + }; + + let [result] = results else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.decode results" + ); + }; + + let decoded = WasmCodec::with_array_view_from_wasm_record(encoded, |encoded| { + let ctx = ctx.as_context(); + let codec: &ErasedDynCodec = codec.rep(&ctx)?; + + let decoded = codec.decode(encoded.cow()).map_err(anyhow::Error::new)?; + Ok(decoded) + }); + + let decoded = match decoded { + Ok(decoded) => Ok(WasmCodec::array_into_wasm(decoded.view())?), + Err(err) => Err(into_wit_error(err, &my_numcodecs_types_error_record)?), + }; + + let res = match decoded { + Ok(decoded) => Ok(Some(Value::Record(decoded))), + Err(err) => Err(Some(Value::Record(err))), + }; + + *result = Value::Result(ResultValue::new(my_any_array_result.clone(), res)?); + + Ok(()) + }, + ); + numcodecs_registry_instance.define_func("[method]erased-dyn-codec.decode", codec_decode)?; + + let any_array_prototype_record = WasmCodec::any_array_prototype_ty().clone(); + + let my_any_array_result = any_array_result.clone(); + let my_numcodecs_types_error_record = numcodecs_types_error_record.clone(); + let codec_decode_into = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ + ValueType::Borrow(numcodecs_registry_codec_resource.clone()), + ValueType::Record(any_array_record), + ValueType::Record(any_array_prototype_record), + ], + [ValueType::Result(any_array_result)], + ), + move |ctx, args, results| { + let [ + Value::Borrow(codec), + Value::Record(encoded), + Value::Record(decoded), + ] = args + else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.decode-into arguments" + ); + }; + + let [result] = results else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.decode-into results" + ); + }; + + let mut decoded = WasmCodec::array_prototype_from_wasm_record(decoded)?; + + let res = WasmCodec::with_array_view_from_wasm_record(encoded, |encoded| { + let ctx = ctx.as_context(); + let codec: &ErasedDynCodec = codec.rep(&ctx)?; + + codec + .decode_into(encoded, decoded.view_mut()) + .map_err(anyhow::Error::new)?; + Ok(()) + }); + + let decoded = match res { + Ok(()) => Ok(WasmCodec::array_into_wasm(decoded.view())?), + Err(err) => Err(into_wit_error(err, &my_numcodecs_types_error_record)?), + }; + + let res = match decoded { + Ok(decoded) => Ok(Some(Value::Record(decoded))), + Err(err) => Err(Some(Value::Record(err))), + }; + + *result = Value::Result(ResultValue::new(my_any_array_result.clone(), res)?); + + Ok(()) + }, + ); + numcodecs_registry_instance + .define_func("[method]erased-dyn-codec.decode-into", codec_decode_into)?; + let my_numcodecs_registry_codec_resource = numcodecs_registry_codec_resource.clone(); let codec_clone = Func::new( ctx.as_context_mut(), From 3e85e1e998cd5c202c31cd3b39730cd874bd14c9 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sat, 30 May 2026 07:54:19 +0300 Subject: [PATCH 10/10] Add simple onion identity meta-codec --- Cargo.toml | 2 + codecs/onion/Cargo.toml | 25 ++++++++ codecs/onion/LICENSE | 1 + codecs/onion/README.md | 38 ++++++++++++ codecs/onion/src/lib.rs | 86 +++++++++++++++++++++++++++ codecs/onion/tests/schema.json | 34 +++++++++++ codecs/onion/tests/schema.rs | 20 +++++++ crates/numcodecs-registry/Cargo.toml | 1 + crates/numcodecs-registry/src/lib.rs | 89 +++++++++++++++++++++++++++- 9 files changed, 294 insertions(+), 2 deletions(-) create mode 100644 codecs/onion/Cargo.toml create mode 120000 codecs/onion/LICENSE create mode 100644 codecs/onion/README.md create mode 100644 codecs/onion/src/lib.rs create mode 100644 codecs/onion/tests/schema.json create mode 100644 codecs/onion/tests/schema.rs diff --git a/Cargo.toml b/Cargo.toml index ee9246b00..d873c7bd5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ members = [ "codecs/lc", "codecs/linear-quantize", "codecs/log", + "codecs/onion", "codecs/pco", "codecs/qpet-sperr", "codecs/random-projection", @@ -70,6 +71,7 @@ numcodecs-jpeg2000 = { version = "0.3", path = "codecs/jpeg2000", default-featur numcodecs-lc = { version = "0.1", path = "codecs/lc", default-features = false } numcodecs-linear-quantize = { version = "0.5", path = "codecs/linear-quantize", default-features = false } numcodecs-log = { version = "0.5", path = "codecs/log", default-features = false } +numcodecs-onion = { version = "0.1", path = "codecs/onion", default-features = false } numcodecs-pco = { version = "0.4", path = "codecs/pco", default-features = false } numcodecs-qpet-sperr = { version = "0.2.2", path = "codecs/qpet-sperr", default-features = false } numcodecs-random-projection = { version = "0.4", path = "codecs/random-projection", default-features = false } diff --git a/codecs/onion/Cargo.toml b/codecs/onion/Cargo.toml new file mode 100644 index 000000000..b905f1897 --- /dev/null +++ b/codecs/onion/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "numcodecs-onion" +version = "0.1.0" +edition = { workspace = true } +authors = { workspace = true } +repository = { workspace = true } +license = { workspace = true } +rust-version = { workspace = true } + +description = "Onion identity meta-codec implementation for the numcodecs API" +readme = "README.md" +categories = ["compression", "encoding"] +keywords = ["identity", "numcodecs", "compression", "encoding", "meta"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +numcodecs = { workspace = true } +numcodecs-registry = { workspace = true } +schemars = { workspace = true, features = ["derive", "preserve_order"] } +serde = { workspace = true, features = ["std", "derive"] } +thiserror = { workspace = true } + +[lints] +workspace = true diff --git a/codecs/onion/LICENSE b/codecs/onion/LICENSE new file mode 120000 index 000000000..30cff7403 --- /dev/null +++ b/codecs/onion/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/codecs/onion/README.md b/codecs/onion/README.md new file mode 100644 index 000000000..23a7a2a7e --- /dev/null +++ b/codecs/onion/README.md @@ -0,0 +1,38 @@ +[![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![PyPi Release]][pypi] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs] [![Read the Docs]][rtdocs] + +[CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main +[workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain + +[MSRV]: https://img.shields.io/badge/MSRV-1.87.0-blue +[repo]: https://github.com/juntyr/numcodecs-rs + +[Latest Version]: https://img.shields.io/crates/v/numcodecs-onion +[crates.io]: https://crates.io/crates/numcodecs-onion + +[PyPi Release]: https://img.shields.io/pypi/v/numcodecs-wasm-onion.svg +[pypi]: https://pypi.python.org/pypi/numcodecs-wasm-onion + +[Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-onion +[docs.rs]: https://docs.rs/numcodecs-onion/ + +[Rust Doc Main]: https://img.shields.io/badge/docs-main-blue +[docs]: https://juntyr.github.io/numcodecs-rs/numcodecs_onion + +[Read the Docs]: https://img.shields.io/readthedocs/numcodecs-wasm?label=readthedocs +[rtdocs]: https://numcodecs-wasm.readthedocs.io/en/stable/api/numcodecs_wasm_onion/ + +# numcodecs-onion + +Onion identity meta-codec implementation for the [`numcodecs`] API. + +[`numcodecs`]: https://docs.rs/numcodecs/0.2/numcodecs/ + +## License + +Licensed under the Mozilla Public License, Version 2.0 ([LICENSE](LICENSE) or https://www.mozilla.org/en-US/MPL/2.0/). + +## Funding + +The `numcodecs-onion` crate has been developed as part of [ESiWACE3](https://www.esiwace.eu), the third phase of the Centre of Excellence in Simulation of Weather and Climate in Europe. + +Funded by the European Union. This work has received funding from the European High Performance Computing Joint Undertaking (JU) under grant agreement No 101093054. diff --git a/codecs/onion/src/lib.rs b/codecs/onion/src/lib.rs new file mode 100644 index 000000000..0dbb0ce0b --- /dev/null +++ b/codecs/onion/src/lib.rs @@ -0,0 +1,86 @@ +//! [![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs] +//! +//! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main +//! [workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain +//! +//! [MSRV]: https://img.shields.io/badge/MSRV-1.87.0-blue +//! [repo]: https://github.com/juntyr/numcodecs-rs +//! +//! [Latest Version]: https://img.shields.io/crates/v/numcodecs-onion +//! [crates.io]: https://crates.io/crates/numcodecs-onion +//! +//! [Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-onion +//! [docs.rs]: https://docs.rs/numcodecs-onion/ +//! +//! [Rust Doc Main]: https://img.shields.io/badge/docs-main-blue +//! [docs]: https://juntyr.github.io/numcodecs-rs/numcodecs_onion +//! +//! Onion identity meta-codec implementation for the [`numcodecs`] API. + +use numcodecs::{ + AnyArray, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, ErasedError, StaticCodec, + StaticCodecConfig, StaticCodecVersion, +}; +use numcodecs_registry::GlobalErasedDynCodec; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +#[derive(Clone, Serialize, Deserialize, JsonSchema)] +#[serde(deny_unknown_fields)] +/// Onion identity meta-codec which wraps an existing codec and passes the +/// inputs to and outputs from it unchanged during encoding and decoding. +pub struct OnionCodec { + /// The configuration of the wrapped codec. + pub codec: GlobalErasedDynCodec, + /// The codec's encoding format version. Do not provide this parameter explicitly. + #[serde(default, rename = "_version")] + pub version: StaticCodecVersion<1, 0, 0>, +} + +impl Codec for OnionCodec { + type Error = OnionCodecError; + + fn encode(&self, data: AnyCowArray) -> Result { + self.codec + .encode(data) + .map_err(|err| OnionCodecError { error: err }) + } + + fn decode(&self, encoded: AnyCowArray) -> Result { + self.codec + .decode(encoded) + .map_err(|err| OnionCodecError { error: err }) + } + + fn decode_into( + &self, + encoded: AnyArrayView, + decoded: AnyArrayViewMut, + ) -> Result<(), Self::Error> { + self.codec + .decode_into(encoded, decoded) + .map_err(|err| OnionCodecError { error: err }) + } +} + +impl StaticCodec for OnionCodec { + const CODEC_ID: &'static str = "onion.rs"; + + type Config<'de> = Self; + + fn from_config(config: Self::Config<'_>) -> Self { + config + } + + fn get_config(&self) -> StaticCodecConfig<'_, Self> { + StaticCodecConfig::from(self) + } +} + +#[derive(Debug, Error)] +/// Errors that may occur when applying the [`OnionCodec`]. +#[error(transparent)] +pub struct OnionCodecError { + error: ErasedError, +} diff --git a/codecs/onion/tests/schema.json b/codecs/onion/tests/schema.json new file mode 100644 index 000000000..40cad458a --- /dev/null +++ b/codecs/onion/tests/schema.json @@ -0,0 +1,34 @@ +{ + "type": "object", + "additionalProperties": false, + "properties": { + "codec": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The `codec_id` of the codec, which is looked up in the global\nregistry." + } + }, + "required": [ + "id" + ], + "description": "The configuration of the wrapped codec.", + "additionalProperties": { + "type": "object" + } + }, + "_version": { + "type": "string", + "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$", + "description": "The codec's encoding format version. Do not provide this parameter explicitly.", + "default": "1.0.0" + } + }, + "required": [ + "codec" + ], + "description": "Onion identity meta-codec which wraps an existing codec and passes the\ninputs to and outputs from it unchanged during encoding and decoding.", + "title": "OnionCodec", + "$schema": "https://json-schema.org/draft/2020-12/schema" +} \ No newline at end of file diff --git a/codecs/onion/tests/schema.rs b/codecs/onion/tests/schema.rs new file mode 100644 index 000000000..02d6c09e2 --- /dev/null +++ b/codecs/onion/tests/schema.rs @@ -0,0 +1,20 @@ +#![expect(missing_docs)] + +use ::{numcodecs_registry as _, schemars as _, serde as _, thiserror as _}; + +use numcodecs::{DynCodecType, StaticCodecType}; +use numcodecs_onion::OnionCodec; + +#[test] +fn schema() { + let schema = format!( + "{:#}", + StaticCodecType::::of() + .codec_config_schema() + .to_value() + ); + + if schema != include_str!("schema.json") { + panic!("Onion schema has changed\n===\n{schema}\n==="); + } +} diff --git a/crates/numcodecs-registry/Cargo.toml b/crates/numcodecs-registry/Cargo.toml index 4797c456a..c55c037bb 100644 --- a/crates/numcodecs-registry/Cargo.toml +++ b/crates/numcodecs-registry/Cargo.toml @@ -17,6 +17,7 @@ keywords = ["numcodecs", "registry", "compression", "encoding"] [dependencies] erased-serde = { workspace = true, features = ["std"] } numcodecs = { workspace = true } +schemars = { workspace = true, features = ["derive"] } serde = { workspace = true } thiserror = { workspace = true } diff --git a/crates/numcodecs-registry/src/lib.rs b/crates/numcodecs-registry/src/lib.rs index 9bbb274be..40d5b8f27 100644 --- a/crates/numcodecs-registry/src/lib.rs +++ b/crates/numcodecs-registry/src/lib.rs @@ -19,10 +19,16 @@ //! //! [`numcodecs`]: https://numcodecs.readthedocs.io/en/stable/ -use std::{error::Error, sync::Arc}; +use std::{ + borrow::Cow, + error::Error, + ops::{Deref, DerefMut}, + sync::Arc, +}; use numcodecs::{DynCodec, ErasedDynCodec, ErasedError}; -use serde::Deserializer; +use schemars::{JsonSchema, Schema, SchemaGenerator}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; /// Registry of codec types. pub trait Registry: 'static + Send + Sync { @@ -233,3 +239,82 @@ impl Registry for EmptyRegistry { Err(CodecNotFoundError) } } + +#[derive(Clone)] +/// Wrapper around an [`ErasedDynCodec`] that can be used inside a meta-codec +/// configuration to (de)serialize a wrapped inner codec. +pub struct GlobalErasedDynCodec { + codec: ErasedDynCodec, +} + +impl GlobalErasedDynCodec { + #[must_use] + /// Wrap an existing `codec`. + pub const fn new(codec: ErasedDynCodec) -> Self { + Self { codec } + } + + #[must_use] + /// Extract the inner codec. + pub fn into_inner(this: Self) -> ErasedDynCodec { + this.codec + } +} + +impl Deref for GlobalErasedDynCodec { + type Target = ErasedDynCodec; + + fn deref(&self) -> &Self::Target { + &self.codec + } +} + +impl DerefMut for GlobalErasedDynCodec { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.codec + } +} + +impl Serialize for GlobalErasedDynCodec { + fn serialize(&self, serializer: S) -> Result { + self.codec.get_config(serializer) + } +} + +impl<'de> Deserialize<'de> for GlobalErasedDynCodec { + fn deserialize>(deserializer: D) -> Result { + Ok(Self { + codec: GlobalRegistry + .get_codec(deserializer) + .map_err(serde::de::Error::custom)?, + }) + } +} + +impl JsonSchema for GlobalErasedDynCodec { + fn inline_schema() -> bool { + false + } + + fn schema_name() -> Cow<'static, str> { + Cow::Borrowed("NumcodecsCodecConfig") + } + + fn schema_id() -> Cow<'static, str> { + Cow::Borrowed(concat!(module_path!(), "::", "GlobalErasedDynCodec")) + } + + fn json_schema(generator: &mut SchemaGenerator) -> Schema { + #[derive(JsonSchema)] + #[schemars(extend("additionalProperties" = {"type": "object"}))] + /// The configuration for a codec. + struct NumcodecsCodecConfig { + /// The `codec_id` of the codec, which is looked up in the global + /// registry. + #[expect(dead_code)] + id: String, + } + + NumcodecsCodecConfig::json_schema(generator) + } +}