From 9b1dc4ead9e7529b181f5cdc48f0fb7c7b94db11 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sun, 31 May 2026 07:20:11 +0300 Subject: [PATCH 1/4] Implement numcodecs-registry from #72 on host --- crates/numcodecs-python/Cargo.toml | 1 + crates/numcodecs-python/src/registry.rs | 46 +- .../Cargo.toml | 1 + .../src/codec.rs | 9 + .../src/tests.rs | 7 +- .../src/transform/mod.rs | 31 +- crates/numcodecs-wasm-host/Cargo.toml | 1 + crates/numcodecs-wasm-host/src/codec.rs | 62 +- crates/numcodecs-wasm-host/src/lib.rs | 2 + crates/numcodecs-wasm-host/src/registry.rs | 562 ++++++++++++++++++ crates/numcodecs-wasm-host/src/wit.rs | 25 +- py/numcodecs-wasm/Cargo.toml | 1 + py/numcodecs-wasm/src/lib.rs | 7 +- 13 files changed, 722 insertions(+), 33 deletions(-) create mode 100644 crates/numcodecs-wasm-host/src/registry.rs diff --git a/crates/numcodecs-python/Cargo.toml b/crates/numcodecs-python/Cargo.toml index c8ed8daf5..a2d431ee1 100644 --- a/crates/numcodecs-python/Cargo.toml +++ b/crates/numcodecs-python/Cargo.toml @@ -18,6 +18,7 @@ keywords = ["numcodecs", "compression", "encoding", "python", "pyo3"] convert_case = { workspace = true } ndarray = { workspace = true } numcodecs = { workspace = true } +numcodecs-registry = { workspace = true } numpy = { workspace = true } pyo3 = { workspace = true } pyo3-error = { workspace = true } diff --git a/crates/numcodecs-python/src/registry.rs b/crates/numcodecs-python/src/registry.rs index e3f121ad3..9adbaa6c3 100644 --- a/crates/numcodecs-python/src/registry.rs +++ b/crates/numcodecs-python/src/registry.rs @@ -1,13 +1,16 @@ +use numcodecs::{DynCodec, ErasedDynCodec}; +use numcodecs_registry::Registry; use pyo3::{prelude::*, sync::PyOnceLock, types::PyDict}; +use pythonize::Pythonizer; +use serde::Deserializer; +use serde_transcode::transcode; #[expect(unused_imports)] // FIXME: use expect, only used in docs use crate::PyCodecClassMethods; -use crate::{PyCodec, PyCodecClass}; +use crate::{PyCodec, PyCodecAdapter, PyCodecClass}; /// Dynamic registry of codec classes. -pub struct PyCodecRegistry { - _private: (), -} +pub struct PyCodecRegistry; impl PyCodecRegistry { /// Instantiate a codec from a configuration dictionary. @@ -56,3 +59,38 @@ impl PyCodecRegistry { Ok(()) } } + +impl Registry for PyCodecRegistry { + type Error = PyErr; + + fn get_codec<'de, D: Deserializer<'de>>( + &self, + config: D, + ) -> Result { + Python::attach(|py| { + let config = transcode(config, Pythonizer::new(py))?; + let config: Bound = config.extract()?; + + let codec = Self::get_codec(config.as_borrowed())?; + let codec = PyCodecAdapter::from_codec(codec)?; + + Ok(ErasedDynCodec::new(codec)) + }) + } + + fn get_codec_typed<'de, T: DynCodec, D: Deserializer<'de>>( + &self, + config: D, + ) -> Result, Self::Error> { + Python::attach(|py| { + let config = transcode(config, Pythonizer::new(py))?; + let config: Bound = config.extract()?; + + let codec = Self::get_codec(config.as_borrowed())?; + // clone is necessary since we cannot move out of a PyCodec + let codec = PyCodecAdapter::with_downcast(py, &codec, |codec: &T| codec.clone()); + + Ok(codec) + }) + } +} diff --git a/crates/numcodecs-wasm-host-reproducible/Cargo.toml b/crates/numcodecs-wasm-host-reproducible/Cargo.toml index 75af6bd19..830e0f59e 100644 --- a/crates/numcodecs-wasm-host-reproducible/Cargo.toml +++ b/crates/numcodecs-wasm-host-reproducible/Cargo.toml @@ -26,6 +26,7 @@ numcodecs-wasm-host = { workspace = true } indexmap = { workspace = true, features = ["std"] } log = { workspace = true } numcodecs = { workspace = true } +numcodecs-registry = { workspace = true } polonius-the-crab = { workspace = true } schemars = { workspace = true } semver = { workspace = true } diff --git a/crates/numcodecs-wasm-host-reproducible/src/codec.rs b/crates/numcodecs-wasm-host-reproducible/src/codec.rs index 1328c22bd..9b4747a28 100644 --- a/crates/numcodecs-wasm-host-reproducible/src/codec.rs +++ b/crates/numcodecs-wasm-host-reproducible/src/codec.rs @@ -4,6 +4,7 @@ use std::sync::{Arc, Mutex}; use numcodecs::{ AnyArray, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, DynCodec, DynCodecType, }; +use numcodecs_registry::Registry; use numcodecs_wasm_host::{CodecError, RuntimeError, WasmCodec, WasmCodecComponent}; use schemars::Schema; use serde::Serializer; @@ -323,6 +324,7 @@ where pub fn new( engine: E, wasm_component: impl Into>, + registry: impl Registry, ) -> Result where E: Send + Sync, @@ -343,6 +345,8 @@ where } })?; + let registry = Arc::new(registry); + let component_instantiater = Arc::new(move |component: &Component, codec_id: &str| { let mut store = Store::new(&engine, ()); @@ -359,6 +363,11 @@ where source: RuntimeError::from(err), } })?; + numcodecs_wasm_host::add_registry_to_linker(&mut linker, &mut store, registry.clone()) + .map_err(|err| ReproducibleWasmCodecError::Runtime { + codec_id: Arc::from(codec_id), + source: RuntimeError::from(err), + })?; let instance = linker.instantiate(&mut store, component).map_err(|err| { ReproducibleWasmCodecError::Runtime { diff --git a/crates/numcodecs-wasm-host-reproducible/src/tests.rs b/crates/numcodecs-wasm-host-reproducible/src/tests.rs index d22978864..8addebfb9 100644 --- a/crates/numcodecs-wasm-host-reproducible/src/tests.rs +++ b/crates/numcodecs-wasm-host-reproducible/src/tests.rs @@ -2,6 +2,7 @@ use ndarray::Array; use ndarray_rand::RandomExt; use ndarray_rand::rand_distr::Normal; use numcodecs::{Codec, DynCodecType}; +use numcodecs_registry::EmptyRegistry; use crate::ReproducibleWasmCodecType; @@ -53,7 +54,11 @@ fn codec_roundtrip() { let engine = wasmtime_runtime_layer::Engine::new(wasmtime::Engine::new(&config).unwrap()); - let ty = match ReproducibleWasmCodecType::new(engine, include_bytes!("../tests/round.wasm")) { + let ty = match ReproducibleWasmCodecType::new( + engine, + include_bytes!("../tests/round.wasm"), + EmptyRegistry, + ) { Ok(ty) => ty, Err(err) => panic!( "ReproducibleWasmCodecType::new:\n===\n{err}\n===\n{err:?}\n===\n{err:#}\n===\n{err:#?}\n===\n" diff --git a/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs b/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs index bcf0a663d..73e8f3c85 100644 --- a/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs +++ b/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs @@ -12,7 +12,10 @@ pub mod nan; #[expect(clippy::too_many_lines)] // FIXME pub fn transform_wasm_component(wasm_component: impl Into>) -> Result, Error> { let NumcodecsWitInterfaces { + package, codec: codec_interface, + registry: registry_interface, + types: types_interface, .. } = NumcodecsWitInterfaces::get(); @@ -24,33 +27,37 @@ pub fn transform_wasm_component(wasm_component: impl Into>) -> Result>) -> Result>(); @@ -150,7 +157,7 @@ pub fn transform_wasm_component(wasm_component: impl Into>) -> Result &'static VariantType { + pub(crate) fn any_array_data_ty() -> &'static VariantType { static ANY_ARRAY_DATA_TY: OnceLock = OnceLock::new(); #[expect(clippy::expect_used)] @@ -337,7 +337,7 @@ impl WasmCodec { }) } - fn any_array_ty() -> &'static RecordType { + pub(crate) fn any_array_ty() -> &'static RecordType { static ANY_ARRAY_TY: OnceLock = OnceLock::new(); #[expect(clippy::expect_used)] @@ -359,7 +359,7 @@ impl WasmCodec { } #[expect(clippy::needless_pass_by_value)] - fn array_into_wasm(array: AnyArrayView) -> Result { + pub(crate) fn array_into_wasm(array: AnyArrayView) -> Result { fn list_from_standard_layout<'a, T: 'static + Copy, S: Data, D: Dimension>( array: &'a ArrayBase, ) -> List @@ -451,7 +451,7 @@ impl WasmCodec { .map_err(RuntimeError::from) } - fn any_array_dtype_ty() -> &'static EnumType { + pub(crate) fn any_array_dtype_ty() -> &'static EnumType { static ANY_ARRAY_DTYPE_TY: OnceLock = OnceLock::new(); #[expect(clippy::expect_used)] @@ -468,7 +468,7 @@ impl WasmCodec { }) } - fn any_array_prototype_ty() -> &'static RecordType { + pub(crate) fn any_array_prototype_ty() -> &'static RecordType { static ANY_ARRAY_PROTOTYPE_TY: OnceLock = OnceLock::new(); #[expect(clippy::expect_used)] @@ -486,7 +486,7 @@ impl WasmCodec { }) } - fn array_prototype_into_wasm( + pub(crate) fn array_prototype_into_wasm( dtype: AnyArrayDType, shape: &[usize], ) -> Result { @@ -522,7 +522,7 @@ impl WasmCodec { .map_err(RuntimeError::from) } - fn with_array_view_from_wasm_record( + pub(crate) fn with_array_view_from_wasm_record( record: &Record, with: impl for<'a> FnOnce(AnyArrayView<'a>) -> Result, ) -> Result { @@ -602,4 +602,52 @@ impl WasmCodec { with(array) } + + pub(crate) fn array_prototype_from_wasm_record( + record: &Record, + ) -> Result { + let Some(Value::Variant(dtype)) = record.field("dtype") else { + return Err(RuntimeError::from(anyhow::Error::msg(format!( + "{record:?} is missing dtype field" + )))); + }; + if let Some(ty) = dtype.value() { + return Err(RuntimeError::from(anyhow::Error::msg(format!( + "{record:?} has an invalid dtype variant type {ty:?}" + )))); + } + + let dtype = match dtype.discriminant() { + 0 => AnyArrayDType::U8, + 1 => AnyArrayDType::U16, + 2 => AnyArrayDType::U32, + 3 => AnyArrayDType::U64, + 4 => AnyArrayDType::I8, + 5 => AnyArrayDType::I16, + 6 => AnyArrayDType::I32, + 7 => AnyArrayDType::I64, + 8 => AnyArrayDType::F32, + 9 => AnyArrayDType::F64, + discriminant => { + return Err(RuntimeError::from(anyhow::Error::msg(format!( + "{record:?} has an invalid dtype variant [{discriminant}]" + )))); + } + }; + + let Some(Value::List(shape)) = record.field("shape") else { + return Err(RuntimeError::from(anyhow::Error::msg(format!( + "process result record {record:?} is missing shape field" + )))); + }; + let shape = shape + .typed::()? + .iter() + .copied() + .map(usize::try_from) + .collect::, _>>() + .map_err(anyhow::Error::new)?; + + Ok(AnyArray::zeros(dtype, &shape)) + } } diff --git a/crates/numcodecs-wasm-host/src/lib.rs b/crates/numcodecs-wasm-host/src/lib.rs index fa57892c2..bb0620450 100644 --- a/crates/numcodecs-wasm-host/src/lib.rs +++ b/crates/numcodecs-wasm-host/src/lib.rs @@ -24,9 +24,11 @@ mod codec; mod component; mod error; +mod registry; mod wit; pub use codec::WasmCodec; pub use component::WasmCodecComponent; pub use error::{CodecError, RuntimeError}; +pub use registry::add_registry_to_linker; pub use wit::NumcodecsWitInterfaces; diff --git a/crates/numcodecs-wasm-host/src/registry.rs b/crates/numcodecs-wasm-host/src/registry.rs new file mode 100644 index 000000000..96d73893a --- /dev/null +++ b/crates/numcodecs-wasm-host/src/registry.rs @@ -0,0 +1,562 @@ +use std::{error::Error, sync::Arc}; + +use numcodecs::{Codec, DynCodec, DynCodecType, ErasedDynCodec, ErasedDynCodecType}; +use numcodecs_registry::Registry; +use wasm_component_layer::{ + AsContext, AsContextMut, Func, FuncType, Linker, List, ListType, Record, RecordType, + ResourceOwn, ResourceType, ResultType, ResultValue, TypeIdentifier, Value, ValueType, +}; + +use crate::{WasmCodec, wit::NumcodecsWitInterfaces}; + +/// Adds the `registry` to the `linker` to define the `numcodecs:abc/registry` +/// interface. +/// +/// # Errors +/// +/// Errors if adding the `registry` to the `linker` fails. +#[expect(clippy::too_many_lines)] // FIXME +pub fn add_registry_to_linker( + linker: &mut Linker, + mut ctx: impl AsContextMut, + registry: impl Registry, +) -> Result<(), anyhow::Error> { + let NumcodecsWitInterfaces { + registry: numcodecs_registry_interface, + types: numcodecs_types_interface, + .. + } = NumcodecsWitInterfaces::get(); + + let registry = Arc::new(registry); + + let numcodecs_types_error_record = RecordType::new( + Some(TypeIdentifier::new( + "error", + Some(numcodecs_types_interface.clone()), + )), + [ + ("message", ValueType::String), + ("chain", ValueType::List(ListType::new(ValueType::String))), + ], + )?; + + let numcodecs_registry_instance = + linker.define_instance(numcodecs_registry_interface.clone())?; + + let numcodecs_registry_codec_resource = ResourceType::with_destructor( + ctx.as_context_mut(), + Some(TypeIdentifier::new( + "erased-dyn-codec", + Some(numcodecs_registry_interface.clone()), + )), + |_ctx, codec: ErasedDynCodec| { + std::mem::drop(codec); + Ok(()) + }, + )?; + + numcodecs_registry_instance.define_resource( + "erased-dyn-codec", + numcodecs_registry_codec_resource.clone(), + )?; + + let numcodecs_registry_codec_type_resource = ResourceType::with_destructor( + ctx.as_context_mut(), + Some(TypeIdentifier::new( + "erased-dyn-codec-type", + Some(numcodecs_registry_interface.clone()), + )), + |_ctx, codec_ty: ErasedDynCodecType| { + std::mem::drop(codec_ty); + Ok(()) + }, + )?; + + numcodecs_registry_instance.define_resource( + "erased-dyn-codec-type", + numcodecs_registry_codec_type_resource.clone(), + )?; + + let any_array_record = WasmCodec::any_array_ty().clone(); + + let any_array_result = ResultType::new( + Some(ValueType::Record(any_array_record.clone())), + Some(ValueType::Record(numcodecs_types_error_record.clone())), + ); + + let my_any_array_result = any_array_result.clone(); + let my_numcodecs_types_error_record = numcodecs_types_error_record.clone(); + let codec_encode = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ + ValueType::Borrow(numcodecs_registry_codec_resource.clone()), + ValueType::Record(any_array_record.clone()), + ], + [ValueType::Result(any_array_result.clone())], + ), + move |ctx, args, results| { + let [Value::Borrow(codec), Value::Record(data)] = args else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.encode arguments" + ); + }; + + let [result] = results else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.encode results" + ); + }; + + let encoded = WasmCodec::with_array_view_from_wasm_record(data, |data| { + let ctx = ctx.as_context(); + let codec: &ErasedDynCodec = codec.rep(&ctx)?; + + let encoded = codec.encode(data.cow()).map_err(anyhow::Error::new)?; + Ok(encoded) + }); + + let encoded = match encoded { + Ok(encoded) => Ok(WasmCodec::array_into_wasm(encoded.view())?), + Err(err) => Err(into_wit_error(err, &my_numcodecs_types_error_record)?), + }; + + let res = match encoded { + Ok(encoded) => Ok(Some(Value::Record(encoded))), + Err(err) => Err(Some(Value::Record(err))), + }; + + *result = Value::Result(ResultValue::new(my_any_array_result.clone(), res)?); + + Ok(()) + }, + ); + numcodecs_registry_instance.define_func("[method]erased-dyn-codec.encode", codec_encode)?; + + let my_any_array_result = any_array_result.clone(); + let my_numcodecs_types_error_record = numcodecs_types_error_record.clone(); + let codec_decode = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ + ValueType::Borrow(numcodecs_registry_codec_resource.clone()), + ValueType::Record(any_array_record.clone()), + ], + [ValueType::Result(any_array_result.clone())], + ), + move |ctx, args, results| { + let [Value::Borrow(codec), Value::Record(encoded)] = args else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.decode arguments" + ); + }; + + let [result] = results else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.decode results" + ); + }; + + let decoded = WasmCodec::with_array_view_from_wasm_record(encoded, |encoded| { + let ctx = ctx.as_context(); + let codec: &ErasedDynCodec = codec.rep(&ctx)?; + + let decoded = codec.decode(encoded.cow()).map_err(anyhow::Error::new)?; + Ok(decoded) + }); + + let decoded = match decoded { + Ok(decoded) => Ok(WasmCodec::array_into_wasm(decoded.view())?), + Err(err) => Err(into_wit_error(err, &my_numcodecs_types_error_record)?), + }; + + let res = match decoded { + Ok(decoded) => Ok(Some(Value::Record(decoded))), + Err(err) => Err(Some(Value::Record(err))), + }; + + *result = Value::Result(ResultValue::new(my_any_array_result.clone(), res)?); + + Ok(()) + }, + ); + numcodecs_registry_instance.define_func("[method]erased-dyn-codec.decode", codec_decode)?; + + let any_array_prototype_record = WasmCodec::any_array_prototype_ty().clone(); + + let my_any_array_result = any_array_result.clone(); + let my_numcodecs_types_error_record = numcodecs_types_error_record.clone(); + let codec_decode_into = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ + ValueType::Borrow(numcodecs_registry_codec_resource.clone()), + ValueType::Record(any_array_record), + ValueType::Record(any_array_prototype_record), + ], + [ValueType::Result(any_array_result)], + ), + move |ctx, args, results| { + let [ + Value::Borrow(codec), + Value::Record(encoded), + Value::Record(decoded), + ] = args + else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.decode-into arguments" + ); + }; + + let [result] = results else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.decode-into results" + ); + }; + + let mut decoded = WasmCodec::array_prototype_from_wasm_record(decoded)?; + + let res = WasmCodec::with_array_view_from_wasm_record(encoded, |encoded| { + let ctx = ctx.as_context(); + let codec: &ErasedDynCodec = codec.rep(&ctx)?; + + codec + .decode_into(encoded, decoded.view_mut()) + .map_err(anyhow::Error::new)?; + Ok(()) + }); + + let decoded = match res { + Ok(()) => Ok(WasmCodec::array_into_wasm(decoded.view())?), + Err(err) => Err(into_wit_error(err, &my_numcodecs_types_error_record)?), + }; + + let res = match decoded { + Ok(decoded) => Ok(Some(Value::Record(decoded))), + Err(err) => Err(Some(Value::Record(err))), + }; + + *result = Value::Result(ResultValue::new(my_any_array_result.clone(), res)?); + + Ok(()) + }, + ); + numcodecs_registry_instance + .define_func("[method]erased-dyn-codec.decode-into", codec_decode_into)?; + + let my_numcodecs_registry_codec_resource = numcodecs_registry_codec_resource.clone(); + let codec_clone = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ValueType::Borrow(numcodecs_registry_codec_resource.clone())], + [ValueType::Own(numcodecs_registry_codec_resource.clone())], + ), + move |ctx, args, results| { + let [Value::Borrow(codec)] = args else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.clone arguments" + ); + }; + + let [result] = results else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.clone results" + ); + }; + + let codec = { + let ctx = ctx.as_context(); + let codec: &ErasedDynCodec = codec.rep(&ctx)?; + codec.clone() + }; + + *result = Value::Own(ResourceOwn::new( + ctx, + codec, + my_numcodecs_registry_codec_resource.clone(), + )?); + + Ok(()) + }, + ); + numcodecs_registry_instance.define_func("[method]erased-dyn-codec.clone", codec_clone)?; + + let string_result = ResultType::new( + Some(ValueType::String), + Some(ValueType::Record(numcodecs_types_error_record.clone())), + ); + + let my_numcodecs_types_error_record = numcodecs_types_error_record.clone(); + let codec_get_config = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ValueType::Borrow(numcodecs_registry_codec_resource.clone())], + [ValueType::Result(string_result.clone())], + ), + move |ctx, args, results| { + let [Value::Borrow(codec)] = args else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.get-config arguments" + ); + }; + + let [result] = results else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.get-config results" + ); + }; + + let config = { + let ctx = ctx.as_context(); + let codec: &ErasedDynCodec = codec.rep(&ctx)?; + + let mut config_bytes = Vec::new(); + match codec.get_config(&mut serde_json::Serializer::new(&mut config_bytes)) { + Ok(()) => match String::from_utf8(config_bytes) { + Ok(config) => Ok(config), + Err(err) => Err(into_wit_error(err, &my_numcodecs_types_error_record)?), + }, + Err(err) => Err(into_wit_error(err, &my_numcodecs_types_error_record)?), + } + }; + + let res = match config { + Ok(config) => Ok(Some(Value::String(Arc::from(config)))), + Err(err) => Err(Some(Value::Record(err))), + }; + + *result = Value::Result(ResultValue::new(string_result.clone(), res)?); + + Ok(()) + }, + ); + numcodecs_registry_instance + .define_func("[method]erased-dyn-codec.get-config", codec_get_config)?; + + let my_numcodecs_registry_codec_type_resource = numcodecs_registry_codec_type_resource.clone(); + let codec_ty = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ValueType::Borrow(numcodecs_registry_codec_resource.clone())], + [ValueType::Own( + numcodecs_registry_codec_type_resource.clone(), + )], + ), + move |ctx, args, results| { + let [Value::Borrow(codec)] = args else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec.ty arguments" + ); + }; + + let [result] = results else { + anyhow::bail!("invalid numcodecs:abc/registry#[method]erased-dyn-codec.ty results"); + }; + + let ty = { + let ctx = ctx.as_context(); + let codec: &ErasedDynCodec = codec.rep(&ctx)?; + codec.ty() + }; + + *result = Value::Own(ResourceOwn::new( + ctx, + ty, + my_numcodecs_registry_codec_type_resource.clone(), + )?); + + Ok(()) + }, + ); + numcodecs_registry_instance.define_func("[method]erased-dyn-codec.ty", codec_ty)?; + + let codec_type_id = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ValueType::Borrow( + numcodecs_registry_codec_type_resource.clone(), + )], + [ValueType::String], + ), + move |ctx, args, results| { + let [Value::Borrow(ty)] = args else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec-type.codec-id arguments" + ); + }; + + let [result] = results else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codectype.codec-id results" + ); + }; + + let ctx = ctx.as_context(); + let ty: &ErasedDynCodecType = ty.rep(&ctx)?; + + *result = Value::String(Arc::from(ty.codec_id())); + + Ok(()) + }, + ); + numcodecs_registry_instance + .define_func("[method]erased-dyn-codec-type.codec-id", codec_type_id)?; + + let codec_type_schema = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ValueType::Borrow( + numcodecs_registry_codec_type_resource.clone(), + )], + [ValueType::String], + ), + move |ctx, args, results| { + let [Value::Borrow(ty)] = args else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec-type.codec-config-schema arguments" + ); + }; + + let [result] = results else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codectype.codec-config-schema results" + ); + }; + + let ctx = ctx.as_context(); + let ty: &ErasedDynCodecType = ty.rep(&ctx)?; + + *result = Value::String(Arc::from(ty.codec_config_schema().to_value().to_string())); + + Ok(()) + }, + ); + numcodecs_registry_instance.define_func( + "[method]erased-dyn-codec-type.codec-config-schema", + codec_type_schema, + )?; + + let codec_result = ResultType::new( + Some(ValueType::Own(numcodecs_registry_codec_resource.clone())), + Some(ValueType::Record(numcodecs_types_error_record.clone())), + ); + + let my_numcodecs_registry_codec_resource = numcodecs_registry_codec_resource.clone(); + let my_numcodecs_types_error_record = numcodecs_types_error_record.clone(); + let my_codec_result = codec_result.clone(); + let codec_from_config = Func::new( + ctx.as_context_mut(), + FuncType::new( + [ + ValueType::Borrow(numcodecs_registry_codec_type_resource), + ValueType::String, + ], + [ValueType::Result(my_codec_result.clone())], + ), + move |ctx, args, results| { + let [Value::Borrow(ty), Value::String(config)] = args else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codec-type.codec-from-config arguments" + ); + }; + + let [result] = results else { + anyhow::bail!( + "invalid numcodecs:abc/registry#[method]erased-dyn-codectype.codec-from-config results" + ); + }; + + let res = { + let ctx = ctx.as_context(); + let ty: &ErasedDynCodecType = ty.rep(&ctx)?; + ty.codec_from_config(&mut serde_json::Deserializer::from_str(config)) + }; + + let res = match res { + Ok(codec) => Ok(Some(Value::Own(ResourceOwn::new( + ctx, + codec, + my_numcodecs_registry_codec_resource.clone(), + )?))), + Err(err) => Err(Some(Value::Record(into_wit_error( + err, + &my_numcodecs_types_error_record, + )?))), + }; + + *result = Value::Result(ResultValue::new(my_codec_result.clone(), res)?); + + Ok(()) + }, + ); + numcodecs_registry_instance.define_func( + "[method]erased-dyn-codec-type.codec-from-config", + codec_from_config, + )?; + + let my_numcodecs_registry_codec_resource = numcodecs_registry_codec_resource; + let my_numcodecs_types_error_record = numcodecs_types_error_record; + let my_codec_result = codec_result; + let get_codec = Func::new( + ctx, + FuncType::new( + [ValueType::String], + [ValueType::Result(my_codec_result.clone())], + ), + move |ctx, args, results| { + let [Value::String(config)] = args else { + anyhow::bail!("invalid numcodecs:abc/registry#get-codec arguments"); + }; + + let [result] = results else { + anyhow::bail!("invalid numcodecs:abc/registry#get-codec results"); + }; + + let res = match registry.get_codec(&mut serde_json::Deserializer::from_str(config)) { + Ok(codec) => Ok(Some(Value::Own(ResourceOwn::new( + ctx, + codec, + my_numcodecs_registry_codec_resource.clone(), + )?))), + Err(err) => Err(Some(Value::Record(into_wit_error( + err, + &my_numcodecs_types_error_record, + )?))), + }; + + *result = Value::Result(ResultValue::new(my_codec_result.clone(), res)?); + + Ok(()) + }, + ); + numcodecs_registry_instance.define_func("get-codec", get_codec)?; + + Ok(()) +} + +fn into_wit_error(err: T, ty: &RecordType) -> Result { + let mut source: Option<&dyn Error> = err.source(); + + let message = Value::String(Arc::from(format!("{err}"))); + let mut chain = if source.is_some() { + Vec::with_capacity(4) + } else { + Vec::new() + }; + + while let Some(err) = source.take() { + chain.push(Value::String(Arc::from(format!("{err}")))); + source = err.source(); + } + + Record::new( + ty.clone(), + [ + ("message", message), + ( + "chain", + Value::List(List::new(ListType::new(ValueType::String), chain)?), + ), + ], + ) +} diff --git a/crates/numcodecs-wasm-host/src/wit.rs b/crates/numcodecs-wasm-host/src/wit.rs index d8fdc0a18..1204ad78d 100644 --- a/crates/numcodecs-wasm-host/src/wit.rs +++ b/crates/numcodecs-wasm-host/src/wit.rs @@ -8,8 +8,14 @@ use crate::error::{CodecError, RuntimeError}; /// WebAssembly Interface Type (WIT) interfaces for `numcodecs` #[non_exhaustive] pub struct NumcodecsWitInterfaces { + /// The `numcodecs:abc` package + pub package: PackageIdentifier, /// The `numcodecs:abc/codec` interface pub codec: InterfaceIdentifier, + /// The `numcodecs:abc/registry` interface + pub registry: InterfaceIdentifier, + /// The `numcodecs:abc/types` interface + pub types: InterfaceIdentifier, } impl NumcodecsWitInterfaces { @@ -18,14 +24,17 @@ impl NumcodecsWitInterfaces { pub fn get() -> &'static Self { static NUMCODECS_WIT_INTERFACES: OnceLock = OnceLock::new(); - NUMCODECS_WIT_INTERFACES.get_or_init(|| Self { - codec: InterfaceIdentifier::new( - PackageIdentifier::new( - PackageName::new("numcodecs", "abc"), - Some(Version::new(0, 1, 1)), - ), - "codec", - ), + NUMCODECS_WIT_INTERFACES.get_or_init(|| { + let package = PackageIdentifier::new( + PackageName::new("numcodecs", "abc"), + Some(Version::new(0, 1, 1)), + ); + Self { + package: package.clone(), + codec: InterfaceIdentifier::new(package.clone(), "codec"), + registry: InterfaceIdentifier::new(package.clone(), "registry"), + types: InterfaceIdentifier::new(package, "types"), + } }) } } diff --git a/py/numcodecs-wasm/Cargo.toml b/py/numcodecs-wasm/Cargo.toml index 2f3a65124..4d8ea9f21 100644 --- a/py/numcodecs-wasm/Cargo.toml +++ b/py/numcodecs-wasm/Cargo.toml @@ -29,6 +29,7 @@ anyhow = { workspace = true } # FIXME: https://github.com/bytecodealliance/rustix/issues/1620 memfd = { version = "0.6.5", default-features = false } numcodecs-python = { workspace = true } +numcodecs-registry = { workspace = true } numcodecs-wasm-host-reproducible = { workspace = true } pyo3 = { workspace = true, features = ["macros", "abi3-py310"] } pyo3-error = { workspace = true } diff --git a/py/numcodecs-wasm/src/lib.rs b/py/numcodecs-wasm/src/lib.rs index 66010b32e..163739ca6 100644 --- a/py/numcodecs-wasm/src/lib.rs +++ b/py/numcodecs-wasm/src/lib.rs @@ -32,7 +32,8 @@ fn create_codec_class<'py>( ) -> Result, PyErr> { let engine = default_engine(py)?; - let codec_ty = ReproducibleWasmCodecType::new(engine, wasm) + // TODO: we should allow restricting the codecs that the reproducible codec can 'see' + let codec_ty = ReproducibleWasmCodecType::new(engine, wasm, numcodecs_python::PyCodecRegistry) .map_err(|err| pyo3_error::PyErrChain::new(py, err))?; let codec_class = numcodecs_python::export_codec_class(py, codec_ty, module.as_borrowed())?; @@ -60,3 +61,7 @@ fn read_codec_instruction_counter<'py>( Ok(instruction_counter.0) } + +numcodecs_registry::export_global! { + static REGISTRY: numcodecs_python::PyCodecRegistry = numcodecs_python::PyCodecRegistry; +} From 347b505ad6837f089172e34322ed0df8a35838ff Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sun, 31 May 2026 08:22:20 +0300 Subject: [PATCH 2/4] check wit bump failures --- .../src/transform/mod.rs | 19 ++++++++------- crates/numcodecs-wasm-host/src/component.rs | 24 ++++++++++--------- crates/numcodecs-wasm-host/src/wit.rs | 2 +- 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs b/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs index 73e8f3c85..aeabbe6c8 100644 --- a/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs +++ b/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs @@ -12,10 +12,10 @@ pub mod nan; #[expect(clippy::too_many_lines)] // FIXME pub fn transform_wasm_component(wasm_component: impl Into>) -> Result, Error> { let NumcodecsWitInterfaces { - package, - codec: codec_interface, - registry: registry_interface, - types: types_interface, + package: numcodecs_package, + codec: numcodecs_codec_interface, + registry: numcodecs_registry_interface, + types: numcodecs_types_interface, .. } = NumcodecsWitInterfaces::get(); @@ -31,8 +31,8 @@ pub fn transform_wasm_component(wasm_component: impl Into>) -> Result>) -> Result>) -> Result = codec_config_schema.call(&mut ctx, ())?; let codec_config_schema: Schema = serde_json::from_str(&codec_config_schema).map_err(anyhow::Error::new)?; @@ -91,11 +93,11 @@ impl WasmCodecComponent { Ok(Self { codec_id, codec_config_schema: Arc::new(codec_config_schema), - from_config: load_func(codecs_interface, "[static]codec.from-config")?, - encode: load_func(codecs_interface, "[method]codec.encode")?, - decode: load_func(codecs_interface, "[method]codec.decode")?, - decode_into: load_func(codecs_interface, "[method]codec.decode-into")?, - get_config: load_func(codecs_interface, "[method]codec.get-config")?, + from_config: load_func(codec_interface, "[static]codec.from-config")?, + encode: load_func(codec_interface, "[method]codec.encode")?, + decode: load_func(codec_interface, "[method]codec.decode")?, + decode_into: load_func(codec_interface, "[method]codec.decode-into")?, + get_config: load_func(codec_interface, "[method]codec.get-config")?, instance, }) } diff --git a/crates/numcodecs-wasm-host/src/wit.rs b/crates/numcodecs-wasm-host/src/wit.rs index 1204ad78d..c8e547ca9 100644 --- a/crates/numcodecs-wasm-host/src/wit.rs +++ b/crates/numcodecs-wasm-host/src/wit.rs @@ -27,7 +27,7 @@ impl NumcodecsWitInterfaces { NUMCODECS_WIT_INTERFACES.get_or_init(|| { let package = PackageIdentifier::new( PackageName::new("numcodecs", "abc"), - Some(Version::new(0, 1, 1)), + Some(Version::new(0, 1, 2)), ); Self { package: package.clone(), From 695c8b42760c42423398d03085249d3111c95b42 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sun, 31 May 2026 09:04:40 +0300 Subject: [PATCH 3/4] try to re-export old codecs as new codecs --- .../src/transform/mod.rs | 11 ++++++++++- crates/numcodecs-wasm-host/src/wit.rs | 7 +++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs b/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs index aeabbe6c8..6b97973d8 100644 --- a/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs +++ b/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs @@ -3,6 +3,7 @@ use std::sync::OnceLock; use anyhow::{Context, Error, anyhow}; use instcnt::PerfWitInterfaces; use numcodecs_wasm_host::NumcodecsWitInterfaces; +use wac_graph::AliasError; use crate::{logging::WasiLoggingInterface, stdio::WasiSandboxedStdioInterface}; @@ -14,6 +15,7 @@ pub fn transform_wasm_component(wasm_component: impl Into>) -> Result>) -> Result numcodecs_codecs_export, + Err(AliasError::InstanceMissingExport { .. }) => wac.alias_instance_export( + numcodecs_instance, + &format!("{numcodecs_v0_1_1_codec_interface}"), + )?, + Err(err) => Err(err)?, + }; wac.export(numcodecs_codecs_export, numcodecs_codecs_str)?; // encode the WAC composition graph into a WASM component and validate it diff --git a/crates/numcodecs-wasm-host/src/wit.rs b/crates/numcodecs-wasm-host/src/wit.rs index c8e547ca9..b8b4f0a1d 100644 --- a/crates/numcodecs-wasm-host/src/wit.rs +++ b/crates/numcodecs-wasm-host/src/wit.rs @@ -12,6 +12,8 @@ pub struct NumcodecsWitInterfaces { pub package: PackageIdentifier, /// The `numcodecs:abc/codec` interface pub codec: InterfaceIdentifier, + /// The `numcodecs:abc@0.1.1/codec` interface + pub codec_v0_1_1: InterfaceIdentifier, /// The `numcodecs:abc/registry` interface pub registry: InterfaceIdentifier, /// The `numcodecs:abc/types` interface @@ -29,9 +31,14 @@ impl NumcodecsWitInterfaces { PackageName::new("numcodecs", "abc"), Some(Version::new(0, 1, 2)), ); + let package_v0_1_1 = PackageIdentifier::new( + PackageName::new("numcodecs", "abc"), + Some(Version::new(0, 1, 1)), + ); Self { package: package.clone(), codec: InterfaceIdentifier::new(package.clone(), "codec"), + codec_v0_1_1: InterfaceIdentifier::new(package_v0_1_1, "codec"), registry: InterfaceIdentifier::new(package.clone(), "registry"), types: InterfaceIdentifier::new(package, "types"), } From 948240dcfd72ffb3b36dbc7e68ba254905001c8e Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sun, 31 May 2026 09:31:26 +0300 Subject: [PATCH 4/4] also add fallback for numcodecs-wasm-host --- crates/numcodecs-wasm-host/src/component.rs | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/crates/numcodecs-wasm-host/src/component.rs b/crates/numcodecs-wasm-host/src/component.rs index 7dbf0df51..7bd081887 100644 --- a/crates/numcodecs-wasm-host/src/component.rs +++ b/crates/numcodecs-wasm-host/src/component.rs @@ -73,13 +73,22 @@ impl WasmCodecComponent { let NumcodecsWitInterfaces { codec: codec_interface, + codec_v0_1_1: codec_v0_1_1_interface, .. } = NumcodecsWitInterfaces::get(); - let Some(codec_interface) = instance.exports().instance(codec_interface) else { - return Err(RuntimeError::from(anyhow::Error::msg(format!( - "WASM component does not contain an interface named `{codec_interface}`" - )))); + // prefer to import the numcodecs:abc@0.1.2/codec interface + let codec_interface = match instance.exports().instance(codec_interface) { + Some(codec_interface) => codec_interface, + None => match instance.exports().instance(codec_v0_1_1_interface) { + // fall back to import the numcodecs:abc@0.1.1/codec interface + Some(codec_interface) => codec_interface, + None => { + return Err(RuntimeError::from(anyhow::Error::msg(format!( + "WASM component does not export an interface named `{codec_interface}`" + )))); + } + }, }; let codec_id = load_typed_func(codec_interface, "codec-id")?;