diff --git a/ordvec-python/src/lib.rs b/ordvec-python/src/lib.rs index a770cb8..e051af6 100644 --- a/ordvec-python/src/lib.rs +++ b/ordvec-python/src/lib.rs @@ -15,7 +15,10 @@ //! `assert!`/`assert_all_finite` panics surface as typed Python exceptions, not //! an opaque `PanicException`: constructors and `swap_remove` check their //! arguments, `check_width` rejects shape mismatches, `ensure_finite` rejects -//! NaN/±Inf, and the inline guard rejects non-C-contiguous arrays. +//! NaN/±Inf, and most array inputs reject non-C-contiguous layouts. Candidate +//! and doc-id arrays are the exception: contiguous `uint32` arrays are borrowed +//! zero-copy, non-contiguous `uint32` arrays are copied directly, and other +//! integer dtypes are copied through the checked `u32` conversion path. //! //! File paths passed to `write` / `load` are forwarded to the filesystem //! unmodified — there is no `..` / traversal sanitisation — so callers must @@ -137,13 +140,15 @@ impl CandidateIds<'_> { /// are small relative to the scan; large-M FFI is tracked in issue #11). The /// in-range (`< n`) check stays with the caller, which knows the corpus size. fn as_u32_ids_1d<'py>(arr: &Bound<'py, PyAny>, what: &str) -> PyResult> { - // Fast path: already uint32 and C-contiguous -> borrow, zero-copy. + // Fast path: already uint32. Borrow if contiguous; otherwise copy without + // unnecessary bounds checks because every u32 value already fits. if let Ok(a) = arr.cast::>() { let ro = a.readonly(); if ro.as_slice().is_ok() { return Ok(CandidateIds::Borrowed(ro)); } - // Non-contiguous uint32 falls through to the copying path below. + let out = ro.as_array().to_vec(); + return Ok(CandidateIds::Owned(out)); } macro_rules! try_int_dtype { @@ -164,10 +169,7 @@ fn as_u32_ids_1d<'py>(arr: &Bound<'py, PyAny>, what: &str) -> PyResult