From df08e99796c3ad69818faf5cbb83d21928899d71 Mon Sep 17 00:00:00 2001 From: Nelson Spence Date: Thu, 28 May 2026 10:07:55 -0500 Subject: [PATCH 1/2] fix python id coercion follow-ups Signed-off-by: Nelson Spence --- ordvec-python/src/lib.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/ordvec-python/src/lib.rs b/ordvec-python/src/lib.rs index a770cb8..80ef154 100644 --- a/ordvec-python/src/lib.rs +++ b/ordvec-python/src/lib.rs @@ -15,7 +15,10 @@ //! `assert!`/`assert_all_finite` panics surface as typed Python exceptions, not //! an opaque `PanicException`: constructors and `swap_remove` check their //! arguments, `check_width` rejects shape mismatches, `ensure_finite` rejects -//! NaN/±Inf, and the inline guard rejects non-C-contiguous arrays. +//! NaN/±Inf, and most array inputs reject non-C-contiguous layouts. Candidate +//! and doc-id arrays are the exception: non-contiguous integer arrays are +//! copied through the checked `u32` conversion path unless they hit the +//! contiguous `uint32` zero-copy fast path. //! //! File paths passed to `write` / `load` are forwarded to the filesystem //! unmodified — there is no `..` / traversal sanitisation — so callers must @@ -137,13 +140,16 @@ impl CandidateIds<'_> { /// are small relative to the scan; large-M FFI is tracked in issue #11). The /// in-range (`< n`) check stays with the caller, which knows the corpus size. fn as_u32_ids_1d<'py>(arr: &Bound<'py, PyAny>, what: &str) -> PyResult> { - // Fast path: already uint32 and C-contiguous -> borrow, zero-copy. + // Fast path: already uint32. Borrow if contiguous; otherwise copy without + // unnecessary bounds checks because every u32 value already fits. if let Ok(a) = arr.cast::>() { let ro = a.readonly(); if ro.as_slice().is_ok() { return Ok(CandidateIds::Borrowed(ro)); } - // Non-contiguous uint32 falls through to the copying path below. + let view = ro.as_array(); + let out = view.iter().copied().collect(); + return Ok(CandidateIds::Owned(out)); } macro_rules! try_int_dtype { @@ -164,10 +170,7 @@ fn as_u32_ids_1d<'py>(arr: &Bound<'py, PyAny>, what: &str) -> PyResult Date: Thu, 28 May 2026 11:08:59 -0500 Subject: [PATCH 2/2] fix python id coercion review notes Signed-off-by: Nelson Spence --- ordvec-python/src/lib.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ordvec-python/src/lib.rs b/ordvec-python/src/lib.rs index 80ef154..e051af6 100644 --- a/ordvec-python/src/lib.rs +++ b/ordvec-python/src/lib.rs @@ -16,9 +16,9 @@ //! an opaque `PanicException`: constructors and `swap_remove` check their //! arguments, `check_width` rejects shape mismatches, `ensure_finite` rejects //! NaN/±Inf, and most array inputs reject non-C-contiguous layouts. Candidate -//! and doc-id arrays are the exception: non-contiguous integer arrays are -//! copied through the checked `u32` conversion path unless they hit the -//! contiguous `uint32` zero-copy fast path. +//! and doc-id arrays are the exception: contiguous `uint32` arrays are borrowed +//! zero-copy, non-contiguous `uint32` arrays are copied directly, and other +//! integer dtypes are copied through the checked `u32` conversion path. //! //! File paths passed to `write` / `load` are forwarded to the filesystem //! unmodified — there is no `..` / traversal sanitisation — so callers must @@ -147,8 +147,7 @@ fn as_u32_ids_1d<'py>(arr: &Bound<'py, PyAny>, what: &str) -> PyResult