From ba3e117815de305bc9f9664e3911be3f75c971bc Mon Sep 17 00:00:00 2001 From: Nelson Spence Date: Thu, 28 May 2026 10:51:54 -0500 Subject: [PATCH 1/2] remove ordered-float from core rank transform Signed-off-by: Nelson Spence --- Cargo.lock | 10 ---------- Cargo.toml | 1 - deny.toml | 2 +- fuzz/Cargo.lock | 25 ------------------------- src/rank.rs | 33 ++++++++++++++++++++++++++++----- src/util.rs | 15 +++++++++++++++ 6 files changed, 44 insertions(+), 42 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index adb3c1b..63ecee7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -264,20 +264,10 @@ version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" -[[package]] -name = "ordered-float" -version = "5.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7d950ca161dc355eaf28f82b11345ed76c6e1f6eb1f4f4479e0323b9e2fbd0e" -dependencies = [ - "num-traits", -] - [[package]] name = "ordvec" version = "0.2.0" dependencies = [ - "ordered-float", "rand", "rand_chacha", "rayon", diff --git a/Cargo.toml b/Cargo.toml index bfe5b57..7407c9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,6 @@ all-features = false [dependencies] rayon = "1.10" -ordered-float = "5" [dev-dependencies] # Test/bench corpora are seeded with these; not needed by the library. diff --git a/deny.toml b/deny.toml index 720ac7c..7009117 100644 --- a/deny.toml +++ b/deny.toml @@ -23,7 +23,7 @@ ignore = [] # # Derived from `cargo metadata` over the full graph (lib + dev + build): # * MIT / Apache-2.0 — ordvec itself + the bulk of the tree -# (rayon, ordered-float, num-traits, crossbeam-*, rand*, libc, …). +# (rayon, crossbeam-*, rand*, libc, …). # * Unicode-3.0 — required by unicode-ident, whose expression is # `(MIT OR Apache-2.0) AND Unicode-3.0`; the AND clause means Unicode-3.0 # must be explicitly allowed even though MIT/Apache cover the other half. diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 14cf0d4..d9e3ff5 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -14,12 +14,6 @@ version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" -[[package]] -name = "autocfg" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" - [[package]] name = "bitflags" version = "2.11.1" @@ -229,35 +223,16 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", -] - [[package]] name = "once_cell" version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" -[[package]] -name = "ordered-float" -version = "5.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7d950ca161dc355eaf28f82b11345ed76c6e1f6eb1f4f4479e0323b9e2fbd0e" -dependencies = [ - "num-traits", -] - [[package]] name = "ordvec" version = "0.2.0" dependencies = [ - "ordered-float", "rayon", ] diff --git a/src/rank.rs b/src/rank.rs index 507fc40..27005bf 100644 --- a/src/rank.rs +++ b/src/rank.rs @@ -21,16 +21,17 @@ //! //! See the `tests` module below for the round-trip and norm-invariant tests. -use ordered_float::OrderedFloat; use rayon::prelude::*; -use crate::util::{assert_all_finite, l2_normalise, result_buffer_len, TopK}; +use crate::util::{ + assert_all_finite, cmp_finite_f32_then_index, l2_normalise, result_buffer_len, TopK, +}; use crate::SearchResults; /// Compute the dimension-wise rank transform of a single vector. /// /// `out[k]` is the rank of `v[k]` among `v[0..d]`, with ties broken by -/// the index (stable sort). Output values are in `[0, d)`. Equivalent +/// ascending index. Output values are in `[0, d)`. Equivalent /// to NumPy's `np.argsort(np.argsort(v))` for a vector of length `d`. /// /// `d` must fit in `u16` (`d <= 65_535`); panics otherwise. @@ -39,7 +40,11 @@ pub fn rank_transform(v: &[f32]) -> Vec { assert!(d <= u16::MAX as usize, "dim must fit in u16"); assert_all_finite(v); let mut order: Vec = (0..d as u16).collect(); - order.sort_by_key(|&i| OrderedFloat(v[i as usize])); + order.sort_unstable_by(|&lhs, &rhs| { + let lhs = lhs as usize; + let rhs = rhs as usize; + cmp_finite_f32_then_index(v[lhs], lhs, v[rhs], rhs) + }); let mut ranks = vec![0u16; d]; for (rank, &orig_idx) in order.iter().enumerate() { ranks[orig_idx as usize] = rank as u16; @@ -56,7 +61,11 @@ pub fn rank_transform_into(v: &[f32], out: &mut [u16]) { assert!(d <= u16::MAX as usize, "dim must fit in u16"); assert_all_finite(v); let mut order: Vec = (0..d as u16).collect(); - order.sort_by_key(|&i| OrderedFloat(v[i as usize])); + order.sort_unstable_by(|&lhs, &rhs| { + let lhs = lhs as usize; + let rhs = rhs as usize; + cmp_finite_f32_then_index(v[lhs], lhs, v[rhs], rhs) + }); for (rank, &orig_idx) in order.iter().enumerate() { out[orig_idx as usize] = rank as u16; } @@ -570,6 +579,20 @@ mod tests { assert_eq!(r, vec![0, 1, 2, 3]); } + #[test] + fn duplicate_values_tie_by_original_index() { + let v = [3.0_f32, 1.0, 3.0, 2.0, 1.0]; + let r = rank_transform(&v); + assert_eq!(r, vec![3, 0, 4, 2, 1]); + } + + #[test] + fn signed_zeroes_tie_by_original_index() { + let v = [0.0_f32, -0.0, 1.0, -0.0, 0.0]; + let r = rank_transform(&v); + assert_eq!(r, vec![0, 1, 4, 2, 3]); + } + #[test] fn rank_to_bucket_partitions_uniformly() { let d = 1024; diff --git a/src/util.rs b/src/util.rs index d93b49d..ae0ae87 100644 --- a/src/util.rs +++ b/src/util.rs @@ -13,6 +13,21 @@ //! index modules (`rank`, `quant`, `bitmap`, `multi_bucket`, `fastscan`) //! but not from outside the crate. +/// Compare finite `f32` values, using the coordinate index as a deterministic +/// tiebreaker. +#[inline] +pub(crate) fn cmp_finite_f32_then_index( + lhs_value: f32, + lhs_index: usize, + rhs_value: f32, + rhs_index: usize, +) -> std::cmp::Ordering { + lhs_value + .partial_cmp(&rhs_value) + .expect("ordvec: finite f32 comparator received non-finite value") + .then_with(|| lhs_index.cmp(&rhs_index)) +} + /// Result-buffer length `nq * k`, panicking loudly on usize overflow /// instead of silently wrapping to a too-small allocation. /// From a84c2a76b47912897a3f3bc207683a2691c7850d Mon Sep 17 00:00:00 2001 From: Nelson Spence Date: Thu, 28 May 2026 11:00:59 -0500 Subject: [PATCH 2/2] docs: update rank dependency summary Signed-off-by: Nelson Spence --- docs/RANK_MODES.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/RANK_MODES.md b/docs/RANK_MODES.md index 8eecf3d..8381e17 100644 --- a/docs/RANK_MODES.md +++ b/docs/RANK_MODES.md @@ -496,8 +496,9 @@ multi-seed stability is your call. 1. **Additive index family.** `Rank`, `RankQuant`, `Bitmap`, and `SignBitmap` are independent types, compiled and tested alongside one another. -2. **No heavy dependencies.** The rank primitives use `ordered_float` - and `rayon`. No BLAS, no codebook training, no rotation matrix. +2. **No heavy dependencies.** The rank primitives use `rayon` plus + internal finite-`f32` ordering helpers. No BLAS, no codebook + training, no rotation matrix. 3. **Build-speed advantage.** Encode is fast and data-independent because there is no rotation matmul and no codebook fit — the per-vector cost is the `argsort`.