Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ all-features = false

[dependencies]
rayon = "1.10"
ordered-float = "5"

[dev-dependencies]
# Test/bench corpora are seeded with these; not needed by the library.
Expand Down
2 changes: 1 addition & 1 deletion deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ ignore = []
#
# Derived from `cargo metadata` over the full graph (lib + dev + build):
# * MIT / Apache-2.0 — ordvec itself + the bulk of the tree
# (rayon, ordered-float, num-traits, crossbeam-*, rand*, libc, …).
# (rayon, crossbeam-*, rand*, libc, …).
# * Unicode-3.0 — required by unicode-ident, whose expression is
# `(MIT OR Apache-2.0) AND Unicode-3.0`; the AND clause means Unicode-3.0
# must be explicitly allowed even though MIT/Apache cover the other half.
Expand Down
5 changes: 3 additions & 2 deletions docs/RANK_MODES.md
Original file line number Diff line number Diff line change
Expand Up @@ -496,8 +496,9 @@ multi-seed stability is your call.
1. **Additive index family.** `Rank`, `RankQuant`,
`Bitmap`, and `SignBitmap` are independent types,
compiled and tested alongside one another.
2. **No heavy dependencies.** The rank primitives use `ordered_float`
and `rayon`. No BLAS, no codebook training, no rotation matrix.
2. **No heavy dependencies.** The rank primitives use `rayon` plus
internal finite-`f32` ordering helpers. No BLAS, no codebook
training, no rotation matrix.
3. **Build-speed advantage.** Encode is fast and data-independent
because there is no rotation matmul and no codebook fit — the
per-vector cost is the `argsort`.
Expand Down
25 changes: 0 additions & 25 deletions fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 28 additions & 5 deletions src/rank.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,17 @@
//!
//! See the `tests` module below for the round-trip and norm-invariant tests.

use ordered_float::OrderedFloat;
use rayon::prelude::*;

use crate::util::{assert_all_finite, l2_normalise, result_buffer_len, TopK};
use crate::util::{
assert_all_finite, cmp_finite_f32_then_index, l2_normalise, result_buffer_len, TopK,
};
use crate::SearchResults;

/// Compute the dimension-wise rank transform of a single vector.
///
/// `out[k]` is the rank of `v[k]` among `v[0..d]`, with ties broken by
/// the index (stable sort). Output values are in `[0, d)`. Equivalent
/// ascending index. Output values are in `[0, d)`. Equivalent
/// to NumPy's `np.argsort(np.argsort(v))` for a vector of length `d`.
///
/// `d` must fit in `u16` (`d <= 65_535`); panics otherwise.
Expand All @@ -39,7 +40,11 @@ pub fn rank_transform(v: &[f32]) -> Vec<u16> {
assert!(d <= u16::MAX as usize, "dim must fit in u16");
assert_all_finite(v);
let mut order: Vec<u16> = (0..d as u16).collect();
order.sort_by_key(|&i| OrderedFloat(v[i as usize]));
order.sort_unstable_by(|&lhs, &rhs| {
let lhs = lhs as usize;
let rhs = rhs as usize;
cmp_finite_f32_then_index(v[lhs], lhs, v[rhs], rhs)
});
let mut ranks = vec![0u16; d];
for (rank, &orig_idx) in order.iter().enumerate() {
ranks[orig_idx as usize] = rank as u16;
Expand All @@ -56,7 +61,11 @@ pub fn rank_transform_into(v: &[f32], out: &mut [u16]) {
assert!(d <= u16::MAX as usize, "dim must fit in u16");
assert_all_finite(v);
let mut order: Vec<u16> = (0..d as u16).collect();
order.sort_by_key(|&i| OrderedFloat(v[i as usize]));
order.sort_unstable_by(|&lhs, &rhs| {
let lhs = lhs as usize;
let rhs = rhs as usize;
cmp_finite_f32_then_index(v[lhs], lhs, v[rhs], rhs)
});
for (rank, &orig_idx) in order.iter().enumerate() {
out[orig_idx as usize] = rank as u16;
}
Expand Down Expand Up @@ -570,6 +579,20 @@ mod tests {
assert_eq!(r, vec![0, 1, 2, 3]);
}

#[test]
fn duplicate_values_tie_by_original_index() {
let v = [3.0_f32, 1.0, 3.0, 2.0, 1.0];
let r = rank_transform(&v);
assert_eq!(r, vec![3, 0, 4, 2, 1]);
}

#[test]
fn signed_zeroes_tie_by_original_index() {
let v = [0.0_f32, -0.0, 1.0, -0.0, 0.0];
let r = rank_transform(&v);
assert_eq!(r, vec![0, 1, 4, 2, 3]);
}

#[test]
fn rank_to_bucket_partitions_uniformly() {
let d = 1024;
Expand Down
15 changes: 15 additions & 0 deletions src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,21 @@
//! index modules (`rank`, `quant`, `bitmap`, `multi_bucket`, `fastscan`)
//! but not from outside the crate.

/// Compare finite `f32` values, using the coordinate index as a deterministic
/// tiebreaker.
#[inline]
pub(crate) fn cmp_finite_f32_then_index(
lhs_value: f32,
lhs_index: usize,
rhs_value: f32,
rhs_index: usize,
) -> std::cmp::Ordering {
lhs_value
.partial_cmp(&rhs_value)
.expect("ordvec: finite f32 comparator received non-finite value")
.then_with(|| lhs_index.cmp(&rhs_index))
Comment thread
Fieldnote-Echo marked this conversation as resolved.
}

/// Result-buffer length `nq * k`, panicking loudly on usize overflow
/// instead of silently wrapping to a too-small allocation.
///
Expand Down
Loading