diff --git a/docs/RANK_MODES.md b/docs/RANK_MODES.md index f36fed1..f95b4ac 100644 --- a/docs/RANK_MODES.md +++ b/docs/RANK_MODES.md @@ -427,6 +427,9 @@ serialisers living in [`src/rank_io.rs`](../src/rank_io.rs) and [`src/sign_bitmap.rs`](../src/sign_bitmap.rs). `RankQuant` additionally exposes `search_asymmetric_subset` for scoring a precomputed candidate set — the rerank half of the two-stage pattern. +Candidate IDs are global row ordinals; duplicate candidates are scored as +separate entries and can produce duplicate hits, so callers that need +unique output rows should deduplicate candidate lists before reranking. `RankQuantFastscan` (re-exported `#[doc(hidden)]`) is an optional single-pass b=2 fast path; it supports `add`/`search` but not diff --git a/fuzz/fuzz_targets/search_rankquant.rs b/fuzz/fuzz_targets/search_rankquant.rs index 6c7b386..3854a83 100644 --- a/fuzz/fuzz_targets/search_rankquant.rs +++ b/fuzz/fuzz_targets/search_rankquant.rs @@ -9,8 +9,8 @@ //! huge value. Invalid dimensions, non-finite floats, and ragged vector lengths //! are caller contract violations, so this target avoids them and treats any //! panic as a compute-path bug. Assertions stay structural: shape, finite -//! scores, valid doc IDs, score-descending rows, and repeat determinism in one -//! process. +//! scores, valid doc IDs, score-descending/doc-ID-ascending rows, and repeat +//! determinism in one process. #![no_main] use libfuzzer_sys::{ @@ -105,15 +105,19 @@ fn assert_results(label: &str, res: &SearchResults, nq: usize, k_eff: usize, n: "{label}: doc id {id} out of range for n={n} at query {qi} slot {slot}", ); } - for slot in 1..k_eff { - let prev = (scores[slot - 1], ids[slot - 1]); - let cur = (scores[slot], ids[slot]); - assert!( - cur.0 <= prev.0, - "{label}: row {qi} not sorted at slots {} and {slot}", - slot - 1, - ); - } + assert_score_then_id_order(label, qi, scores, ids); + } +} + +fn assert_score_then_id_order(label: &str, qi: usize, scores: &[f32], ids: &[i64]) { + for slot in 1..scores.len() { + let prev = (scores[slot - 1], ids[slot - 1]); + let cur = (scores[slot], ids[slot]); + assert!( + cur.0 < prev.0 || (cur.0 == prev.0 && cur.1 > prev.1), + "{label}: row {qi} violates score-desc/doc-id-asc order at slots {} and {slot}", + slot - 1, + ); } } diff --git a/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs b/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs index 3a45c4b..776e57d 100644 --- a/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs +++ b/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs @@ -11,7 +11,9 @@ //! reranking agrees with a full RankQuant search. //! //! Contract: no panic, abort, or out-of-bounds access on any in-range candidate -//! input, and full-corpus candidate reranking must match full RankQuant search. +//! input, subset reranking must preserve score-descending/doc-ID-ascending +//! ordering, and full-corpus candidate reranking must match full RankQuant +//! search. #![no_main] use libfuzzer_sys::{ @@ -31,6 +33,18 @@ struct TwoStageInput { payload: Vec, } +fn assert_score_then_id_order(scores: &[f32], ids: &[i64]) { + for slot in 1..scores.len() { + let prev = (scores[slot - 1], ids[slot - 1]); + let cur = (scores[slot], ids[slot]); + assert!( + cur.0 < prev.0 || (cur.0 == prev.0 && cur.1 >= prev.1), + "subset rerank violates score-desc/doc-id-asc order at slots {} and {slot}", + slot - 1, + ); + } +} + impl<'a> Arbitrary<'a> for TwoStageInput { fn arbitrary(u: &mut Unstructured<'a>) -> Result { let dim = *u.choose(&[64usize, 128, 256, 512])?; @@ -108,7 +122,7 @@ fuzz_target!(|input: TwoStageInput| { assert_eq!(scores.len(), k_eff); assert_eq!(ids.len(), k_eff); assert!(scores.iter().all(|score| score.is_finite())); - assert!(scores.windows(2).all(|pair| pair[0] >= pair[1])); + assert_score_then_id_order(&scores, &ids); for &id in &ids { assert!(id >= 0); assert!(subset_candidates.contains(&(id as u32))); diff --git a/ordvec-ffi/include/ordvec.h b/ordvec-ffi/include/ordvec.h index 36f34cb..b02bc6f 100644 --- a/ordvec-ffi/include/ordvec.h +++ b/ordvec-ffi/include/ordvec.h @@ -52,6 +52,10 @@ typedef struct { const float *query; uint64_t dim; uint64_t k; + /** + * Optional subset row IDs. These are entry lists, not sets: duplicate + * candidates are scored independently and can produce duplicate hits. + */ const uint32_t *candidate_rows; uint64_t candidate_count; uint64_t flags; @@ -224,6 +228,11 @@ void ordvec_index_free(ordvec_index_t *index); /** * Run a synchronous single-query search. * + * When `params.candidate_rows` is supplied, those IDs are global row ordinals + * and may be unsorted or duplicated. Duplicate candidates are scored as + * separate entries and can produce duplicate hits; callers that need unique + * output rows must deduplicate before calling. + * * # Safety * * `index` must be a live handle returned by `ordvec_index_load`. All non-null diff --git a/ordvec-ffi/src/lib.rs b/ordvec-ffi/src/lib.rs index 6b35d48..d1f0097 100644 --- a/ordvec-ffi/src/lib.rs +++ b/ordvec-ffi/src/lib.rs @@ -73,6 +73,8 @@ pub struct ordvec_search_params_t { pub query: *const f32, pub dim: u64, pub k: u64, + /// Optional subset row IDs. These are entry lists, not sets: duplicate + /// candidates are scored independently and can produce duplicate hits. pub candidate_rows: *const u32, pub candidate_count: u64, pub flags: u64, @@ -871,6 +873,11 @@ pub unsafe extern "C" fn ordvec_index_free(index: *mut ordvec_index_t) { #[no_mangle] /// Run a synchronous single-query search. /// +/// When `params.candidate_rows` is supplied, those IDs are global row ordinals +/// and may be unsorted or duplicated. Duplicate candidates are scored as +/// separate entries and can produce duplicate hits; callers that need unique +/// output rows must deduplicate before calling. +/// /// # Safety /// /// `index` must be a live handle returned by `ordvec_index_load`. All non-null diff --git a/ordvec-go/README.md b/ordvec-go/README.md index 8fde2ef..15cdb4c 100644 --- a/ordvec-go/README.md +++ b/ordvec-go/README.md @@ -19,3 +19,7 @@ Search with `nil` options or `nil` `SearchOptions.Candidates` performs a full search. An empty, non-nil `Candidates` slice is treated as an explicit empty subset and returns a typed `StatusBadArgument`, matching the C ABI v1 pointer/count contract. + +`SearchOptions.Candidates` is an entry list of global row ordinals, not a set. +Duplicate candidates are scored independently and can produce duplicate hits; +deduplicate before searching when unique row IDs are required. diff --git a/ordvec-go/doc.go b/ordvec-go/doc.go index fb79eff..eaf3458 100644 --- a/ordvec-go/doc.go +++ b/ordvec-go/doc.go @@ -6,4 +6,8 @@ // Search pins and passes caller-owned query and candidate slices to the C ABI // without copying them. Callers must not mutate those slices until Search // returns. +// +// Candidate slices are entry lists, not sets. Duplicate candidate IDs are scored +// independently and can produce duplicate hits; callers that require unique row +// IDs should deduplicate before Search. package ordvec diff --git a/ordvec-go/ordvec.go b/ordvec-go/ordvec.go index 1566cb9..820b678 100644 --- a/ordvec-go/ordvec.go +++ b/ordvec-go/ordvec.go @@ -145,6 +145,10 @@ type Stats struct { } type SearchOptions struct { + // Candidates restricts the search to these global row ordinals. It is an + // entry list, not a set: duplicate candidates are scored independently and + // can produce duplicate hits. Deduplicate before calling if unique rows are + // required. Candidates []uint32 UserTag uint64 } diff --git a/ordvec-python/src/lib.rs b/ordvec-python/src/lib.rs index ce03c29..e6d7365 100644 --- a/ordvec-python/src/lib.rs +++ b/ordvec-python/src/lib.rs @@ -731,9 +731,14 @@ impl RankQuant { /// Asymmetric scoring restricted to a candidate subset (e.g. the top-M /// shortlist from a [`Bitmap`] or [`SignBitmap`] probe). Returns /// ``(scores, global_ids)`` where ``global_ids`` are the original doc - /// indices (mapped from the local candidate slot); slots that could not be - /// filled are returned as ``-1``. Uses the same AVX-512 → AVX2 → scalar - /// dispatch as ``search_asymmetric``. + /// indices (mapped from the local candidate slot). ``k`` is capped to the + /// candidate-list length; the subset path does not add sentinel padding. + /// Uses the same AVX-512 → AVX2 → scalar dispatch as ``search_asymmetric``. + /// + /// ``candidates`` may be unsorted and may contain duplicates. Duplicate + /// candidate IDs are scored as separate entries and can produce duplicate + /// hits; callers that require unique row IDs should deduplicate before + /// calling. /// /// If the shortlist came from [`Bitmap`], this is the exact RankQuant /// rerank stage over that survivor set; it does not itself apply or diff --git a/ordvec-python/tests/test_rank_quant.py b/ordvec-python/tests/test_rank_quant.py index 21bf830..4e97ca8 100644 --- a/ordvec-python/tests/test_rank_quant.py +++ b/ordvec-python/tests/test_rank_quant.py @@ -310,8 +310,9 @@ def test_search_asymmetric_subset_returns_global_ids(): assert ids.dtype == np.int64 # Self-query against a candidate set containing self → top-1 is self. assert int(ids[0]) == 0 - # All returned ids are from the candidate set (or sentinel -1). - candidate_set = set(candidates.tolist()) | {-1} + # All returned ids are from the candidate set; k is capped instead of + # sentinel-padding unfilled slots. + candidate_set = set(candidates.tolist()) for i in ids: assert int(i) in candidate_set @@ -347,6 +348,20 @@ def test_search_asymmetric_subset_ties_use_global_row_ids(): np.testing.assert_array_equal(scores, np.array([0.0, 0.0], dtype=np.float32)) +def test_search_asymmetric_subset_duplicates_remain_duplicate_entries(): + vectors = np.ones((12, 64), dtype=np.float32) + idx = RankQuant(dim=64, bits=2) + idx.add(vectors) + + candidates = np.array([7, 8, 7], dtype=np.uint32) + scores, ids = idx.search_asymmetric_subset( + np.zeros(64, dtype=np.float32), candidates, k=2 + ) + + np.testing.assert_array_equal(ids, np.array([7, 7], dtype=np.int64)) + np.testing.assert_array_equal(scores, np.array([0.0, 0.0], dtype=np.float32)) + + def test_search_asymmetric_subset_k_caps_at_candidate_count(): # k > len(candidates) should silently cap — no panic, no sentinel # padding beyond the candidate-set size. diff --git a/src/quant.rs b/src/quant.rs index f770043..aef5038 100644 --- a/src/quant.rs +++ b/src/quant.rs @@ -338,10 +338,10 @@ impl RankQuant { #[cfg_attr(not(target_arch = "x86_64"), allow(unused_variables))] let simd_tier = select_simd_tier(dim, bits); - // For the AVX2 path we drop the per-lane centre subtract from - // the hot loop and add it back as a per-query constant offset - // to the top-k scores at finalize time. Ranking is invariant - // to this constant; absolute scores stay exact. + // SIMD asymmetric kernels drop the per-lane centre subtract from the + // hot loop. Apply the query-constant offset before TopK insertion so + // retention and final ordering use the same public visible score key. + #[cfg(target_arch = "x86_64")] let centre = ((1u32 << bits) as f32 - 1.0) / 2.0; queries @@ -351,27 +351,27 @@ impl RankQuant { .for_each(|((q, out_scores), out_indices)| { let q_unit = l2_normalise(q); let mut top = TopK::new(k_eff); - #[cfg_attr(not(target_arch = "x86_64"), allow(unused_mut))] - let mut centre_drop_used = false; + #[cfg(target_arch = "x86_64")] + let centre_offset = -centre * q_unit.iter().sum::() * inv_norm; #[cfg(target_arch = "x86_64")] unsafe { match (simd_tier, bits) { (SimdTier::Avx512, 2) => { + top.set_score_offset(centre_offset); scan_b2_asym_avx512(&self.packed, n, dim, &q_unit, inv_norm, &mut top); - centre_drop_used = true; } (SimdTier::Avx512, 4) => { + top.set_score_offset(centre_offset); scan_b4_asym_avx512(&self.packed, n, dim, &q_unit, inv_norm, &mut top); - centre_drop_used = true; } (SimdTier::Avx2, 2) => { + top.set_score_offset(centre_offset); scan_b2_asym_avx2(&self.packed, n, dim, &q_unit, inv_norm, &mut top); - centre_drop_used = true; } (SimdTier::Avx2, 4) => { + top.set_score_offset(centre_offset); scan_b4_asym_avx2(&self.packed, n, dim, &q_unit, inv_norm, &mut top); - centre_drop_used = true; } _ => scan_via_lut_scalar( &self.packed, @@ -399,25 +399,6 @@ impl RankQuant { top.finalize_into(out_scores, out_indices); - if centre_drop_used { - // The asym kernels drop the per-lane `- centre` term from - // the hot loop; it is a query-constant shift, re-applied - // here. Guarded by `is_finite` so it lands only on filled - // slots: when fewer than `k` docs were scored the trailing - // top-k positions stay at the `f32::NEG_INFINITY` sentinel, - // and `NEG_INFINITY + offset` would wrongly turn a sentinel - // into a finite score. (Real scores are always finite — the - // finite-input policy guarantees it — so the guard only ever - // skips sentinels, never a genuine result.) - let q_sum: f32 = q_unit.iter().sum(); - let offset = -centre * q_sum * inv_norm; - for s in out_scores.iter_mut() { - if s.is_finite() { - *s += offset; - } - } - } - let _ = bytes_per_vec; // shape clarity }); @@ -539,6 +520,9 @@ impl RankQuant { /// to global IDs before returning). Results are ordered by score /// descending, then global row ID ascending, matching the full-index /// search tie policy even when `candidates` is unsorted. + /// Duplicate candidate IDs are scored as separate entries and can + /// produce duplicate hits; callers that require unique row IDs should + /// deduplicate before calling. /// /// Uses the same AVX-512 → AVX2 → scalar dispatch as /// [`Self::search_asymmetric`] and the same centre-drop math, just @@ -585,12 +569,13 @@ impl RankQuant { let norm = rankquant_norm(dim, bits); let inv_norm = 1.0_f32 / norm; + #[cfg(target_arch = "x86_64")] let centre = ((1u32 << bits) as f32 - 1.0) / 2.0; // L2-normalise the query and gather centre-correction. let q_unit = l2_normalise(query); - let q_sum: f32 = q_unit.iter().sum(); - let centre_offset = -centre * q_sum * inv_norm; + #[cfg(target_arch = "x86_64")] + let centre_offset = -centre * q_unit.iter().sum::() * inv_norm; // Pack the candidate docs' bytes into a contiguous buffer so // the SIMD kernels can scan them as if they were a small dense @@ -609,26 +594,24 @@ impl RankQuant { #[cfg_attr(not(target_arch = "x86_64"), allow(unused_variables))] let simd_tier = select_simd_tier(dim, bits); let mut top = TopK::new_with_tie_keys(k_eff, candidates); - #[cfg_attr(not(target_arch = "x86_64"), allow(unused_mut))] - let mut centre_drop_used = false; #[cfg(target_arch = "x86_64")] unsafe { match (simd_tier, bits) { (SimdTier::Avx512, 2) => { + top.set_score_offset(centre_offset); scan_b2_asym_avx512(&sub_packed, m, dim, &q_unit, inv_norm, &mut top); - centre_drop_used = true; } (SimdTier::Avx512, 4) => { + top.set_score_offset(centre_offset); scan_b4_asym_avx512(&sub_packed, m, dim, &q_unit, inv_norm, &mut top); - centre_drop_used = true; } (SimdTier::Avx2, 2) => { + top.set_score_offset(centre_offset); scan_b2_asym_avx2(&sub_packed, m, dim, &q_unit, inv_norm, &mut top); - centre_drop_used = true; } (SimdTier::Avx2, 4) => { + top.set_score_offset(centre_offset); scan_b4_asym_avx2(&sub_packed, m, dim, &q_unit, inv_norm, &mut top); - centre_drop_used = true; } _ => scan_via_lut_scalar( &sub_packed, @@ -657,17 +640,6 @@ impl RankQuant { let mut scores = vec![f32::NEG_INFINITY; k_eff]; let mut local_indices = vec![-1i64; k_eff]; top.finalize_into(&mut scores, &mut local_indices); - if centre_drop_used { - // Re-apply the per-query centre shift dropped from the kernel hot - // loop; the `is_finite` guard skips unfilled top-k slots (still at - // the `f32::NEG_INFINITY` sentinel) so a sentinel never becomes a - // finite score. See the matching note in `search_asymmetric`. - for s in scores.iter_mut() { - if s.is_finite() { - *s += centre_offset; - } - } - } // Map local → global doc IDs. let global_indices: Vec = local_indices .iter() diff --git a/src/util.rs b/src/util.rs index 0229f72..d0dae5c 100644 --- a/src/util.rs +++ b/src/util.rs @@ -368,6 +368,12 @@ pub(crate) struct TopK { indices: Vec, tie_keys: Vec, tie_key_by_index: Option>, + /// Query-constant score offset applied before insertion/eviction. + /// + /// RankQuant SIMD asymmetric kernels can drop a per-query centre term from + /// the hot loop. Applying it here keeps TopK's retention key identical to + /// the public visible score key, including f32 rounding-collapse ties. + score_offset: f32, filled: usize, /// Slot holding the worst kept entry under `(score asc, tie_key /// desc)` — the next to be evicted. @@ -387,6 +393,7 @@ impl TopK { indices: vec![-1; k], tie_keys: vec![i64::MAX; k], tie_key_by_index: None, + score_offset: 0.0, filled: 0, worst_pos: 0, worst_val: f32::INFINITY, @@ -406,8 +413,14 @@ impl TopK { top } + #[cfg_attr(not(target_arch = "x86_64"), allow(dead_code))] + pub(crate) fn set_score_offset(&mut self, score_offset: f32) { + self.score_offset = score_offset; + } + #[inline] pub(crate) fn maybe_insert(&mut self, score: f32, idx: usize) { + let score = score + self.score_offset; // Convert the doc_id to its i64 storage form once, up front. doc_ids // are `< n_vectors ≤ MAX_VECTORS` (2^26) by the `add` cap, so this // never fails in practice; the checked conversion makes the "a doc_id @@ -578,6 +591,21 @@ mod tests { assert_eq!(indices, [0, 1]); } + #[test] + fn topk_score_offset_is_part_of_eviction_key() { + let mut top = TopK::new_with_tie_keys(1, &[10, 3]); + top.set_score_offset(16_777_216.0); + top.maybe_insert(1.0, 0); + top.maybe_insert(0.0, 1); + + let mut scores = [f32::NEG_INFINITY; 1]; + let mut indices = [-1; 1]; + top.finalize_into(&mut scores, &mut indices); + + assert_eq!(scores, [16_777_216.0]); + assert_eq!(indices, [1]); + } + #[test] fn checked_new_len_accepts_up_to_max() { use crate::rank_io::MAX_VECTORS; diff --git a/tests/index/two_stage.rs b/tests/index/two_stage.rs index d434234..613ea56 100644 --- a/tests/index/two_stage.rs +++ b/tests/index/two_stage.rs @@ -19,6 +19,18 @@ fn assert_two_stage_invariants(sign: &SignBitmap, rankquant: &RankQuant) { assert_eq!(sign.len(), N); } +fn assert_score_then_id_order(scores: &[f32], ids: &[i64]) { + for slot in 1..scores.len() { + let prev = (scores[slot - 1], ids[slot - 1]); + let cur = (scores[slot], ids[slot]); + assert!( + cur.0 < prev.0 || (cur.0 == prev.0 && cur.1 >= prev.1), + "results violate score-desc/doc-id-asc order at slots {} and {slot}", + slot - 1, + ); + } +} + #[test] fn sign_rankquant_pipeline_handles_edge_candidate_and_k_shapes() { let (sign, rankquant, _corpus) = build_two_stage(2); @@ -51,6 +63,7 @@ fn sign_rankquant_pipeline_handles_edge_candidate_and_k_shapes() { assert_eq!(scores.len(), shortlist.len()); assert_eq!(ids.len(), shortlist.len()); assert!(ids.iter().all(|&id| shortlist.contains(&(id as u32)))); + assert_score_then_id_order(&scores, &ids); } #[test] @@ -64,18 +77,42 @@ fn sign_rankquant_full_candidate_set_matches_full_rankquant_search() { let full = rankquant.search_asymmetric(query, 16); let (subset_scores, subset_ids) = rankquant.search_asymmetric_subset(query, &candidates, 16); - assert!(subset_ids - .iter() - .all(|&id| candidates.contains(&(id as u32)))); + assert_eq!(subset_ids, full.indices_for_query(0)); assert_eq!(subset_scores.len(), full.scores_for_query(0).len()); - let mut subset_scores_sorted = subset_scores; - let mut full_scores_sorted = full.scores_for_query(0).to_vec(); - subset_scores_sorted.sort_by(|left, right| left.total_cmp(right)); - full_scores_sorted.sort_by(|left, right| left.total_cmp(right)); - for (subset, full) in subset_scores_sorted.iter().zip(&full_scores_sorted) { + assert_score_then_id_order(&subset_scores, &subset_ids); + for (subset, full) in subset_scores.iter().zip(full.scores_for_query(0)) { assert!( (subset - full).abs() <= 1e-6, "subset score {subset} diverged from full score {full}" ); } } + +#[test] +fn sign_rankquant_subset_orders_visible_ties_after_centre_offset() { + let dim = 128usize; + let n_vectors = 5usize; + let bits = 4u8; + let payload = [ + 158u8, 158, 158, 158, 158, 158, 158, 158, 158, 158, 137, 10, 10, + ]; + let floats: Vec = (0..((n_vectors + 1) * dim)) + .map(|i| payload[i % payload.len()] as f32 - 128.0) + .collect(); + let (corpus, query) = floats.split_at(n_vectors * dim); + + let mut sign = SignBitmap::new(dim); + let mut rankquant = RankQuant::new(dim, bits); + sign.add(corpus); + rankquant.add(corpus); + + let candidates = sign.top_m_candidates(query, n_vectors); + assert_eq!(candidates.len(), n_vectors); + + let (scores, ids) = rankquant.search_asymmetric_subset(query, &candidates, n_vectors + 1); + + assert_eq!(scores.len(), n_vectors); + assert_eq!(ids.len(), n_vectors); + assert!(scores.iter().all(|score| score.is_finite())); + assert_score_then_id_order(&scores, &ids); +}