Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/RANK_MODES.md
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,9 @@ serialisers living in [`src/rank_io.rs`](../src/rank_io.rs) and
[`src/sign_bitmap.rs`](../src/sign_bitmap.rs). `RankQuant`
additionally exposes `search_asymmetric_subset` for scoring a
precomputed candidate set — the rerank half of the two-stage pattern.
Candidate IDs are global row ordinals; duplicate candidates are scored as
separate entries and can produce duplicate hits, so callers that need
unique output rows should deduplicate candidate lists before reranking.

`RankQuantFastscan` (re-exported `#[doc(hidden)]`) is an optional
single-pass b=2 fast path; it supports `add`/`search` but not
Expand Down
26 changes: 15 additions & 11 deletions fuzz/fuzz_targets/search_rankquant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
//! huge value. Invalid dimensions, non-finite floats, and ragged vector lengths
//! are caller contract violations, so this target avoids them and treats any
//! panic as a compute-path bug. Assertions stay structural: shape, finite
//! scores, valid doc IDs, score-descending rows, and repeat determinism in one
//! process.
//! scores, valid doc IDs, score-descending/doc-ID-ascending rows, and repeat
//! determinism in one process.
#![no_main]

use libfuzzer_sys::{
Expand Down Expand Up @@ -105,15 +105,19 @@ fn assert_results(label: &str, res: &SearchResults, nq: usize, k_eff: usize, n:
"{label}: doc id {id} out of range for n={n} at query {qi} slot {slot}",
);
}
for slot in 1..k_eff {
let prev = (scores[slot - 1], ids[slot - 1]);
let cur = (scores[slot], ids[slot]);
assert!(
cur.0 <= prev.0,
"{label}: row {qi} not sorted at slots {} and {slot}",
slot - 1,
);
}
assert_score_then_id_order(label, qi, scores, ids);
}
}

fn assert_score_then_id_order(label: &str, qi: usize, scores: &[f32], ids: &[i64]) {
for slot in 1..scores.len() {
let prev = (scores[slot - 1], ids[slot - 1]);
let cur = (scores[slot], ids[slot]);
assert!(
cur.0 < prev.0 || (cur.0 == prev.0 && cur.1 > prev.1),
"{label}: row {qi} violates score-desc/doc-id-asc order at slots {} and {slot}",
slot - 1,
);
}
}

Expand Down
18 changes: 16 additions & 2 deletions fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
//! reranking agrees with a full RankQuant search.
//!
//! Contract: no panic, abort, or out-of-bounds access on any in-range candidate
//! input, and full-corpus candidate reranking must match full RankQuant search.
//! input, subset reranking must preserve score-descending/doc-ID-ascending
//! ordering, and full-corpus candidate reranking must match full RankQuant
//! search.
#![no_main]

use libfuzzer_sys::{
Expand All @@ -31,6 +33,18 @@ struct TwoStageInput {
payload: Vec<u8>,
}

fn assert_score_then_id_order(scores: &[f32], ids: &[i64]) {
for slot in 1..scores.len() {
let prev = (scores[slot - 1], ids[slot - 1]);
let cur = (scores[slot], ids[slot]);
assert!(
cur.0 < prev.0 || (cur.0 == prev.0 && cur.1 >= prev.1),
"subset rerank violates score-desc/doc-id-asc order at slots {} and {slot}",
slot - 1,
);
}
}

impl<'a> Arbitrary<'a> for TwoStageInput {
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
let dim = *u.choose(&[64usize, 128, 256, 512])?;
Expand Down Expand Up @@ -108,7 +122,7 @@ fuzz_target!(|input: TwoStageInput| {
assert_eq!(scores.len(), k_eff);
assert_eq!(ids.len(), k_eff);
assert!(scores.iter().all(|score| score.is_finite()));
assert!(scores.windows(2).all(|pair| pair[0] >= pair[1]));
assert_score_then_id_order(&scores, &ids);
for &id in &ids {
assert!(id >= 0);
assert!(subset_candidates.contains(&(id as u32)));
Expand Down
9 changes: 9 additions & 0 deletions ordvec-ffi/include/ordvec.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ typedef struct {
const float *query;
uint64_t dim;
uint64_t k;
/**
* Optional subset row IDs. These are entry lists, not sets: duplicate
* candidates are scored independently and can produce duplicate hits.
*/
const uint32_t *candidate_rows;
uint64_t candidate_count;
uint64_t flags;
Expand Down Expand Up @@ -224,6 +228,11 @@ void ordvec_index_free(ordvec_index_t *index);
/**
* Run a synchronous single-query search.
*
* When `params.candidate_rows` is supplied, those IDs are global row ordinals
* and may be unsorted or duplicated. Duplicate candidates are scored as
* separate entries and can produce duplicate hits; callers that need unique
* output rows must deduplicate before calling.
*
* # Safety
*
* `index` must be a live handle returned by `ordvec_index_load`. All non-null
Expand Down
7 changes: 7 additions & 0 deletions ordvec-ffi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ pub struct ordvec_search_params_t {
pub query: *const f32,
pub dim: u64,
pub k: u64,
/// Optional subset row IDs. These are entry lists, not sets: duplicate
/// candidates are scored independently and can produce duplicate hits.
pub candidate_rows: *const u32,
pub candidate_count: u64,
pub flags: u64,
Expand Down Expand Up @@ -871,6 +873,11 @@ pub unsafe extern "C" fn ordvec_index_free(index: *mut ordvec_index_t) {
#[no_mangle]
/// Run a synchronous single-query search.
///
/// When `params.candidate_rows` is supplied, those IDs are global row ordinals
/// and may be unsorted or duplicated. Duplicate candidates are scored as
/// separate entries and can produce duplicate hits; callers that need unique
/// output rows must deduplicate before calling.
///
/// # Safety
///
/// `index` must be a live handle returned by `ordvec_index_load`. All non-null
Expand Down
4 changes: 4 additions & 0 deletions ordvec-go/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ Search with `nil` options or `nil` `SearchOptions.Candidates` performs a full
search. An empty, non-nil `Candidates` slice is treated as an explicit empty
subset and returns a typed `StatusBadArgument`, matching the C ABI v1
pointer/count contract.

`SearchOptions.Candidates` is an entry list of global row ordinals, not a set.
Duplicate candidates are scored independently and can produce duplicate hits;
deduplicate before searching when unique row IDs are required.
4 changes: 4 additions & 0 deletions ordvec-go/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,8 @@
// Search pins and passes caller-owned query and candidate slices to the C ABI
// without copying them. Callers must not mutate those slices until Search
// returns.
//
// Candidate slices are entry lists, not sets. Duplicate candidate IDs are scored
// independently and can produce duplicate hits; callers that require unique row
// IDs should deduplicate before Search.
package ordvec
4 changes: 4 additions & 0 deletions ordvec-go/ordvec.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ type Stats struct {
}

type SearchOptions struct {
// Candidates restricts the search to these global row ordinals. It is an
// entry list, not a set: duplicate candidates are scored independently and
// can produce duplicate hits. Deduplicate before calling if unique rows are
// required.
Candidates []uint32
UserTag uint64
}
Expand Down
11 changes: 8 additions & 3 deletions ordvec-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -731,9 +731,14 @@ impl RankQuant {
/// Asymmetric scoring restricted to a candidate subset (e.g. the top-M
/// shortlist from a [`Bitmap`] or [`SignBitmap`] probe). Returns
/// ``(scores, global_ids)`` where ``global_ids`` are the original doc
/// indices (mapped from the local candidate slot); slots that could not be
/// filled are returned as ``-1``. Uses the same AVX-512 → AVX2 → scalar
/// dispatch as ``search_asymmetric``.
/// indices (mapped from the local candidate slot). ``k`` is capped to the
/// candidate-list length; the subset path does not add sentinel padding.
/// Uses the same AVX-512 → AVX2 → scalar dispatch as ``search_asymmetric``.
///
/// ``candidates`` may be unsorted and may contain duplicates. Duplicate
/// candidate IDs are scored as separate entries and can produce duplicate
/// hits; callers that require unique row IDs should deduplicate before
/// calling.
///
/// If the shortlist came from [`Bitmap`], this is the exact RankQuant
/// rerank stage over that survivor set; it does not itself apply or
Expand Down
19 changes: 17 additions & 2 deletions ordvec-python/tests/test_rank_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,9 @@ def test_search_asymmetric_subset_returns_global_ids():
assert ids.dtype == np.int64
# Self-query against a candidate set containing self → top-1 is self.
assert int(ids[0]) == 0
# All returned ids are from the candidate set (or sentinel -1).
candidate_set = set(candidates.tolist()) | {-1}
# All returned ids are from the candidate set; k is capped instead of
# sentinel-padding unfilled slots.
candidate_set = set(candidates.tolist())
for i in ids:
assert int(i) in candidate_set

Expand Down Expand Up @@ -347,6 +348,20 @@ def test_search_asymmetric_subset_ties_use_global_row_ids():
np.testing.assert_array_equal(scores, np.array([0.0, 0.0], dtype=np.float32))


def test_search_asymmetric_subset_duplicates_remain_duplicate_entries():
vectors = np.ones((12, 64), dtype=np.float32)
idx = RankQuant(dim=64, bits=2)
idx.add(vectors)

candidates = np.array([7, 8, 7], dtype=np.uint32)
scores, ids = idx.search_asymmetric_subset(
np.zeros(64, dtype=np.float32), candidates, k=2
)

np.testing.assert_array_equal(ids, np.array([7, 7], dtype=np.int64))
np.testing.assert_array_equal(scores, np.array([0.0, 0.0], dtype=np.float32))


def test_search_asymmetric_subset_k_caps_at_candidate_count():
# k > len(candidates) should silently cap — no panic, no sentinel
# padding beyond the candidate-set size.
Expand Down
68 changes: 20 additions & 48 deletions src/quant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,10 +338,10 @@ impl RankQuant {
#[cfg_attr(not(target_arch = "x86_64"), allow(unused_variables))]
let simd_tier = select_simd_tier(dim, bits);

// For the AVX2 path we drop the per-lane centre subtract from
// the hot loop and add it back as a per-query constant offset
// to the top-k scores at finalize time. Ranking is invariant
// to this constant; absolute scores stay exact.
// SIMD asymmetric kernels drop the per-lane centre subtract from the
// hot loop. Apply the query-constant offset before TopK insertion so
// retention and final ordering use the same public visible score key.
#[cfg(target_arch = "x86_64")]
let centre = ((1u32 << bits) as f32 - 1.0) / 2.0;

queries
Expand All @@ -351,27 +351,27 @@ impl RankQuant {
.for_each(|((q, out_scores), out_indices)| {
let q_unit = l2_normalise(q);
let mut top = TopK::new(k_eff);
#[cfg_attr(not(target_arch = "x86_64"), allow(unused_mut))]
let mut centre_drop_used = false;
#[cfg(target_arch = "x86_64")]
let centre_offset = -centre * q_unit.iter().sum::<f32>() * inv_norm;

#[cfg(target_arch = "x86_64")]
unsafe {
match (simd_tier, bits) {
(SimdTier::Avx512, 2) => {
top.set_score_offset(centre_offset);
scan_b2_asym_avx512(&self.packed, n, dim, &q_unit, inv_norm, &mut top);
centre_drop_used = true;
}
(SimdTier::Avx512, 4) => {
top.set_score_offset(centre_offset);
scan_b4_asym_avx512(&self.packed, n, dim, &q_unit, inv_norm, &mut top);
centre_drop_used = true;
}
(SimdTier::Avx2, 2) => {
top.set_score_offset(centre_offset);
scan_b2_asym_avx2(&self.packed, n, dim, &q_unit, inv_norm, &mut top);
centre_drop_used = true;
}
(SimdTier::Avx2, 4) => {
top.set_score_offset(centre_offset);
scan_b4_asym_avx2(&self.packed, n, dim, &q_unit, inv_norm, &mut top);
centre_drop_used = true;
}
_ => scan_via_lut_scalar(
&self.packed,
Expand Down Expand Up @@ -399,25 +399,6 @@ impl RankQuant {

top.finalize_into(out_scores, out_indices);

if centre_drop_used {
// The asym kernels drop the per-lane `- centre` term from
// the hot loop; it is a query-constant shift, re-applied
// here. Guarded by `is_finite` so it lands only on filled
// slots: when fewer than `k` docs were scored the trailing
// top-k positions stay at the `f32::NEG_INFINITY` sentinel,
// and `NEG_INFINITY + offset` would wrongly turn a sentinel
// into a finite score. (Real scores are always finite — the
// finite-input policy guarantees it — so the guard only ever
// skips sentinels, never a genuine result.)
let q_sum: f32 = q_unit.iter().sum();
let offset = -centre * q_sum * inv_norm;
for s in out_scores.iter_mut() {
if s.is_finite() {
*s += offset;
}
}
}

let _ = bytes_per_vec; // shape clarity
});

Expand Down Expand Up @@ -539,6 +520,9 @@ impl RankQuant {
/// to global IDs before returning). Results are ordered by score
/// descending, then global row ID ascending, matching the full-index
/// search tie policy even when `candidates` is unsorted.
/// Duplicate candidate IDs are scored as separate entries and can
/// produce duplicate hits; callers that require unique row IDs should
/// deduplicate before calling.
///
/// Uses the same AVX-512 → AVX2 → scalar dispatch as
/// [`Self::search_asymmetric`] and the same centre-drop math, just
Expand Down Expand Up @@ -585,12 +569,13 @@ impl RankQuant {

let norm = rankquant_norm(dim, bits);
let inv_norm = 1.0_f32 / norm;
#[cfg(target_arch = "x86_64")]
let centre = ((1u32 << bits) as f32 - 1.0) / 2.0;

// L2-normalise the query and gather centre-correction.
let q_unit = l2_normalise(query);
let q_sum: f32 = q_unit.iter().sum();
let centre_offset = -centre * q_sum * inv_norm;
#[cfg(target_arch = "x86_64")]
let centre_offset = -centre * q_unit.iter().sum::<f32>() * inv_norm;

// Pack the candidate docs' bytes into a contiguous buffer so
// the SIMD kernels can scan them as if they were a small dense
Expand All @@ -609,26 +594,24 @@ impl RankQuant {
#[cfg_attr(not(target_arch = "x86_64"), allow(unused_variables))]
let simd_tier = select_simd_tier(dim, bits);
let mut top = TopK::new_with_tie_keys(k_eff, candidates);
#[cfg_attr(not(target_arch = "x86_64"), allow(unused_mut))]
let mut centre_drop_used = false;
#[cfg(target_arch = "x86_64")]
unsafe {
match (simd_tier, bits) {
(SimdTier::Avx512, 2) => {
top.set_score_offset(centre_offset);
scan_b2_asym_avx512(&sub_packed, m, dim, &q_unit, inv_norm, &mut top);
centre_drop_used = true;
}
(SimdTier::Avx512, 4) => {
top.set_score_offset(centre_offset);
scan_b4_asym_avx512(&sub_packed, m, dim, &q_unit, inv_norm, &mut top);
centre_drop_used = true;
}
(SimdTier::Avx2, 2) => {
top.set_score_offset(centre_offset);
scan_b2_asym_avx2(&sub_packed, m, dim, &q_unit, inv_norm, &mut top);
centre_drop_used = true;
}
(SimdTier::Avx2, 4) => {
top.set_score_offset(centre_offset);
scan_b4_asym_avx2(&sub_packed, m, dim, &q_unit, inv_norm, &mut top);
centre_drop_used = true;
}
_ => scan_via_lut_scalar(
&sub_packed,
Expand Down Expand Up @@ -657,17 +640,6 @@ impl RankQuant {
let mut scores = vec![f32::NEG_INFINITY; k_eff];
let mut local_indices = vec![-1i64; k_eff];
top.finalize_into(&mut scores, &mut local_indices);
if centre_drop_used {
// Re-apply the per-query centre shift dropped from the kernel hot
// loop; the `is_finite` guard skips unfilled top-k slots (still at
// the `f32::NEG_INFINITY` sentinel) so a sentinel never becomes a
// finite score. See the matching note in `search_asymmetric`.
for s in scores.iter_mut() {
if s.is_finite() {
*s += centre_offset;
}
}
}
// Map local → global doc IDs.
let global_indices: Vec<i64> = local_indices
.iter()
Expand Down
Loading
Loading