From 79333506b23bcd402de9519b6a2c61c1220a752d Mon Sep 17 00:00:00 2001 From: Ahmad Date: Fri, 5 Jun 2026 11:58:07 +0300 Subject: [PATCH] MDEV-39858: Reloading COSINE metric index from disk degrades search recall due to abs2 quantization noise When a vector is created in-memory using FVector::create() during normal inserts, its squared magnitude (abs2) under the COSINE metric is hardcoded to 0.5f. However, when the index is reloaded from disk (after a server restart, FLUSH TABLES, or ALTER TABLE), the index uses FVectorNode::load_from_record(). This method reads the stored scale and quantized int16 coordinates from the database record, and runs postprocess(). Inside postprocess(), abs2 is dynamically recomputed using floating-point math: abs2 = subabs2 + scale * scale * dot_product(d, d, vec_len) / 2; Because the coordinates stored on disk are quantized int16 values, this recalculation introduces rounding noise. This affects high dimensions datasets, and it is increasing as M increases. Added hardcoded abs2=0.5 to FVectorNode::load_from_record --- sql/vector_mhnsw.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/vector_mhnsw.cc b/sql/vector_mhnsw.cc index 14942735e887f..655a27acc2a5e 100644 --- a/sql/vector_mhnsw.cc +++ b/sql/vector_mhnsw.cc @@ -883,6 +883,8 @@ int FVectorNode::load_from_record(TABLE *graph) FVector *vec_ptr= FVector::align_ptr(tref() + tref_len()); memcpy(vec_ptr->data(), v->ptr(), v->length()); vec_ptr->postprocess(ctx->vec_len); + if (ctx->metric == COSINE) + vec_ptr->abs2= 0.5f; longlong layer= graph->field[FIELD_LAYER]->val_int(); if (layer > 100) // 10e30 nodes at M=2, more at larger M's