Skip to content

Commit 71bd103

Browse files
authored
[0.6] [MOD-11237] Block Size Boundary Oscillation Benchmark (#768 (#773)
* [MOD-11237] Block Size Boundary Oscillation Benchmark (#768) * imp bemchmar fix counters * fix (cherry picked from commit fd08b55) * add indexCapacity API * fix
1 parent 520531a commit 71bd103

File tree

10 files changed

+135
-41
lines changed

10 files changed

+135
-41
lines changed

src/VecSim/algorithms/brute_force/brute_force.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class BruteForceIndex : public VecSimIndexAbstract<DistType> {
3636
BruteForceIndex(const BFParams *params, std::shared_ptr<VecSimAllocator> allocator);
3737

3838
virtual size_t indexSize() const override;
39+
size_t indexCapacity() const override;
3940
vecsim_stl::vector<DistType> computeBlockScores(VectorBlock *block, const void *queryBlob,
4041
void *timeoutCtx,
4142
VecSimQueryResult_Code *rc) const;
@@ -212,6 +213,11 @@ size_t BruteForceIndex<DataType, DistType>::indexSize() const {
212213
return this->count;
213214
}
214215

216+
template <typename DataType, typename DistType>
217+
size_t BruteForceIndex<DataType, DistType>::indexCapacity() const {
218+
return this->idToLabelMapping.size();
219+
}
220+
215221
// Compute the score for every vector in the block by using the given distance function.
216222
template <typename DataType, typename DistType>
217223
vecsim_stl::vector<DistType> BruteForceIndex<DataType, DistType>::computeBlockScores(

src/VecSim/algorithms/hnsw/hnsw.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ class HNSWIndex : public VecSimIndexAbstract<DistType>
184184
inline void setEpsilon(double epsilon);
185185
inline double getEpsilon() const;
186186
inline size_t indexSize() const override;
187-
inline size_t getIndexCapacity() const;
187+
inline size_t indexCapacity() const override;
188188
inline size_t getEfConstruction() const;
189189
inline size_t getM() const;
190190
inline size_t getMaxLevel() const;
@@ -248,7 +248,7 @@ size_t HNSWIndex<DataType, DistType>::indexSize() const {
248248
}
249249

250250
template <typename DataType, typename DistType>
251-
size_t HNSWIndex<DataType, DistType>::getIndexCapacity() const {
251+
size_t HNSWIndex<DataType, DistType>::indexCapacity() const {
252252
return max_elements_;
253253
}
254254

src/VecSim/vec_sim_interface.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,13 @@ struct VecSimIndexInterface : public VecsimBaseObject {
6969
*/
7070
virtual size_t indexSize() const = 0;
7171

72+
/**
73+
* @brief Return the index capacity, so we know if resize is required for adding new vectors.
74+
*
75+
* @return index capacity.
76+
*/
77+
virtual size_t indexCapacity() const = 0;
78+
7279
/**
7380
* @brief Return the number of unique labels in the index using its SizeFn.
7481
*

tests/benchmark/bm_common.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,17 +54,19 @@ void BM_VecSimCommon<index_type_t>::Memory_FLAT(benchmark::State &st, unsigned s
5454
for (auto _ : st) {
5555
// Do nothing...
5656
}
57-
st.counters["memory"] =
58-
(double)VecSimIndex_StatsInfo(INDICES[VecSimAlgo_BF + index_offset]).memory;
57+
st.counters["memory"] = benchmark::Counter(
58+
(double)VecSimIndex_StatsInfo(INDICES[VecSimAlgo_BF + index_offset]).memory,
59+
benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024);
5960
}
6061
template <typename index_type_t>
6162
void BM_VecSimCommon<index_type_t>::Memory_HNSW(benchmark::State &st, unsigned short index_offset) {
6263

6364
for (auto _ : st) {
6465
// Do nothing...
6566
}
66-
st.counters["memory"] =
67-
(double)VecSimIndex_StatsInfo(INDICES[VecSimAlgo_HNSWLIB + index_offset]).memory;
67+
st.counters["memory"] = benchmark::Counter(
68+
(double)VecSimIndex_StatsInfo(INDICES[VecSimAlgo_HNSWLIB + index_offset]).memory,
69+
benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024);
6870
}
6971

7072
// TopK search BM

tests/benchmark/bm_vecsim_basics.h

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@ class BM_VecSimBasics : public BM_VecSimCommon<index_type_t> {
2323
static void Range_BF(benchmark::State &st);
2424
static void Range_HNSW(benchmark::State &st);
2525

26+
// Reproduces allocation/deallocation oscillation issue at block size boundaries.
27+
// Sets up index at blockSize+1 capacity, then repeatedly deletes and re-adds the same vector,
28+
// triggering constant grow-shrink cycles.
29+
// This behavior was fixed by PR #753 with a conservative resize strategy that only
30+
// shrinks containers when there are 2+ free blocks, preventing oscillation cycles.
31+
// Expected: High allocation overhead before fix, stable performance after fix.
32+
static void UpdateAtBlockSize(benchmark::State &st);
33+
2634
private:
2735
// Vectors of vector to store deleted labels' data.
2836
using LabelData = std::vector<std::vector<data_t>>;
@@ -53,7 +61,9 @@ void BM_VecSimBasics<index_type_t>::AddLabel(benchmark::State &st) {
5361
label++;
5462
}
5563

56-
st.counters["memory_per_vector"] = (double)memory_delta / (double)added_vec_count;
64+
st.counters["memory_per_vector"] =
65+
benchmark::Counter((double)memory_delta / (double)added_vec_count,
66+
benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024);
5767
st.counters["vectors_per_label"] = vec_per_label;
5868

5969
assert(VecSimIndex_IndexSize(INDICES[st.range(0)]) == N_VECTORS + added_vec_count);
@@ -95,7 +105,9 @@ void BM_VecSimBasics<index_type_t>::DeleteLabel(algo_t *index, benchmark::State
95105

96106
// Avg. memory delta per vector equals the total memory delta divided by the number
97107
// of deleted vectors.
98-
st.counters["memory_per_vector"] = memory_delta / (double)removed_vectors_count;
108+
st.counters["memory_per_vector"] =
109+
benchmark::Counter((double)memory_delta / (double)removed_vectors_count,
110+
benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024);
99111

100112
// Restore index state.
101113
// For each label in removed_labels_data
@@ -154,6 +166,56 @@ void BM_VecSimBasics<index_type_t>::Range_HNSW(benchmark::State &st) {
154166
st.counters["Recall"] = (float)total_res / total_res_bf;
155167
}
156168

169+
template <typename index_type_t>
170+
void BM_VecSimBasics<index_type_t>::UpdateAtBlockSize(benchmark::State &st) {
171+
auto index = INDICES[st.range(0)];
172+
size_t initial_index_size = VecSimIndex_IndexSize(index);
173+
// Calculate vectors needed to reach next block boundary
174+
size_t vecs_to_blocksize =
175+
BM_VecSimGeneral::block_size - (initial_index_size % BM_VecSimGeneral::block_size);
176+
assert(vecs_to_blocksize < BM_VecSimGeneral::block_size);
177+
labelType initial_label_count = index->indexLabelCount();
178+
labelType curr_label = initial_label_count;
179+
// Set up index at blockSize+1 to trigger oscillation issue
180+
// Make sure we have enough queries to add a new label.
181+
assert(N_QUERIES > BM_VecSimGeneral::block_size);
182+
size_t overhead = 1;
183+
size_t added_vec_count = vecs_to_blocksize + overhead;
184+
for (size_t i = 0; i < added_vec_count; ++i) {
185+
VecSimIndex_AddVector(index, QUERIES[added_vec_count % N_QUERIES].data(), curr_label++);
186+
}
187+
assert(VecSimIndex_IndexSize(index) % BM_VecSimGeneral::block_size == overhead);
188+
assert(VecSimIndex_IndexSize(index) == N_VECTORS + added_vec_count);
189+
std::cout << "Added " << added_vec_count << " vectors to reach block size boundary."
190+
<< std::endl;
191+
std::cout << "Index size is now " << VecSimIndex_IndexSize(index) << std::endl;
192+
std::cout << "Last label is " << curr_label - 1 << std::endl;
193+
// Benchmark loop: repeatedly delete/add same vector to trigger grow-shrink cycles
194+
labelType label_to_update = curr_label - 1;
195+
size_t index_cap = index->indexCapacity();
196+
for (auto _ : st) {
197+
// Remove the vector directly from hnsw
198+
size_t ret = VecSimIndex_DeleteVector(index, label_to_update);
199+
assert(ret == 1);
200+
assert(index->indexCapacity() == index_cap - BM_VecSimGeneral::block_size);
201+
// Capacity should shrink by one block after deletion
202+
ret = VecSimIndex_AddVector(index, QUERIES[(added_vec_count - 1) % N_QUERIES].data(),
203+
label_to_update);
204+
assert(ret == 1);
205+
assert(VecSimIndex_IndexSize(index) == N_VECTORS + added_vec_count);
206+
// Capacity should grow back to original size after addition
207+
assert(index->indexCapacity() == index_cap);
208+
}
209+
assert(VecSimIndex_IndexSize(index) == N_VECTORS + added_vec_count);
210+
// Clean-up all the new vectors to restore the index size to its original value.
211+
size_t new_label_count = index->indexLabelCount();
212+
for (size_t label = initial_label_count; label < new_label_count; label++) {
213+
// If index is tiered HNSW, remove directly from the underline HNSW.
214+
VecSimIndex_DeleteVector(index, label);
215+
}
216+
assert(VecSimIndex_IndexSize(index) == N_VECTORS);
217+
}
218+
157219
#define UNIT_AND_ITERATIONS \
158220
Unit(benchmark::kMillisecond)->Iterations((long)BM_VecSimGeneral::block_size)
159221

@@ -200,3 +262,8 @@ void BM_VecSimBasics<index_type_t>::Range_HNSW(benchmark::State &st) {
200262
}
201263
#define REGISTER_DeleteLabel(BM_FUNC) \
202264
BENCHMARK_REGISTER_F(BM_VecSimBasics, BM_FUNC)->UNIT_AND_ITERATIONS
265+
266+
#define REGISTER_UpdateAtBlockSize(BM_FUNC, VecSimAlgo) \
267+
BENCHMARK_REGISTER_F(BM_VecSimBasics, BM_FUNC) \
268+
->UNIT_AND_ITERATIONS->Arg(VecSimAlgo) \
269+
->ArgName(#VecSimAlgo)

tests/benchmark/run_files/bm_basics_multi_fp32.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,9 @@ DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, HNSW), fp32_index_t, HNSWIndex_Mul
2828
VecSimAlgo_HNSWLIB)
2929
#include "benchmark/bm_initialization/bm_basics_initialize_fp32.h"
3030

31+
// Test oscillations at block size boundaries.
32+
BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimBasics, UpdateAtBlockSize_Multi, fp32_index_t)
33+
(benchmark::State &st) { UpdateAtBlockSize(st); }
34+
REGISTER_UpdateAtBlockSize(UpdateAtBlockSize_Multi, VecSimAlgo_BF);
35+
REGISTER_UpdateAtBlockSize(UpdateAtBlockSize_Multi, VecSimAlgo_HNSWLIB);
3136
BENCHMARK_MAIN();

tests/benchmark/run_files/bm_basics_single_fp32.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,12 @@ DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, BF), fp32_index_t, BruteForceIndex
2626
float, VecSimAlgo_BF)
2727
DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, HNSW), fp32_index_t, HNSWIndex_Single, float, float,
2828
VecSimAlgo_HNSWLIB)
29+
2930
#include "benchmark/bm_initialization/bm_basics_initialize_fp32.h"
31+
32+
// Test oscillations at block size boundaries.
33+
BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimBasics, UpdateAtBlockSize_Single, fp32_index_t)
34+
(benchmark::State &st) { UpdateAtBlockSize(st); }
35+
REGISTER_UpdateAtBlockSize(UpdateAtBlockSize_Single, VecSimAlgo_BF);
36+
REGISTER_UpdateAtBlockSize(UpdateAtBlockSize_Single, VecSimAlgo_HNSWLIB);
3037
BENCHMARK_MAIN();

tests/unit/test_allocator.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ TYPED_TEST(IndexAllocatorTest, test_hnsw_reclaim_memory) {
343343
auto *hnswIndex =
344344
new (allocator) HNSWIndex_Single<TEST_DATA_T, TEST_DIST_T>(&params, allocator);
345345

346-
ASSERT_EQ(hnswIndex->getIndexCapacity(), 0);
346+
ASSERT_EQ(hnswIndex->indexCapacity(), 0);
347347
size_t initial_memory_size = allocator->getAllocationSize();
348348
// labels_lookup and element_levels containers are not allocated at all in some platforms,
349349
// when initial capacity is zero, while in other platforms labels_lookup is allocated with a
@@ -362,7 +362,7 @@ TYPED_TEST(IndexAllocatorTest, test_hnsw_reclaim_memory) {
362362
}
363363
// Validate that a single block exists.
364364
ASSERT_EQ(hnswIndex->indexSize(), block_size);
365-
ASSERT_EQ(hnswIndex->getIndexCapacity(), block_size);
365+
ASSERT_EQ(hnswIndex->indexCapacity(), block_size);
366366
ASSERT_EQ(allocator->getAllocationSize(), initial_memory_size + accumulated_mem_delta);
367367
// Also validate that there are no unidirectional connections (these add memory to the incoming
368368
// edges sets).
@@ -373,7 +373,7 @@ TYPED_TEST(IndexAllocatorTest, test_hnsw_reclaim_memory) {
373373
size_t mem_delta = GenerateAndAddVector<TEST_DATA_T>(hnswIndex, d, block_size, block_size);
374374

375375
ASSERT_EQ(hnswIndex->indexSize(), block_size + 1);
376-
ASSERT_EQ(hnswIndex->getIndexCapacity(), 2 * block_size);
376+
ASSERT_EQ(hnswIndex->indexCapacity(), 2 * block_size);
377377
ASSERT_EQ(hnswIndex->checkIntegrity().unidirectional_connections, 0);
378378

379379
// Compute the expected memory allocation due to the last vector insertion.
@@ -400,7 +400,7 @@ TYPED_TEST(IndexAllocatorTest, test_hnsw_reclaim_memory) {
400400
// memory consumption.
401401
VecSimIndex_DeleteVector(hnswIndex, block_size);
402402
ASSERT_EQ(hnswIndex->indexSize(), block_size);
403-
ASSERT_EQ(hnswIndex->getIndexCapacity(), block_size);
403+
ASSERT_EQ(hnswIndex->indexCapacity(), block_size);
404404
ASSERT_EQ(hnswIndex->checkIntegrity().unidirectional_connections, 0);
405405
ASSERT_EQ(allocator->getAllocationSize(), initial_memory_size + accumulated_mem_delta);
406406

@@ -410,7 +410,7 @@ TYPED_TEST(IndexAllocatorTest, test_hnsw_reclaim_memory) {
410410
}
411411

412412
ASSERT_EQ(hnswIndex->indexSize(), 0);
413-
ASSERT_EQ(hnswIndex->getIndexCapacity(), 0);
413+
ASSERT_EQ(hnswIndex->indexCapacity(), 0);
414414
// All data structures' memory returns to as it was, with the exceptional of the labels_lookup
415415
// (STL unordered_map with hash table implementation), that leaves some empty buckets.
416416
size_t hash_table_memory = hnswIndex->label_lookup_.bucket_count() * sizeof(size_t);

tests/unit/test_hnsw.cpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -130,17 +130,17 @@ TYPED_TEST(HNSWTest, resizeNAlignIndex) {
130130
}
131131
// The size and the capacity should be equal.
132132
HNSWIndex<TEST_DATA_T, TEST_DIST_T> *hnswIndex = this->CastToHNSW(index);
133-
ASSERT_EQ(hnswIndex->getIndexCapacity(), VecSimIndex_IndexSize(index));
133+
ASSERT_EQ(hnswIndex->indexCapacity(), VecSimIndex_IndexSize(index));
134134
// The capacity shouldn't be changed.
135-
ASSERT_EQ(hnswIndex->getIndexCapacity(), n);
135+
ASSERT_EQ(hnswIndex->indexCapacity(), n);
136136

137137
// Add another vector to exceed the initial capacity.
138138
GenerateAndAddVector<TEST_DATA_T>(index, dim, n);
139139

140140
// The capacity should be now aligned with the block size.
141141
// bs = 3, size = 11 -> capacity = 12
142142
// New capacity = initial capacity + blockSize - initial capacity % blockSize.
143-
ASSERT_EQ(hnswIndex->getIndexCapacity(), n + bs - n % bs);
143+
ASSERT_EQ(hnswIndex->indexCapacity(), n + bs - n % bs);
144144
VecSimIndex_Free(index);
145145
}
146146

@@ -164,7 +164,7 @@ TYPED_TEST(HNSWTest, resizeNAlignIndex_largeInitialCapacity) {
164164

165165
// The capacity shouldn't change, should remain n.
166166
HNSWIndex<TEST_DATA_T, TEST_DIST_T> *hnswIndex = this->CastToHNSW(index);
167-
ASSERT_EQ(hnswIndex->getIndexCapacity(), n);
167+
ASSERT_EQ(hnswIndex->indexCapacity(), n);
168168

169169
// Delete last vector, to get size % block_size == 0. size = 3
170170
VecSimIndex_DeleteVector(index, bs);
@@ -174,7 +174,7 @@ TYPED_TEST(HNSWTest, resizeNAlignIndex_largeInitialCapacity) {
174174

175175
// New capacity = initial capacity - block_size - number_of_vectors_to_align =
176176
// 10 - 3 - 10 % 3 (1) = 6
177-
size_t curr_capacity = hnswIndex->getIndexCapacity();
177+
size_t curr_capacity = hnswIndex->indexCapacity();
178178
ASSERT_EQ(curr_capacity, n - bs - n % bs);
179179

180180
// Delete all the vectors to decrease capacity by another bs.
@@ -183,20 +183,20 @@ TYPED_TEST(HNSWTest, resizeNAlignIndex_largeInitialCapacity) {
183183
VecSimIndex_DeleteVector(index, i);
184184
++i;
185185
}
186-
ASSERT_EQ(hnswIndex->getIndexCapacity(), bs);
186+
ASSERT_EQ(hnswIndex->indexCapacity(), bs);
187187
// Add and delete a vector to achieve:
188188
// size % block_size == 0 && size + bs <= capacity(3).
189189
// the capacity should be resized to zero
190190
GenerateAndAddVector<TEST_DATA_T>(index, dim, 0);
191191
VecSimIndex_DeleteVector(index, 0);
192-
ASSERT_EQ(hnswIndex->getIndexCapacity(), 0);
192+
ASSERT_EQ(hnswIndex->indexCapacity(), 0);
193193

194194
// Do it again. This time after adding a vector the capacity is increased by bs.
195195
// Upon deletion it will be resized to zero again.
196196
GenerateAndAddVector<TEST_DATA_T>(index, dim, 0);
197-
ASSERT_EQ(hnswIndex->getIndexCapacity(), bs);
197+
ASSERT_EQ(hnswIndex->indexCapacity(), bs);
198198
VecSimIndex_DeleteVector(index, 0);
199-
ASSERT_EQ(hnswIndex->getIndexCapacity(), 0);
199+
ASSERT_EQ(hnswIndex->indexCapacity(), 0);
200200

201201
VecSimIndex_Free(index);
202202
}
@@ -221,14 +221,14 @@ TYPED_TEST(HNSWTest, resizeNAlignIndex_largerBlockSize) {
221221

222222
HNSWIndex<TEST_DATA_T, TEST_DIST_T> *hnswIndex = this->CastToHNSW(index);
223223
// The capacity shouldn't change.
224-
ASSERT_EQ(hnswIndex->getIndexCapacity(), n);
224+
ASSERT_EQ(hnswIndex->indexCapacity(), n);
225225

226226
// Size equals capacity.
227227
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
228228

229229
// Add another vector - > the capacity is increased to a multiplication of block_size.
230230
GenerateAndAddVector<TEST_DATA_T>(index, dim, n);
231-
ASSERT_EQ(hnswIndex->getIndexCapacity(), bs);
231+
ASSERT_EQ(hnswIndex->indexCapacity(), bs);
232232

233233
// Size increased by 1.
234234
ASSERT_EQ(VecSimIndex_IndexSize(index), n + 1);
@@ -237,7 +237,7 @@ TYPED_TEST(HNSWTest, resizeNAlignIndex_largerBlockSize) {
237237
VecSimIndex_DeleteVector(index, 1);
238238

239239
// The capacity should remain the same.
240-
ASSERT_EQ(hnswIndex->getIndexCapacity(), bs);
240+
ASSERT_EQ(hnswIndex->indexCapacity(), bs);
241241

242242
VecSimIndex_Free(index);
243243
}
@@ -266,7 +266,7 @@ TYPED_TEST(HNSWTest, emptyIndex) {
266266
// The capacity should change to be aligned with the block size.
267267

268268
HNSWIndex<TEST_DATA_T, TEST_DIST_T> *hnswIndex = this->CastToHNSW(index);
269-
size_t new_capacity = hnswIndex->getIndexCapacity();
269+
size_t new_capacity = hnswIndex->indexCapacity();
270270
ASSERT_EQ(new_capacity, n - n % bs - bs);
271271

272272
// Size equals 0.
@@ -275,7 +275,7 @@ TYPED_TEST(HNSWTest, emptyIndex) {
275275
// Try to remove it again.
276276
// The capacity should remain unchanged, as we are trying to delete a label that doesn't exist.
277277
VecSimIndex_DeleteVector(index, 1);
278-
ASSERT_EQ(hnswIndex->getIndexCapacity(), new_capacity);
278+
ASSERT_EQ(hnswIndex->indexCapacity(), new_capacity);
279279
// Nor the size.
280280
ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
281281

0 commit comments

Comments
 (0)