Skip to content

Commit 47ab7a6

Browse files
alonre24GuyAv46
authored andcommitted
Tiered index benchmark MOD-5174 (#370)
1 parent b17ba77 commit 47ab7a6

32 files changed

+917
-617
lines changed

CMakeLists.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@ include(cmake/san.cmake)
2424
# ----------------------------------------------------------------------------------------------
2525
project(VectorSimilarity)
2626

27-
set(CMAKE_CXX_STANDARD 20)
28-
2927
# Only do these if this is the main project, and not if it is included through add_subdirectory
3028
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
3129

@@ -44,7 +42,7 @@ if(VECSIM_BUILD_TESTS)
4442
include(FetchContent)
4543
enable_testing()
4644

47-
set(CMAKE_CXX_FLAGS "-Wno-unused-parameter")
45+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
4846

4947
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -fPIC ${CLANG_SAN_FLAGS} ${LLVM_CXX_FLAGS} ${COV_CXX_FLAGS}")
5048
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} ${LLVM_LD_FLAGS}")

src/VecSim/algorithms/hnsw/hnsw_tiered.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,9 @@ class TieredHNSWIndex : public VecSimTieredIndex<DataType, DistType> {
209209
// Run no more than pendingSwapJobsThreshold value jobs.
210210
this->executeReadySwapJobs(this->pendingSwapJobsThreshold);
211211
}
212+
#ifdef BUILD_TESTS
213+
void getDataByLabel(labelType label, std::vector<std::vector<DataType>> &vectors_output) const;
214+
#endif
212215
};
213216

214217
/**
@@ -1098,4 +1101,12 @@ VecSimIndexBasicInfo TieredHNSWIndex<DataType, DistType>::basicInfo() const {
10981101
info.isTiered = true;
10991102
info.algo = VecSimAlgo_HNSWLIB;
11001103
return info;
1101-
};
1104+
}
1105+
1106+
#ifdef BUILD_TESTS
1107+
template <typename DataType, typename DistType>
1108+
void TieredHNSWIndex<DataType, DistType>::getDataByLabel(
1109+
labelType label, std::vector<std::vector<DataType>> &vectors_output) const {
1110+
this->getHNSWIndex()->getDataByLabel(label, vectors_output);
1111+
}
1112+
#endif

src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,5 @@ INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_overwriteVectorBasic_Test)
4848
INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_overwriteVectorAsync_Test)
4949
INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_preferAdHocOptimization_Test)
5050
INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_runGCAPI_Test)
51+
52+
INDEX_TEST_FRIEND_CLASS(BM_VecSimBasics)

src/VecSim/index_factories/brute_force_factory.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ NewIndex_ChooseMultiOrSingle(const BFParams *params,
2626

2727
static AbstractIndexInitParams NewAbstractInitParams(const VecSimParams *params) {
2828

29-
const BFParams *bfParams = &params->bfParams;
29+
const BFParams *bfParams = &params->algoParams.bfParams;
3030
AbstractIndexInitParams abstractInitParams = {.allocator =
3131
VecSimAllocator::newVecsimAllocator(),
3232
.dim = bfParams->dim,
@@ -39,7 +39,7 @@ static AbstractIndexInitParams NewAbstractInitParams(const VecSimParams *params)
3939
}
4040

4141
VecSimIndex *NewIndex(const VecSimParams *params) {
42-
const BFParams *bfParams = &params->bfParams;
42+
const BFParams *bfParams = &params->algoParams.bfParams;
4343
AbstractIndexInitParams abstractInitParams = NewAbstractInitParams(params);
4444
return NewIndex(bfParams, NewAbstractInitParams(params));
4545
}
@@ -56,7 +56,7 @@ VecSimIndex *NewIndex(const BFParams *bfparams, const AbstractIndexInitParams &a
5656
}
5757

5858
VecSimIndex *NewIndex(const BFParams *bfparams) {
59-
VecSimParams params = {.bfParams = *bfparams};
59+
VecSimParams params = {.algoParams{.bfParams = BFParams{*bfparams}}};
6060
return NewIndex(&params);
6161
}
6262

src/VecSim/index_factories/hnsw_factory.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ NewIndex_ChooseMultiOrSingle(const HNSWParams *params,
2525
}
2626

2727
static AbstractIndexInitParams NewAbstractInitParams(const VecSimParams *params) {
28-
const HNSWParams *hnswParams = &params->hnswParams;
28+
const HNSWParams *hnswParams = &params->algoParams.hnswParams;
2929
AbstractIndexInitParams abstractInitParams = {.allocator =
3030
VecSimAllocator::newVecsimAllocator(),
3131
.dim = hnswParams->dim,
@@ -38,7 +38,7 @@ static AbstractIndexInitParams NewAbstractInitParams(const VecSimParams *params)
3838
}
3939

4040
VecSimIndex *NewIndex(const VecSimParams *params) {
41-
const HNSWParams *hnswParams = &params->hnswParams;
41+
const HNSWParams *hnswParams = &params->algoParams.hnswParams;
4242
AbstractIndexInitParams abstractInitParams = NewAbstractInitParams(params);
4343
if (hnswParams->type == VecSimType_FLOAT32) {
4444
return NewIndex_ChooseMultiOrSingle<float>(hnswParams, abstractInitParams);
@@ -51,7 +51,7 @@ VecSimIndex *NewIndex(const VecSimParams *params) {
5151
}
5252

5353
VecSimIndex *NewIndex(const HNSWParams *params) {
54-
VecSimParams vecSimParams = {.hnswParams = *params};
54+
VecSimParams vecSimParams = {.algoParams = {.hnswParams = HNSWParams{*params}}};
5555
return NewIndex(&vecSimParams);
5656
}
5757

@@ -237,7 +237,8 @@ VecSimIndex *NewIndex(const std::string &location, const HNSWParams *v1_params)
237237
}
238238
Serializer::readBinaryPOD(input, params.initialCapacity);
239239

240-
VecSimParams vecsimParams = {.algo = VecSimAlgo_HNSWLIB, .hnswParams = params};
240+
VecSimParams vecsimParams = {.algo = VecSimAlgo_HNSWLIB,
241+
.algoParams = {.hnswParams = HNSWParams{params}}};
241242
AbstractIndexInitParams abstractInitParams = NewAbstractInitParams(&vecsimParams);
242243
if (params.type == VecSimType_FLOAT32) {
243244
return NewIndex_ChooseMultiOrSingle<float>(input, &params, abstractInitParams, version);

src/VecSim/index_factories/index_factory.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ VecSimIndex *NewIndex(const VecSimParams *params) {
2626
break;
2727
}
2828
case VecSimAlgo_TIERED: {
29-
index = TieredFactory::NewIndex(&params->tieredParams);
29+
index = TieredFactory::NewIndex(&params->algoParams.tieredParams);
3030
break;
3131
}
3232
}
@@ -39,23 +39,23 @@ VecSimIndex *NewIndex(const VecSimParams *params) {
3939
size_t EstimateInitialSize(const VecSimParams *params) {
4040
switch (params->algo) {
4141
case VecSimAlgo_HNSWLIB:
42-
return HNSWFactory::EstimateInitialSize(&params->hnswParams);
42+
return HNSWFactory::EstimateInitialSize(&params->algoParams.hnswParams);
4343
case VecSimAlgo_BF:
44-
return BruteForceFactory::EstimateInitialSize(&params->bfParams);
44+
return BruteForceFactory::EstimateInitialSize(&params->algoParams.bfParams);
4545
case VecSimAlgo_TIERED:
46-
return TieredFactory::EstimateInitialSize(&params->tieredParams);
46+
return TieredFactory::EstimateInitialSize(&params->algoParams.tieredParams);
4747
}
4848
return -1;
4949
}
5050

5151
size_t EstimateElementSize(const VecSimParams *params) {
5252
switch (params->algo) {
5353
case VecSimAlgo_HNSWLIB:
54-
return HNSWFactory::EstimateElementSize(&params->hnswParams);
54+
return HNSWFactory::EstimateElementSize(&params->algoParams.hnswParams);
5555
case VecSimAlgo_BF:
56-
return BruteForceFactory::EstimateElementSize(&params->bfParams);
56+
return BruteForceFactory::EstimateElementSize(&params->algoParams.bfParams);
5757
case VecSimAlgo_TIERED:
58-
return TieredFactory::EstimateElementSize(&params->tieredParams);
58+
return TieredFactory::EstimateElementSize(&params->algoParams.tieredParams);
5959
}
6060
return -1;
6161
}

src/VecSim/index_factories/tiered_factory.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@ inline VecSimIndex *NewIndex(const TieredIndexParams *params) {
2121
HNSWFactory::NewIndex(params->primaryIndexParams));
2222
// initialize brute force index
2323

24-
BFParams bf_params = {.type = params->primaryIndexParams->hnswParams.type,
25-
.dim = params->primaryIndexParams->hnswParams.dim,
26-
.metric = params->primaryIndexParams->hnswParams.metric,
27-
.multi = params->primaryIndexParams->hnswParams.multi,
28-
.blockSize = params->primaryIndexParams->hnswParams.blockSize};
24+
BFParams bf_params = {.type = params->primaryIndexParams->algoParams.hnswParams.type,
25+
.dim = params->primaryIndexParams->algoParams.hnswParams.dim,
26+
.metric = params->primaryIndexParams->algoParams.hnswParams.metric,
27+
.multi = params->primaryIndexParams->algoParams.hnswParams.multi,
28+
.blockSize = params->primaryIndexParams->algoParams.hnswParams.blockSize};
2929

3030
std::shared_ptr<VecSimAllocator> flat_allocator = VecSimAllocator::newVecsimAllocator();
3131
AbstractIndexInitParams abstractInitParams = {.allocator = flat_allocator,
@@ -47,7 +47,7 @@ inline VecSimIndex *NewIndex(const TieredIndexParams *params) {
4747
}
4848

4949
inline size_t EstimateInitialSize(const TieredIndexParams *params, BFParams &bf_params_output) {
50-
HNSWParams hnsw_params = params->primaryIndexParams->hnswParams;
50+
HNSWParams hnsw_params = params->primaryIndexParams->algoParams.hnswParams;
5151

5252
// Add size estimation of VecSimTieredIndex sub indexes.
5353
size_t est = HNSWFactory::EstimateInitialSize(&hnsw_params);
@@ -70,7 +70,7 @@ inline size_t EstimateInitialSize(const TieredIndexParams *params, BFParams &bf_
7070

7171
VecSimIndex *NewIndex(const TieredIndexParams *params) {
7272
// Tiered index that contains HNSW index as primary index
73-
VecSimType type = params->primaryIndexParams->hnswParams.type;
73+
VecSimType type = params->primaryIndexParams->algoParams.hnswParams.type;
7474
if (type == VecSimType_FLOAT32) {
7575
return TieredHNSWFactory::NewIndex<float>(params);
7676
} else if (type == VecSimType_FLOAT64) {
@@ -83,7 +83,7 @@ VecSimIndex *NewIndex(const TieredIndexParams *params) {
8383
VecSimIndex *NewIndex(const TieredIndexParams *params) {
8484
// Tiered index that contains HNSW index as primary index
8585
if (params->primaryIndexParams->algo == VecSimAlgo_HNSWLIB) {
86-
VecSimType type = params->primaryIndexParams->hnswParams.type;
86+
VecSimType type = params->primaryIndexParams->algoParams.hnswParams.type;
8787
if (type == VecSimType_FLOAT32) {
8888
return TieredHNSWFactory::NewIndex<float>(params);
8989
} else if (type == VecSimType_FLOAT64) {
@@ -108,7 +108,7 @@ size_t EstimateInitialSize(const TieredIndexParams *params) {
108108
size_t EstimateElementSize(const TieredIndexParams *params) {
109109
size_t est = 0;
110110
if (params->primaryIndexParams->algo == VecSimAlgo_HNSWLIB) {
111-
est = HNSWFactory::EstimateElementSize(&params->primaryIndexParams->hnswParams);
111+
est = HNSWFactory::EstimateElementSize(&params->primaryIndexParams->algoParams.hnswParams);
112112
}
113113
return est;
114114
}

src/VecSim/index_factories/tiered_factory.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#include "VecSim/vec_sim_common.h"
1010
#include "VecSim/memory/vecsim_malloc.h"
1111
#include "VecSim/vec_sim_index.h"
12+
#include "VecSim/algorithms/hnsw/hnsw_tiered.h"
13+
#include "VecSim/algorithms/brute_force/brute_force.h"
1214

1315
namespace TieredFactory {
1416

@@ -20,4 +22,35 @@ VecSimIndex *NewIndex(const TieredIndexParams *params);
2022
size_t EstimateInitialSize(const TieredIndexParams *params);
2123
size_t EstimateElementSize(const TieredIndexParams *params);
2224

25+
#ifdef BUILD_TESTS
26+
namespace TieredHNSWFactory {
27+
// Build tiered index from existing HNSW index - for internal benchmarks purposes
28+
template <typename DataType, typename DistType>
29+
VecSimIndex *NewIndex(const TieredIndexParams *params, HNSWIndex<DataType, DistType> *hnsw_index) {
30+
// Initialize brute force index.
31+
BFParams bf_params = {.type = params->primaryIndexParams->algoParams.hnswParams.type,
32+
.dim = params->primaryIndexParams->algoParams.hnswParams.dim,
33+
.metric = params->primaryIndexParams->algoParams.hnswParams.metric,
34+
.multi = params->primaryIndexParams->algoParams.hnswParams.multi,
35+
.blockSize = params->primaryIndexParams->algoParams.hnswParams.blockSize};
36+
37+
AbstractIndexInitParams abstractInitParams = {.allocator = hnsw_index->getAllocator(),
38+
.dim = bf_params.dim,
39+
.vecType = bf_params.type,
40+
.metric = bf_params.metric,
41+
.blockSize = bf_params.blockSize,
42+
.multi = bf_params.multi,
43+
.logCtx = params->primaryIndexParams->logCtx};
44+
auto frontendIndex = static_cast<BruteForceIndex<DataType, DistType> *>(
45+
BruteForceFactory::NewIndex(&bf_params, abstractInitParams));
46+
47+
// Create new tiered hnsw index
48+
std::shared_ptr<VecSimAllocator> management_layer_allocator =
49+
VecSimAllocator::newVecsimAllocator();
50+
return new (management_layer_allocator) TieredHNSWIndex<DataType, DistType>(
51+
hnsw_index, frontendIndex, *params, management_layer_allocator);
52+
}
53+
} // namespace TieredHNSWFactory
54+
#endif
55+
2356
}; // namespace TieredFactory

src/VecSim/vec_sim_common.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -130,13 +130,15 @@ typedef struct {
130130
} specificParams;
131131
} TieredIndexParams;
132132

133+
typedef union {
134+
HNSWParams hnswParams;
135+
BFParams bfParams;
136+
TieredIndexParams tieredParams;
137+
} AlgoParams;
138+
133139
struct VecSimParams {
134140
VecSimAlgo algo; // Algorithm to use.
135-
union {
136-
HNSWParams hnswParams;
137-
BFParams bfParams;
138-
TieredIndexParams tieredParams;
139-
};
141+
AlgoParams algoParams;
140142
void *logCtx; // External context that stores the index log.
141143
};
142144

src/python_bindings/bindings.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,8 @@ class PyHNSWLibIndex : public PyVecSimIndex {
240240

241241
public:
242242
explicit PyHNSWLibIndex(const HNSWParams &hnsw_params) {
243-
VecSimParams params = {.algo = VecSimAlgo_HNSWLIB, .hnswParams = hnsw_params};
243+
VecSimParams params = {.algo = VecSimAlgo_HNSWLIB,
244+
.algoParams = {.hnswParams = HNSWParams{hnsw_params}}};
244245
this->index = std::shared_ptr<VecSimIndex>(VecSimIndex_New(&params), VecSimIndex_Free);
245246
}
246247

@@ -434,7 +435,8 @@ class PyTiered_HNSWIndex : public PyTieredIndex {
434435
const TieredHNSWParams &tiered_hnsw_params) {
435436

436437
// Create primaryIndexParams and specific params for hnsw tiered index.
437-
VecSimParams primary_index_params = {.algo = VecSimAlgo_HNSWLIB, .hnswParams = hnsw_params};
438+
VecSimParams primary_index_params = {.algo = VecSimAlgo_HNSWLIB,
439+
.algoParams = {.hnswParams = HNSWParams{hnsw_params}}};
438440

439441
// create TieredIndexParams
440442
TieredIndexParams tiered_params = TieredIndexParams_Init();
@@ -443,7 +445,8 @@ class PyTiered_HNSWIndex : public PyTieredIndex {
443445
tiered_params.specificParams.tieredHnswParams = tiered_hnsw_params;
444446

445447
// create VecSimParams for TieredIndexParams
446-
VecSimParams params = {.algo = VecSimAlgo_TIERED, .tieredParams = tiered_params};
448+
VecSimParams params = {.algo = VecSimAlgo_TIERED,
449+
.algoParams = {.tieredParams = TieredIndexParams{tiered_params}}};
447450

448451
this->index = std::shared_ptr<VecSimIndex>(VecSimIndex_New(&params), VecSimIndex_Free);
449452
// Set the created tiered index in the index external context.
@@ -457,7 +460,8 @@ class PyTiered_HNSWIndex : public PyTieredIndex {
457460
class PyBFIndex : public PyVecSimIndex {
458461
public:
459462
explicit PyBFIndex(const BFParams &bf_params) {
460-
VecSimParams params = {.algo = VecSimAlgo_BF, .bfParams = bf_params};
463+
VecSimParams params = {.algo = VecSimAlgo_BF,
464+
.algoParams = {.bfParams = BFParams{bf_params}}};
461465
this->index = std::shared_ptr<VecSimIndex>(VecSimIndex_New(&params), VecSimIndex_Free);
462466
}
463467
};
@@ -511,11 +515,15 @@ PYBIND11_MODULE(VecSim, m) {
511515
.def(py::init())
512516
.def_readwrite("swapJobThreshold", &TieredHNSWParams::swapJobThreshold);
513517

518+
py::class_<AlgoParams>(m, "AlgoParams")
519+
.def(py::init())
520+
.def_readwrite("hnswParams", &AlgoParams::hnswParams)
521+
.def_readwrite("bfParams", &AlgoParams::bfParams);
522+
514523
py::class_<VecSimParams>(m, "VecSimParams")
515524
.def(py::init())
516525
.def_readwrite("algo", &VecSimParams::algo)
517-
.def_readwrite("hnswParams", &VecSimParams::hnswParams)
518-
.def_readwrite("bfParams", &VecSimParams::bfParams);
526+
.def_readwrite("algoParams", &VecSimParams::algoParams);
519527

520528
py::class_<VecSimQueryParams> queryParams(m, "VecSimQueryParams");
521529

0 commit comments

Comments
 (0)