Skip to content

Commit 2b1580f

Browse files
authored
[0.7] Refactor Info Report - [MOD-9321] (#656)
* Refactor Info Report - [MOD-9321] (#650) * refactor `info()` API to indicate it should be used for debug, and prepare structs and new API for statistics info * test fixes * implement new API * format * improve tests * move tiered statisticInfo to base class * refactor * minor simplification * review fixes (cherry picked from commit 5f7dbdf) * fix compilation for 0.7
1 parent a3cacc5 commit 2b1580f

25 files changed

+398
-316
lines changed

src/VecSim/algorithms/brute_force/brute_force.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ class BruteForceIndex : public VecSimIndexAbstract<DistType> {
4747
VecSimQueryParams *queryParams) const override;
4848
virtual VecSimQueryReply *rangeQuery(const void *queryBlob, double radius,
4949
VecSimQueryParams *queryParams) const override;
50-
virtual VecSimIndexInfo info() const override;
51-
virtual VecSimInfoIterator *infoIterator() const override;
50+
VecSimIndexDebugInfo debugInfo() const override;
51+
VecSimDebugInfoIterator *debugInfoIterator() const override;
5252
VecSimIndexBasicInfo basicInfo() const override;
5353
virtual VecSimBatchIterator *newBatchIterator(const void *queryBlob,
5454
VecSimQueryParams *queryParams) const override;
@@ -339,9 +339,9 @@ BruteForceIndex<DataType, DistType>::rangeQuery(const void *queryBlob, double ra
339339
}
340340

341341
template <typename DataType, typename DistType>
342-
VecSimIndexInfo BruteForceIndex<DataType, DistType>::info() const {
342+
VecSimIndexDebugInfo BruteForceIndex<DataType, DistType>::debugInfo() const {
343343

344-
VecSimIndexInfo info;
344+
VecSimIndexDebugInfo info;
345345
info.commonInfo = this->getCommonInfo();
346346
info.commonInfo.basicInfo.algo = VecSimAlgo_BF;
347347

@@ -358,11 +358,11 @@ VecSimIndexBasicInfo BruteForceIndex<DataType, DistType>::basicInfo() const {
358358
}
359359

360360
template <typename DataType, typename DistType>
361-
VecSimInfoIterator *BruteForceIndex<DataType, DistType>::infoIterator() const {
362-
VecSimIndexInfo info = this->info();
361+
VecSimDebugInfoIterator *BruteForceIndex<DataType, DistType>::debugInfoIterator() const {
362+
VecSimIndexDebugInfo info = this->debugInfo();
363363
// For readability. Update this number when needed.
364364
size_t numberOfInfoFields = 10;
365-
VecSimInfoIterator *infoIterator = new VecSimInfoIterator(numberOfInfoFields, this->allocator);
365+
auto *infoIterator = new VecSimDebugInfoIterator(numberOfInfoFields, this->allocator);
366366

367367
infoIterator->addInfoField(
368368
VecSim_InfoField{.fieldName = VecSimCommonStrings::ALGORITHM_STRING,

src/VecSim/algorithms/hnsw/hnsw.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -255,9 +255,9 @@ class HNSWIndex : public VecSimIndexAbstract<DistType>,
255255
void unlockNodeLinks(ElementGraphData *node_data) const;
256256
VisitedNodesHandler *getVisitedList() const;
257257
void returnVisitedList(VisitedNodesHandler *visited_nodes_handler) const;
258-
VecSimIndexInfo info() const override;
258+
VecSimIndexDebugInfo debugInfo() const override;
259259
VecSimIndexBasicInfo basicInfo() const override;
260-
VecSimInfoIterator *infoIterator() const override;
260+
VecSimDebugInfoIterator *debugInfoIterator() const override;
261261
bool preferAdHocSearch(size_t subsetSize, size_t k, bool initial_check) const override;
262262
const char *getDataByInternalId(idType internal_id) const;
263263
ElementGraphData *getGraphDataByInternalId(idType internal_id) const;
@@ -2086,18 +2086,19 @@ VecSimQueryReply *HNSWIndex<DataType, DistType>::rangeQuery(const void *query_da
20862086
}
20872087

20882088
template <typename DataType, typename DistType>
2089-
VecSimIndexInfo HNSWIndex<DataType, DistType>::info() const {
2089+
VecSimIndexDebugInfo HNSWIndex<DataType, DistType>::debugInfo() const {
20902090

2091-
VecSimIndexInfo info;
2091+
VecSimIndexDebugInfo info;
20922092
info.commonInfo = this->getCommonInfo();
2093+
auto [ep_id, max_level] = this->safeGetEntryPointState();
20932094

20942095
info.commonInfo.basicInfo.algo = VecSimAlgo_HNSWLIB;
20952096
info.hnswInfo.M = this->getM();
20962097
info.hnswInfo.efConstruction = this->getEfConstruction();
20972098
info.hnswInfo.efRuntime = this->getEf();
20982099
info.hnswInfo.epsilon = this->epsilon;
2099-
info.hnswInfo.max_level = this->getMaxLevel();
2100-
info.hnswInfo.entrypoint = this->getEntryPointLabel();
2100+
info.hnswInfo.max_level = max_level;
2101+
info.hnswInfo.entrypoint = ep_id != INVALID_ID ? getExternalLabel(ep_id) : INVALID_LABEL;
21012102
info.hnswInfo.visitedNodesPoolSize = this->visitedNodesHandlerPool.getPoolSize();
21022103
info.hnswInfo.numberOfMarkedDeletedNodes = this->getNumMarkedDeleted();
21032104
return info;
@@ -2112,11 +2113,11 @@ VecSimIndexBasicInfo HNSWIndex<DataType, DistType>::basicInfo() const {
21122113
}
21132114

21142115
template <typename DataType, typename DistType>
2115-
VecSimInfoIterator *HNSWIndex<DataType, DistType>::infoIterator() const {
2116-
VecSimIndexInfo info = this->info();
2116+
VecSimDebugInfoIterator *HNSWIndex<DataType, DistType>::debugInfoIterator() const {
2117+
VecSimIndexDebugInfo info = this->debugInfo();
21172118
// For readability. Update this number when needed.
21182119
size_t numberOfInfoFields = 17;
2119-
VecSimInfoIterator *infoIterator = new VecSimInfoIterator(numberOfInfoFields, this->allocator);
2120+
auto *infoIterator = new VecSimDebugInfoIterator(numberOfInfoFields, this->allocator);
21202121

21212122
infoIterator->addInfoField(
21222123
VecSim_InfoField{.fieldName = VecSimCommonStrings::ALGORITHM_STRING,

src/VecSim/algorithms/hnsw/hnsw_tiered.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,9 @@ class TieredHNSWIndex : public VecSimTieredIndex<DataType, DistType> {
193193
double getDistanceFrom_Unsafe(labelType label, const void *blob) const override;
194194
// Do nothing here, each tier (flat buffer and HNSW) should increase capacity for itself when
195195
// needed.
196-
VecSimIndexInfo info() const override;
196+
VecSimIndexDebugInfo debugInfo() const override;
197197
VecSimIndexBasicInfo basicInfo() const override;
198-
VecSimInfoIterator *infoIterator() const override;
198+
VecSimDebugInfoIterator *debugInfoIterator() const override;
199199
VecSimBatchIterator *newBatchIterator(const void *queryBlob,
200200
VecSimQueryParams *queryParams) const override {
201201
size_t blobSize = this->backendIndex->getDim() * sizeof(DataType);
@@ -1118,8 +1118,8 @@ void TieredHNSWIndex<DataType, DistType>::TieredHNSW_BatchIterator::filter_irrel
11181118
}
11191119

11201120
template <typename DataType, typename DistType>
1121-
VecSimIndexInfo TieredHNSWIndex<DataType, DistType>::info() const {
1122-
auto info = VecSimTieredIndex<DataType, DistType>::info();
1121+
VecSimIndexDebugInfo TieredHNSWIndex<DataType, DistType>::debugInfo() const {
1122+
auto info = VecSimTieredIndex<DataType, DistType>::debugInfo();
11231123

11241124
HnswTieredInfo hnswTieredInfo = {.pendingSwapJobsThreshold = this->pendingSwapJobsThreshold};
11251125
info.tieredInfo.specificTieredBackendInfo.hnswTieredInfo = hnswTieredInfo;
@@ -1128,10 +1128,10 @@ VecSimIndexInfo TieredHNSWIndex<DataType, DistType>::info() const {
11281128
}
11291129

11301130
template <typename DataType, typename DistType>
1131-
VecSimInfoIterator *TieredHNSWIndex<DataType, DistType>::infoIterator() const {
1132-
VecSimIndexInfo info = this->info();
1131+
VecSimDebugInfoIterator *TieredHNSWIndex<DataType, DistType>::debugInfoIterator() const {
1132+
VecSimIndexDebugInfo info = this->debugInfo();
11331133
// Get the base tiered fields.
1134-
auto *infoIterator = VecSimTieredIndex<DataType, DistType>::infoIterator();
1134+
auto *infoIterator = VecSimTieredIndex<DataType, DistType>::debugInfoIterator();
11351135

11361136
// Tiered HNSW specific param.
11371137
infoIterator->addInfoField(VecSim_InfoField{

src/VecSim/info_iterator.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,23 @@
66

77
#include "info_iterator_struct.h"
88

9-
extern "C" size_t VecSimInfoIterator_NumberOfFields(VecSimInfoIterator *infoIterator) {
9+
extern "C" size_t VecSimDebugInfoIterator_NumberOfFields(VecSimDebugInfoIterator *infoIterator) {
1010
return infoIterator->numberOfFields();
1111
}
1212

13-
extern "C" bool VecSimInfoIterator_HasNextField(VecSimInfoIterator *infoIterator) {
13+
extern "C" bool VecSimDebugInfoIterator_HasNextField(VecSimDebugInfoIterator *infoIterator) {
1414
return infoIterator->hasNext();
1515
}
1616

17-
extern "C" VecSim_InfoField *VecSimInfoIterator_NextField(VecSimInfoIterator *infoIterator) {
17+
extern "C" VecSim_InfoField *
18+
VecSimDebugInfoIterator_NextField(VecSimDebugInfoIterator *infoIterator) {
1819
if (infoIterator->hasNext()) {
1920
return infoIterator->next();
2021
}
2122
return NULL;
2223
}
2324

24-
extern "C" void VecSimInfoIterator_Free(VecSimInfoIterator *infoIterator) {
25+
extern "C" void VecSimDebugInfoIterator_Free(VecSimDebugInfoIterator *infoIterator) {
2526
if (infoIterator != NULL) {
2627
delete infoIterator;
2728
}

src/VecSim/info_iterator.h

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#pragma once
88
#include <stdlib.h>
9+
#include <stdint.h>
910
#ifdef __cplusplus
1011
extern "C" {
1112
#endif
@@ -15,7 +16,7 @@ extern "C" {
1516
* the type VecSim_InfoFieldType. This struct exposes an iterator-like API to iterate over the
1617
* information fields.
1718
*/
18-
typedef struct VecSimInfoIterator VecSimInfoIterator;
19+
typedef struct VecSimDebugInfoIterator VecSimDebugInfoIterator;
1920

2021
typedef enum {
2122
INFOFIELD_STRING,
@@ -26,11 +27,11 @@ typedef enum {
2627
} VecSim_InfoFieldType;
2728

2829
typedef union {
29-
double floatingPointValue; // Floating point value. 64 bits float.
30-
int64_t integerValue; // Integer value. Signed 64 bits integer.
31-
u_int64_t uintegerValue; // Unsigned value. Unsigned 64 buts integer.
32-
const char *stringValue; // String value.
33-
VecSimInfoIterator *iteratorValue; // Iterator value.
30+
double floatingPointValue; // Floating point value. 64 bits float.
31+
int64_t integerValue; // Integer value. Signed 64 bits integer.
32+
uint64_t uintegerValue; // Unsigned value. Unsigned 64 bits integer.
33+
const char *stringValue; // String value.
34+
VecSimDebugInfoIterator *iteratorValue; // Iterator value.
3435
} FieldValue;
3536

3637
/**
@@ -51,7 +52,7 @@ typedef struct {
5152
* @param infoIterator Given info iterator.
5253
* @return size_t Number of fields.
5354
*/
54-
size_t VecSimInfoIterator_NumberOfFields(VecSimInfoIterator *infoIterator);
55+
size_t VecSimDebugInfoIterator_NumberOfFields(VecSimDebugInfoIterator *infoIterator);
5556

5657
/**
5758
* @brief Returns if the fields iterator is depleted.
@@ -60,22 +61,22 @@ size_t VecSimInfoIterator_NumberOfFields(VecSimInfoIterator *infoIterator);
6061
* @return true Iterator is not depleted.
6162
* @return false Otherwise.
6263
*/
63-
bool VecSimInfoIterator_HasNextField(VecSimInfoIterator *infoIterator);
64+
bool VecSimDebugInfoIterator_HasNextField(VecSimDebugInfoIterator *infoIterator);
6465

6566
/**
6667
* @brief Returns a pointer to the next info field.
6768
*
6869
* @param infoIterator Given info iterator.
6970
* @return VecSim_InfoField* A pointer to the next info field.
7071
*/
71-
VecSim_InfoField *VecSimInfoIterator_NextField(VecSimInfoIterator *infoIterator);
72+
VecSim_InfoField *VecSimDebugInfoIterator_NextField(VecSimDebugInfoIterator *infoIterator);
7273

7374
/**
7475
* @brief Free an info iterator.
7576
*
7677
* @param infoIterator Given info iterator.
7778
*/
78-
void VecSimInfoIterator_Free(VecSimInfoIterator *infoIterator);
79+
void VecSimDebugInfoIterator_Free(VecSimDebugInfoIterator *infoIterator);
7980

8081
#ifdef __cplusplus
8182
}

src/VecSim/info_iterator_struct.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
#include "info_iterator.h"
1010
#include "VecSim/utils/vecsim_stl.h"
1111

12-
struct VecSimInfoIterator {
12+
struct VecSimDebugInfoIterator {
1313
private:
1414
vecsim_stl::vector<VecSim_InfoField> fields;
1515
size_t currentIndex;
1616

1717
public:
18-
VecSimInfoIterator(size_t len, const std::shared_ptr<VecSimAllocator> &alloc)
18+
VecSimDebugInfoIterator(size_t len, const std::shared_ptr<VecSimAllocator> &alloc)
1919
: fields(alloc), currentIndex(0) {
2020
this->fields.reserve(len);
2121
}
@@ -28,7 +28,7 @@ struct VecSimInfoIterator {
2828

2929
inline size_t numberOfFields() { return this->fields.size(); }
3030

31-
virtual ~VecSimInfoIterator() {
31+
virtual ~VecSimDebugInfoIterator() {
3232
for (size_t i = 0; i < this->fields.size(); i++) {
3333
if (this->fields[i].fieldType == INFOFIELD_ITERATOR) {
3434
delete this->fields[i].fieldValue.iteratorValue;

src/VecSim/vec_sim.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -221,16 +221,22 @@ extern "C" void VecSimIndex_Free(VecSimIndex *index) {
221221
delete index;
222222
}
223223

224-
extern "C" VecSimIndexInfo VecSimIndex_Info(VecSimIndex *index) { return index->info(); }
224+
extern "C" VecSimIndexDebugInfo VecSimIndex_DebugInfo(VecSimIndex *index) {
225+
return index->debugInfo();
226+
}
225227

226-
extern "C" VecSimInfoIterator *VecSimIndex_InfoIterator(VecSimIndex *index) {
227-
return index->infoIterator();
228+
extern "C" VecSimDebugInfoIterator *VecSimIndex_DebugInfoIterator(VecSimIndex *index) {
229+
return index->debugInfoIterator();
228230
}
229231

230232
extern "C" VecSimIndexBasicInfo VecSimIndex_BasicInfo(VecSimIndex *index) {
231233
return index->basicInfo();
232234
}
233235

236+
extern "C" VecSimIndexStatsInfo VecSimIndex_StatsInfo(VecSimIndex *index) {
237+
return index->statisticInfo();
238+
}
239+
234240
extern "C" VecSimBatchIterator *VecSimBatchIterator_New(VecSimIndex *index, const void *queryBlob,
235241
VecSimQueryParams *queryParams) {
236242
return index->newBatchIteratorWrapper(queryBlob, queryParams);

src/VecSim/vec_sim.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ VecSimQueryReply *VecSimIndex_RangeQuery(VecSimIndex *index, const void *queryBl
152152
* @param index the index to return its info.
153153
* @return Index general and specific meta-data.
154154
*/
155-
VecSimIndexInfo VecSimIndex_Info(VecSimIndex *index);
155+
VecSimIndexDebugInfo VecSimIndex_DebugInfo(VecSimIndex *index);
156156

157157
/**
158158
* @brief Return basic immutable index information.
@@ -161,13 +161,20 @@ VecSimIndexInfo VecSimIndex_Info(VecSimIndex *index);
161161
*/
162162
VecSimIndexBasicInfo VecSimIndex_BasicInfo(VecSimIndex *index);
163163

164+
/**
165+
* @brief Return statistics information.
166+
* @param index the index to return its info.
167+
* @return Index statistic data.
168+
*/
169+
VecSimIndexStatsInfo VecSimIndex_StatsInfo(VecSimIndex *index);
170+
164171
/**
165172
* @brief Returns an info iterator for generic reply purposes.
166173
*
167174
* @param index this index to return its info.
168-
* @return VecSimInfoIterator* An iterable containing the index general and specific meta-data.
175+
* @return VecSimDebugInfoIterator* An iterable containing the index general and specific meta-data.
169176
*/
170-
VecSimInfoIterator *VecSimIndex_InfoIterator(VecSimIndex *index);
177+
VecSimDebugInfoIterator *VecSimIndex_DebugInfoIterator(VecSimIndex *index);
171178

172179
/**
173180
* @brief Create a new batch iterator for a specific index, for a specific query vector,

src/VecSim/vec_sim_common.h

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -200,16 +200,25 @@ typedef struct {
200200
* Index info that is static and immutable (cannot be changed over time)
201201
*/
202202
typedef struct {
203-
VecSimAlgo algo; // Algorithm being used.
204-
size_t blockSize; // Brute force algorithm vector block (mini matrix) size
203+
VecSimAlgo algo; // Algorithm being used (if index is tiered, this is the backend index).
205204
VecSimMetric metric; // Index distance metric
206205
VecSimType type; // Datatype the index holds.
207206
bool isMulti; // Determines if the index should multi-index or not.
207+
bool isTiered; // Is the index is tiered or not.
208+
size_t blockSize; // Brute force algorithm vector block (mini matrix) size
208209
size_t dim; // Vector size (dimension).
209-
210-
bool isTiered; // The algorithm for the tiered index (if algo is tiered).
211210
} VecSimIndexBasicInfo;
212211

212+
/**
213+
* Index info for statistics - a thin and efficient (no locks, no calculations) info. Can be used in
214+
* production without worrying about performance
215+
*/
216+
typedef struct {
217+
size_t memory;
218+
size_t numberOfMarkedDeleted; // The number of vectors that are marked as deleted (HNSW/tiered
219+
// only).
220+
} VecSimIndexStatsInfo;
221+
213222
typedef struct {
214223
VecSimIndexBasicInfo basicInfo; // Index immutable meta-data.
215224
size_t indexSize; // Current count of vectors.
@@ -257,7 +266,7 @@ typedef struct {
257266
} tieredInfoStruct;
258267

259268
/**
260-
* @brief Index information. Mainly used for debug/testing.
269+
* @brief Index information. Should only be used for debug/testing.
261270
*
262271
*/
263272
typedef struct {
@@ -267,7 +276,7 @@ typedef struct {
267276
hnswInfoStruct hnswInfo;
268277
tieredInfoStruct tieredInfo;
269278
};
270-
} VecSimIndexInfo;
279+
} VecSimIndexDebugInfo;
271280

272281
// Memory function declarations.
273282
typedef void *(*allocFn)(size_t n);

src/VecSim/vec_sim_index.h

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,12 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
106106
inline VecSimMetric getMetric() const { return metric; }
107107
inline size_t getDataSize() const { return dataSize; }
108108
inline size_t getBlockSize() const { return blockSize; }
109+
virtual inline VecSimIndexStatsInfo statisticInfo() const override {
110+
return VecSimIndexStatsInfo{
111+
.memory = this->getAllocationSize(),
112+
.numberOfMarkedDeleted = 0,
113+
};
114+
}
109115

110116
virtual VecSimQueryReply *rangeQuery(const void *queryBlob, double radius,
111117
VecSimQueryParams *queryParams) const = 0;
@@ -134,7 +140,8 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
134140
}
135141

136142
// Adds all common info to the info iterator, besides the block size (currently 8 fields).
137-
void addCommonInfoToIterator(VecSimInfoIterator *infoIterator, const CommonInfo &info) const {
143+
void addCommonInfoToIterator(VecSimDebugInfoIterator *infoIterator,
144+
const CommonInfo &info) const {
138145
infoIterator->addInfoField(VecSim_InfoField{
139146
.fieldName = VecSimCommonStrings::TYPE_STRING,
140147
.fieldType = INFOFIELD_STRING,
@@ -195,11 +202,13 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
195202
* @return basicInfo
196203
*/
197204
VecSimIndexBasicInfo getBasicInfo() const {
198-
VecSimIndexBasicInfo info{.blockSize = this->blockSize,
199-
.metric = this->metric,
200-
.type = this->vecType,
201-
.isMulti = this->isMulti,
202-
.dim = this->dim};
205+
VecSimIndexBasicInfo info{
206+
.metric = this->metric,
207+
.type = this->vecType,
208+
.isMulti = this->isMulti,
209+
.blockSize = this->blockSize,
210+
.dim = this->dim,
211+
};
203212
return info;
204213
}
205214

0 commit comments

Comments
 (0)