Skip to content

Commit deb498c

Browse files
committed
Refactor query result - [MOD-5530] (#412)
* remove `arr_cpp.h` and replace all its use cases with vectors * remove `uninitialized` from an error worthy warning in spaces * after rebase fixes * build fixes * another attempt * more build fixes * format * fix bindings * fix leak in test * some review fixes * remove `VecSimQueryResult` setters * renaming `query_result_struct.h` * generalize VecSimQueryResult_Iterator iterator * improved and aligned result collecting loop in bf_batch_iterator * use std::tie * remove error suppressing (seems to be fixed) * renaming `VecSimQueryResult` structures, introducing `VecSimQueryReply` * reordering comment * renaming * tidy up sorting functions * support for gcc-9 * rename VecSim_QueryResult_* codes
1 parent 638397b commit deb498c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+957
-1177
lines changed

src/VecSim/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ add_library(VectorSimilarity ${VECSIM_LIBTYPE}
2424
vec_sim_interface.cpp
2525
query_results.cpp
2626
info_iterator.cpp
27-
query_result_struct.cpp
2827
utils/vec_utils.cpp
2928
utils/data_block.cpp
3029
memory/vecsim_malloc.cpp

src/VecSim/algorithms/brute_force/bf_batch_iterator.h

Lines changed: 32 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,19 @@ class BF_BatchIterator : public VecSimBatchIterator {
2929
size_t scores_valid_start_pos; // the first index in the scores vector that contains a vector
3030
// that hasn't been returned already.
3131

32-
VecSimQueryResult_List searchByHeuristics(size_t n_res, VecSimQueryResult_Order order);
33-
VecSimQueryResult_List selectBasedSearch(size_t n_res);
34-
VecSimQueryResult_List heapBasedSearch(size_t n_res);
32+
VecSimQueryReply *searchByHeuristics(size_t n_res, VecSimQueryReply_Order order);
33+
VecSimQueryReply *selectBasedSearch(size_t n_res);
34+
VecSimQueryReply *heapBasedSearch(size_t n_res);
3535
void swapScores(const vecsim_stl::unordered_map<labelType, size_t> &TopCandidatesIndices,
3636
size_t res_num);
3737

38-
virtual inline VecSimQueryResult_Code calculateScores() = 0;
38+
virtual inline VecSimQueryReply_Code calculateScores() = 0;
3939

4040
public:
4141
BF_BatchIterator(void *query_vector, const BruteForceIndex<DataType, DistType> *bf_index,
4242
VecSimQueryParams *queryParams, std::shared_ptr<VecSimAllocator> allocator);
4343

44-
VecSimQueryResult_List getNextResults(size_t n_res, VecSimQueryResult_Order order) override;
44+
VecSimQueryReply *getNextResults(size_t n_res, VecSimQueryReply_Order order) override;
4545

4646
bool isDepleted() override;
4747

@@ -55,20 +55,20 @@ class BF_BatchIterator : public VecSimBatchIterator {
5555
// heuristics: decide if using heap or select search, based on the ratio between the
5656
// number of remaining results and the index size.
5757
template <typename DataType, typename DistType>
58-
VecSimQueryResult_List
58+
VecSimQueryReply *
5959
BF_BatchIterator<DataType, DistType>::searchByHeuristics(size_t n_res,
60-
VecSimQueryResult_Order order) {
60+
VecSimQueryReply_Order order) {
6161
if ((this->index_label_count - this->getResultsCount()) / 1000 > n_res) {
6262
// Heap based search always returns the results ordered by score
6363
return this->heapBasedSearch(n_res);
6464
}
65-
VecSimQueryResult_List rl = this->selectBasedSearch(n_res);
65+
VecSimQueryReply *rep = this->selectBasedSearch(n_res);
6666
if (order == BY_SCORE) {
67-
sort_results_by_score(rl);
67+
sort_results_by_score(rep);
6868
} else if (order == BY_SCORE_THEN_ID) {
69-
sort_results_by_score_then_id(rl);
69+
sort_results_by_score_then_id(rep);
7070
}
71-
return rl;
71+
return rep;
7272
}
7373

7474
template <typename DataType, typename DistType>
@@ -103,8 +103,8 @@ void BF_BatchIterator<DataType, DistType>::swapScores(
103103
}
104104

105105
template <typename DataType, typename DistType>
106-
VecSimQueryResult_List BF_BatchIterator<DataType, DistType>::heapBasedSearch(size_t n_res) {
107-
VecSimQueryResult_List rl = {0};
106+
VecSimQueryReply *BF_BatchIterator<DataType, DistType>::heapBasedSearch(size_t n_res) {
107+
auto rep = new VecSimQueryReply(this->allocator);
108108
DistType upperBound = std::numeric_limits<DistType>::lowest();
109109
vecsim_stl::max_priority_queue<DistType, labelType> TopCandidates(this->allocator);
110110
// map vector's label to its index in the scores vector.
@@ -128,19 +128,18 @@ VecSimQueryResult_List BF_BatchIterator<DataType, DistType>::heapBasedSearch(siz
128128
}
129129

130130
// Save the top results to return.
131-
rl.results = array_new_len<VecSimQueryResult>(TopCandidates.size(), TopCandidates.size());
132-
for (int i = (int)TopCandidates.size() - 1; i >= 0; --i) {
133-
VecSimQueryResult_SetId(rl.results[i], TopCandidates.top().second);
134-
VecSimQueryResult_SetScore(rl.results[i], TopCandidates.top().first);
131+
rep->results.resize(TopCandidates.size());
132+
for (auto result = rep->results.rbegin(); result != rep->results.rend(); result++) {
133+
std::tie(result->score, result->id) = TopCandidates.top();
135134
TopCandidates.pop();
136135
}
137-
swapScores(TopCandidatesIndices, array_len(rl.results));
138-
return rl;
136+
swapScores(TopCandidatesIndices, rep->results.size());
137+
return rep;
139138
}
140139

141140
template <typename DataType, typename DistType>
142-
VecSimQueryResult_List BF_BatchIterator<DataType, DistType>::selectBasedSearch(size_t n_res) {
143-
VecSimQueryResult_List rl = {0};
141+
VecSimQueryReply *BF_BatchIterator<DataType, DistType>::selectBasedSearch(size_t n_res) {
142+
auto rep = new VecSimQueryReply(this->allocator);
144143
size_t remaining_vectors_count = this->scores.size() - this->scores_valid_start_pos;
145144
// Get an iterator to the effective first element in the scores array, which is the first
146145
// element that hasn't been returned in previous iterations.
@@ -155,15 +154,13 @@ VecSimQueryResult_List BF_BatchIterator<DataType, DistType>::selectBasedSearch(s
155154
// will be placed before it, and all the rest will be placed after.
156155
std::nth_element(valid_begin_it, n_th_element_pos, this->scores.end());
157156

158-
rl.results = array_new<VecSimQueryResult>(n_res);
157+
rep->results.reserve(n_res);
159158
for (size_t i = this->scores_valid_start_pos; i < this->scores_valid_start_pos + n_res; i++) {
160-
rl.results = array_append(rl.results, VecSimQueryResult{});
161-
VecSimQueryResult_SetId(rl.results[array_len(rl.results) - 1], this->scores[i].second);
162-
VecSimQueryResult_SetScore(rl.results[array_len(rl.results) - 1], this->scores[i].first);
159+
rep->results.push_back(VecSimQueryResult{this->scores[i].second, this->scores[i].first});
163160
}
164161
// Update the valid results start position after returning the results.
165-
this->scores_valid_start_pos += array_len(rl.results);
166-
return rl;
162+
this->scores_valid_start_pos += rep->results.size();
163+
return rep;
167164
}
168165

169166
template <typename DataType, typename DistType>
@@ -175,8 +172,8 @@ BF_BatchIterator<DataType, DistType>::BF_BatchIterator(
175172
scores_valid_start_pos(0) {}
176173

177174
template <typename DataType, typename DistType>
178-
VecSimQueryResult_List
179-
BF_BatchIterator<DataType, DistType>::getNextResults(size_t n_res, VecSimQueryResult_Order order) {
175+
VecSimQueryReply *
176+
BF_BatchIterator<DataType, DistType>::getNextResults(size_t n_res, VecSimQueryReply_Order order) {
180177
// Only in the first iteration we need to compute all the scores
181178
if (this->scores.empty()) {
182179
assert(getResultsCount() == 0);
@@ -185,19 +182,19 @@ BF_BatchIterator<DataType, DistType>::getNextResults(size_t n_res, VecSimQueryRe
185182
auto rc = calculateScores();
186183

187184
if (VecSim_OK != rc) {
188-
return {NULL, rc};
185+
return new VecSimQueryReply(this->allocator, rc);
189186
}
190187
}
191188
if (VECSIM_TIMEOUT(this->getTimeoutCtx())) {
192-
return {NULL, VecSim_QueryResult_TimedOut};
189+
return new VecSimQueryReply(this->allocator, VecSim_QueryReply_TimedOut);
193190
}
194-
VecSimQueryResult_List rl = searchByHeuristics(n_res, order);
191+
VecSimQueryReply *rep = searchByHeuristics(n_res, order);
195192

196-
this->updateResultsCount(array_len(rl.results));
193+
this->updateResultsCount(VecSimQueryReply_Len(rep));
197194
if (order == BY_ID) {
198-
sort_results_by_id(rl);
195+
sort_results_by_id(rep);
199196
}
200-
return rl;
197+
return rep;
201198
}
202199

203200
template <typename DataType, typename DistType>

src/VecSim/algorithms/brute_force/bfm_batch_iterator.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@ class BFM_BatchIterator : public BF_BatchIterator<DataType, DistType> {
1919
~BFM_BatchIterator() override = default;
2020

2121
private:
22-
inline VecSimQueryResult_Code calculateScores() override {
22+
inline VecSimQueryReply_Code calculateScores() override {
2323
this->index_label_count = this->index->indexLabelCount();
2424
this->scores.reserve(this->index_label_count);
2525
vecsim_stl::unordered_map<labelType, DistType> tmp_scores(this->index_label_count,
2626
this->allocator);
2727
auto &blocks = this->index->getVectorBlocks();
28-
VecSimQueryResult_Code rc;
28+
VecSimQueryReply_Code rc;
2929

3030
idType curr_id = 0;
3131
for (auto &block : blocks) {
@@ -51,6 +51,6 @@ class BFM_BatchIterator : public BF_BatchIterator<DataType, DistType> {
5151
for (auto p : tmp_scores) {
5252
this->scores.emplace_back(p.second, p.first);
5353
}
54-
return VecSim_QueryResult_OK;
54+
return VecSim_QueryReply_OK;
5555
}
5656
};

src/VecSim/algorithms/brute_force/bfs_batch_iterator.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ class BFS_BatchIterator : public BF_BatchIterator<DataType, DistType> {
1919
~BFS_BatchIterator() override = default;
2020

2121
private:
22-
inline VecSimQueryResult_Code calculateScores() override {
22+
inline VecSimQueryReply_Code calculateScores() override {
2323
this->index_label_count = this->index->indexLabelCount();
2424
this->scores.reserve(this->index_label_count);
2525
auto &blocks = this->index->getVectorBlocks();
26-
VecSimQueryResult_Code rc;
26+
VecSimQueryReply_Code rc;
2727

2828
idType curr_id = 0;
2929
for (auto &block : blocks) {
@@ -39,6 +39,6 @@ class BFS_BatchIterator : public BF_BatchIterator<DataType, DistType> {
3939
}
4040
}
4141
assert(curr_id == this->index->indexSize());
42-
return VecSim_QueryResult_OK;
42+
return VecSim_QueryReply_OK;
4343
}
4444
};

src/VecSim/algorithms/brute_force/brute_force.h

Lines changed: 29 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#include "VecSim/utils/vecsim_results_container.h"
1414
#include "VecSim/index_factories/brute_force_factory.h"
1515
#include "VecSim/spaces/spaces.h"
16-
#include "VecSim/query_result_struct.h"
16+
#include "VecSim/query_result_definitions.h"
1717
#include "VecSim/utils/vec_utils.h"
1818

1919
#include <cstring>
@@ -39,14 +39,14 @@ class BruteForceIndex : public VecSimIndexAbstract<DistType> {
3939
size_t indexCapacity() const override;
4040
vecsim_stl::vector<DistType> computeBlockScores(const DataBlock &block, const void *queryBlob,
4141
void *timeoutCtx,
42-
VecSimQueryResult_Code *rc) const;
42+
VecSimQueryReply_Code *rc) const;
4343
inline DataType *getDataByInternalId(idType id) const {
4444
return (DataType *)vectorBlocks.at(id / this->blockSize).getElement(id % this->blockSize);
4545
}
46-
virtual VecSimQueryResult_List topKQuery(const void *queryBlob, size_t k,
47-
VecSimQueryParams *queryParams) const override;
48-
virtual VecSimQueryResult_List rangeQuery(const void *queryBlob, double radius,
49-
VecSimQueryParams *queryParams) const override;
46+
virtual VecSimQueryReply *topKQuery(const void *queryBlob, size_t k,
47+
VecSimQueryParams *queryParams) const override;
48+
virtual VecSimQueryReply *rangeQuery(const void *queryBlob, double radius,
49+
VecSimQueryParams *queryParams) const override;
5050
virtual VecSimIndexInfo info() const override;
5151
virtual VecSimInfoIterator *infoIterator() const override;
5252
VecSimIndexBasicInfo basicInfo() const override;
@@ -243,32 +243,31 @@ template <typename DataType, typename DistType>
243243
vecsim_stl::vector<DistType>
244244
BruteForceIndex<DataType, DistType>::computeBlockScores(const DataBlock &block,
245245
const void *queryBlob, void *timeoutCtx,
246-
VecSimQueryResult_Code *rc) const {
246+
VecSimQueryReply_Code *rc) const {
247247
size_t len = block.getLength();
248248
vecsim_stl::vector<DistType> scores(len, this->allocator);
249249
for (size_t i = 0; i < len; i++) {
250250
if (VECSIM_TIMEOUT(timeoutCtx)) {
251-
*rc = VecSim_QueryResult_TimedOut;
251+
*rc = VecSim_QueryReply_TimedOut;
252252
return scores;
253253
}
254254
scores[i] = this->distFunc(block.getElement(i), queryBlob, this->dim);
255255
}
256-
*rc = VecSim_QueryResult_OK;
256+
*rc = VecSim_QueryReply_OK;
257257
return scores;
258258
}
259259

260260
template <typename DataType, typename DistType>
261-
VecSimQueryResult_List
261+
VecSimQueryReply *
262262
BruteForceIndex<DataType, DistType>::topKQuery(const void *queryBlob, size_t k,
263263
VecSimQueryParams *queryParams) const {
264264

265-
VecSimQueryResult_List rl = {0};
265+
auto rep = new VecSimQueryReply(this->allocator);
266266
void *timeoutCtx = queryParams ? queryParams->timeoutCtx : NULL;
267267
this->lastMode = STANDARD_KNN;
268268

269269
if (0 == k) {
270-
rl.results = array_new<VecSimQueryResult>(0);
271-
return rl;
270+
return rep;
272271
}
273272

274273
DistType upperBound = std::numeric_limits<DistType>::lowest();
@@ -277,10 +276,10 @@ BruteForceIndex<DataType, DistType>::topKQuery(const void *queryBlob, size_t k,
277276
// For every block, compute its vectors scores and update the Top candidates max heap
278277
idType curr_id = 0;
279278
for (auto &vectorBlock : this->vectorBlocks) {
280-
auto scores = computeBlockScores(vectorBlock, queryBlob, timeoutCtx, &rl.code);
281-
if (VecSim_OK != rl.code) {
279+
auto scores = computeBlockScores(vectorBlock, queryBlob, timeoutCtx, &rep->code);
280+
if (VecSim_OK != rep->code) {
282281
delete TopCandidates;
283-
return rl;
282+
return rep;
284283
}
285284
for (size_t i = 0; i < scores.size(); i++) {
286285
// If we have less than k or a better score, insert it.
@@ -297,22 +296,20 @@ BruteForceIndex<DataType, DistType>::topKQuery(const void *queryBlob, size_t k,
297296
}
298297
assert(curr_id == this->count);
299298

300-
rl.results = array_new_len<VecSimQueryResult>(TopCandidates->size(), TopCandidates->size());
301-
for (int i = (int)TopCandidates->size() - 1; i >= 0; --i) {
302-
VecSimQueryResult_SetId(rl.results[i], TopCandidates->top().second);
303-
VecSimQueryResult_SetScore(rl.results[i], TopCandidates->top().first);
299+
rep->results.resize(TopCandidates->size());
300+
for (auto result = rep->results.rbegin(); result != rep->results.rend(); ++result) {
301+
std::tie(result->score, result->id) = TopCandidates->top();
304302
TopCandidates->pop();
305303
}
306304
delete TopCandidates;
307-
rl.code = VecSim_QueryResult_OK;
308-
return rl;
305+
return rep;
309306
}
310307

311308
template <typename DataType, typename DistType>
312-
VecSimQueryResult_List
309+
VecSimQueryReply *
313310
BruteForceIndex<DataType, DistType>::rangeQuery(const void *queryBlob, double radius,
314311
VecSimQueryParams *queryParams) const {
315-
auto rl = (VecSimQueryResult_List){0};
312+
auto rep = new VecSimQueryReply(this->allocator);
316313
void *timeoutCtx = queryParams ? queryParams->timeoutCtx : nullptr;
317314
this->lastMode = RANGE_QUERY;
318315

@@ -322,10 +319,9 @@ BruteForceIndex<DataType, DistType>::rangeQuery(const void *queryBlob, double ra
322319

323320
DistType radius_ = DistType(radius);
324321
idType curr_id = 0;
325-
rl.code = VecSim_QueryResult_OK;
326322
for (auto &vectorBlock : this->vectorBlocks) {
327-
auto scores = computeBlockScores(vectorBlock, queryBlob, timeoutCtx, &rl.code);
328-
if (VecSim_OK != rl.code) {
323+
auto scores = computeBlockScores(vectorBlock, queryBlob, timeoutCtx, &rep->code);
324+
if (VecSim_OK != rep->code) {
329325
break;
330326
}
331327
for (size_t i = 0; i < scores.size(); i++) {
@@ -335,10 +331,11 @@ BruteForceIndex<DataType, DistType>::rangeQuery(const void *queryBlob, double ra
335331
++curr_id;
336332
}
337333
}
338-
// assert only if the loop finished iterating all the ids (we didn't get rl.code != VecSim_OK).
339-
assert((rl.code != VecSim_OK || curr_id == this->count));
340-
rl.results = res_container->get_results();
341-
return rl;
334+
// assert only if the loop finished iterating all the ids (we didn't get rep->code !=
335+
// VecSim_OK).
336+
assert((rep->code != VecSim_OK || curr_id == this->count));
337+
rep->results = res_container->get_results();
338+
return rep;
342339
}
343340

344341
template <typename DataType, typename DistType>
@@ -365,7 +362,7 @@ VecSimInfoIterator *BruteForceIndex<DataType, DistType>::infoIterator() const {
365362
VecSimIndexInfo info = this->info();
366363
// For readability. Update this number when needed.
367364
size_t numberOfInfoFields = 10;
368-
VecSimInfoIterator *infoIterator = new VecSimInfoIterator(numberOfInfoFields);
365+
VecSimInfoIterator *infoIterator = new VecSimInfoIterator(numberOfInfoFields, this->allocator);
369366

370367
infoIterator->addInfoField(
371368
VecSim_InfoField{.fieldName = VecSimCommonStrings::ALGORITHM_STRING,

0 commit comments

Comments
 (0)