Skip to content

Commit 520531a

Browse files
authored
[0.6] [MOD-7297] Replace Variable Length Array on stack with heap allocation (#762)
* avoid vla * install cmake only if doesnt exist * macos-14 * update array size * bump cpu_features to support macos15 detectiond * revert macos version
1 parent e07f1ae commit 520531a

File tree

12 files changed

+70
-37
lines changed

12 files changed

+70
-37
lines changed

.install/install_cmake.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@ MODE=$1 # whether to install using sudo or not
66

77
if [[ $OS_TYPE = 'Darwin' ]]
88
then
9-
brew install cmake
9+
if ! command -v cmake &> /dev/null; then
10+
brew install cmake
11+
else
12+
echo "cmake already installed"
13+
fi
1014
else
1115
if [[ $processor = 'x86_64' ]]
1216
then

cmake/cpu_features.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@ option(CMAKE_POSITION_INDEPENDENT_CODE "" ON)
44
FetchContent_Declare(
55
cpu_features
66
GIT_REPOSITORY https://github.com/google/cpu_features.git
7-
GIT_TAG v0.9.0
7+
GIT_TAG v0.10.1
88
)
99
FetchContent_MakeAvailable(cpu_features)

src/VecSim/algorithms/brute_force/brute_force.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -243,12 +243,11 @@ BruteForceIndex<DataType, DistType>::topKQuery(const void *queryBlob, size_t k,
243243
return rl;
244244
}
245245

246-
DataType normalized_blob[this->dim]; // This will be use only if metric == VecSimMetric_Cosine.
246+
auto normalized_blob = this->getAllocator()->allocate_unique(this->dim * sizeof(DataType));
247247
if (this->metric == VecSimMetric_Cosine) {
248-
memcpy(normalized_blob, queryBlob, this->dim * sizeof(DataType));
249-
normalizeVector(normalized_blob, this->dim);
250-
251-
queryBlob = normalized_blob;
248+
memcpy(normalized_blob.get(), queryBlob, this->dim * sizeof(DataType));
249+
normalizeVector(static_cast<DataType *>(normalized_blob.get()), this->dim);
250+
queryBlob = normalized_blob.get();
252251
}
253252

254253
DistType upperBound = std::numeric_limits<DistType>::lowest();
@@ -296,11 +295,11 @@ BruteForceIndex<DataType, DistType>::rangeQuery(const void *queryBlob, double ra
296295
void *timeoutCtx = queryParams ? queryParams->timeoutCtx : nullptr;
297296
this->last_mode = RANGE_QUERY;
298297

299-
DataType normalized_blob[this->dim]; // This will be use only if metric == VecSimMetric_Cosine.
298+
auto normalized_blob = this->getAllocator()->allocate_unique(this->dim * sizeof(DataType));
300299
if (this->metric == VecSimMetric_Cosine) {
301-
memcpy(normalized_blob, queryBlob, this->dim * sizeof(DataType));
302-
normalizeVector(normalized_blob, this->dim);
303-
queryBlob = normalized_blob;
300+
memcpy(normalized_blob.get(), queryBlob, this->dim * sizeof(DataType));
301+
normalizeVector(static_cast<DataType *>(normalized_blob.get()), this->dim);
302+
queryBlob = normalized_blob.get();
304303
}
305304

306305
// Compute scores in every block and save results that are within the range.

src/VecSim/algorithms/brute_force/brute_force_multi.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,11 @@ class BruteForceIndex_Multi : public BruteForceIndex<DataType, DistType> {
7373
template <typename DataType, typename DistType>
7474
int BruteForceIndex_Multi<DataType, DistType>::addVector(const void *vector_data, labelType label) {
7575

76-
DataType normalized_blob[this->dim]; // This will be use only if metric == VecSimMetric_Cosine.
76+
auto normalized_blob = this->getAllocator()->allocate_unique(this->dim * sizeof(DataType));
7777
if (this->metric == VecSimMetric_Cosine) {
78-
memcpy(normalized_blob, vector_data, this->dim * sizeof(DataType));
79-
normalizeVector(normalized_blob, this->dim);
80-
vector_data = normalized_blob;
78+
memcpy(normalized_blob.get(), vector_data, this->dim * sizeof(DataType));
79+
normalizeVector(static_cast<DataType *>(normalized_blob.get()), this->dim);
80+
vector_data = normalized_blob.get();
8181
}
8282

8383
return this->appendVector(vector_data, label);

src/VecSim/algorithms/brute_force/brute_force_single.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,11 @@ template <typename DataType, typename DistType>
9494
int BruteForceIndex_Single<DataType, DistType>::addVector(const void *vector_data,
9595
labelType label) {
9696

97-
DataType normalized_blob[this->dim]; // This will be use only if metric == VecSimMetric_Cosine
97+
auto normalized_blob = this->getAllocator()->allocate_unique(this->dim * sizeof(DataType));
9898
if (this->metric == VecSimMetric_Cosine) {
99-
memcpy(normalized_blob, vector_data, this->dim * sizeof(DataType));
100-
normalizeVector(normalized_blob, this->dim);
101-
vector_data = normalized_blob;
99+
memcpy(normalized_blob.get(), vector_data, this->dim * sizeof(DataType));
100+
normalizeVector(static_cast<DataType *>(normalized_blob.get()), this->dim);
101+
vector_data = normalized_blob.get();
102102
}
103103

104104
auto optionalID = this->labelToIdLookup.find(label);

src/VecSim/algorithms/hnsw/hnsw.h

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -680,7 +680,9 @@ idType HNSWIndex<DataType, DistType>::mutuallyConnectNewElement(
680680
neighbors_bitmap[neighbor_neighbors[j]] = true;
681681
}
682682

683-
idType removed_links[sz_link_list_other + 1];
683+
auto removed_links_alloc =
684+
this->getAllocator()->allocate_unique((sz_link_list_other + 1) * sizeof(idType));
685+
auto removed_links = static_cast<idType *>(removed_links_alloc.get());
684686
size_t removed_links_num;
685687
removeExtraLinks(ll_other, candidates, Mcurmax, neighbor_neighbors, neighbors_bitmap,
686688
removed_links, &removed_links_num);
@@ -755,7 +757,9 @@ void HNSWIndex<DataType, DistType>::repairConnectionsForDeletion(
755757

756758
size_t Mcurmax = level ? maxM_ : maxM0_;
757759
size_t removed_links_num;
758-
idType removed_links[neighbour_neighbours_count];
760+
auto removed_links_alloc =
761+
this->getAllocator()->allocate_unique(neighbour_neighbours_count * sizeof(idType));
762+
idType *removed_links = static_cast<idType *>(removed_links_alloc.get());
759763
removeExtraLinks(neighbour_neighbours_list, candidates, Mcurmax, neighbour_neighbours,
760764
neighbour_orig_neighbours_set, removed_links, &removed_links_num);
761765

@@ -1166,11 +1170,11 @@ int HNSWIndex<DataType, DistType>::appendVector(const void *vector_data, const l
11661170

11671171
idType cur_c;
11681172

1169-
DataType normalized_blob[this->dim]; // This will be use only if metric == VecSimMetric_Cosine
1173+
auto normalized_blob = this->getAllocator()->allocate_unique(this->dim * sizeof(DataType));
11701174
if (this->metric == VecSimMetric_Cosine) {
1171-
memcpy(normalized_blob, vector_data, this->dim * sizeof(DataType));
1172-
normalizeVector(normalized_blob, this->dim);
1173-
vector_data = normalized_blob;
1175+
memcpy(normalized_blob.get(), vector_data, this->dim * sizeof(DataType));
1176+
normalizeVector(static_cast<DataType *>(normalized_blob.get()), this->dim);
1177+
vector_data = normalized_blob.get();
11741178
}
11751179

11761180
{
@@ -1392,11 +1396,11 @@ VecSimQueryResult_List HNSWIndex<DataType, DistType>::topKQuery(const void *quer
13921396

13931397
void *timeoutCtx = nullptr;
13941398

1395-
DataType normalized_blob[this->dim]; // This will be use only if metric == VecSimMetric_Cosine.
1399+
auto normalized_blob = this->getAllocator()->allocate_unique(this->dim * sizeof(DataType));
13961400
if (this->metric == VecSimMetric_Cosine) {
1397-
memcpy(normalized_blob, query_data, this->dim * sizeof(DataType));
1398-
normalizeVector(normalized_blob, this->dim);
1399-
query_data = normalized_blob;
1401+
memcpy(normalized_blob.get(), query_data, this->dim * sizeof(DataType));
1402+
normalizeVector(static_cast<DataType *>(normalized_blob.get()), this->dim);
1403+
query_data = normalized_blob.get();
14001404
}
14011405
// Get original efRuntime and store it.
14021406
size_t ef = ef_;
@@ -1509,11 +1513,11 @@ VecSimQueryResult_List HNSWIndex<DataType, DistType>::rangeQuery(const void *que
15091513
}
15101514
void *timeoutCtx = nullptr;
15111515

1512-
DataType normalized_blob[this->dim]; // This will be use only if metric == VecSimMetric_Cosine
1516+
auto normalized_blob = this->getAllocator()->allocate_unique(this->dim * sizeof(DataType));
15131517
if (this->metric == VecSimMetric_Cosine) {
1514-
memcpy(normalized_blob, query_data, this->dim * sizeof(DataType));
1515-
normalizeVector(normalized_blob, this->dim);
1516-
query_data = normalized_blob;
1518+
memcpy(normalized_blob.get(), query_data, this->dim * sizeof(DataType));
1519+
normalizeVector(static_cast<DataType *>(normalized_blob.get()), this->dim);
1520+
query_data = normalized_blob.get();
15171521
}
15181522

15191523
double epsilon = epsilon_;

src/VecSim/memory/vecsim_malloc.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,11 @@ void *VecSimAllocator::callocate(size_t size) {
7171
return NULL;
7272
}
7373

74+
std::unique_ptr<void, VecSimAllocator::Deleter> VecSimAllocator::allocate_unique(size_t size) {
75+
void *ptr = this->allocate(size);
76+
return {ptr, Deleter(*this)};
77+
}
78+
7479
void *VecSimAllocator::operator new(size_t size) { return vecsim_malloc(size); }
7580

7681
void *VecSimAllocator::operator new[](size_t size) { return vecsim_malloc(size); }

src/VecSim/memory/vecsim_malloc.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ struct VecSimAllocator {
2424
static size_t allocation_header_size;
2525
static VecSimMemoryFunctions memFunctions;
2626

27+
// Forward declaration of the deleter for the unique_ptr.
28+
struct Deleter;
2729
VecSimAllocator() : allocated(std::make_shared<uint64_t>(sizeof(VecSimAllocator))) {}
2830

2931
public:
@@ -34,6 +36,9 @@ struct VecSimAllocator {
3436
void *reallocate(void *p, size_t size);
3537
void free_allocation(void *p);
3638

39+
// Allocations for scope-life-time memory.
40+
std::unique_ptr<void, Deleter> allocate_unique(size_t size);
41+
3742
void *operator new(size_t size);
3843
void *operator new[](size_t size);
3944
void operator delete(void *p, size_t size);
@@ -51,8 +56,14 @@ struct VecSimAllocator {
5156
static void setMemoryFunctions(VecSimMemoryFunctions memFunctions);
5257

5358
private:
54-
// Retrive the original requested allocation size. Required for remalloc.
59+
// Retrieve the original requested allocation size. Required for remalloc.
5560
inline size_t getPointerAllocationSize(void *p) { return *(((size_t *)p) - 1); }
61+
62+
struct Deleter {
63+
VecSimAllocator &allocator;
64+
explicit constexpr Deleter(VecSimAllocator &allocator) : allocator(allocator) {}
65+
void operator()(void *ptr) const { allocator.free_allocation(ptr); }
66+
};
5667
};
5768

5869
/**

src/VecSim/spaces/IP_space.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#include "VecSim/spaces/functions/NEON.h"
1414
#include "VecSim/spaces/functions/SVE.h"
1515
#include "VecSim/spaces/functions/SVE2.h"
16-
16+
#include <cassert>
1717
namespace spaces {
1818
dist_func_t<float> IP_FP32_GetDistFunc(size_t dim, const Arch_Optimization arch_opt) {
1919

@@ -78,6 +78,8 @@ dist_func_t<float> IP_FP32_GetDistFunc(size_t dim, const Arch_Optimization arch_
7878
ret_dist_func = Choose_FP32_IP_implementation_NEON(dim);
7979
break;
8080
#endif
81+
case ARCH_OPT_SIZE:
82+
assert(false && "ARCH_OPT_SIZE is not a valid optimization type");
8183
#endif // CPU_FEATURES_ARCH_X86_64
8284
case ARCH_OPT_NONE:
8385
break;
@@ -161,6 +163,8 @@ dist_func_t<double> IP_FP64_GetDistFunc(size_t dim, const Arch_Optimization arch
161163
ret_dist_func = Choose_FP64_IP_implementation_NEON(dim);
162164
break;
163165
#endif
166+
case ARCH_OPT_SIZE:
167+
assert(false && "ARCH_OPT_SIZE is not a valid optimization type");
164168
#endif // CPU_FEATURES_ARCH_AARCH64
165169
case ARCH_OPT_NONE:
166170
break;

src/VecSim/spaces/L2_space.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "VecSim/spaces/functions/NEON.h"
1414
#include "VecSim/spaces/functions/SVE.h"
1515
#include "VecSim/spaces/functions/SVE2.h"
16+
#include <cassert>
1617

1718
namespace spaces {
1819

@@ -81,6 +82,8 @@ dist_func_t<float> L2_FP32_GetDistFunc(size_t dim, const Arch_Optimization arch_
8182
ret_dist_func = Choose_FP32_L2_implementation_NEON(dim);
8283
break;
8384
#endif
85+
case ARCH_OPT_SIZE:
86+
assert(false && "ARCH_OPT_SIZE is not a valid optimization type");
8487
#endif
8588
case ARCH_OPT_NONE:
8689
break;
@@ -162,6 +165,8 @@ dist_func_t<double> L2_FP64_GetDistFunc(size_t dim, const Arch_Optimization arch
162165
ret_dist_func = Choose_FP64_L2_implementation_NEON(dim);
163166
break;
164167
#endif
168+
case ARCH_OPT_SIZE:
169+
assert(false && "ARCH_OPT_SIZE is not a valid optimization type");
165170
#endif // __aarch64__
166171
case ARCH_OPT_NONE:
167172
break;

0 commit comments

Comments
 (0)