Skip to content

Commit a51585e

Browse files
lerman25rfsaliev
andauthored
[8.2] [MOD-10236] Add serialization to SVS index (#782)
[MOD-10236] Add serialization to SVS index (#716) * generalize * remove serializer.cpp from cmake * prepare merge with rafik commit * [SVS] Implement Save/Load + test * seperate hnsw_serializer to h and cpp * remove get version impl * save impl * add load * change camelcase * for mat * generalzie saveIndexFields * format * compare metadata on load * Add checkIntegrity with error * checkIntegrity * remove duplicate verification in compare meta data * format * svs serializetion version testing * Revert "svs serializetion version testing" This reverts commit 9ed7730. * common serializer test * remove changes_num from metadata * Add location c'tor * Add location ctor and to test * Remove outdated comment from serializer header * Enhance documentation for loadIndex function in SVSIndex * Add comments * format + remove test * enable tests * serializer test * format * reset SVS to master * add logging to test_svs * format * remove duplicate NewIndexImpl * expose loadIndex in VecSimIndex, add BUILD_TEST gurad * remove string ctor from SVSIndex * format * fix BUILD_TEST in svs_factory * document loadIndex * move loadIndex to serializer * remove excess declarations * remove extra ; * compatable -> compatible * remove redundant params from test * remove comments from threadpool_handle * remove error context comments * add checkIntegrity * update checkIntegrity and format * move loadIndex to SVSSerializer * update bindings * format * add test * add single * adjust labels * Refactor save_load test to simplify vector generation logic * add HAVE_SVS guard * Add missing include for <sstream> in svs_serializer.h * free faulty index * Free index * Improve error message for index loading failure in NewIndex function * format --------- Co-authored-by: Rafik Saliev <rafik.f.saliev@intel.com>
1 parent 2ee1e15 commit a51585e

24 files changed

+1216
-378
lines changed

src/VecSim/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ if (TARGET svs::svs)
5252
endif()
5353

5454
if(VECSIM_BUILD_TESTS)
55-
add_library(VectorSimilaritySerializer utils/serializer.cpp)
55+
add_library(VectorSimilaritySerializer
56+
algorithms/hnsw/hnsw_serializer.cpp
57+
algorithms/svs/svs_serializer.cpp
58+
)
5659
target_link_libraries(VectorSimilarity VectorSimilaritySerializer)
5760
endif()

src/VecSim/algorithms/hnsw/hnsw.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#ifdef BUILD_TESTS
2626
#include "hnsw_serialization_utils.h"
2727
#include "VecSim/utils/serializer.h"
28+
#include "hnsw_serializer.h"
2829
#endif
2930

3031
#include <deque>
@@ -85,7 +86,7 @@ class HNSWIndex : public VecSimIndexAbstract<DataType, DistType>,
8586
public VecSimIndexTombstone
8687
#ifdef BUILD_TESTS
8788
,
88-
public Serializer
89+
public HNSWSerializer
8990
#endif
9091
{
9192
protected:
@@ -2355,5 +2356,5 @@ HNSWIndex<DataType, DistType>::getHNSWElementNeighbors(size_t label, int ***neig
23552356
}
23562357

23572358
#ifdef BUILD_TESTS
2358-
#include "hnsw_serializer.h"
2359+
#include "hnsw_serializer_impl.h"
23592360
#endif

src/VecSim/algorithms/hnsw/hnsw_multi.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class HNSWIndex_Multi : public HNSWIndex<DataType, DistType> {
6464
HNSWIndex_Multi(std::ifstream &input, const HNSWParams *params,
6565
const AbstractIndexInitParams &abstractInitParams,
6666
const IndexComponents<DataType, DistType> &components,
67-
Serializer::EncodingVersion version)
67+
HNSWSerializer::EncodingVersion version)
6868
: HNSWIndex<DataType, DistType>(input, params, abstractInitParams, components, version),
6969
labelLookup(this->maxElements, this->allocator) {}
7070

src/VecSim/utils/serializer.cpp renamed to src/VecSim/algorithms/hnsw/hnsw_serializer.cpp

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,38 +7,34 @@
77
* GNU Affero General Public License v3 (AGPLv3).
88
*/
99

10-
#include <fstream>
11-
#include <string>
10+
#include "hnsw_serializer.h"
1211

13-
#include "VecSim/utils/serializer.h"
14-
15-
// Persist index into a file in the specified location.
16-
void Serializer::saveIndex(const std::string &location) {
17-
18-
// Serializing with the latest version.
19-
EncodingVersion version = EncodingVersion_V4;
20-
21-
std::ofstream output(location, std::ios::binary);
22-
writeBinaryPOD(output, version);
23-
saveIndexIMP(output);
24-
output.close();
25-
}
26-
27-
Serializer::EncodingVersion Serializer::ReadVersion(std::ifstream &input) {
12+
HNSWSerializer::HNSWSerializer(EncodingVersion version) : m_version(version) {}
2813

14+
HNSWSerializer::EncodingVersion HNSWSerializer::ReadVersion(std::ifstream &input) {
2915
input.seekg(0, std::ifstream::beg);
3016

31-
// The version number is the first field that is serialized.
32-
EncodingVersion version = EncodingVersion_INVALID;
17+
EncodingVersion version = EncodingVersion::INVALID;
3318
readBinaryPOD(input, version);
34-
if (version <= EncodingVersion_DEPRECATED) {
19+
20+
if (version <= EncodingVersion::DEPRECATED) {
3521
input.close();
3622
throw std::runtime_error("Cannot load index: deprecated encoding version: " +
37-
std::to_string(version));
38-
} else if (version >= EncodingVersion_INVALID) {
23+
std::to_string(static_cast<int>(version)));
24+
} else if (version >= EncodingVersion::INVALID) {
3925
input.close();
4026
throw std::runtime_error("Cannot load index: bad encoding version: " +
41-
std::to_string(version));
27+
std::to_string(static_cast<int>(version)));
4228
}
4329
return version;
4430
}
31+
32+
void HNSWSerializer::saveIndex(const std::string &location) {
33+
EncodingVersion version = EncodingVersion::V4;
34+
std::ofstream output(location, std::ios::binary);
35+
writeBinaryPOD(output, version);
36+
saveIndexIMP(output);
37+
output.close();
38+
}
39+
40+
HNSWSerializer::EncodingVersion HNSWSerializer::getVersion() const { return m_version; }

0 commit comments

Comments
 (0)