Skip to content

Commit 921e66c

Browse files
authored
[MOD-7643] Refactor graph data + handle in-place deletion tiered [0.8] (#537)
* CP refactor HNSW graph data + indegree support * [MOD-7643] Handle pending jobs after delete in-place in tiered index [Bug] (#534) * wip * update repair and swap job in tiered index after removing vector inplace * update ready swap jobs properly + test * touch ups in test * fix for inplace override * fix deadlock inplace override * Fix delete inplace for multi + test, remove always assume that hnsw index may contain marked deleted elements, cleanups * cleanups + bring back assert * Addressing Guy's CR * Revert in-degree HNSW (#536) * revert in-degree in HNSW * remove empty data dir * revert clang * fix unit test flow for OSs
1 parent 798dd30 commit 921e66c

27 files changed

+787
-590
lines changed

.github/workflows/task-unit-test.yml

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,53 @@ jobs:
2929
steps:
3030
- name: pre-checkout script
3131
if: ${{ inputs.pre-checkout-script }}
32+
shell: sh -l -eo pipefail {0}
3233
run: ${{ inputs.pre-checkout-script }}
34+
- name: Check for node20 support
35+
id: node20 # TODO: Remove this when node20 is supported on all platforms, or when we drop support for theses platforms
36+
run: |
37+
for os in amazonlinux:2 ubuntu:bionic; do
38+
if [ "${{ inputs.container }}" = "$os" ]; then
39+
# https://github.com/actions/checkout/issues/1809
40+
# https://github.blog/changelog/2024-03-07-github-actions-all-actions-will-run-on-node20-instead-of-node16-by-default/
41+
echo "ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION=true" >> $GITHUB_ENV
42+
echo "supported=false" >> $GITHUB_OUTPUT
43+
exit 0
44+
fi
45+
done
46+
echo "supported=true" >> $GITHUB_OUTPUT
3347
- name: checkout
48+
if: steps.node20.outputs.supported == 'true'
49+
uses: actions/checkout@v4
50+
with:
51+
ref: ${{ github.event.number && format('refs/pull/{0}/merge', github.event.number) || github.head_ref }}
52+
- name: checkout (fallback)
53+
if: steps.node20.outputs.supported == 'false'
3454
uses: actions/checkout@v3
55+
with:
56+
ref: ${{ github.event.number && format('refs/pull/{0}/merge', github.event.number) || github.head_ref }}
3557
- name: install dependencies
3658
run: .install/install_script.sh ${{ !inputs.container && 'sudo' || '' }}
3759
- name: unit tests
3860
run: make unit_test
3961
- name: valgrind
4062
if: ${{ inputs.run-valgrind }}
4163
run: make valgrind
64+
- name: Set Artifact Names
65+
# Artifact names have to be unique, so we base them on the environment.
66+
# We also remove invalid characters from the name.
67+
id: artifact-names
68+
run: | # Invalid characters include: Double quote ", Colon :, Less than <, Greater than >, Vertical bar |, Asterisk *, Question mark ?
69+
echo "name=$(echo "${{ inputs.container || inputs.env }} ${{ runner.arch }}" | sed -e 's/[":\/\\<>\|*?]/_/g')" >> $GITHUB_OUTPUT
4270
- name: Archive valgrind tests reports
43-
if: ${{ inputs.run-valgrind }} && failure()
71+
if: ${{ inputs.run-valgrind && failure() && steps.node20.outputs.supported == 'true' }}
72+
uses: actions/upload-artifact@v4
73+
with:
74+
name: valgrind tests reports ${{ steps.artifact-names.outputs.name }}
75+
path: bin/Linux-x86_64-debug/unit_tests/Testing/Temporary/
76+
- name: Archive valgrind tests reports (fallback)
77+
if: ${{ inputs.run-valgrind && failure() && steps.node20.outputs.supported == 'false' }}
4478
uses: actions/upload-artifact@v3
4579
with:
46-
name: valgrind tests reports
80+
name: valgrind tests reports ${{ steps.artifact-names.outputs.name }}
4781
path: bin/Linux-x86_64-debug/unit_tests/Testing/Temporary/
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
2+
#pragma once
3+
4+
#include <cassert>
5+
#include <algorithm>
6+
#include <mutex>
7+
#include "VecSim/utils/vec_utils.h"
8+
9+
template <typename DistType>
10+
using candidatesList = vecsim_stl::vector<std::pair<DistType, idType>>;
11+
12+
typedef uint16_t linkListSize;
13+
14+
struct ElementLevelData {
15+
// A list of ids that are pointing to the node where each edge is *unidirectional*
16+
vecsim_stl::vector<idType> *incomingUnidirectionalEdges;
17+
linkListSize numLinks;
18+
// Flexible array member - https://en.wikipedia.org/wiki/Flexible_array_member
19+
// Using this trick, we can have the links list as part of the ElementLevelData struct, and
20+
// avoid the need to dereference a pointer to get to the links list. We have to calculate the
21+
// size of the struct manually, as `sizeof(ElementLevelData)` will not include this member. We
22+
// do so in the constructor of the index, under the name `levelDataSize` (and
23+
// `elementGraphDataSize`). Notice that this member must be the last member of the struct and
24+
// all nesting structs.
25+
idType links[];
26+
27+
explicit ElementLevelData(std::shared_ptr<VecSimAllocator> allocator)
28+
: incomingUnidirectionalEdges(new (allocator) vecsim_stl::vector<idType>(allocator)),
29+
numLinks(0) {}
30+
31+
linkListSize getNumLinks() const { return this->numLinks; }
32+
idType getLinkAtPos(size_t pos) const {
33+
assert(pos < numLinks);
34+
return this->links[pos];
35+
}
36+
const vecsim_stl::vector<idType> &getIncomingEdges() const {
37+
return *incomingUnidirectionalEdges;
38+
}
39+
std::vector<idType> copyLinks() {
40+
std::vector<idType> links_copy;
41+
links_copy.assign(links, links + numLinks);
42+
return links_copy;
43+
}
44+
// Sets the outgoing links of the current element.
45+
// Assumes that the object has the capacity to hold all the links.
46+
void setLinks(vecsim_stl::vector<idType> &links) {
47+
numLinks = links.size();
48+
memcpy(this->links, links.data(), numLinks * sizeof(idType));
49+
}
50+
template <typename DistType>
51+
void setLinks(candidatesList<DistType> &links) {
52+
numLinks = 0;
53+
for (auto &link : links) {
54+
this->links[numLinks++] = link.second;
55+
}
56+
}
57+
void popLink() { this->numLinks--; }
58+
void setNumLinks(linkListSize num) { this->numLinks = num; }
59+
void setLinkAtPos(size_t pos, idType node_id) { this->links[pos] = node_id; }
60+
void appendLink(idType node_id) { this->links[this->numLinks++] = node_id; }
61+
void newIncomingUnidirectionalEdge(idType node_id) {
62+
this->incomingUnidirectionalEdges->push_back(node_id);
63+
}
64+
bool removeIncomingUnidirectionalEdgeIfExists(idType node_id) {
65+
return this->incomingUnidirectionalEdges->remove(node_id);
66+
}
67+
void swapNodeIdInIncomingEdges(idType id_before, idType id_after) {
68+
auto it = std::find(this->incomingUnidirectionalEdges->begin(),
69+
this->incomingUnidirectionalEdges->end(), id_before);
70+
// This should always succeed
71+
assert(it != this->incomingUnidirectionalEdges->end());
72+
*it = id_after;
73+
}
74+
};
75+
76+
struct ElementGraphData {
77+
size_t toplevel;
78+
std::mutex neighborsGuard;
79+
ElementLevelData *others;
80+
ElementLevelData level0;
81+
82+
ElementGraphData(size_t maxLevel, size_t high_level_size,
83+
std::shared_ptr<VecSimAllocator> allocator)
84+
: toplevel(maxLevel), others(nullptr), level0(allocator) {
85+
if (toplevel > 0) {
86+
others = (ElementLevelData *)allocator->callocate(high_level_size * toplevel);
87+
if (others == nullptr) {
88+
throw std::runtime_error("VecSim index low memory error");
89+
}
90+
for (size_t i = 0; i < maxLevel; i++) {
91+
new ((char *)others + i * high_level_size) ElementLevelData(allocator);
92+
}
93+
}
94+
}
95+
~ElementGraphData() = delete; // should be destroyed using `destroy'
96+
97+
void destroy(size_t levelDataSize, std::shared_ptr<VecSimAllocator> allocator) {
98+
delete this->level0.incomingUnidirectionalEdges;
99+
ElementLevelData *cur_ld = this->others;
100+
for (size_t i = 0; i < this->toplevel; i++) {
101+
delete cur_ld->incomingUnidirectionalEdges;
102+
cur_ld = reinterpret_cast<ElementLevelData *>(reinterpret_cast<char *>(cur_ld) +
103+
levelDataSize);
104+
}
105+
allocator->free_allocation(this->others);
106+
}
107+
ElementLevelData &getElementLevelData(size_t level, size_t levelDataSize) {
108+
assert(level <= this->toplevel);
109+
if (level == 0) {
110+
return this->level0;
111+
}
112+
return *reinterpret_cast<ElementLevelData *>(reinterpret_cast<char *>(this->others) +
113+
(level - 1) * levelDataSize);
114+
}
115+
};

0 commit comments

Comments
 (0)