@@ -91,6 +91,20 @@ struct LevelData {
9191
9292 LevelData (std::shared_ptr<VecSimAllocator> allocator)
9393 : incomingEdges(new (allocator) vecsim_stl::vector<idType>(allocator)), numLinks(0 ) {}
94+
95+ // Sets the outgoing links of the current element.
96+ // Assumes that the object has the capacity to hold all the links.
97+ inline void setLinks (vecsim_stl::vector<idType> &links) {
98+ numLinks = links.size ();
99+ memcpy (this ->links , links.data (), numLinks * sizeof (idType));
100+ }
101+ template <typename DistType>
102+ inline void setLinks (candidatesList<DistType> &links) {
103+ numLinks = 0 ;
104+ for (auto &link : links) {
105+ this ->links [numLinks++] = link.second ;
106+ }
107+ }
94108};
95109
96110struct ElementGraphData {
@@ -171,9 +185,6 @@ class HNSWIndex : public VecSimIndexAbstract<DistType>,
171185 HNSWIndex () = delete ; // default constructor is disabled.
172186 HNSWIndex (const HNSWIndex &) = delete ; // default (shallow) copy constructor is disabled.
173187 inline size_t getRandomLevel (double reverse_size);
174- inline void removeExtraLinks (candidatesList<DistType> candidates, size_t Mcurmax,
175- LevelData &node_level, const vecsim_stl::vector<bool > &bitmap,
176- idType *removed_links, size_t *removed_links_num);
177188 template <bool has_marked_deleted, typename Identifier> // Either idType or labelType
178189 inline void
179190 processCandidate (idType curNodeId, const void *data_point, size_t layer, size_t ef,
@@ -532,32 +543,6 @@ void HNSWIndex<DataType, DistType>::unlockNodeLinks(idType node_id) const {
532543/* *
533544 * helper functions
534545 */
535- template <typename DataType, typename DistType>
536- void HNSWIndex<DataType, DistType>::removeExtraLinks(
537- candidatesList<DistType> candidates, size_t Mcurmax, LevelData &node_level,
538- const vecsim_stl::vector<bool > &neighbors_bitmap, idType *removed_links,
539- size_t *removed_links_num) {
540-
541- // candidates will store the newly selected neighbours (for the relevant node).
542- vecsim_stl::vector<idType> not_selected_candidates (this ->allocator );
543- getNeighborsByHeuristic2 (candidates, Mcurmax, not_selected_candidates);
544-
545- // update the node level data.
546- node_level.numLinks = 0 ;
547- for (auto &selected_neighbor : candidates) {
548- node_level.links [node_level.numLinks ++] = selected_neighbor.second ;
549- }
550-
551- // save the neighbours that were chosen to be removed.
552- size_t removed_idx = 0 ;
553- for (auto ¬_selected_neighbor : not_selected_candidates) {
554- if (neighbors_bitmap[not_selected_neighbor]) {
555- removed_links[removed_idx++] = not_selected_neighbor;
556- }
557- }
558- *removed_links_num = removed_idx;
559- }
560-
561546template <typename DataType, typename DistType>
562547void HNSWIndex<DataType, DistType>::emplaceToHeap(
563548 vecsim_stl::abstract_priority_queue<DistType, idType> &heap, DistType dist, idType id) const {
@@ -1024,56 +1009,61 @@ void HNSWIndex<DataType, DistType>::repairConnectionsForDeletion(
10241009 LevelData &neighbor_level, size_t level, vecsim_stl::vector<bool > &neighbours_bitmap) {
10251010
10261011 // put the deleted element's neighbours in the candidates.
1027- candidatesList<DistType> candidates (this ->allocator );
1028- candidates.reserve (node_level.numLinks + neighbor_level.numLinks );
1029- auto neighbours_data = getDataByInternalId (neighbour_id);
1012+ vecsim_stl::vector<idType> candidate_ids (this ->allocator );
1013+ candidate_ids.reserve (node_level.numLinks + neighbor_level.numLinks );
10301014 for (size_t j = 0 ; j < node_level.numLinks ; j++) {
10311015 // Don't put the neighbor itself in his own candidates
1032- if (node_level.links [j] = = neighbour_id) {
1033- continue ;
1016+ if (node_level.links [j] ! = neighbour_id) {
1017+ candidate_ids. push_back (node_level. links [j]) ;
10341018 }
1035- candidates.emplace_back (
1036- this ->distFunc (getDataByInternalId (node_level.links [j]), neighbours_data, this ->dim ),
1037- node_level.links [j]);
10381019 }
1039-
10401020 // add the deleted element's neighbour's original neighbors in the candidates.
10411021 vecsim_stl::vector<bool > neighbour_orig_neighbours_set (curElementCount, false , this ->allocator );
1042-
10431022 for (size_t j = 0 ; j < neighbor_level.numLinks ; j++) {
10441023 neighbour_orig_neighbours_set[neighbor_level.links [j]] = true ;
10451024 // Don't add the removed element to the candidates, nor nodes that are already in the
10461025 // candidates set.
1047- if (neighbours_bitmap[ neighbor_level.links [j]] ||
1048- neighbor_level.links [j] == element_internal_id ) {
1049- continue ;
1026+ if (neighbor_level.links [j] != element_internal_id &&
1027+ !neighbours_bitmap[ neighbor_level.links [j]] ) {
1028+ candidate_ids. push_back (neighbor_level. links [j]) ;
10501029 }
1051- candidates.emplace_back (this ->distFunc (neighbours_data,
1052- getDataByInternalId (neighbor_level.links [j]),
1053- this ->dim ),
1054- neighbor_level.links [j]);
10551030 }
10561031
10571032 size_t Mcurmax = level ? M : M0;
1058- size_t removed_links_num;
1059- idType removed_links[neighbor_level. numLinks ];
1060- removeExtraLinks (candidates, Mcurmax, neighbor_level, neighbour_orig_neighbours_set,
1061- removed_links, &removed_links_num );
1062-
1063- // remove neighbour id from the incoming list of nodes for his
1064- // neighbours that were chosen to remove
1065- for ( size_t i = 0 ; i < removed_links_num; i++) {
1066- idType node_id = removed_links[i] ;
1067- LevelData &node_level = getLevelData (node_id, level);
1033+ if (candidate_ids. size () > Mcurmax) {
1034+ // We need to filter the candidates by the heuristic.
1035+ candidatesList<DistType> candidates ( this -> allocator );
1036+ candidates. reserve (candidate_ids. size () );
1037+ auto neighbours_data = getDataByInternalId (neighbour_id);
1038+ for ( auto candidate_id : candidate_ids) {
1039+ candidates. emplace_back (
1040+ this -> distFunc ( getDataByInternalId (candidate_id), neighbours_data, this -> dim ),
1041+ candidate_id) ;
1042+ }
10681043
1069- // if the node id (the neighbour's neighbour to be removed)
1070- // wasn't pointing to the neighbour (edge was one directional),
1071- // we should remove it from the node's incoming edges.
1072- // otherwise, edge turned from bidirectional to one directional,
1073- // and it should be saved in the neighbor's incoming edges.
1074- if (!removeIdFromList (*node_level.incomingEdges , neighbour_id)) {
1075- neighbor_level.incomingEdges ->push_back (node_id);
1044+ candidate_ids.clear ();
1045+ auto ¬_chosen_candidates = candidate_ids; // rename and reuse the vector
1046+ getNeighborsByHeuristic2 (candidates, Mcurmax, not_chosen_candidates);
1047+
1048+ neighbor_level.setLinks (candidates);
1049+
1050+ // remove neighbour id from the incoming list of nodes for his
1051+ // neighbours that were chosen to remove
1052+ for (auto node_id : not_chosen_candidates) {
1053+ if (neighbour_orig_neighbours_set[node_id]) {
1054+ // if the node id (the neighbour's neighbour to be removed)
1055+ // wasn't pointing to the neighbour (edge was one directional),
1056+ // we should remove it from the node's incoming edges.
1057+ // otherwise, edge turned from bidirectional to one directional,
1058+ // and it should be saved in the neighbor's incoming edges.
1059+ if (!removeIdFromList (*getLevelData (node_id, level).incomingEdges , neighbour_id)) {
1060+ neighbor_level.incomingEdges ->push_back (node_id);
1061+ }
1062+ }
10761063 }
1064+ } else {
1065+ // We don't need to filter the candidates - just update the edges.
1066+ neighbor_level.setLinks (candidate_ids);
10771067 }
10781068
10791069 // updates for the new edges created
@@ -1499,7 +1489,7 @@ void HNSWIndex<DataType, DistType>::mutuallyUpdateForRepairedNode(
14991489template <typename DataType, typename DistType>
15001490void HNSWIndex<DataType, DistType>::repairNodeConnections(idType node_id, size_t level) {
15011491
1502- candidatesList<DistType> neighbors_candidates (this ->allocator );
1492+ vecsim_stl::vector<idType> neighbors_candidate_ids (this ->allocator );
15031493 // Use bitmaps for fast accesses:
15041494 // node_orig_neighbours_set is used to differentiate between the neighbors that will *not* be
15051495 // selected by the heuristics - only the ones that were originally neighbors should be removed.
@@ -1512,7 +1502,6 @@ void HNSWIndex<DataType, DistType>::repairNodeConnections(idType node_id, size_t
15121502
15131503 // Go over the repaired node neighbors, collect the non-deleted ones to be neighbors candidates
15141504 // after the repair as well.
1515- const void *node_data = getDataByInternalId (node_id);
15161505 auto *element = getGraphDataByInternalId (node_id);
15171506 lockNodeLinks (element);
15181507 LevelData &node_level_data = getLevelData (element, level);
@@ -1524,9 +1513,7 @@ void HNSWIndex<DataType, DistType>::repairNodeConnections(idType node_id, size_t
15241513 continue ;
15251514 }
15261515 neighbors_candidates_set[node_level_data.links [j]] = true ;
1527- neighbors_candidates.emplace_back (
1528- this ->distFunc (node_data, getDataByInternalId (node_level_data.links [j]), this ->dim ),
1529- node_level_data.links [j]);
1516+ neighbors_candidate_ids.push_back (node_level_data.links [j]);
15301517 }
15311518 unlockNodeLinks (element);
15321519
@@ -1561,28 +1548,43 @@ void HNSWIndex<DataType, DistType>::repairNodeConnections(idType node_id, size_t
15611548 continue ;
15621549 }
15631550 neighbors_candidates_set[neighbor_level_data.links [j]] = true ;
1564- neighbors_candidates.emplace_back (
1565- this ->distFunc (node_data, getDataByInternalId (neighbor_level_data.links [j]),
1566- this ->dim ),
1567- neighbor_level_data.links [j]);
1551+ neighbors_candidate_ids.push_back (neighbor_level_data.links [j]);
15681552 }
15691553 unlockNodeLinks (neighbor);
15701554 }
15711555
15721556 size_t max_M_cur = level ? M : M0;
1573- vecsim_stl::vector<idType> not_chosen_neighbors (this ->allocator );
1574- getNeighborsByHeuristic2 (neighbors_candidates, max_M_cur, not_chosen_neighbors);
1557+ if (neighbors_candidate_ids.size () > max_M_cur) {
1558+ // We have more candidates than the maximum number of neighbors, so we need to select which
1559+ // ones to keep. We use the heuristic to select the neighbors, and then remove the ones that
1560+ // were not originally neighbors.
1561+ candidatesList<DistType> neighbors_candidates (this ->allocator );
1562+ neighbors_candidates.reserve (neighbors_candidate_ids.size ());
1563+ const void *node_data = getDataByInternalId (node_id);
1564+ for (idType candidate : neighbors_candidate_ids) {
1565+ neighbors_candidates.emplace_back (
1566+ this ->distFunc (getDataByInternalId (candidate), node_data, this ->dim ), candidate);
1567+ }
1568+ vecsim_stl::vector<idType> not_chosen_neighbors (this ->allocator );
1569+ getNeighborsByHeuristic2 (neighbors_candidates, max_M_cur, not_chosen_neighbors);
15751570
1576- for (idType not_chosen_neighbor : not_chosen_neighbors) {
1577- if (node_orig_neighbours_set[not_chosen_neighbor]) {
1578- neighbors_to_remove.push_back (not_chosen_neighbor);
1579- nodes_to_update.push_back (not_chosen_neighbor);
1571+ for (idType not_chosen_neighbor : not_chosen_neighbors) {
1572+ if (node_orig_neighbours_set[not_chosen_neighbor]) {
1573+ neighbors_to_remove.push_back (not_chosen_neighbor);
1574+ nodes_to_update.push_back (not_chosen_neighbor);
1575+ }
15801576 }
1581- }
15821577
1583- for (auto &neighbor : neighbors_candidates) {
1584- chosen_neighbors.push_back (neighbor.second );
1585- nodes_to_update.push_back (neighbor.second );
1578+ for (auto &neighbor : neighbors_candidates) {
1579+ chosen_neighbors.push_back (neighbor.second );
1580+ nodes_to_update.push_back (neighbor.second );
1581+ }
1582+ } else {
1583+ // We have less candidates than the maximum number of neighbors, so we choose them all, and
1584+ // extend the nodes to update with them.
1585+ chosen_neighbors.swap (neighbors_candidate_ids);
1586+ nodes_to_update.insert (nodes_to_update.end (), chosen_neighbors.begin (),
1587+ chosen_neighbors.end ());
15861588 }
15871589
15881590 // Perform the actual updates for the node and the impacted neighbors while holding the nodes'
0 commit comments