@@ -45,6 +45,8 @@ typedef uint8_t elementFlags;
4545template <typename DistType>
4646using candidatesMaxHeap = vecsim_stl::max_priority_queue<DistType, idType>;
4747template <typename DistType>
48+ using candidatesList = vecsim_stl::vector<pair<DistType, idType>>;
49+ template <typename DistType>
4850using candidatesLabelsMaxHeap = vecsim_stl::abstract_priority_queue<DistType, labelType>;
4951using graphNodeType = pair<idType, ushort>; // represented as: (element_id, level)
5052
@@ -170,7 +172,7 @@ class HNSWIndex : public VecSimIndexAbstract<DistType>,
170172 HNSWIndex () = delete ; // default constructor is disabled.
171173 HNSWIndex (const HNSWIndex &) = delete ; // default (shallow) copy constructor is disabled.
172174 inline size_t getRandomLevel (double reverse_size);
173- inline void removeExtraLinks (candidatesMaxHeap <DistType> candidates, size_t Mcurmax,
175+ inline void removeExtraLinks (candidatesList <DistType> candidates, size_t Mcurmax,
174176 LevelData &node_level, const vecsim_stl::vector<bool > &bitmap,
175177 idType *removed_links, size_t *removed_links_num);
176178 template <bool has_marked_deleted, typename Identifier> // Either idType or labelType
@@ -197,7 +199,13 @@ class HNSWIndex : public VecSimIndexAbstract<DistType>,
197199 double epsilon, DistType radius,
198200 void *timeoutCtx,
199201 VecSimQueryResult_Code *rc) const ;
200- void getNeighborsByHeuristic2 (candidatesMaxHeap<DistType> &top_candidates, size_t M);
202+ idType getNeighborsByHeuristic2 (candidatesList<DistType> &top_candidates, size_t M) const ;
203+ void getNeighborsByHeuristic2 (candidatesList<DistType> &top_candidates, size_t M,
204+ vecsim_stl::vector<idType> ¬_chosen_candidates) const ;
205+ template <bool record_removed>
206+ void getNeighborsByHeuristic2_internal (
207+ candidatesList<DistType> &top_candidates, size_t M,
208+ vecsim_stl::vector<idType> *removed_candidates = nullptr ) const ;
201209 // Helper function for re-selecting node's neighbors which was selected as a neighbor for
202210 // a newly inserted node. Also, responsible for mutually connect the new node and the neighbor
203211 // (unidirectional or bidirectional connection).
@@ -527,32 +535,27 @@ void HNSWIndex<DataType, DistType>::unlockNodeLinks(idType node_id) const {
527535 */
528536template <typename DataType, typename DistType>
529537void HNSWIndex<DataType, DistType>::removeExtraLinks(
530- candidatesMaxHeap <DistType> candidates, size_t Mcurmax, LevelData &node_level,
538+ candidatesList <DistType> candidates, size_t Mcurmax, LevelData &node_level,
531539 const vecsim_stl::vector<bool > &neighbors_bitmap, idType *removed_links,
532540 size_t *removed_links_num) {
533541
534- auto orig_candidates = candidates;
535542 // candidates will store the newly selected neighbours (for the relevant node).
536- getNeighborsByHeuristic2 (candidates, Mcurmax);
543+ vecsim_stl::vector<idType> not_selected_candidates (this ->allocator );
544+ getNeighborsByHeuristic2 (candidates, Mcurmax, not_selected_candidates);
537545
538- // check the diff in the link list, save the neighbours
539- // that were chosen to be removed, and update the new neighbours
540- size_t removed_idx = 0 ;
541- size_t link_idx = 0 ;
546+ // update the node level data.
547+ node_level.numLinks = 0 ;
548+ for (auto &selected_neighbor : candidates) {
549+ node_level.links [node_level.numLinks ++] = selected_neighbor.second ;
550+ }
542551
543- while (orig_candidates.size () > 0 ) {
544- if (orig_candidates.top ().second != candidates.top ().second ) {
545- if (neighbors_bitmap[orig_candidates.top ().second ]) {
546- removed_links[removed_idx++] = orig_candidates.top ().second ;
547- }
548- orig_candidates.pop ();
549- } else {
550- node_level.links [link_idx++] = candidates.top ().second ;
551- candidates.pop ();
552- orig_candidates.pop ();
552+ // save the neighbours that were chosen to be removed.
553+ size_t removed_idx = 0 ;
554+ for (auto ¬_selected_neighbor : not_selected_candidates) {
555+ if (neighbors_bitmap[not_selected_neighbor]) {
556+ removed_links[removed_idx++] = not_selected_neighbor;
553557 }
554558 }
555- node_level.numLinks = link_idx;
556559 *removed_links_num = removed_idx;
557560}
558561
@@ -764,31 +767,53 @@ HNSWIndex<DataType, DistType>::searchLayer(idType ep_id, const void *data_point,
764767 return top_candidates;
765768}
766769
770+ template <typename DataType, typename DistType>
771+ idType
772+ HNSWIndex<DataType, DistType>::getNeighborsByHeuristic2(candidatesList<DistType> &top_candidates,
773+ const size_t M) const {
774+ if (top_candidates.size () < M) {
775+ return std::min_element (top_candidates.begin (), top_candidates.end (),
776+ [](const auto &a, const auto &b) { return a.first < b.first ; })
777+ ->second ;
778+ } else {
779+ getNeighborsByHeuristic2_internal<false >(top_candidates, M, nullptr );
780+ return top_candidates.front ().second ;
781+ }
782+ }
783+
767784template <typename DataType, typename DistType>
768785void HNSWIndex<DataType, DistType>::getNeighborsByHeuristic2(
769- candidatesMaxHeap<DistType> &top_candidates, const size_t M) {
786+ candidatesList<DistType> &top_candidates, const size_t M,
787+ vecsim_stl::vector<idType> &removed_candidates) const {
788+ getNeighborsByHeuristic2_internal<true >(top_candidates, M, &removed_candidates);
789+ }
790+
791+ template <typename DataType, typename DistType>
792+ template <bool record_removed>
793+ void HNSWIndex<DataType, DistType>::getNeighborsByHeuristic2_internal(
794+ candidatesList<DistType> &top_candidates, const size_t M,
795+ vecsim_stl::vector<idType> *removed_candidates) const {
770796 if (top_candidates.size () < M) {
771797 return ;
772798 }
773799
774- candidatesMaxHeap<DistType> queue_closest (this ->allocator );
775- vecsim_stl::vector<pair<DistType, idType>> return_list (this ->allocator );
800+ candidatesList<DistType> return_list (this ->allocator );
776801 vecsim_stl::vector<const void *> cached_vectors (this ->allocator );
777802 return_list.reserve (M);
778803 cached_vectors.reserve (M);
779- while (top_candidates.size () > 0 ) {
780- // the distance is saved negatively to have the queue ordered such that first is closer
781- // (higher).
782- queue_closest.emplace (-top_candidates.top ().first , top_candidates.top ().second );
783- top_candidates.pop ();
804+ if constexpr (record_removed) {
805+ removed_candidates->reserve (top_candidates.size ());
784806 }
785807
786- while (queue_closest.size () && return_list.size () < M) {
787- pair<DistType, idType> current_pair = queue_closest.top ();
788- DistType candidate_to_query_dist = -current_pair.first ;
789- queue_closest.pop ();
808+ // Sort the candidates by their distance (we don't mind the secondary order (the internal id))
809+ std::sort (top_candidates.begin (), top_candidates.end (),
810+ [](const auto &a, const auto &b) { return a.first < b.first ; });
811+
812+ auto current_pair = top_candidates.begin ();
813+ for (; current_pair != top_candidates.end () && return_list.size () < M; ++current_pair) {
814+ DistType candidate_to_query_dist = current_pair->first ;
790815 bool good = true ;
791- const void *curr_vector = getDataByInternalId (current_pair. second );
816+ const void *curr_vector = getDataByInternalId (current_pair-> second );
792817
793818 // a candidate is "good" to become a neighbour, unless we find
794819 // another item that was already selected to the neighbours set which is closer
@@ -797,19 +822,26 @@ void HNSWIndex<DataType, DistType>::getNeighborsByHeuristic2(
797822 DistType candidate_to_selected_dist =
798823 this ->distFunc (cached_vectors[i], curr_vector, this ->dim );
799824 if (candidate_to_selected_dist < candidate_to_query_dist) {
825+ if constexpr (record_removed) {
826+ removed_candidates->push_back (current_pair->second );
827+ }
800828 good = false ;
801829 break ;
802830 }
803831 }
804832 if (good) {
805833 cached_vectors.push_back (curr_vector);
806- return_list.push_back (current_pair);
834+ return_list.push_back (* current_pair);
807835 }
808836 }
809837
810- for (pair<DistType, idType> current_pair : return_list) {
811- top_candidates.emplace (-current_pair.first , current_pair.second );
838+ if constexpr (record_removed) {
839+ for (; current_pair != top_candidates.end (); ++current_pair) {
840+ removed_candidates->push_back (current_pair->second );
841+ }
812842 }
843+
844+ top_candidates.swap (return_list);
813845}
814846
815847template <typename DataType, typename DistType>
@@ -819,57 +851,41 @@ void HNSWIndex<DataType, DistType>::revisitNeighborConnections(
819851 // Note - expect that node_lock and neighbor_lock are locked at that point.
820852
821853 // Collect the existing neighbors and the new node as the neighbor's neighbors candidates.
822- candidatesMaxHeap<DistType> candidates (this ->allocator );
854+ candidatesList<DistType> candidates (this ->allocator );
855+ candidates.reserve (neighbor_level.numLinks + 1 );
823856 // Add the new node along with the pre-calculated distance to the current neighbor,
824- candidates.emplace (neighbor_data.first , new_node_id);
857+ candidates.emplace_back (neighbor_data.first , new_node_id);
825858
826859 idType selected_neighbor = neighbor_data.second ;
827860 const void *selected_neighbor_data = getDataByInternalId (selected_neighbor);
828861 for (size_t j = 0 ; j < neighbor_level.numLinks ; j++) {
829- candidates.emplace (this ->distFunc (getDataByInternalId (neighbor_level.links [j]),
830- selected_neighbor_data, this ->dim ),
831- neighbor_level.links [j]);
862+ candidates.emplace_back (this ->distFunc (getDataByInternalId (neighbor_level.links [j]),
863+ selected_neighbor_data, this ->dim ),
864+ neighbor_level.links [j]);
832865 }
833866
834- std::vector<idType> nodes_to_update;
835- auto orig_candidates = candidates;
836-
837867 // Candidates will store the newly selected neighbours (for the neighbor).
838868 size_t max_M_cur = level ? M : M0;
839- getNeighborsByHeuristic2 (candidates, max_M_cur);
840-
841- // Go over the original candidates set, and save the ones chosen to be removed to update later
842- // on.
843- bool cur_node_chosen = false ;
844- while (orig_candidates.size () > 0 ) {
845- idType orig_candidate = orig_candidates.top ().second ;
846- // If the current original candidate was not selected as neighbor by the heuristics, it
847- // should be updated and removed from the neighbor's neighbors.
848- if (candidates.empty () || orig_candidate != candidates.top ().second ) {
849- // Don't add the new_node_id to nodes_to_update, it will be inserted either way later.
850- if (orig_candidate != new_node_id) {
851- nodes_to_update.push_back (orig_candidate);
852- }
853- orig_candidates.pop ();
854- // Otherwise, the original candidate was selected to remain a neighbor - no need to
855- // update.
856- } else {
857- candidates.pop ();
858- orig_candidates.pop ();
859- if (orig_candidate == new_node_id) {
860- cur_node_chosen = true ;
861- }
862- }
863- }
869+ vecsim_stl::vector<idType> nodes_to_update (this ->allocator );
870+ getNeighborsByHeuristic2 (candidates, max_M_cur, nodes_to_update);
864871
865872 // Acquire all relevant locks for making the updates for the selected neighbor - all its removed
866873 // neighbors, along with the neighbors itself and the cur node.
867874 // but first, we release the node and neighbors lock to avoid deadlocks.
868875 unlockNodeLinks (new_node_id);
869876 unlockNodeLinks (selected_neighbor);
870877
878+ // Check if the new node was selected as a neighbor for the current neighbor.
879+ // Make sure to add the cur node to the list of nodes to update if it was selected.
880+ bool cur_node_chosen;
881+ auto new_node_iter = std::find (nodes_to_update.begin (), nodes_to_update.end (), new_node_id);
882+ if (new_node_iter != nodes_to_update.end ()) {
883+ cur_node_chosen = false ;
884+ } else {
885+ cur_node_chosen = true ;
886+ nodes_to_update.push_back (new_node_id);
887+ }
871888 nodes_to_update.push_back (selected_neighbor);
872- nodes_to_update.push_back (new_node_id);
873889
874890 std::sort (nodes_to_update.begin (), nodes_to_update.end ());
875891 size_t nodes_to_update_count = nodes_to_update.size ();
@@ -935,27 +951,21 @@ idType HNSWIndex<DataType, DistType>::mutuallyConnectNewElement(
935951 size_t max_M_cur = level ? M : M0;
936952
937953 // Filter the top candidates to the selected neighbors by the algorithm heuristics.
938- getNeighborsByHeuristic2 (top_candidates, M);
939- assert (top_candidates.size () <= M &&
954+ // First, we need to copy the top candidates to a vector.
955+ candidatesList<DistType> top_candidates_list (this ->allocator );
956+ top_candidates_list.insert (top_candidates_list.end (), top_candidates.begin (),
957+ top_candidates.end ());
958+ // Use the heuristic to filter the top candidates, and get the next closest entry point.
959+ idType next_closest_entry_point = getNeighborsByHeuristic2 (top_candidates_list, M);
960+ assert (top_candidates_list.size () <= M &&
940961 " Should be not be more than M candidates returned by the heuristic" );
941962
942- // Hold (distance_from_new_node_id, neighbor_id) pair for every selected neighbor.
943- vecsim_stl::vector<std::pair<DistType, idType>> selected_neighbors (this ->allocator );
944- selected_neighbors.reserve (M);
945- while (!top_candidates.empty ()) {
946- selected_neighbors.push_back (top_candidates.top ());
947- top_candidates.pop ();
948- }
949-
950- // The closest vector that has found to be returned (and start the scan from it in the next
951- // level).
952- idType next_closest_entry_point = selected_neighbors.back ().second ;
953963 auto *new_node_level = getGraphDataByInternalId (new_node_id);
954964 LevelData &new_node_level_data = getLevelData (new_node_level, level);
955965 assert (new_node_level_data.numLinks == 0 &&
956966 " The newly inserted element should have blank link list" );
957967
958- for (auto &neighbor_data : selected_neighbors ) {
968+ for (auto &neighbor_data : top_candidates_list ) {
959969 idType selected_neighbor = neighbor_data.second ; // neighbor's id
960970 auto *neighbor_graph_data = getGraphDataByInternalId (selected_neighbor);
961971 if (new_node_id < selected_neighbor) {
@@ -1015,14 +1025,15 @@ void HNSWIndex<DataType, DistType>::repairConnectionsForDeletion(
10151025 LevelData &neighbor_level, size_t level, vecsim_stl::vector<bool > &neighbours_bitmap) {
10161026
10171027 // put the deleted element's neighbours in the candidates.
1018- candidatesMaxHeap<DistType> candidates (this ->allocator );
1028+ candidatesList<DistType> candidates (this ->allocator );
1029+ candidates.reserve (node_level.numLinks + neighbor_level.numLinks );
10191030 auto neighbours_data = getDataByInternalId (neighbour_id);
10201031 for (size_t j = 0 ; j < node_level.numLinks ; j++) {
10211032 // Don't put the neighbor itself in his own candidates
10221033 if (node_level.links [j] == neighbour_id) {
10231034 continue ;
10241035 }
1025- candidates.emplace (
1036+ candidates.emplace_back (
10261037 this ->distFunc (getDataByInternalId (node_level.links [j]), neighbours_data, this ->dim ),
10271038 node_level.links [j]);
10281039 }
@@ -1038,9 +1049,10 @@ void HNSWIndex<DataType, DistType>::repairConnectionsForDeletion(
10381049 neighbor_level.links [j] == element_internal_id) {
10391050 continue ;
10401051 }
1041- candidates.emplace (this ->distFunc (neighbours_data,
1042- getDataByInternalId (neighbor_level.links [j]), this ->dim ),
1043- neighbor_level.links [j]);
1052+ candidates.emplace_back (this ->distFunc (neighbours_data,
1053+ getDataByInternalId (neighbor_level.links [j]),
1054+ this ->dim ),
1055+ neighbor_level.links [j]);
10441056 }
10451057
10461058 size_t Mcurmax = level ? M : M0;
@@ -1488,7 +1500,7 @@ void HNSWIndex<DataType, DistType>::mutuallyUpdateForRepairedNode(
14881500template <typename DataType, typename DistType>
14891501void HNSWIndex<DataType, DistType>::repairNodeConnections(idType node_id, size_t level) {
14901502
1491- candidatesMaxHeap <DistType> neighbors_candidates (this ->allocator );
1503+ candidatesList <DistType> neighbors_candidates (this ->allocator );
14921504 // Use bitmaps for fast accesses:
14931505 // node_orig_neighbours_set is used to differentiate between the neighbors that will *not* be
14941506 // selected by the heuristics - only the ones that were originally neighbors should be removed.
@@ -1513,7 +1525,7 @@ void HNSWIndex<DataType, DistType>::repairNodeConnections(idType node_id, size_t
15131525 continue ;
15141526 }
15151527 neighbors_candidates_set[node_level_data.links [j]] = true ;
1516- neighbors_candidates.emplace (
1528+ neighbors_candidates.emplace_back (
15171529 this ->distFunc (node_data, getDataByInternalId (node_level_data.links [j]), this ->dim ),
15181530 node_level_data.links [j]);
15191531 }
@@ -1550,37 +1562,30 @@ void HNSWIndex<DataType, DistType>::repairNodeConnections(idType node_id, size_t
15501562 continue ;
15511563 }
15521564 neighbors_candidates_set[neighbor_level_data.links [j]] = true ;
1553- neighbors_candidates.emplace (
1565+ neighbors_candidates.emplace_back (
15541566 this ->distFunc (node_data, getDataByInternalId (neighbor_level_data.links [j]),
15551567 this ->dim ),
15561568 neighbor_level_data.links [j]);
15571569 }
15581570 unlockNodeLinks (neighbor);
15591571 }
15601572
1561- // Copy the original candidates, and run the heuristics. Afterwards, neighbors_candidates will
1562- // store the newly selected neighbours (for the node), while candidates which were originally
1563- // neighbors and are not going to be selected, are going to be removed.
1564- auto orig_candidates = neighbors_candidates;
15651573 size_t max_M_cur = level ? M : M0;
1566- getNeighborsByHeuristic2 (neighbors_candidates, max_M_cur);
1567-
1568- while (!orig_candidates.empty ()) {
1569- idType orig_candidate = orig_candidates.top ().second ;
1570- if (neighbors_candidates.empty () || orig_candidate != neighbors_candidates.top ().second ) {
1571- if (node_orig_neighbours_set[orig_candidate]) {
1572- neighbors_to_remove.push_back (orig_candidate);
1573- nodes_to_update.push_back (orig_candidate);
1574- }
1575- orig_candidates.pop ();
1576- } else {
1577- chosen_neighbors.push_back (orig_candidate);
1578- nodes_to_update.push_back (orig_candidate);
1579- neighbors_candidates.pop ();
1580- orig_candidates.pop ();
1574+ vecsim_stl::vector<idType> not_chosen_neighbors (this ->allocator );
1575+ getNeighborsByHeuristic2 (neighbors_candidates, max_M_cur, not_chosen_neighbors);
1576+
1577+ for (idType not_chosen_neighbor : not_chosen_neighbors) {
1578+ if (node_orig_neighbours_set[not_chosen_neighbor]) {
1579+ neighbors_to_remove.push_back (not_chosen_neighbor);
1580+ nodes_to_update.push_back (not_chosen_neighbor);
15811581 }
15821582 }
15831583
1584+ for (auto &neighbor : neighbors_candidates) {
1585+ chosen_neighbors.push_back (neighbor.second );
1586+ nodes_to_update.push_back (neighbor.second );
1587+ }
1588+
15841589 // Perform the actual updates for the node and the impacted neighbors while holding the nodes'
15851590 // locks.
15861591 mutuallyUpdateForRepairedNode (node_id, level, neighbors_to_remove, nodes_to_update,
0 commit comments