From 8f7ffc15507aca55289c706d107d53c001c37bc6 Mon Sep 17 00:00:00 2001 From: Matt Scicluna Date: Sat, 25 Oct 2025 12:54:16 -0400 Subject: [PATCH 1/2] changed hardcoded metric to self.distance --- graphtools/graphs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graphtools/graphs.py b/graphtools/graphs.py index 85aeae7..c53a1dc 100644 --- a/graphtools/graphs.py +++ b/graphtools/graphs.py @@ -1205,11 +1205,11 @@ def build_landmark_op(self): self.data if not hasattr(self, "data_nu") else self.data_nu ) # because of the scaling to review if ( - n_samples > 5000 + n_samples > 5000 and self.distance == "euclidean" ): # sklearn.euclidean_distances is faster than cdist for big dataset distances = euclidean_distances(data, data[landmark_indices]) else: - distances = cdist(data, data[landmark_indices], metric="euclidean") + distances = cdist(data, data[landmark_indices], metric=self.distance) self._clusters = np.argmin(distances, axis=1) else: From 9573cb141fb89dd115c44a8b0580df928a1d582c Mon Sep 17 00:00:00 2001 From: Matt Scicluna Date: Sat, 25 Oct 2025 13:07:01 -0400 Subject: [PATCH 2/2] fixed test_landmark to be more sensitive to differences. Was FN before --- test/test_landmark.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/test/test_landmark.py b/test/test_landmark.py index aaeb929..8badf87 100644 --- a/test/test_landmark.py +++ b/test/test_landmark.py @@ -284,21 +284,26 @@ def test_landmark_with_non_euclidean_distances(): # "but all distance metrics gave identical results" # ) - # Test that the landmark operators are different shapes/values when different distances - # are used (this is a more sensitive test than just cluster assignments) - euclidean_landmark_sum = np.sum(euclidean_G.landmark_op) - manhattan_landmark_sum = np.sum(G_manhattan.landmark_op) - cosine_landmark_sum = np.sum(G_cosine.landmark_op) + # Compare landmark operators using Frobenius norm + euclidean_landmark_op = euclidean_G.landmark_op + manhattan_landmark_op = G_manhattan.landmark_op + cosine_landmark_op = G_cosine.landmark_op + + diff_euclidean_manhattan = np.linalg.norm( + euclidean_landmark_op - manhattan_landmark_op, "fro" + ) + diff_euclidean_cosine = np.linalg.norm( + euclidean_landmark_op - cosine_landmark_op, "fro" + ) print( - f"Landmark operator sums: euclidean={euclidean_landmark_sum:.6f}, " - f"manhattan={manhattan_landmark_sum:.6f}, cosine={cosine_landmark_sum:.6f}" + f"Landmark operator differences: " + f"euclidean vs manhattan={diff_euclidean_manhattan:.6f}, " + f"euclidean vs cosine={diff_euclidean_cosine:.6f}" ) - # The landmark operators should be different when using different distance metrics operators_different = ( - abs(euclidean_landmark_sum - manhattan_landmark_sum) > 1e-10 - or abs(euclidean_landmark_sum - cosine_landmark_sum) > 1e-10 + diff_euclidean_manhattan > 1e-6 or diff_euclidean_cosine > 1e-6 ) if not operators_different: @@ -306,7 +311,7 @@ def test_landmark_with_non_euclidean_distances(): warnings.warn( "Landmark operators are identical across different distance metrics. " - "This strongly suggests the distance parameter is being ignored in build_landmark_op.", + "This suggests the distance parameter is being ignored in build_landmark_op.", UserWarning, )