Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions graphtools/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1205,11 +1205,11 @@ def build_landmark_op(self):
self.data if not hasattr(self, "data_nu") else self.data_nu
) # because of the scaling to review
if (
n_samples > 5000
n_samples > 5000 and self.distance == "euclidean"
): # sklearn.euclidean_distances is faster than cdist for big dataset
distances = euclidean_distances(data, data[landmark_indices])
else:
distances = cdist(data, data[landmark_indices], metric="euclidean")
distances = cdist(data, data[landmark_indices], metric=self.distance)
self._clusters = np.argmin(distances, axis=1)

else:
Expand Down
27 changes: 16 additions & 11 deletions test/test_landmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,29 +284,34 @@ def test_landmark_with_non_euclidean_distances():
# "but all distance metrics gave identical results"
# )

# Test that the landmark operators are different shapes/values when different distances
# are used (this is a more sensitive test than just cluster assignments)
euclidean_landmark_sum = np.sum(euclidean_G.landmark_op)
manhattan_landmark_sum = np.sum(G_manhattan.landmark_op)
cosine_landmark_sum = np.sum(G_cosine.landmark_op)
# Compare landmark operators using Frobenius norm
euclidean_landmark_op = euclidean_G.landmark_op
manhattan_landmark_op = G_manhattan.landmark_op
cosine_landmark_op = G_cosine.landmark_op

diff_euclidean_manhattan = np.linalg.norm(
euclidean_landmark_op - manhattan_landmark_op, "fro"
)
diff_euclidean_cosine = np.linalg.norm(
euclidean_landmark_op - cosine_landmark_op, "fro"
)

print(
f"Landmark operator sums: euclidean={euclidean_landmark_sum:.6f}, "
f"manhattan={manhattan_landmark_sum:.6f}, cosine={cosine_landmark_sum:.6f}"
f"Landmark operator differences: "
f"euclidean vs manhattan={diff_euclidean_manhattan:.6f}, "
f"euclidean vs cosine={diff_euclidean_cosine:.6f}"
)

# The landmark operators should be different when using different distance metrics
operators_different = (
abs(euclidean_landmark_sum - manhattan_landmark_sum) > 1e-10
or abs(euclidean_landmark_sum - cosine_landmark_sum) > 1e-10
diff_euclidean_manhattan > 1e-6 or diff_euclidean_cosine > 1e-6
)

if not operators_different:
import warnings

warnings.warn(
"Landmark operators are identical across different distance metrics. "
"This strongly suggests the distance parameter is being ignored in build_landmark_op.",
"This suggests the distance parameter is being ignored in build_landmark_op.",
UserWarning,
)

Expand Down