From 0138a99a8c170828b0d90b54a7de65a7b6835b66 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Wed, 24 Feb 2021 18:11:26 +0000
Subject: [PATCH 001/111] Create Nystrom.py

---
 pykeops/numpy/nystrom/Nystrom.py | 278 +++++++++++++++++++++++++++++++
 1 file changed, 278 insertions(+)
 create mode 100644 pykeops/numpy/nystrom/Nystrom.py

diff --git a/pykeops/numpy/nystrom/Nystrom.py b/pykeops/numpy/nystrom/Nystrom.py
new file mode 100644
index 000000000..e4adb1f6e
--- /dev/null
+++ b/pykeops/numpy/nystrom/Nystrom.py
@@ -0,0 +1,278 @@
+import numpy as np
+import pykeops
+
+from pykeops.numpy import LazyTensor as LazyTensor_n
+from pykeops.numpy.cluster import grid_cluster
+from pykeops.numpy.cluster import from_matrix
+from pykeops.numpy.cluster import cluster_ranges_centroids, cluster_ranges
+from pykeops.numpy.cluster import sort_clusters
+
+from sklearn.utils import check_random_state
+from pykeops.torch import LazyTensor
+
+# For LinearOperator math
+from scipy.sparse.linalg import aslinearoperator, eigsh
+from scipy.sparse.linalg.interface import IdentityOperator
+
+
+#################################################################################
+
+class Nystrom_NK:
+    '''
+        Class to implement Nystrom using numpy and PyKeops.
+        * The fit method computes K^{-1}_q.
+        * The transform method maps the data into the feature space underlying
+        the Nystrom-approximated kernel.
+        * The method K_approx directly computes the Nystrom approximation.
+        Parameters:
+        n_components [int] = how many samples to select from data.
+        kernel [str] = type of kernel to use. Current options = {rbf}.
+        sigma [float] = exponential constant for the RBF kernel. 
+        exp_sigma [float] = exponential constant for the exponential kernel.
+        eps[float] = size for square bins in block-sparse preprocessing.
+        k_means[int] = number of centroids for KMeans algorithm in block-sparse 
+                       preprocessing.
+        n_iter[int] = number of iterations for KMeans
+        dtype[type] = type of data: np.float32 or np.float64
+        inv_eps[float] = additive invertibility constant for matrix decomposition.
+        backend[string] = "GPU" or "CPU" mode
+        verbose[boolean] = set True to print details
+        random_state=[None, float] = to set a random seed for the random
+                                     sampling of the samples. To be used when 
+                                     reproducibility is needed.
+    '''
+  
+    def __init__(self, n_components=100, kernel='rbf', sigma:float = 1.,
+                 exp_sigma:float = 1.0, eps:float = 0.05, mask_radius:float = None,
+                 k_means = 10, n_iter:int = 10, inv_eps:float = None, dtype = np.float32, 
+                 backend = None, verbose = False, random_state=None): 
+
+        self.n_components = n_components
+        self.kernel = kernel
+        self.random_state = random_state
+        self.sigma = sigma
+        self.exp_sigma = exp_sigma
+        self.eps = eps
+        self.mask_radius = mask_radius
+        self.k_means = k_means
+        self.n_iter = n_iter
+        self.dtype = dtype
+        self.verbose = verbose
+
+        if not backend:
+            self.backend = 'GPU' if pykeops.config.gpu_available else 'CPU'
+        else:
+            self.backend = backend
+
+        if inv_eps:
+            self.inv_eps = inv_eps
+        else:
+            if kernel == 'linear':
+                self.inv_eps = 1e-4
+            else:
+                self.inv_eps = 1e-8
+
+        if not mask_radius:
+            if kernel == 'rbf':
+                self.mask_radius = 2* np.sqrt(2) * self.sigma
+            elif kernel == 'exp':
+                self.mask_radius = 8 * self.exp_sigma
+
+
+    def fit(self, x:np.ndarray):
+        ''' 
+        Args:   x = numpy array of shape (n_samples, n_features)
+        Returns: Fitted instance of the class
+        '''
+        if self.verbose:
+            print(f'Working with backend = {self.backend}')
+        
+        # Basic checks
+        assert type(x) == np.ndarray, 'Input to fit(.) must be an array.'
+        assert x.shape[0] >= self.n_components, 'The application needs X.shape[0] >= n_components.'
+        assert self.exp_sigma > 0, 'Should be working with decaying exponential.'
+
+        # Update dtype
+        self._update_dtype(x)
+        # Number of samples
+        n_samples = x.shape[0]
+        # Define basis
+        rnd = check_random_state(self.random_state)
+        inds = rnd.permutation(n_samples) 
+        basis_inds = inds[:self.n_components] 
+        basis = x[basis_inds]
+        # Build smaller kernel
+        basis_kernel = self._pairwise_kernels(basis)
+        # Spectral decomposition
+        S, U = self._spectral(basis_kernel)
+        S = np.maximum(S, 1e-12)
+        self.normalization_ = np.dot(U / np.sqrt(S), U.T)
+        self.components_ = basis
+        self.component_indices_ = inds
+
+        return self
+
+
+    def _spectral(self, X_i:LazyTensor):
+        '''
+        Helper function to compute eigendecomposition of K_q.
+        Written using LinearOperators which are lazy
+        representations of sparse and/or structured data.
+        Args: X_i[numpy LazyTensor]
+        Returns S[np.array] eigenvalues,
+                U[np.array] eigenvectors
+        '''
+        K_linear = aslinearoperator(X_i)
+        # K <- K + eps
+        K_linear = K_linear + IdentityOperator(K_linear.shape, dtype=self.dtype) * self.inv_eps
+        k = K_linear.shape[0] - 1
+        S, U = eigsh(K_linear, k=k, which='LM')
+
+        return S, U
+        
+
+    def transform(self, x:np.ndarray) -> np.array:
+        ''' Applies transform on the data.
+        
+        Args:
+            X [np.array] = data to transform
+        Returns
+            X [np.array] = data after transformation
+        '''
+        
+        K_nq = self._pairwise_kernels(x, self.components_)
+        x_new = K_nq @ self.normalization_.T
+
+        return x_new
+
+    
+    def K_approx(self, x:np.array) -> np.array:
+        ''' Function to return Nystrom approximation to the kernel.
+        
+        Args:
+            X[np.array] = data used in fit(.) function.
+        Returns
+            K[np.array] = Nystrom approximation to kernel'''
+       
+        K_nq = self._pairwise_kernels(x, self.components_)
+        # For arrays: K_approx = K_nq @ K_q_inv @ K_nq.T
+        # But to use @ with lazy tensors we have:
+        K_q_inv = self.normalization_.T @ self.normalization_
+        K_approx = K_nq @ (K_nq @ K_q_inv ).T
+        
+        return K_approx.T 
+
+
+    def _pairwise_kernels(self, x:np.array, y:np.array = None) -> LazyTensor:
+        '''Helper function to build kernel
+        
+        Args:   X = torch tensor of dimension 2,
+                K_type = type of Kernel to return.
+        Returns:
+                K_ij[LazyTensor]
+        '''
+        if y is None:
+            y = x
+        if self.kernel == 'linear': 
+            K_ij = x @ y.T 
+        elif self.kernel == 'rbf':
+            x /= self.sigma
+            y /= self.sigma
+            x_i, x_j = LazyTensor_n(x[:, None, :]), LazyTensor_n(y[None, :, :])
+            K_ij = ( -(( (x_i - x_j)**2 ).sum(dim=2) ) ).exp()
+            # block-sparse reduction preprocess
+            K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)
+        elif self.kernel == 'exp':
+            x /= self.exp_sigma
+            y /= self.exp_sigma
+            x_i, x_j = LazyTensor_n(x[:, None, :]), LazyTensor_n(y[None, :, :])
+            K_ij = (- ( ((x_i - x_j) ** 2).sqrt().sum(2))).exp()
+            # block-sparse reduction preprocess
+            K_ij = self._Gauss_block_sparse_pre(x, y, K_ij) # TODO 
+       
+        K_ij.backend = self.backend
+        
+        return K_ij
+
+
+    def _Gauss_block_sparse_pre(self, x:np.array, y:np.array, K_ij:LazyTensor):
+        ''' 
+        Helper function to preprocess data for block-sparse reduction
+        of the Gaussian kernel
+    
+        Args: 
+            x[np.array], y[np.array] = arrays giving rise to Gaussian kernel K(x,y)
+            K_ij[LazyTensor_n] = symbolic representation of K(x,y)
+            eps[float] = size for square bins
+        Returns:
+            K_ij[LazyTensor_n] = symbolic representation of K(x,y) with 
+                                set sparse ranges
+        '''
+        # labels for low dimensions
+        if x.shape[1] < 4 or y.shape[1] < 4:
+            x_labels = grid_cluster(x, self.eps) 
+            y_labels = grid_cluster(y, self.eps) 
+            # range and centroid per class
+            x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
+            y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
+        else:
+        # labels for higher dimensions
+            x_labels, x_centroids = self._KMeans(x)
+            y_labels, y_centroids = self._KMeans(y)
+            # compute ranges
+            x_ranges = cluster_ranges(x_labels)
+            y_ranges = cluster_ranges(y_labels)
+
+        # sort points
+        x, x_labels = sort_clusters(x, x_labels)
+        y, y_labels = sort_clusters(y, y_labels) 
+        # Compute a coarse Boolean mask:
+        if self.kernel == 'rbf':
+            D = np.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2)
+        elif self.kernel == 'exp':
+            D = np.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2).sqrt()
+        keep = D < (self.mask_radius) ** 2
+        # mask -> set of integer tensors
+        ranges_ij = from_matrix(x_ranges, y_ranges, keep)
+        K_ij.ranges = ranges_ij  # block-sparsity pattern
+
+        return K_ij
+
+
+    def _KMeans(self,x:np.array):
+        ''' KMeans with Pykeops to do binning of original data.
+        Args:
+            x[np.array] = data
+            k_means[int] = number of bins to build
+            n_iter[int] = number iterations of KMeans loop
+        Returns:
+            labels[np.array] = class labels for each point in x
+            clusters[np.array] = coordinates for each centroid
+        '''
+        N, D = x.shape  
+        clusters = np.copy(x[:self.k_means, :])  # initialization of clusters
+        x_i = LazyTensor_n(x[:, None, :])  
+
+        for i in range(self.n_iter):
+
+            clusters_j = LazyTensor_n(clusters[None, :, :])  
+            D_ij = ((x_i - clusters_j) ** 2).sum(-1)  # points-clusters kernel
+            labels = D_ij.argmin(axis=1).astype(int).reshape(N)  # Points -> Nearest cluster
+            Ncl = np.bincount(labels).astype(self.dtype)  # Class weights
+            for d in range(D):  # Compute the cluster centroids with np.bincount:
+                clusters[:, d] = np.bincount(labels, weights=x[:, d]) / Ncl
+
+        return labels, clusters
+
+        
+    def _update_dtype(self,x):
+        ''' Helper function that sets dtype to that of 
+            the given data in the fitting step.
+            
+        Args:
+            x [np.array] = raw data to remap
+        Returns:
+            nothing
+        '''
+        self.dtype = x.dtype
+        self.inv_eps = np.array([self.inv_eps]).astype(np.float32)[0]
\ No newline at end of file

From d73a55bf14c6c2a4f0e42d4c529f7435eb50d730 Mon Sep 17 00:00:00 2001
From: Anna Hledikova <ahledikova123@gmail.com>
Date: Wed, 24 Feb 2021 18:58:28 +0000
Subject: [PATCH 002/111] adding code and unit test for nystrom

---
 pykeops/test/unit_tests_numpy.py   |  38 +++++
 pykeops/test/unit_tests_pytorch.py |  34 ++++
 pykeops/torch/nystrom/nystrom.py   | 257 +++++++++++++++++++++++++++++
 3 files changed, 329 insertions(+)
 create mode 100644 pykeops/torch/nystrom/nystrom.py

diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index 601ad9ae0..87ce7cf91 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -26,6 +26,8 @@ class NumpyUnitTestCase(unittest.TestCase):
     N = int(6)
     D = int(3)
     E = int(3)
+
+    
     nbatchdims = int(2)
 
     x = np.random.rand(M, D)
@@ -436,7 +438,43 @@ def test_LazyTensor_sum(self):
         for (res_keops, res_numpy) in zip(full_results[0], full_results[1]):
             self.assertTrue(res_keops.shape == res_numpy.shape)
             self.assertTrue(np.allclose(res_keops, res_numpy, atol=1e-3))
+    
+    ############################################################    
+    def Nystrom_K_approx_test(self):
+        ############################################################
+        
+        from pykeops.numpy.nystrom import Nystrom_NK
+        inp = np.random.randint(1,10,(100,3)).astype(np.float32)
+
+        kernels = ['rbf', 'exp']
+        
+        for kernel in kernels:
+            N_NK = Nystrom_NK(n_components=20, kernel = kernel, 
+                              random_state=0).fit(inp)
+            
+            K = N_NK.K_approx(inp)
+            x_new = N_NK.transform(inp)
+            
+            ML2_error = np.linalg.norm(x_new @ x_new.T - K) / K.size
+
+            self.assertTrue(ML2_error < 1e-2)
+
+    ############################################################ 
+    def Nystrom_K_shape_test(self):
+        ############################################################
+
+        from pykeops.numpy.nystrom import Nystrom_NK 
+        inp = np.random.randint(1,10,size = (100,3)).astype(np.float32)
+
+        kernels = ['rbf', 'exp']
+        
+        for kernel in kernels:
+            N_NK = Nystrom_NK(n_components=20, kernel = 'rbf', 
+                              random_state=0).fit(inp)
+
 
+            self.assertTrue(N_NK.normalization_.shape == (20,20))
+            self.assertTrue(N_NK.transform(inp).shape == (100, 20))
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index a3bc09af0..81d9522dd 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -673,6 +673,40 @@ def invert_permutation_numpy(permutation):
             torch.allclose(grad_keops.flatten(), grad_torch.flatten(), rtol=1e-4)
         )
 
+    ############################################################         
+    def Nystrom_K_approx_test(self):
+        ############################################################ 
+
+        from pykeops.torch.nystrom import LazyNystrom_TK as Nystrom_TK
+        import torch
+        
+        inp = torch.rand(100,3)*100
+        kernels = ['rbf', 'exp']
+        
+        for kernel in kernels:
+            N_TK = Nystrom_TK(n_components=20, kernel = kernel, random_state=0).fit(inp)
+            K = N_TK.K_approx(inp)
+            x_new = N_TK.transform(inp)
+            
+            ML2_error = np.linalg.norm(x_new @ x_new.T - K) / K.shape[0]
+
+            self.assertTrue(ML2_error < 0.01)
+    
+    ############################################################ 
+    def Nystrom_K_shape_test(self):
+        ############################################################ 
+
+        from pykeops.torch.nystrom import LazyNystrom_TK as Nystrom_TK
+        import torch
+        
+        inp = torch.rand(100,3)*100
+        kernels = ['rbf', 'exp']
+        
+        for kernel in kernels:
+            N_NT = Nystrom_TK(n_components=20, kernel = 'rbf', random_state=0).fit(inp)
+
+            self.assertTrue(N_NT.normalization_.shape == (20,20))
+            self.assertTrue(N_NT.transform(inp).shape == (100,20))
 
 if __name__ == "__main__":
     """
diff --git a/pykeops/torch/nystrom/nystrom.py b/pykeops/torch/nystrom/nystrom.py
new file mode 100644
index 000000000..ee7d6458f
--- /dev/null
+++ b/pykeops/torch/nystrom/nystrom.py
@@ -0,0 +1,257 @@
+# !pip install pykeops[full] > install.log
+# colab for this code
+# https://colab.research.google.com/drive/1vF2cOSddbRFM5PLqxkIzyZ9XkuzO5DKN?usp=sharing
+import numpy as np
+import torch
+import pykeops
+
+from pykeops.torch.cluster import grid_cluster
+from pykeops.torch.cluster import from_matrix
+from pykeops.torch.cluster import cluster_ranges_centroids, cluster_ranges
+from pykeops.torch.cluster import sort_clusters
+from pykeops.torch import LazyTensor
+
+from sklearn.utils import check_random_state
+
+from scipy.sparse.linalg import aslinearoperator, eigsh
+from scipy.sparse.linalg.interface import IdentityOperator
+
+
+################################################################################
+# Same as LazyNystrom_T but written with pyKeOps
+import numpy as np
+import torch
+import pykeops
+
+from pykeops.numpy import LazyTensor as LazyTensor_n
+from pykeops.torch.cluster import grid_cluster
+from pykeops.torch.cluster import from_matrix
+from pykeops.torch.cluster import cluster_ranges_centroids, cluster_ranges
+from pykeops.torch.cluster import sort_clusters
+from pykeops.torch import LazyTensor
+
+from sklearn.utils import check_random_state, as_float_array
+from scipy.linalg import svd
+
+from scipy.sparse.linalg import aslinearoperator, eigsh
+from scipy.sparse.linalg.interface import IdentityOperator
+from pykeops.torch import Genred
+
+import matplotlib.pyplot as plt
+import time
+
+
+################################################################################
+# Same as LazyNystrom_T but written with pyKeOps
+
+class LazyNystrom_TK:
+    '''
+        Class to implement Nystrom on torch LazyTensors.
+        This class works as an interface between lazy tensors and
+        the Nystrom algorithm in NumPy.
+        * The fit method computes K^{-1}_q.
+        * The transform method maps the data into the feature space underlying
+        the Nystrom-approximated kernel.
+        * The method K_approx directly computes the Nystrom approximation.
+        Parameters:
+        n_components [int] = how many samples to select from data.
+        kernel [str] = type of kernel to use. Current options = {linear, rbf}.
+        gamma [float] = exponential constant for the RBF kernel.
+        random_state=[None, float] = to set a random seed for the random
+                                     sampling of the samples. To be used when
+                                     reproducibility is needed.
+    '''
+
+    def __init__(self, n_components=100, kernel='rbf', sigma: float = 1.,
+                 exp_sigma: float = 1.0, eps: float = 0.05, mask_radius: float = None,
+                 k_means=10, n_iter: int = 10, inv_eps: float = None, dtype=np.float32,
+                 backend='CPU', random_state=None):
+
+        self.n_components = n_components
+        self.kernel = kernel
+        self.random_state = random_state
+        self.sigma = sigma
+        self.exp_sigma = exp_sigma
+        self.eps = eps
+        self.mask_radius = mask_radius
+        self.k_means = k_means
+        self.n_iter = n_iter
+        self.dtype = dtype
+        self.backend = backend  # conditional here
+        if inv_eps:
+            self.inv_eps = inv_eps
+        else:
+            if kernel == 'linear':
+                self.inv_eps = 1e-4
+            else:
+                self.inv_eps = 1e-8
+        if not mask_radius:
+            if kernel == 'rbf':
+                self.mask_radius = 2 * np.sqrt(2) * self.sigma
+            if kernel == 'exp':
+                self.mask_radius = 8 * self.exp_sigma
+
+    def fit(self, X: torch.tensor):
+        '''
+        Args:   X = torch tensor with features of shape
+                (1, n_samples, n_features)
+        Returns: Fitted instance of the class
+        '''
+
+        # Basic checks: we have a lazy tensor and n_components isn't too large
+        assert type(X) == torch.Tensor, 'Input to fit(.) must be a Tensor.'
+        assert X.size(0) >= self.n_components, f'The application needs X.shape[1] >= n_components.'
+        # self._update_dtype(X)
+        # Number of samples
+        n_samples = X.size(0)
+        # Define basis
+        rnd = check_random_state(self.random_state)
+        inds = rnd.permutation(n_samples)
+        basis_inds = inds[:self.n_components]
+        basis = X[basis_inds]
+        # Build smaller kernel
+        basis_kernel = self._pairwise_kernels(basis, kernel=self.kernel)
+        # Get SVD
+        U, S, V = torch.svd(basis_kernel)
+        S = torch.maximum(S, torch.ones(S.size()) * 1e-12)
+        self.normalization_ = torch.mm(U / np.sqrt(S), V.t())
+        self.components_ = basis
+        self.component_indices_ = inds
+
+        return self
+
+    def transform(self, X: torch.tensor) -> torch.tensor:
+        ''' Applies transform on the data.
+        Args:
+            X [LazyTensor] = data to transform
+        Returns
+            X [LazyTensor] = data after transformation
+        '''
+        K_nq = self._pairwise_kernels(X, self.components_, self.kernel)
+        return K_nq @ self.normalization_.t()
+
+    def K_approx(self, X: torch.tensor) -> torch.tensor:
+        ''' Function to return Nystrom approximation to the kernel.
+        Args:
+            X[torch.tensor] = data used in fit(.) function.
+        Returns
+            K[torch.tensor] = Nystrom approximation to kernel'''
+
+        K_nq = self._pairwise_kernels(X, self.components_, self.kernel)
+        K_approx = K_nq @ self.normalization_ @ K_nq.t()
+        return K_approx
+
+    def _pairwise_kernels(self, x: torch.tensor, y: torch.tensor = None, kernel='rbf',
+                          sigma: float = 1.) -> LazyTensor:
+        '''Helper function to build kernel
+        Args:   X = torch tensor of dimension 2.
+                K_type = type of Kernel to return
+        Returns:
+                K_ij[LazyTensor]
+        '''
+        if y is None:
+            y = x
+        if kernel == 'linear':
+            K_ij = x @ y.T
+        elif kernel == 'rbf':
+            x /= sigma
+            y /= sigma
+
+            x_i, x_j = LazyTensor(x[:, None, :]), LazyTensor(y[None, :, :])
+            K_ij = (-1 * ((x_i - x_j) ** 2).sum(-1)).exp()
+
+            # block-sparse reduction preprocess
+            K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)
+
+
+        elif kernel == 'exp':
+            x_i, x_j = LazyTensor(x[:, None, :]), LazyTensor(y[None, :, :])
+            K_ij = (-1 * ((x_i - x_j) ** 2).sum().sqrt()).exp()
+            # block-sparse reduction preprocess
+            K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)  # TODO
+
+        K_ij = K_ij @ torch.diag(torch.ones(K_ij.shape[1]))  # make 1 on diag only
+
+        K_ij.backend = self.backend
+        return K_ij
+
+    def _Gauss_block_sparse_pre(self, x: torch.tensor, y: torch.tensor, K_ij: LazyTensor):
+        '''
+        Helper function to preprocess data for block-sparse reduction
+        of the Gaussian kernel
+
+        Args:
+            x[np.array], y[np.array] = arrays giving rise to Gaussian kernel K(x,y)
+            K_ij[LazyTensor_n] = symbolic representation of K(x,y)
+            eps[float] = size for square bins
+        Returns:
+            K_ij[LazyTensor_n] = symbolic representation of K(x,y) with
+                                set sparse ranges
+        '''
+        # labels for low dimensions
+
+        if x.shape[1] < 4 or y.shape[1] < 4:
+
+            x_labels = grid_cluster(x, self.eps)
+            y_labels = grid_cluster(y, self.eps)
+            # range and centroid per class
+            x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
+            y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
+        else:
+            # labels for higher dimensions
+
+            x_labels, x_centroids = self._KMeans(x)
+            y_labels, y_centroids = self._KMeans(y)
+            # compute ranges
+            x_ranges = cluster_ranges(x_labels)
+            y_ranges = cluster_ranges(y_labels)
+
+        # sort points
+        x, x_labels = sort_clusters(x, x_labels)
+        y, y_labels = sort_clusters(y, y_labels)
+        # Compute a coarse Boolean mask:
+        D = torch.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2)
+        keep = D < (self.mask_radius) ** 2
+        # mask -> set of integer tensors
+        ranges_ij = from_matrix(x_ranges, y_ranges, keep)
+        K_ij.ranges = ranges_ij  # block-sparsity pattern
+
+        return K_ij
+
+    def _KMeans(self, x: torch.tensor):
+        ''' KMeans with Pykeops to do binning of original data.
+        Args:
+            x[np.array] = data
+            k_means[int] = number of bins to build
+            n_iter[int] = number iterations of KMeans loop
+        Returns:
+            labels[np.array] = class labels for each point in x
+            clusters[np.array] = coordinates for each centroid
+        '''
+
+        N, D = x.shape
+        clusters = torch.clone(x[:self.k_means, :])  # initialization of clusters
+        x_i = LazyTensor(x[:, None, :])
+
+        for i in range(self.n_iter):
+
+            clusters_j = LazyTensor(clusters[None, :, :])
+            D_ij = ((x_i - clusters_j) ** 2).sum(-1)  # points-clusters kernel
+            labels = D_ij.argmin(axis=1).reshape(N)  # Points -> Nearest cluster
+            Ncl = torch.bincount(labels)  # Class weights
+            for d in range(D):  # Compute the cluster centroids with np.bincount:
+                clusters[:, d] = torch.bincount(labels, weights=x[:, d]) / Ncl
+
+        return labels, clusters
+
+    def _update_dtype(self, x):
+        ''' Helper function that sets inv_eps to dtype to that of
+            the given data in the fitting step.
+
+        Args:
+            x [np.array] = raw data to remap
+        Returns:
+            nothing
+        '''
+        self.dtype = x.dtype
+        self.inv_eps = np.array([self.inv_eps]).astype(self.dtype)[0]
\ No newline at end of file

From ef95f4cef8230d2e2ff66bbc7b40e22b607a7480 Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Wed, 24 Feb 2021 19:18:30 +0000
Subject: [PATCH 003/111] added ivf_np tests

---
 pykeops/test/unit_tests_numpy.py | 33 ++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index 601ad9ae0..5ef5e4b28 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -436,6 +436,39 @@ def test_LazyTensor_sum(self):
         for (res_keops, res_numpy) in zip(full_results[0], full_results[1]):
             self.assertTrue(res_keops.shape == res_numpy.shape)
             self.assertTrue(np.allclose(res_keops, res_numpy, atol=1e-3))
+            
+    ############################################################
+    def test_IVF(self):
+        ###########################################################
+        from pykeops.numpy.nn.ivf_np import ivf
+        import numpy as np
+
+        np.random.seed(0)
+        N, D, K, k, a = 10**3, 3, 50, 5, 5
+        
+        # Generate random datapoints x, y
+        x = 0.7 * np.random.normal(size=(N, D)) + 0.3
+        y = 0.7 * np.random.normal(size=(N, D)) + 0.3
+
+        # Ground truth K nearest neighbours
+        truth = np.argsort(((np.expand_dims(y,1)-np.expand_dims(x,0))**2).sum(-1),axis=1)
+        truth = truth[:,:k]
+        
+        # IVF K nearest neighbours
+        IVF = ivf()
+        IVF.fit(x,a=a)
+        ivf_fit = IVF.kneighbors(y)
+        
+        # Calculate accuracy
+        accuracy = 0
+        for i in range(k):
+            accuracy += float(np.sum(ivf_fit == truth))/N
+            truth = np.roll(truth, 1, -1) # Create a rolling window (index positions may not match)
+        # Record accuracies
+        accuracy = float(accuracy/k)
+        
+        print(a,accuracy)
+        self.assertTrue(accuracy >= 0.8, f'Failed at {a}, {accuracy}')
 
 
 if __name__ == "__main__":

From 4c4cf472a6b680c807ccb04e4d14d05939b64a42 Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Wed, 24 Feb 2021 19:20:42 +0000
Subject: [PATCH 004/111] added tests for ivf

---
 pykeops/test/unit_tests_pytorch.py | 33 ++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index a3bc09af0..053149b40 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -672,6 +672,39 @@ def invert_permutation_numpy(permutation):
         self.assertTrue(
             torch.allclose(grad_keops.flatten(), grad_torch.flatten(), rtol=1e-4)
         )
+        
+    ############################################################
+    def test_IVF(self):
+        ############################################################
+        from pykeops.torch.nn.ivf_torch import ivf
+        import torch
+
+        torch.manual_seed(0)
+        N, D, K, k, a = 10**3, 3, 50, 5, 5
+        
+        # Generate random datapoints x, y
+        x = 0.7 * torch.randn(N, D) + 0.3
+        y = 0.7 * torch.randn(N, D) + 0.3
+
+        # Ground truth K nearest neighbours
+        truth = torch.argsort(((y.unsqueeze(1)-x.unsqueeze(0))**2).sum(-1),dim=1)
+        truth = truth[:,:k]
+
+        # IVF K nearest neighbours
+        IVF = ivf()
+        IVF.fit(x,a=a)
+        ivf_fit = IVF.kneighbors(y)
+
+        # Calculate accuracy
+        accuracy = 0
+        for i in range(k):
+            accuracy += torch.sum(ivf_fit == truth).float()/N
+            truth = torch.roll(truth, 1, -1) # Create a rolling window (index positions may not match)
+        # Record accuracies
+        accuracy = float(accuracy/k)
+
+        print(a,accuracy)
+        self.assertTrue(accuracy >= 0.8, f'Failed at {a}, {accuracy}')
 
 
 if __name__ == "__main__":

From dad67591aa873a336b8bee26e567c8d2df65e409 Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Wed, 24 Feb 2021 19:30:31 +0000
Subject: [PATCH 005/111] added ivf numpy tests

---
 pykeops/test/unit_tests_numpy.py | 63 +++++++++++++++-----------------
 1 file changed, 29 insertions(+), 34 deletions(-)

diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index 87ce7cf91..5ef5e4b28 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -26,8 +26,6 @@ class NumpyUnitTestCase(unittest.TestCase):
     N = int(6)
     D = int(3)
     E = int(3)
-
-    
     nbatchdims = int(2)
 
     x = np.random.rand(M, D)
@@ -438,43 +436,40 @@ def test_LazyTensor_sum(self):
         for (res_keops, res_numpy) in zip(full_results[0], full_results[1]):
             self.assertTrue(res_keops.shape == res_numpy.shape)
             self.assertTrue(np.allclose(res_keops, res_numpy, atol=1e-3))
-    
-    ############################################################    
-    def Nystrom_K_approx_test(self):
-        ############################################################
-        
-        from pykeops.numpy.nystrom import Nystrom_NK
-        inp = np.random.randint(1,10,(100,3)).astype(np.float32)
-
-        kernels = ['rbf', 'exp']
-        
-        for kernel in kernels:
-            N_NK = Nystrom_NK(n_components=20, kernel = kernel, 
-                              random_state=0).fit(inp)
-            
-            K = N_NK.K_approx(inp)
-            x_new = N_NK.transform(inp)
             
-            ML2_error = np.linalg.norm(x_new @ x_new.T - K) / K.size
-
-            self.assertTrue(ML2_error < 1e-2)
-
-    ############################################################ 
-    def Nystrom_K_shape_test(self):
-        ############################################################
-
-        from pykeops.numpy.nystrom import Nystrom_NK 
-        inp = np.random.randint(1,10,size = (100,3)).astype(np.float32)
+    ############################################################
+    def test_IVF(self):
+        ###########################################################
+        from pykeops.numpy.nn.ivf_np import ivf
+        import numpy as np
 
-        kernels = ['rbf', 'exp']
+        np.random.seed(0)
+        N, D, K, k, a = 10**3, 3, 50, 5, 5
         
-        for kernel in kernels:
-            N_NK = Nystrom_NK(n_components=20, kernel = 'rbf', 
-                              random_state=0).fit(inp)
+        # Generate random datapoints x, y
+        x = 0.7 * np.random.normal(size=(N, D)) + 0.3
+        y = 0.7 * np.random.normal(size=(N, D)) + 0.3
 
+        # Ground truth K nearest neighbours
+        truth = np.argsort(((np.expand_dims(y,1)-np.expand_dims(x,0))**2).sum(-1),axis=1)
+        truth = truth[:,:k]
+        
+        # IVF K nearest neighbours
+        IVF = ivf()
+        IVF.fit(x,a=a)
+        ivf_fit = IVF.kneighbors(y)
+        
+        # Calculate accuracy
+        accuracy = 0
+        for i in range(k):
+            accuracy += float(np.sum(ivf_fit == truth))/N
+            truth = np.roll(truth, 1, -1) # Create a rolling window (index positions may not match)
+        # Record accuracies
+        accuracy = float(accuracy/k)
+        
+        print(a,accuracy)
+        self.assertTrue(accuracy >= 0.8, f'Failed at {a}, {accuracy}')
 
-            self.assertTrue(N_NK.normalization_.shape == (20,20))
-            self.assertTrue(N_NK.transform(inp).shape == (100, 20))
 
 if __name__ == "__main__":
     unittest.main()

From de88326ce60aadff1bf31d3d24b4688a0726d749 Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Wed, 24 Feb 2021 19:31:12 +0000
Subject: [PATCH 006/111] added tests for ivf_pytorch

---
 pykeops/test/unit_tests_pytorch.py | 61 +++++++++++++++---------------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index 81d9522dd..053149b40 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -672,41 +672,40 @@ def invert_permutation_numpy(permutation):
         self.assertTrue(
             torch.allclose(grad_keops.flatten(), grad_torch.flatten(), rtol=1e-4)
         )
-
-    ############################################################         
-    def Nystrom_K_approx_test(self):
-        ############################################################ 
-
-        from pykeops.torch.nystrom import LazyNystrom_TK as Nystrom_TK
-        import torch
-        
-        inp = torch.rand(100,3)*100
-        kernels = ['rbf', 'exp']
         
-        for kernel in kernels:
-            N_TK = Nystrom_TK(n_components=20, kernel = kernel, random_state=0).fit(inp)
-            K = N_TK.K_approx(inp)
-            x_new = N_TK.transform(inp)
-            
-            ML2_error = np.linalg.norm(x_new @ x_new.T - K) / K.shape[0]
-
-            self.assertTrue(ML2_error < 0.01)
-    
-    ############################################################ 
-    def Nystrom_K_shape_test(self):
-        ############################################################ 
-
-        from pykeops.torch.nystrom import LazyNystrom_TK as Nystrom_TK
+    ############################################################
+    def test_IVF(self):
+        ############################################################
+        from pykeops.torch.nn.ivf_torch import ivf
         import torch
+
+        torch.manual_seed(0)
+        N, D, K, k, a = 10**3, 3, 50, 5, 5
         
-        inp = torch.rand(100,3)*100
-        kernels = ['rbf', 'exp']
-        
-        for kernel in kernels:
-            N_NT = Nystrom_TK(n_components=20, kernel = 'rbf', random_state=0).fit(inp)
+        # Generate random datapoints x, y
+        x = 0.7 * torch.randn(N, D) + 0.3
+        y = 0.7 * torch.randn(N, D) + 0.3
+
+        # Ground truth K nearest neighbours
+        truth = torch.argsort(((y.unsqueeze(1)-x.unsqueeze(0))**2).sum(-1),dim=1)
+        truth = truth[:,:k]
+
+        # IVF K nearest neighbours
+        IVF = ivf()
+        IVF.fit(x,a=a)
+        ivf_fit = IVF.kneighbors(y)
+
+        # Calculate accuracy
+        accuracy = 0
+        for i in range(k):
+            accuracy += torch.sum(ivf_fit == truth).float()/N
+            truth = torch.roll(truth, 1, -1) # Create a rolling window (index positions may not match)
+        # Record accuracies
+        accuracy = float(accuracy/k)
+
+        print(a,accuracy)
+        self.assertTrue(accuracy >= 0.8, f'Failed at {a}, {accuracy}')
 
-            self.assertTrue(N_NT.normalization_.shape == (20,20))
-            self.assertTrue(N_NT.transform(inp).shape == (100,20))
 
 if __name__ == "__main__":
     """

From 21207c5e629cf0c6eb0b6ce098b59adc61c13840 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Wed, 24 Feb 2021 19:37:09 +0000
Subject: [PATCH 007/111] final edit

---
 pykeops/numpy/nn/__init__.py       |   0
 pykeops/numpy/nn/ivf.py            | 134 ++++++++++++++++++++++++++++
 pykeops/test/unit_tests_numpy.py   |   3 +-
 pykeops/test/unit_tests_pytorch.py |   3 +-
 pykeops/torch/nn/__init__.py       |   0
 pykeops/torch/nn/ivf.py            | 137 +++++++++++++++++++++++++++++
 6 files changed, 273 insertions(+), 4 deletions(-)
 create mode 100644 pykeops/numpy/nn/__init__.py
 create mode 100644 pykeops/numpy/nn/ivf.py
 create mode 100644 pykeops/torch/nn/__init__.py
 create mode 100644 pykeops/torch/nn/ivf.py

diff --git a/pykeops/numpy/nn/__init__.py b/pykeops/numpy/nn/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
new file mode 100644
index 000000000..d4685d3d2
--- /dev/null
+++ b/pykeops/numpy/nn/ivf.py
@@ -0,0 +1,134 @@
+from pykeops.numpy import LazyTensor
+from pykeops.numpy.cluster import cluster_ranges_centroids
+from pykeops.numpy.cluster import from_matrix
+import pykeops.config
+import numpy as np
+class ivf():
+  def __init__(self,k=5):
+    self.__c=None
+    self.__k=k
+    self.__x=None
+    self.__keep=None
+    self.__x_ranges=None
+    self.__x_perm=None
+    self.__y_perm=None
+    self.__use_gpu=None
+       
+  def __KMeans(self,x, K=10, Niter=15):
+      N, D = x.shape  
+      c = np.copy(x[:K, :])  
+      x_i = LazyTensor(x[:, None, :])  
+      for i in range(Niter):
+          c_j = LazyTensor(c[None, :, :])  
+          D_ij = ((x_i - c_j) ** 2).sum(-1)  
+          if self.__use_gpu:
+            D_ij.backend='GPU'
+          else:
+            D_ij.backend='CPU'          
+          cl = D_ij.argmin(axis=1).astype(int).reshape(N) 
+
+          Ncl = np.bincount(cl).astype(dtype = "float32") 
+          for d in range(D): 
+              c[:, d] = np.bincount(cl, weights=x[:, d]) / Ncl
+      return cl, c      
+
+  def __k_argmin(self,x,y,k=1):
+  
+    x_LT=LazyTensor(np.expand_dims(x, 1))
+    y_LT=LazyTensor(np.expand_dims(y, 0))
+    d=((x_LT-y_LT)**2).sum(-1)
+    if self.__use_gpu:
+      d.backend='GPU'
+    else:
+      d.backend='CPU'    
+    if k==1:
+      return d.argmin(dim=1).flatten()
+    else:
+      return d.argKmin(K=k,dim=1)
+
+  def __sort_clusters(self,x,lab,store_x=True):   
+    perm=np.argsort(lab.flatten())
+    if store_x:
+      self.__x_perm=perm 
+    else:
+      self.__y_perm=perm
+    return x[perm],lab[perm]
+
+  def __unsort(self,nn):
+    return np.take(self.__x_perm[nn],self.__y_perm.argsort(),axis=0)
+
+  def fit(self,x,clusters=50,a=5,use_gpu=True,n=15):
+    '''
+    Fits the main dataset
+    '''
+    if type(x)!=np.ndarray:
+      raise ValueError('Input must be a numpy ndarray')
+    if type(clusters)!=int:
+      raise ValueError('Clusters must be an integer')
+    if clusters>=len(x):
+      raise ValueError('Number of clusters must be less than length of dataset')
+    if type(a)!=int:
+      raise ValueError('Number of clusters to search over must be an integer')    
+    if a>clusters:
+      raise ValueError('Number of clusters to search over must be less than total number of clusters')    
+    if len(x.shape)!=2:
+      raise ValueError('Input must be a 2D array')
+            
+    if use_gpu and not pykeops.config.gpu_available:
+      raise ValueError('use_gpu = True but GPU not detected')
+    self.__use_gpu=use_gpu
+    cl, c = self.__KMeans(x,clusters,Niter=n)
+    self.__c=c
+    cl=self.__assign(x)
+
+    ncl=self.__k_argmin(c,c,k=a)
+    self.__x_ranges, _, _ = cluster_ranges_centroids(x, cl)
+    x, x_labels = self.__sort_clusters(x,cl,store_x=True) 
+    self.__x=x
+
+    r=np.arange(clusters).repeat(a).T.reshape(-1)
+    self.__keep= np.zeros([clusters,clusters], dtype=bool)
+    self.__keep[r,ncl.flatten()]=True        
+    return self
+
+
+  def __assign(self,x,c=None):
+    if c is None:
+      c=self.__c
+    return self.__k_argmin(x,c)  
+    
+  def kneighbors(self,y,sparse=True):
+    '''
+    Obtain the k nearest neighbors of the query dataset y
+    '''
+    if self.__x is None:
+      raise ValueError('Input dataset not fitted yet! Call .fit() first!')
+    if type(y)!=np.ndarray:
+      raise ValueError("Query dataset must be a numpy ndarray")
+    if len(y.shape)!=2:
+      raise ValueError('Query dataset must be a 2D array')      
+    if self.__x.shape[-1]!=y.shape[-1]:
+      raise ValueError('Query and dataset must have same dimensions')
+
+    y_labels=self.__assign(y,self.__c)
+    y_ranges,_,_ = cluster_ranges_centroids(y, y_labels)
+
+    y, y_labels = self.__sort_clusters(y, y_labels,store_x=False)   
+    
+    x_LT=LazyTensor(np.expand_dims(self.__x,0))
+    y_LT=LazyTensor(np.expand_dims(y,1))
+    D_ij=((y_LT-x_LT)**2).sum(-1)
+    ranges_ij = from_matrix(y_ranges,self.__x_ranges,self.__keep)
+    D_ij.ranges=ranges_ij
+    if self.__use_gpu:
+      D_ij.backend='GPU'
+    else:
+      D_ij.backend='CPU'
+    nn=D_ij.argKmin(K=self.__k,axis=1)
+    return self.__unsort(nn)
+
+  def brute_force(self,x,y,k=5):
+    x_LT=LazyTensor(np.expand_dims(x,0))
+    y_LT=LazyTensor(np.expand_dims(y,1))
+    D_ij=((y_LT-x_LT)**2).sum(-1) 
+    return D_ij.argKmin(K=k,axis=1)
diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index 5ef5e4b28..17629bd24 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -440,7 +440,7 @@ def test_LazyTensor_sum(self):
     ############################################################
     def test_IVF(self):
         ###########################################################
-        from pykeops.numpy.nn.ivf_np import ivf
+        from pykeops.numpy.nn.ivf import ivf
         import numpy as np
 
         np.random.seed(0)
@@ -467,7 +467,6 @@ def test_IVF(self):
         # Record accuracies
         accuracy = float(accuracy/k)
         
-        print(a,accuracy)
         self.assertTrue(accuracy >= 0.8, f'Failed at {a}, {accuracy}')
 
 
diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index 053149b40..b23ab525c 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -676,7 +676,7 @@ def invert_permutation_numpy(permutation):
     ############################################################
     def test_IVF(self):
         ############################################################
-        from pykeops.torch.nn.ivf_torch import ivf
+        from pykeops.torch.nn.ivf import ivf
         import torch
 
         torch.manual_seed(0)
@@ -703,7 +703,6 @@ def test_IVF(self):
         # Record accuracies
         accuracy = float(accuracy/k)
 
-        print(a,accuracy)
         self.assertTrue(accuracy >= 0.8, f'Failed at {a}, {accuracy}')
 
 
diff --git a/pykeops/torch/nn/__init__.py b/pykeops/torch/nn/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pykeops/torch/nn/ivf.py b/pykeops/torch/nn/ivf.py
new file mode 100644
index 000000000..5cfe88204
--- /dev/null
+++ b/pykeops/torch/nn/ivf.py
@@ -0,0 +1,137 @@
+from pykeops.torch import LazyTensor
+from pykeops.torch.cluster import cluster_ranges_centroids
+from pykeops.torch.cluster import from_matrix
+
+import torch
+
+use_cuda = torch.cuda.is_available()
+if use_cuda:
+    torch.cuda.synchronize()
+
+class ivf():
+  def __init__(self,k=5):
+    self.__c=None
+    self.__k=k
+    self.__x=None
+    self.__keep=None
+    self.__x_ranges=None
+    self.__x_perm=None
+    self.__y_perm=None
+    self.__device=None
+
+  def __KMeans(self,x, K=10, Niter=15):
+      N, D = x.shape  
+      c = x[:K, :].clone() 
+      x_i = LazyTensor(x.view(N, 1, D).to(self.__device))  
+      for i in range(Niter):
+          c_j = LazyTensor(c.view(1, K, D).to(self.__device))  
+          D_ij = ((x_i - c_j) ** 2).sum(-1)
+          cl = D_ij.argmin(dim=1).long().view(-1)  
+          c.zero_() 
+          c.scatter_add_(0, cl[:, None].repeat(1, D), x) 
+          Ncl = torch.bincount(cl, minlength=K).type_as(c).view(K, 1)
+          c /= Ncl  
+      return cl, c    
+
+  def __k_argmin(self,x,y,k=1):
+    if use_cuda:
+        torch.cuda.synchronize()    
+    x_LT=LazyTensor(x.unsqueeze(1).to(self.__device))
+    y_LT=LazyTensor(y.unsqueeze(0).to(self.__device))
+    d=((x_LT-y_LT)**2).sum(-1)
+    if k==1:
+      return d.argmin(dim=1).long().view(-1)
+    else:
+      return d.argKmin(K=k,dim=1).long()  
+
+  def __sort_clusters(self,x,lab,store_x=True):
+    lab, perm = torch.sort(lab.view(-1))
+    if store_x:
+      self.__x_perm=perm 
+    else:
+      self.__y_perm=perm
+    return x[perm],lab
+
+  def __unsort(self,nn):
+    return torch.index_select(self.__x_perm[nn],0,self.__y_perm.argsort())
+
+  def fit(self,x,clusters=50,a=5,n=15):
+    '''
+    Fits the main dataset
+    '''
+    if type(x)!=torch.Tensor:
+      raise ValueError('Input must be a torch tensor')
+    if type(clusters)!=int:
+      raise ValueError('Clusters must be an integer')
+    if clusters>=len(x):
+      raise ValueError('Number of clusters must be less than length of dataset')
+    if type(a)!=int:
+      raise ValueError('Number of clusters to search over must be an integer')    
+    if a>clusters:
+      raise ValueError('Number of clusters to search over must be less than total number of clusters') 
+    if len(x.shape)!=2:
+      raise ValueError('Input must be a 2D array')    
+    x=x.contiguous()
+    self.__device=x.device
+    cl, c = self.__KMeans(x,clusters,Niter=n)
+    self.__c=c
+
+    cl=self.__assign(x)
+    if use_cuda:
+        torch.cuda.synchronize()
+
+    ncl=self.__k_argmin(c,c,k=a)
+    self.__x_ranges, _, _ = cluster_ranges_centroids(x, cl)
+    
+    x, x_labels = self.__sort_clusters(x,cl,store_x=True)
+    self.__x=x
+    r=torch.arange(clusters).repeat(a,1).T.reshape(-1).long()
+    self.__keep= torch.zeros([clusters,clusters], dtype=torch.bool).to(self.__device)  
+    self.__keep[r,ncl.flatten()]=True    
+    return self
+
+
+  def __assign(self,x,c=None):
+    if c is None:
+      c=self.__c
+    return self.__k_argmin(x,c)
+    
+  def kneighbors(self,y):
+    '''
+    Obtain the k nearest neighbors of the query dataset y
+    '''
+    if self.__x is None:
+      raise ValueError('Input dataset not fitted yet! Call .fit() first!')
+    if type(y)!=torch.Tensor:
+      raise ValueError("Query dataset must be a torch tensor")
+    if y.device!=self.__device:
+      raise ValueError('Input dataset and query dataset must be on same device')
+    if len(y.shape)!=2:
+      raise ValueError('Query dataset must be a 2D tensor')
+    if self.__x.shape[-1]!=y.shape[-1]:
+      raise ValueError('Query and dataset must have same dimensions')    
+    if use_cuda:
+        torch.cuda.synchronize()
+    y=y.contiguous()
+    y_labels=self.__assign(y)
+    
+    
+    y_ranges,_,_ = cluster_ranges_centroids(y,y_labels)
+    self.__y_ranges=y_ranges
+    y, y_labels = self.__sort_clusters(y, y_labels,store_x=False)   
+    x_LT=LazyTensor(self.__x.unsqueeze(0).to(self.__device).contiguous())
+    y_LT=LazyTensor(y.unsqueeze(1).to(self.__device).contiguous())
+    D_ij=((y_LT-x_LT)**2).sum(-1)
+    
+    ranges_ij = from_matrix(y_ranges, self.__x_ranges, self.__keep)
+    D_ij.ranges=ranges_ij
+    nn=D_ij.argKmin(K=self.__k,axis=1)
+    return self.__unsort(nn)
+
+  def brute_force(self,x,y,k=5):
+    if use_cuda:
+        torch.cuda.synchronize()    
+    x_LT=LazyTensor(x.unsqueeze(0))
+    y_LT=LazyTensor(y.unsqueeze(1))
+    D_ij=((y_LT-x_LT)**2).sum(-1) 
+    return D_ij.argKmin(K=k,axis=1)
\ No newline at end of file

From 9f3b308ddcf4c05c8739543b957e10dabd66a2a9 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Wed, 24 Feb 2021 19:39:40 +0000
Subject: [PATCH 008/111] add empty init files

---
 pykeops/numpy/nystrom/__init__.py | 0
 pykeops/torch/nystrom/__init__.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 pykeops/numpy/nystrom/__init__.py
 create mode 100644 pykeops/torch/nystrom/__init__.py

diff --git a/pykeops/numpy/nystrom/__init__.py b/pykeops/numpy/nystrom/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pykeops/torch/nystrom/__init__.py b/pykeops/torch/nystrom/__init__.py
new file mode 100644
index 000000000..e69de29bb

From bbf9876c3a5dd7d11de0d5ffc9c53abe7581097c Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Wed, 24 Feb 2021 19:45:30 +0000
Subject: [PATCH 009/111] make lint happy

---
 pykeops/numpy/nn/ivf.py            | 261 +++++++++++++++--------------
 pykeops/numpy/nystrom/Nystrom.py   | 227 +++++++++++++------------
 pykeops/test/unit_tests_numpy.py   |  30 ++--
 pykeops/test/unit_tests_pytorch.py |  22 +--
 pykeops/torch/nn/ivf.py            | 257 ++++++++++++++--------------
 pykeops/torch/nystrom/nystrom.py   | 116 +++++++------
 6 files changed, 477 insertions(+), 436 deletions(-)

diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
index d4685d3d2..c4715d3fd 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/nn/ivf.py
@@ -3,132 +3,135 @@
 from pykeops.numpy.cluster import from_matrix
 import pykeops.config
 import numpy as np
-class ivf():
-  def __init__(self,k=5):
-    self.__c=None
-    self.__k=k
-    self.__x=None
-    self.__keep=None
-    self.__x_ranges=None
-    self.__x_perm=None
-    self.__y_perm=None
-    self.__use_gpu=None
-       
-  def __KMeans(self,x, K=10, Niter=15):
-      N, D = x.shape  
-      c = np.copy(x[:K, :])  
-      x_i = LazyTensor(x[:, None, :])  
-      for i in range(Niter):
-          c_j = LazyTensor(c[None, :, :])  
-          D_ij = ((x_i - c_j) ** 2).sum(-1)  
-          if self.__use_gpu:
-            D_ij.backend='GPU'
-          else:
-            D_ij.backend='CPU'          
-          cl = D_ij.argmin(axis=1).astype(int).reshape(N) 
-
-          Ncl = np.bincount(cl).astype(dtype = "float32") 
-          for d in range(D): 
-              c[:, d] = np.bincount(cl, weights=x[:, d]) / Ncl
-      return cl, c      
-
-  def __k_argmin(self,x,y,k=1):
-  
-    x_LT=LazyTensor(np.expand_dims(x, 1))
-    y_LT=LazyTensor(np.expand_dims(y, 0))
-    d=((x_LT-y_LT)**2).sum(-1)
-    if self.__use_gpu:
-      d.backend='GPU'
-    else:
-      d.backend='CPU'    
-    if k==1:
-      return d.argmin(dim=1).flatten()
-    else:
-      return d.argKmin(K=k,dim=1)
-
-  def __sort_clusters(self,x,lab,store_x=True):   
-    perm=np.argsort(lab.flatten())
-    if store_x:
-      self.__x_perm=perm 
-    else:
-      self.__y_perm=perm
-    return x[perm],lab[perm]
-
-  def __unsort(self,nn):
-    return np.take(self.__x_perm[nn],self.__y_perm.argsort(),axis=0)
-
-  def fit(self,x,clusters=50,a=5,use_gpu=True,n=15):
-    '''
-    Fits the main dataset
-    '''
-    if type(x)!=np.ndarray:
-      raise ValueError('Input must be a numpy ndarray')
-    if type(clusters)!=int:
-      raise ValueError('Clusters must be an integer')
-    if clusters>=len(x):
-      raise ValueError('Number of clusters must be less than length of dataset')
-    if type(a)!=int:
-      raise ValueError('Number of clusters to search over must be an integer')    
-    if a>clusters:
-      raise ValueError('Number of clusters to search over must be less than total number of clusters')    
-    if len(x.shape)!=2:
-      raise ValueError('Input must be a 2D array')
-            
-    if use_gpu and not pykeops.config.gpu_available:
-      raise ValueError('use_gpu = True but GPU not detected')
-    self.__use_gpu=use_gpu
-    cl, c = self.__KMeans(x,clusters,Niter=n)
-    self.__c=c
-    cl=self.__assign(x)
-
-    ncl=self.__k_argmin(c,c,k=a)
-    self.__x_ranges, _, _ = cluster_ranges_centroids(x, cl)
-    x, x_labels = self.__sort_clusters(x,cl,store_x=True) 
-    self.__x=x
-
-    r=np.arange(clusters).repeat(a).T.reshape(-1)
-    self.__keep= np.zeros([clusters,clusters], dtype=bool)
-    self.__keep[r,ncl.flatten()]=True        
-    return self
-
-
-  def __assign(self,x,c=None):
-    if c is None:
-      c=self.__c
-    return self.__k_argmin(x,c)  
-    
-  def kneighbors(self,y,sparse=True):
-    '''
-    Obtain the k nearest neighbors of the query dataset y
-    '''
-    if self.__x is None:
-      raise ValueError('Input dataset not fitted yet! Call .fit() first!')
-    if type(y)!=np.ndarray:
-      raise ValueError("Query dataset must be a numpy ndarray")
-    if len(y.shape)!=2:
-      raise ValueError('Query dataset must be a 2D array')      
-    if self.__x.shape[-1]!=y.shape[-1]:
-      raise ValueError('Query and dataset must have same dimensions')
-
-    y_labels=self.__assign(y,self.__c)
-    y_ranges,_,_ = cluster_ranges_centroids(y, y_labels)
-
-    y, y_labels = self.__sort_clusters(y, y_labels,store_x=False)   
-    
-    x_LT=LazyTensor(np.expand_dims(self.__x,0))
-    y_LT=LazyTensor(np.expand_dims(y,1))
-    D_ij=((y_LT-x_LT)**2).sum(-1)
-    ranges_ij = from_matrix(y_ranges,self.__x_ranges,self.__keep)
-    D_ij.ranges=ranges_ij
-    if self.__use_gpu:
-      D_ij.backend='GPU'
-    else:
-      D_ij.backend='CPU'
-    nn=D_ij.argKmin(K=self.__k,axis=1)
-    return self.__unsort(nn)
-
-  def brute_force(self,x,y,k=5):
-    x_LT=LazyTensor(np.expand_dims(x,0))
-    y_LT=LazyTensor(np.expand_dims(y,1))
-    D_ij=((y_LT-x_LT)**2).sum(-1) 
-    return D_ij.argKmin(K=k,axis=1)
+
+
+class ivf:
+    def __init__(self, k=5):
+        self.__c = None
+        self.__k = k
+        self.__x = None
+        self.__keep = None
+        self.__x_ranges = None
+        self.__x_perm = None
+        self.__y_perm = None
+        self.__use_gpu = None
+
+    def __KMeans(self, x, K=10, Niter=15):
+        N, D = x.shape
+        c = np.copy(x[:K, :])
+        x_i = LazyTensor(x[:, None, :])
+        for i in range(Niter):
+            c_j = LazyTensor(c[None, :, :])
+            D_ij = ((x_i - c_j) ** 2).sum(-1)
+            if self.__use_gpu:
+                D_ij.backend = "GPU"
+            else:
+                D_ij.backend = "CPU"
+            cl = D_ij.argmin(axis=1).astype(int).reshape(N)
+
+            Ncl = np.bincount(cl).astype(dtype="float32")
+            for d in range(D):
+                c[:, d] = np.bincount(cl, weights=x[:, d]) / Ncl
+        return cl, c
+
+    def __k_argmin(self, x, y, k=1):
+
+        x_LT = LazyTensor(np.expand_dims(x, 1))
+        y_LT = LazyTensor(np.expand_dims(y, 0))
+        d = ((x_LT - y_LT) ** 2).sum(-1)
+        if self.__use_gpu:
+            d.backend = "GPU"
+        else:
+            d.backend = "CPU"
+        if k == 1:
+            return d.argmin(dim=1).flatten()
+        else:
+            return d.argKmin(K=k, dim=1)
+
+    def __sort_clusters(self, x, lab, store_x=True):
+        perm = np.argsort(lab.flatten())
+        if store_x:
+            self.__x_perm = perm
+        else:
+            self.__y_perm = perm
+        return x[perm], lab[perm]
+
+    def __unsort(self, nn):
+        return np.take(self.__x_perm[nn], self.__y_perm.argsort(), axis=0)
+
+    def fit(self, x, clusters=50, a=5, use_gpu=True, n=15):
+        """
+        Fits the main dataset
+        """
+        if type(x) != np.ndarray:
+            raise ValueError("Input must be a numpy ndarray")
+        if type(clusters) != int:
+            raise ValueError("Clusters must be an integer")
+        if clusters >= len(x):
+            raise ValueError("Number of clusters must be less than length of dataset")
+        if type(a) != int:
+            raise ValueError("Number of clusters to search over must be an integer")
+        if a > clusters:
+            raise ValueError(
+                "Number of clusters to search over must be less than total number of clusters"
+            )
+        if len(x.shape) != 2:
+            raise ValueError("Input must be a 2D array")
+
+        if use_gpu and not pykeops.config.gpu_available:
+            raise ValueError("use_gpu = True but GPU not detected")
+        self.__use_gpu = use_gpu
+        cl, c = self.__KMeans(x, clusters, Niter=n)
+        self.__c = c
+        cl = self.__assign(x)
+
+        ncl = self.__k_argmin(c, c, k=a)
+        self.__x_ranges, _, _ = cluster_ranges_centroids(x, cl)
+        x, x_labels = self.__sort_clusters(x, cl, store_x=True)
+        self.__x = x
+
+        r = np.arange(clusters).repeat(a).T.reshape(-1)
+        self.__keep = np.zeros([clusters, clusters], dtype=bool)
+        self.__keep[r, ncl.flatten()] = True
+        return self
+
+    def __assign(self, x, c=None):
+        if c is None:
+            c = self.__c
+        return self.__k_argmin(x, c)
+
+    def kneighbors(self, y, sparse=True):
+        """
+        Obtain the k nearest neighbors of the query dataset y
+        """
+        if self.__x is None:
+            raise ValueError("Input dataset not fitted yet! Call .fit() first!")
+        if type(y) != np.ndarray:
+            raise ValueError("Query dataset must be a numpy ndarray")
+        if len(y.shape) != 2:
+            raise ValueError("Query dataset must be a 2D array")
+        if self.__x.shape[-1] != y.shape[-1]:
+            raise ValueError("Query and dataset must have same dimensions")
+
+        y_labels = self.__assign(y, self.__c)
+        y_ranges, _, _ = cluster_ranges_centroids(y, y_labels)
+
+        y, y_labels = self.__sort_clusters(y, y_labels, store_x=False)
+
+        x_LT = LazyTensor(np.expand_dims(self.__x, 0))
+        y_LT = LazyTensor(np.expand_dims(y, 1))
+        D_ij = ((y_LT - x_LT) ** 2).sum(-1)
+        ranges_ij = from_matrix(y_ranges, self.__x_ranges, self.__keep)
+        D_ij.ranges = ranges_ij
+        if self.__use_gpu:
+            D_ij.backend = "GPU"
+        else:
+            D_ij.backend = "CPU"
+        nn = D_ij.argKmin(K=self.__k, axis=1)
+        return self.__unsort(nn)
+
+    def brute_force(self, x, y, k=5):
+        x_LT = LazyTensor(np.expand_dims(x, 0))
+        y_LT = LazyTensor(np.expand_dims(y, 1))
+        D_ij = ((y_LT - x_LT) ** 2).sum(-1)
+        return D_ij.argKmin(K=k, axis=1)
diff --git a/pykeops/numpy/nystrom/Nystrom.py b/pykeops/numpy/nystrom/Nystrom.py
index e4adb1f6e..70bce7cd0 100644
--- a/pykeops/numpy/nystrom/Nystrom.py
+++ b/pykeops/numpy/nystrom/Nystrom.py
@@ -17,35 +17,48 @@
 
 #################################################################################
 
+
 class Nystrom_NK:
-    '''
-        Class to implement Nystrom using numpy and PyKeops.
-        * The fit method computes K^{-1}_q.
-        * The transform method maps the data into the feature space underlying
-        the Nystrom-approximated kernel.
-        * The method K_approx directly computes the Nystrom approximation.
-        Parameters:
-        n_components [int] = how many samples to select from data.
-        kernel [str] = type of kernel to use. Current options = {rbf}.
-        sigma [float] = exponential constant for the RBF kernel. 
-        exp_sigma [float] = exponential constant for the exponential kernel.
-        eps[float] = size for square bins in block-sparse preprocessing.
-        k_means[int] = number of centroids for KMeans algorithm in block-sparse 
-                       preprocessing.
-        n_iter[int] = number of iterations for KMeans
-        dtype[type] = type of data: np.float32 or np.float64
-        inv_eps[float] = additive invertibility constant for matrix decomposition.
-        backend[string] = "GPU" or "CPU" mode
-        verbose[boolean] = set True to print details
-        random_state=[None, float] = to set a random seed for the random
-                                     sampling of the samples. To be used when 
-                                     reproducibility is needed.
-    '''
-  
-    def __init__(self, n_components=100, kernel='rbf', sigma:float = 1.,
-                 exp_sigma:float = 1.0, eps:float = 0.05, mask_radius:float = None,
-                 k_means = 10, n_iter:int = 10, inv_eps:float = None, dtype = np.float32, 
-                 backend = None, verbose = False, random_state=None): 
+    """
+    Class to implement Nystrom using numpy and PyKeops.
+    * The fit method computes K^{-1}_q.
+    * The transform method maps the data into the feature space underlying
+    the Nystrom-approximated kernel.
+    * The method K_approx directly computes the Nystrom approximation.
+    Parameters:
+    n_components [int] = how many samples to select from data.
+    kernel [str] = type of kernel to use. Current options = {rbf}.
+    sigma [float] = exponential constant for the RBF kernel.
+    exp_sigma [float] = exponential constant for the exponential kernel.
+    eps[float] = size for square bins in block-sparse preprocessing.
+    k_means[int] = number of centroids for KMeans algorithm in block-sparse
+                   preprocessing.
+    n_iter[int] = number of iterations for KMeans
+    dtype[type] = type of data: np.float32 or np.float64
+    inv_eps[float] = additive invertibility constant for matrix decomposition.
+    backend[string] = "GPU" or "CPU" mode
+    verbose[boolean] = set True to print details
+    random_state=[None, float] = to set a random seed for the random
+                                 sampling of the samples. To be used when
+                                 reproducibility is needed.
+    """
+
+    def __init__(
+        self,
+        n_components=100,
+        kernel="rbf",
+        sigma: float = 1.0,
+        exp_sigma: float = 1.0,
+        eps: float = 0.05,
+        mask_radius: float = None,
+        k_means=10,
+        n_iter: int = 10,
+        inv_eps: float = None,
+        dtype=np.float32,
+        backend=None,
+        verbose=False,
+        random_state=None,
+    ):
 
         self.n_components = n_components
         self.kernel = kernel
@@ -60,37 +73,38 @@ def __init__(self, n_components=100, kernel='rbf', sigma:float = 1.,
         self.verbose = verbose
 
         if not backend:
-            self.backend = 'GPU' if pykeops.config.gpu_available else 'CPU'
+            self.backend = "GPU" if pykeops.config.gpu_available else "CPU"
         else:
             self.backend = backend
 
         if inv_eps:
             self.inv_eps = inv_eps
         else:
-            if kernel == 'linear':
+            if kernel == "linear":
                 self.inv_eps = 1e-4
             else:
                 self.inv_eps = 1e-8
 
         if not mask_radius:
-            if kernel == 'rbf':
-                self.mask_radius = 2* np.sqrt(2) * self.sigma
-            elif kernel == 'exp':
+            if kernel == "rbf":
+                self.mask_radius = 2 * np.sqrt(2) * self.sigma
+            elif kernel == "exp":
                 self.mask_radius = 8 * self.exp_sigma
 
-
-    def fit(self, x:np.ndarray):
-        ''' 
+    def fit(self, x: np.ndarray):
+        """
         Args:   x = numpy array of shape (n_samples, n_features)
         Returns: Fitted instance of the class
-        '''
+        """
         if self.verbose:
-            print(f'Working with backend = {self.backend}')
-        
+            print(f"Working with backend = {self.backend}")
+
         # Basic checks
-        assert type(x) == np.ndarray, 'Input to fit(.) must be an array.'
-        assert x.shape[0] >= self.n_components, 'The application needs X.shape[0] >= n_components.'
-        assert self.exp_sigma > 0, 'Should be working with decaying exponential.'
+        assert type(x) == np.ndarray, "Input to fit(.) must be an array."
+        assert (
+            x.shape[0] >= self.n_components
+        ), "The application needs X.shape[0] >= n_components."
+        assert self.exp_sigma > 0, "Should be working with decaying exponential."
 
         # Update dtype
         self._update_dtype(x)
@@ -98,8 +112,8 @@ def fit(self, x:np.ndarray):
         n_samples = x.shape[0]
         # Define basis
         rnd = check_random_state(self.random_state)
-        inds = rnd.permutation(n_samples) 
-        basis_inds = inds[:self.n_components] 
+        inds = rnd.permutation(n_samples)
+        basis_inds = inds[: self.n_components]
         basis = x[basis_inds]
         # Build smaller kernel
         basis_kernel = self._pairwise_kernels(basis)
@@ -112,111 +126,108 @@ def fit(self, x:np.ndarray):
 
         return self
 
-
-    def _spectral(self, X_i:LazyTensor):
-        '''
+    def _spectral(self, X_i: LazyTensor):
+        """
         Helper function to compute eigendecomposition of K_q.
         Written using LinearOperators which are lazy
         representations of sparse and/or structured data.
         Args: X_i[numpy LazyTensor]
         Returns S[np.array] eigenvalues,
                 U[np.array] eigenvectors
-        '''
+        """
         K_linear = aslinearoperator(X_i)
         # K <- K + eps
-        K_linear = K_linear + IdentityOperator(K_linear.shape, dtype=self.dtype) * self.inv_eps
+        K_linear = (
+            K_linear + IdentityOperator(K_linear.shape, dtype=self.dtype) * self.inv_eps
+        )
         k = K_linear.shape[0] - 1
-        S, U = eigsh(K_linear, k=k, which='LM')
+        S, U = eigsh(K_linear, k=k, which="LM")
 
         return S, U
-        
 
-    def transform(self, x:np.ndarray) -> np.array:
-        ''' Applies transform on the data.
-        
+    def transform(self, x: np.ndarray) -> np.array:
+        """Applies transform on the data.
+
         Args:
             X [np.array] = data to transform
         Returns
             X [np.array] = data after transformation
-        '''
-        
+        """
+
         K_nq = self._pairwise_kernels(x, self.components_)
         x_new = K_nq @ self.normalization_.T
 
         return x_new
 
-    
-    def K_approx(self, x:np.array) -> np.array:
-        ''' Function to return Nystrom approximation to the kernel.
-        
+    def K_approx(self, x: np.array) -> np.array:
+        """Function to return Nystrom approximation to the kernel.
+
         Args:
             X[np.array] = data used in fit(.) function.
         Returns
-            K[np.array] = Nystrom approximation to kernel'''
-       
+            K[np.array] = Nystrom approximation to kernel"""
+
         K_nq = self._pairwise_kernels(x, self.components_)
         # For arrays: K_approx = K_nq @ K_q_inv @ K_nq.T
         # But to use @ with lazy tensors we have:
         K_q_inv = self.normalization_.T @ self.normalization_
-        K_approx = K_nq @ (K_nq @ K_q_inv ).T
-        
-        return K_approx.T 
+        K_approx = K_nq @ (K_nq @ K_q_inv).T
 
+        return K_approx.T
+
+    def _pairwise_kernels(self, x: np.array, y: np.array = None) -> LazyTensor:
+        """Helper function to build kernel
 
-    def _pairwise_kernels(self, x:np.array, y:np.array = None) -> LazyTensor:
-        '''Helper function to build kernel
-        
         Args:   X = torch tensor of dimension 2,
                 K_type = type of Kernel to return.
         Returns:
                 K_ij[LazyTensor]
-        '''
+        """
         if y is None:
             y = x
-        if self.kernel == 'linear': 
-            K_ij = x @ y.T 
-        elif self.kernel == 'rbf':
+        if self.kernel == "linear":
+            K_ij = x @ y.T
+        elif self.kernel == "rbf":
             x /= self.sigma
             y /= self.sigma
             x_i, x_j = LazyTensor_n(x[:, None, :]), LazyTensor_n(y[None, :, :])
-            K_ij = ( -(( (x_i - x_j)**2 ).sum(dim=2) ) ).exp()
+            K_ij = (-(((x_i - x_j) ** 2).sum(dim=2))).exp()
             # block-sparse reduction preprocess
             K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)
-        elif self.kernel == 'exp':
+        elif self.kernel == "exp":
             x /= self.exp_sigma
             y /= self.exp_sigma
             x_i, x_j = LazyTensor_n(x[:, None, :]), LazyTensor_n(y[None, :, :])
-            K_ij = (- ( ((x_i - x_j) ** 2).sqrt().sum(2))).exp()
+            K_ij = (-(((x_i - x_j) ** 2).sqrt().sum(2))).exp()
             # block-sparse reduction preprocess
-            K_ij = self._Gauss_block_sparse_pre(x, y, K_ij) # TODO 
-       
+            K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)  # TODO
+
         K_ij.backend = self.backend
-        
-        return K_ij
 
+        return K_ij
 
-    def _Gauss_block_sparse_pre(self, x:np.array, y:np.array, K_ij:LazyTensor):
-        ''' 
+    def _Gauss_block_sparse_pre(self, x: np.array, y: np.array, K_ij: LazyTensor):
+        """
         Helper function to preprocess data for block-sparse reduction
         of the Gaussian kernel
-    
-        Args: 
+
+        Args:
             x[np.array], y[np.array] = arrays giving rise to Gaussian kernel K(x,y)
             K_ij[LazyTensor_n] = symbolic representation of K(x,y)
             eps[float] = size for square bins
         Returns:
-            K_ij[LazyTensor_n] = symbolic representation of K(x,y) with 
+            K_ij[LazyTensor_n] = symbolic representation of K(x,y) with
                                 set sparse ranges
-        '''
+        """
         # labels for low dimensions
         if x.shape[1] < 4 or y.shape[1] < 4:
-            x_labels = grid_cluster(x, self.eps) 
-            y_labels = grid_cluster(y, self.eps) 
+            x_labels = grid_cluster(x, self.eps)
+            y_labels = grid_cluster(y, self.eps)
             # range and centroid per class
             x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
             y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
         else:
-        # labels for higher dimensions
+            # labels for higher dimensions
             x_labels, x_centroids = self._KMeans(x)
             y_labels, y_centroids = self._KMeans(y)
             # compute ranges
@@ -225,12 +236,14 @@ def _Gauss_block_sparse_pre(self, x:np.array, y:np.array, K_ij:LazyTensor):
 
         # sort points
         x, x_labels = sort_clusters(x, x_labels)
-        y, y_labels = sort_clusters(y, y_labels) 
+        y, y_labels = sort_clusters(y, y_labels)
         # Compute a coarse Boolean mask:
-        if self.kernel == 'rbf':
+        if self.kernel == "rbf":
             D = np.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2)
-        elif self.kernel == 'exp':
-            D = np.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2).sqrt()
+        elif self.kernel == "exp":
+            D = np.sum(
+                (x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2
+            ).sqrt()
         keep = D < (self.mask_radius) ** 2
         # mask -> set of integer tensors
         ranges_ij = from_matrix(x_ranges, y_ranges, keep)
@@ -238,9 +251,8 @@ def _Gauss_block_sparse_pre(self, x:np.array, y:np.array, K_ij:LazyTensor):
 
         return K_ij
 
-
-    def _KMeans(self,x:np.array):
-        ''' KMeans with Pykeops to do binning of original data.
+    def _KMeans(self, x: np.array):
+        """KMeans with Pykeops to do binning of original data.
         Args:
             x[np.array] = data
             k_means[int] = number of bins to build
@@ -248,31 +260,32 @@ def _KMeans(self,x:np.array):
         Returns:
             labels[np.array] = class labels for each point in x
             clusters[np.array] = coordinates for each centroid
-        '''
-        N, D = x.shape  
-        clusters = np.copy(x[:self.k_means, :])  # initialization of clusters
-        x_i = LazyTensor_n(x[:, None, :])  
+        """
+        N, D = x.shape
+        clusters = np.copy(x[: self.k_means, :])  # initialization of clusters
+        x_i = LazyTensor_n(x[:, None, :])
 
         for i in range(self.n_iter):
 
-            clusters_j = LazyTensor_n(clusters[None, :, :])  
+            clusters_j = LazyTensor_n(clusters[None, :, :])
             D_ij = ((x_i - clusters_j) ** 2).sum(-1)  # points-clusters kernel
-            labels = D_ij.argmin(axis=1).astype(int).reshape(N)  # Points -> Nearest cluster
+            labels = (
+                D_ij.argmin(axis=1).astype(int).reshape(N)
+            )  # Points -> Nearest cluster
             Ncl = np.bincount(labels).astype(self.dtype)  # Class weights
             for d in range(D):  # Compute the cluster centroids with np.bincount:
                 clusters[:, d] = np.bincount(labels, weights=x[:, d]) / Ncl
 
         return labels, clusters
 
-        
-    def _update_dtype(self,x):
-        ''' Helper function that sets dtype to that of 
+    def _update_dtype(self, x):
+        """Helper function that sets dtype to that of
             the given data in the fitting step.
-            
+
         Args:
             x [np.array] = raw data to remap
         Returns:
             nothing
-        '''
+        """
         self.dtype = x.dtype
-        self.inv_eps = np.array([self.inv_eps]).astype(np.float32)[0]
\ No newline at end of file
+        self.inv_eps = np.array([self.inv_eps]).astype(np.float32)[0]
diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index 17629bd24..c2458c857 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -436,7 +436,7 @@ def test_LazyTensor_sum(self):
         for (res_keops, res_numpy) in zip(full_results[0], full_results[1]):
             self.assertTrue(res_keops.shape == res_numpy.shape)
             self.assertTrue(np.allclose(res_keops, res_numpy, atol=1e-3))
-            
+
     ############################################################
     def test_IVF(self):
         ###########################################################
@@ -444,30 +444,34 @@ def test_IVF(self):
         import numpy as np
 
         np.random.seed(0)
-        N, D, K, k, a = 10**3, 3, 50, 5, 5
-        
+        N, D, K, k, a = 10 ** 3, 3, 50, 5, 5
+
         # Generate random datapoints x, y
         x = 0.7 * np.random.normal(size=(N, D)) + 0.3
         y = 0.7 * np.random.normal(size=(N, D)) + 0.3
 
         # Ground truth K nearest neighbours
-        truth = np.argsort(((np.expand_dims(y,1)-np.expand_dims(x,0))**2).sum(-1),axis=1)
-        truth = truth[:,:k]
-        
+        truth = np.argsort(
+            ((np.expand_dims(y, 1) - np.expand_dims(x, 0)) ** 2).sum(-1), axis=1
+        )
+        truth = truth[:, :k]
+
         # IVF K nearest neighbours
         IVF = ivf()
-        IVF.fit(x,a=a)
+        IVF.fit(x, a=a)
         ivf_fit = IVF.kneighbors(y)
-        
+
         # Calculate accuracy
         accuracy = 0
         for i in range(k):
-            accuracy += float(np.sum(ivf_fit == truth))/N
-            truth = np.roll(truth, 1, -1) # Create a rolling window (index positions may not match)
+            accuracy += float(np.sum(ivf_fit == truth)) / N
+            truth = np.roll(
+                truth, 1, -1
+            )  # Create a rolling window (index positions may not match)
         # Record accuracies
-        accuracy = float(accuracy/k)
-        
-        self.assertTrue(accuracy >= 0.8, f'Failed at {a}, {accuracy}')
+        accuracy = float(accuracy / k)
+
+        self.assertTrue(accuracy >= 0.8, f"Failed at {a}, {accuracy}")
 
 
 if __name__ == "__main__":
diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index b23ab525c..c9c3d272b 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -672,7 +672,7 @@ def invert_permutation_numpy(permutation):
         self.assertTrue(
             torch.allclose(grad_keops.flatten(), grad_torch.flatten(), rtol=1e-4)
         )
-        
+
     ############################################################
     def test_IVF(self):
         ############################################################
@@ -680,30 +680,32 @@ def test_IVF(self):
         import torch
 
         torch.manual_seed(0)
-        N, D, K, k, a = 10**3, 3, 50, 5, 5
-        
+        N, D, K, k, a = 10 ** 3, 3, 50, 5, 5
+
         # Generate random datapoints x, y
         x = 0.7 * torch.randn(N, D) + 0.3
         y = 0.7 * torch.randn(N, D) + 0.3
 
         # Ground truth K nearest neighbours
-        truth = torch.argsort(((y.unsqueeze(1)-x.unsqueeze(0))**2).sum(-1),dim=1)
-        truth = truth[:,:k]
+        truth = torch.argsort(((y.unsqueeze(1) - x.unsqueeze(0)) ** 2).sum(-1), dim=1)
+        truth = truth[:, :k]
 
         # IVF K nearest neighbours
         IVF = ivf()
-        IVF.fit(x,a=a)
+        IVF.fit(x, a=a)
         ivf_fit = IVF.kneighbors(y)
 
         # Calculate accuracy
         accuracy = 0
         for i in range(k):
-            accuracy += torch.sum(ivf_fit == truth).float()/N
-            truth = torch.roll(truth, 1, -1) # Create a rolling window (index positions may not match)
+            accuracy += torch.sum(ivf_fit == truth).float() / N
+            truth = torch.roll(
+                truth, 1, -1
+            )  # Create a rolling window (index positions may not match)
         # Record accuracies
-        accuracy = float(accuracy/k)
+        accuracy = float(accuracy / k)
 
-        self.assertTrue(accuracy >= 0.8, f'Failed at {a}, {accuracy}')
+        self.assertTrue(accuracy >= 0.8, f"Failed at {a}, {accuracy}")
 
 
 if __name__ == "__main__":
diff --git a/pykeops/torch/nn/ivf.py b/pykeops/torch/nn/ivf.py
index 5cfe88204..aab533b50 100644
--- a/pykeops/torch/nn/ivf.py
+++ b/pykeops/torch/nn/ivf.py
@@ -8,130 +8,133 @@
 if use_cuda:
     torch.cuda.synchronize()
 
-class ivf():
-  def __init__(self,k=5):
-    self.__c=None
-    self.__k=k
-    self.__x=None
-    self.__keep=None
-    self.__x_ranges=None
-    self.__x_perm=None
-    self.__y_perm=None
-    self.__device=None
-
-  def __KMeans(self,x, K=10, Niter=15):
-      N, D = x.shape  
-      c = x[:K, :].clone() 
-      x_i = LazyTensor(x.view(N, 1, D).to(self.__device))  
-      for i in range(Niter):
-          c_j = LazyTensor(c.view(1, K, D).to(self.__device))  
-          D_ij = ((x_i - c_j) ** 2).sum(-1)
-          cl = D_ij.argmin(dim=1).long().view(-1)  
-          c.zero_() 
-          c.scatter_add_(0, cl[:, None].repeat(1, D), x) 
-          Ncl = torch.bincount(cl, minlength=K).type_as(c).view(K, 1)
-          c /= Ncl  
-      return cl, c    
-
-  def __k_argmin(self,x,y,k=1):
-    if use_cuda:
-        torch.cuda.synchronize()    
-    x_LT=LazyTensor(x.unsqueeze(1).to(self.__device))
-    y_LT=LazyTensor(y.unsqueeze(0).to(self.__device))
-    d=((x_LT-y_LT)**2).sum(-1)
-    if k==1:
-      return d.argmin(dim=1).long().view(-1)
-    else:
-      return d.argKmin(K=k,dim=1).long()  
-
-  def __sort_clusters(self,x,lab,store_x=True):
-    lab, perm = torch.sort(lab.view(-1))
-    if store_x:
-      self.__x_perm=perm 
-    else:
-      self.__y_perm=perm
-    return x[perm],lab
-
-  def __unsort(self,nn):
-    return torch.index_select(self.__x_perm[nn],0,self.__y_perm.argsort())
-
-  def fit(self,x,clusters=50,a=5,n=15):
-    '''
-    Fits the main dataset
-    '''
-    if type(x)!=torch.Tensor:
-      raise ValueError('Input must be a torch tensor')
-    if type(clusters)!=int:
-      raise ValueError('Clusters must be an integer')
-    if clusters>=len(x):
-      raise ValueError('Number of clusters must be less than length of dataset')
-    if type(a)!=int:
-      raise ValueError('Number of clusters to search over must be an integer')    
-    if a>clusters:
-      raise ValueError('Number of clusters to search over must be less than total number of clusters') 
-    if len(x.shape)!=2:
-      raise ValueError('Input must be a 2D array')    
-    x=x.contiguous()
-    self.__device=x.device
-    cl, c = self.__KMeans(x,clusters,Niter=n)
-    self.__c=c
-
-    cl=self.__assign(x)
-    if use_cuda:
-        torch.cuda.synchronize()
-
-    ncl=self.__k_argmin(c,c,k=a)
-    self.__x_ranges, _, _ = cluster_ranges_centroids(x, cl)
-    
-    x, x_labels = self.__sort_clusters(x,cl,store_x=True)
-    self.__x=x
-    r=torch.arange(clusters).repeat(a,1).T.reshape(-1).long()
-    self.__keep= torch.zeros([clusters,clusters], dtype=torch.bool).to(self.__device)  
-    self.__keep[r,ncl.flatten()]=True    
-    return self
-
-
-  def __assign(self,x,c=None):
-    if c is None:
-      c=self.__c
-    return self.__k_argmin(x,c)
-    
-  def kneighbors(self,y):
-    '''
-    Obtain the k nearest neighbors of the query dataset y
-    '''
-    if self.__x is None:
-      raise ValueError('Input dataset not fitted yet! Call .fit() first!')
-    if type(y)!=torch.Tensor:
-      raise ValueError("Query dataset must be a torch tensor")
-    if y.device!=self.__device:
-      raise ValueError('Input dataset and query dataset must be on same device')
-    if len(y.shape)!=2:
-      raise ValueError('Query dataset must be a 2D tensor')
-    if self.__x.shape[-1]!=y.shape[-1]:
-      raise ValueError('Query and dataset must have same dimensions')    
-    if use_cuda:
-        torch.cuda.synchronize()
-    y=y.contiguous()
-    y_labels=self.__assign(y)
-    
-    
-    y_ranges,_,_ = cluster_ranges_centroids(y,y_labels)
-    self.__y_ranges=y_ranges
-    y, y_labels = self.__sort_clusters(y, y_labels,store_x=False)   
-    x_LT=LazyTensor(self.__x.unsqueeze(0).to(self.__device).contiguous())
-    y_LT=LazyTensor(y.unsqueeze(1).to(self.__device).contiguous())
-    D_ij=((y_LT-x_LT)**2).sum(-1)
-    
-    ranges_ij = from_matrix(y_ranges, self.__x_ranges, self.__keep)
-    D_ij.ranges=ranges_ij
-    nn=D_ij.argKmin(K=self.__k,axis=1)
-    return self.__unsort(nn)
-
-  def brute_force(self,x,y,k=5):
-    if use_cuda:
-        torch.cuda.synchronize()    
-    x_LT=LazyTensor(x.unsqueeze(0))
-    y_LT=LazyTensor(y.unsqueeze(1))
-    D_ij=((y_LT-x_LT)**2).sum(-1) 
-    return D_ij.argKmin(K=k,axis=1)
\ No newline at end of file
+
+class ivf:
+    def __init__(self, k=5):
+        self.__c = None
+        self.__k = k
+        self.__x = None
+        self.__keep = None
+        self.__x_ranges = None
+        self.__x_perm = None
+        self.__y_perm = None
+        self.__device = None
+
+    def __KMeans(self, x, K=10, Niter=15):
+        N, D = x.shape
+        c = x[:K, :].clone()
+        x_i = LazyTensor(x.view(N, 1, D).to(self.__device))
+        for i in range(Niter):
+            c_j = LazyTensor(c.view(1, K, D).to(self.__device))
+            D_ij = ((x_i - c_j) ** 2).sum(-1)
+            cl = D_ij.argmin(dim=1).long().view(-1)
+            c.zero_()
+            c.scatter_add_(0, cl[:, None].repeat(1, D), x)
+            Ncl = torch.bincount(cl, minlength=K).type_as(c).view(K, 1)
+            c /= Ncl
+        return cl, c
+
+    def __k_argmin(self, x, y, k=1):
+        if use_cuda:
+            torch.cuda.synchronize()
+        x_LT = LazyTensor(x.unsqueeze(1).to(self.__device))
+        y_LT = LazyTensor(y.unsqueeze(0).to(self.__device))
+        d = ((x_LT - y_LT) ** 2).sum(-1)
+        if k == 1:
+            return d.argmin(dim=1).long().view(-1)
+        else:
+            return d.argKmin(K=k, dim=1).long()
+
+    def __sort_clusters(self, x, lab, store_x=True):
+        lab, perm = torch.sort(lab.view(-1))
+        if store_x:
+            self.__x_perm = perm
+        else:
+            self.__y_perm = perm
+        return x[perm], lab
+
+    def __unsort(self, nn):
+        return torch.index_select(self.__x_perm[nn], 0, self.__y_perm.argsort())
+
+    def fit(self, x, clusters=50, a=5, n=15):
+        """
+        Fits the main dataset
+        """
+        if type(x) != torch.Tensor:
+            raise ValueError("Input must be a torch tensor")
+        if type(clusters) != int:
+            raise ValueError("Clusters must be an integer")
+        if clusters >= len(x):
+            raise ValueError("Number of clusters must be less than length of dataset")
+        if type(a) != int:
+            raise ValueError("Number of clusters to search over must be an integer")
+        if a > clusters:
+            raise ValueError(
+                "Number of clusters to search over must be less than total number of clusters"
+            )
+        if len(x.shape) != 2:
+            raise ValueError("Input must be a 2D array")
+        x = x.contiguous()
+        self.__device = x.device
+        cl, c = self.__KMeans(x, clusters, Niter=n)
+        self.__c = c
+
+        cl = self.__assign(x)
+        if use_cuda:
+            torch.cuda.synchronize()
+
+        ncl = self.__k_argmin(c, c, k=a)
+        self.__x_ranges, _, _ = cluster_ranges_centroids(x, cl)
+
+        x, x_labels = self.__sort_clusters(x, cl, store_x=True)
+        self.__x = x
+        r = torch.arange(clusters).repeat(a, 1).T.reshape(-1).long()
+        self.__keep = torch.zeros([clusters, clusters], dtype=torch.bool).to(
+            self.__device
+        )
+        self.__keep[r, ncl.flatten()] = True
+        return self
+
+    def __assign(self, x, c=None):
+        if c is None:
+            c = self.__c
+        return self.__k_argmin(x, c)
+
+    def kneighbors(self, y):
+        """
+        Obtain the k nearest neighbors of the query dataset y
+        """
+        if self.__x is None:
+            raise ValueError("Input dataset not fitted yet! Call .fit() first!")
+        if type(y) != torch.Tensor:
+            raise ValueError("Query dataset must be a torch tensor")
+        if y.device != self.__device:
+            raise ValueError("Input dataset and query dataset must be on same device")
+        if len(y.shape) != 2:
+            raise ValueError("Query dataset must be a 2D tensor")
+        if self.__x.shape[-1] != y.shape[-1]:
+            raise ValueError("Query and dataset must have same dimensions")
+        if use_cuda:
+            torch.cuda.synchronize()
+        y = y.contiguous()
+        y_labels = self.__assign(y)
+
+        y_ranges, _, _ = cluster_ranges_centroids(y, y_labels)
+        self.__y_ranges = y_ranges
+        y, y_labels = self.__sort_clusters(y, y_labels, store_x=False)
+        x_LT = LazyTensor(self.__x.unsqueeze(0).to(self.__device).contiguous())
+        y_LT = LazyTensor(y.unsqueeze(1).to(self.__device).contiguous())
+        D_ij = ((y_LT - x_LT) ** 2).sum(-1)
+
+        ranges_ij = from_matrix(y_ranges, self.__x_ranges, self.__keep)
+        D_ij.ranges = ranges_ij
+        nn = D_ij.argKmin(K=self.__k, axis=1)
+        return self.__unsort(nn)
+
+    def brute_force(self, x, y, k=5):
+        if use_cuda:
+            torch.cuda.synchronize()
+        x_LT = LazyTensor(x.unsqueeze(0))
+        y_LT = LazyTensor(y.unsqueeze(1))
+        D_ij = ((y_LT - x_LT) ** 2).sum(-1)
+        return D_ij.argKmin(K=k, axis=1)
diff --git a/pykeops/torch/nystrom/nystrom.py b/pykeops/torch/nystrom/nystrom.py
index ee7d6458f..94b7d288c 100644
--- a/pykeops/torch/nystrom/nystrom.py
+++ b/pykeops/torch/nystrom/nystrom.py
@@ -44,28 +44,40 @@
 ################################################################################
 # Same as LazyNystrom_T but written with pyKeOps
 
+
 class LazyNystrom_TK:
-    '''
-        Class to implement Nystrom on torch LazyTensors.
-        This class works as an interface between lazy tensors and
-        the Nystrom algorithm in NumPy.
-        * The fit method computes K^{-1}_q.
-        * The transform method maps the data into the feature space underlying
-        the Nystrom-approximated kernel.
-        * The method K_approx directly computes the Nystrom approximation.
-        Parameters:
-        n_components [int] = how many samples to select from data.
-        kernel [str] = type of kernel to use. Current options = {linear, rbf}.
-        gamma [float] = exponential constant for the RBF kernel.
-        random_state=[None, float] = to set a random seed for the random
-                                     sampling of the samples. To be used when
-                                     reproducibility is needed.
-    '''
-
-    def __init__(self, n_components=100, kernel='rbf', sigma: float = 1.,
-                 exp_sigma: float = 1.0, eps: float = 0.05, mask_radius: float = None,
-                 k_means=10, n_iter: int = 10, inv_eps: float = None, dtype=np.float32,
-                 backend='CPU', random_state=None):
+    """
+    Class to implement Nystrom on torch LazyTensors.
+    This class works as an interface between lazy tensors and
+    the Nystrom algorithm in NumPy.
+    * The fit method computes K^{-1}_q.
+    * The transform method maps the data into the feature space underlying
+    the Nystrom-approximated kernel.
+    * The method K_approx directly computes the Nystrom approximation.
+    Parameters:
+    n_components [int] = how many samples to select from data.
+    kernel [str] = type of kernel to use. Current options = {linear, rbf}.
+    gamma [float] = exponential constant for the RBF kernel.
+    random_state=[None, float] = to set a random seed for the random
+                                 sampling of the samples. To be used when
+                                 reproducibility is needed.
+    """
+
+    def __init__(
+        self,
+        n_components=100,
+        kernel="rbf",
+        sigma: float = 1.0,
+        exp_sigma: float = 1.0,
+        eps: float = 0.05,
+        mask_radius: float = None,
+        k_means=10,
+        n_iter: int = 10,
+        inv_eps: float = None,
+        dtype=np.float32,
+        backend="CPU",
+        random_state=None,
+    ):
 
         self.n_components = n_components
         self.kernel = kernel
@@ -81,33 +93,35 @@ def __init__(self, n_components=100, kernel='rbf', sigma: float = 1.,
         if inv_eps:
             self.inv_eps = inv_eps
         else:
-            if kernel == 'linear':
+            if kernel == "linear":
                 self.inv_eps = 1e-4
             else:
                 self.inv_eps = 1e-8
         if not mask_radius:
-            if kernel == 'rbf':
+            if kernel == "rbf":
                 self.mask_radius = 2 * np.sqrt(2) * self.sigma
-            if kernel == 'exp':
+            if kernel == "exp":
                 self.mask_radius = 8 * self.exp_sigma
 
     def fit(self, X: torch.tensor):
-        '''
+        """
         Args:   X = torch tensor with features of shape
                 (1, n_samples, n_features)
         Returns: Fitted instance of the class
-        '''
+        """
 
         # Basic checks: we have a lazy tensor and n_components isn't too large
-        assert type(X) == torch.Tensor, 'Input to fit(.) must be a Tensor.'
-        assert X.size(0) >= self.n_components, f'The application needs X.shape[1] >= n_components.'
+        assert type(X) == torch.Tensor, "Input to fit(.) must be a Tensor."
+        assert (
+            X.size(0) >= self.n_components
+        ), f"The application needs X.shape[1] >= n_components."
         # self._update_dtype(X)
         # Number of samples
         n_samples = X.size(0)
         # Define basis
         rnd = check_random_state(self.random_state)
         inds = rnd.permutation(n_samples)
-        basis_inds = inds[:self.n_components]
+        basis_inds = inds[: self.n_components]
         basis = X[basis_inds]
         # Build smaller kernel
         basis_kernel = self._pairwise_kernels(basis, kernel=self.kernel)
@@ -121,39 +135,40 @@ def fit(self, X: torch.tensor):
         return self
 
     def transform(self, X: torch.tensor) -> torch.tensor:
-        ''' Applies transform on the data.
+        """Applies transform on the data.
         Args:
             X [LazyTensor] = data to transform
         Returns
             X [LazyTensor] = data after transformation
-        '''
+        """
         K_nq = self._pairwise_kernels(X, self.components_, self.kernel)
         return K_nq @ self.normalization_.t()
 
     def K_approx(self, X: torch.tensor) -> torch.tensor:
-        ''' Function to return Nystrom approximation to the kernel.
+        """Function to return Nystrom approximation to the kernel.
         Args:
             X[torch.tensor] = data used in fit(.) function.
         Returns
-            K[torch.tensor] = Nystrom approximation to kernel'''
+            K[torch.tensor] = Nystrom approximation to kernel"""
 
         K_nq = self._pairwise_kernels(X, self.components_, self.kernel)
         K_approx = K_nq @ self.normalization_ @ K_nq.t()
         return K_approx
 
-    def _pairwise_kernels(self, x: torch.tensor, y: torch.tensor = None, kernel='rbf',
-                          sigma: float = 1.) -> LazyTensor:
-        '''Helper function to build kernel
+    def _pairwise_kernels(
+        self, x: torch.tensor, y: torch.tensor = None, kernel="rbf", sigma: float = 1.0
+    ) -> LazyTensor:
+        """Helper function to build kernel
         Args:   X = torch tensor of dimension 2.
                 K_type = type of Kernel to return
         Returns:
                 K_ij[LazyTensor]
-        '''
+        """
         if y is None:
             y = x
-        if kernel == 'linear':
+        if kernel == "linear":
             K_ij = x @ y.T
-        elif kernel == 'rbf':
+        elif kernel == "rbf":
             x /= sigma
             y /= sigma
 
@@ -163,8 +178,7 @@ def _pairwise_kernels(self, x: torch.tensor, y: torch.tensor = None, kernel='rbf
             # block-sparse reduction preprocess
             K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)
 
-
-        elif kernel == 'exp':
+        elif kernel == "exp":
             x_i, x_j = LazyTensor(x[:, None, :]), LazyTensor(y[None, :, :])
             K_ij = (-1 * ((x_i - x_j) ** 2).sum().sqrt()).exp()
             # block-sparse reduction preprocess
@@ -175,8 +189,10 @@ def _pairwise_kernels(self, x: torch.tensor, y: torch.tensor = None, kernel='rbf
         K_ij.backend = self.backend
         return K_ij
 
-    def _Gauss_block_sparse_pre(self, x: torch.tensor, y: torch.tensor, K_ij: LazyTensor):
-        '''
+    def _Gauss_block_sparse_pre(
+        self, x: torch.tensor, y: torch.tensor, K_ij: LazyTensor
+    ):
+        """
         Helper function to preprocess data for block-sparse reduction
         of the Gaussian kernel
 
@@ -187,7 +203,7 @@ def _Gauss_block_sparse_pre(self, x: torch.tensor, y: torch.tensor, K_ij: LazyTe
         Returns:
             K_ij[LazyTensor_n] = symbolic representation of K(x,y) with
                                 set sparse ranges
-        '''
+        """
         # labels for low dimensions
 
         if x.shape[1] < 4 or y.shape[1] < 4:
@@ -219,7 +235,7 @@ def _Gauss_block_sparse_pre(self, x: torch.tensor, y: torch.tensor, K_ij: LazyTe
         return K_ij
 
     def _KMeans(self, x: torch.tensor):
-        ''' KMeans with Pykeops to do binning of original data.
+        """KMeans with Pykeops to do binning of original data.
         Args:
             x[np.array] = data
             k_means[int] = number of bins to build
@@ -227,10 +243,10 @@ def _KMeans(self, x: torch.tensor):
         Returns:
             labels[np.array] = class labels for each point in x
             clusters[np.array] = coordinates for each centroid
-        '''
+        """
 
         N, D = x.shape
-        clusters = torch.clone(x[:self.k_means, :])  # initialization of clusters
+        clusters = torch.clone(x[: self.k_means, :])  # initialization of clusters
         x_i = LazyTensor(x[:, None, :])
 
         for i in range(self.n_iter):
@@ -245,13 +261,13 @@ def _KMeans(self, x: torch.tensor):
         return labels, clusters
 
     def _update_dtype(self, x):
-        ''' Helper function that sets inv_eps to dtype to that of
+        """Helper function that sets inv_eps to dtype to that of
             the given data in the fitting step.
 
         Args:
             x [np.array] = raw data to remap
         Returns:
             nothing
-        '''
+        """
         self.dtype = x.dtype
-        self.inv_eps = np.array([self.inv_eps]).astype(self.dtype)[0]
\ No newline at end of file
+        self.inv_eps = np.array([self.inv_eps]).astype(self.dtype)[0]

From b68514366d31314eb8aa8fed1443dd4a39900d4f Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Wed, 24 Feb 2021 20:03:17 +0000
Subject: [PATCH 010/111] changed default use_gpu setting to false

---
 pykeops/numpy/nn/ivf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
index c4715d3fd..dddf508fa 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/nn/ivf.py
@@ -59,7 +59,7 @@ def __sort_clusters(self, x, lab, store_x=True):
     def __unsort(self, nn):
         return np.take(self.__x_perm[nn], self.__y_perm.argsort(), axis=0)
 
-    def fit(self, x, clusters=50, a=5, use_gpu=True, n=15):
+    def fit(self, x, clusters=50, a=5, use_gpu=False, n=15):
         """
         Fits the main dataset
         """

From 9da09b9540d1f4143fe37330a033a1c2b138f417 Mon Sep 17 00:00:00 2001
From: Anna Hledikova <ahledikova123@gmail.com>
Date: Thu, 4 Mar 2021 14:11:01 +0000
Subject: [PATCH 011/111] added unit tests for nystrom

---
 pykeops/test/unit_tests_numpy.py   | 37 +++++++++++++++++++++++++++
 pykeops/test/unit_tests_pytorch.py | 40 ++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)

diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index c2458c857..0cd45a6fd 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -473,6 +473,43 @@ def test_IVF(self):
 
         self.assertTrue(accuracy >= 0.8, f"Failed at {a}, {accuracy}")
 
+    ############################################################
+    def test_Nystrom_k_approx(self):
+        ############################################################
+        from pykeops.numpy.nystrom import Nystrom
+
+        num_sampling = 20
+        x = np.random.randint(1,10,(100,3)).astype(np.float32)
+
+        kernels = ['rbf', 'exp']
+        
+        for kernel in kernels:
+            N_NK = Nystrom(n_components=num_sampling, kernel=kernel, 
+                           random_state=0).fit(x)
+            K = N_NK.K_approx(x)
+            x_new = N_NK.transform(x)
+            
+            ML2_error = np.linalg.norm(x_new @ x_new.T - K) / K.size
+
+            self.assertTrue(ML2_error < 0.01)
+
+    ############################################################ 
+    def test_Nystrom_k_shape(self):
+        ############################################################
+        from pykeops.numpy.nystrom import Nystrom
+        
+        length = 100
+        num_sampling = 20
+        x = np.random.randint(1,10,(length,3)).astype(np.float32)
+
+        kernels = ['rbf', 'exp']
+        
+        for kernel in kernels:
+            N_NK = Nystrom(n_components=num_sampling, kernel=kernel, random_state=0).fit(x)
+
+            self.assertTrue(N_NK.normalization_.shape == (num_sampling, num_sampling))
+            self.assertTrue(N_NK.transform(x).shape == (length, num_sampling))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index c9c3d272b..3ae518bee 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -707,7 +707,47 @@ def test_IVF(self):
 
         self.assertTrue(accuracy >= 0.8, f"Failed at {a}, {accuracy}")
 
+    ############################################################         
+    def test_Nystrom_K_approx(self):
+        ############################################################ 
 
+        from pykeops.torch.nystrom import nystrom as Nystrom_TK
+        import torch
+        
+        length = 100
+        num_sampling = 20
+        x = torch.rand(length,3)*100
+
+        kernels = ['rbf', 'exp']
+        
+        for kernel in kernels:
+            N_TK = Nystrom_TK(n_components=num_sampling, kernel=kernel, random_state=0).fit(x)
+            K = N_TK.K_approx(x)
+            x_new = N_TK.transform(x)
+            
+            ML2_error = np.linalg.norm(x_new @ x_new.T - K) / K.shape[0]
+
+            self.assertTrue(ML2_error < 0.01)
+
+    ############################################################ 
+    def test_Nystrom_K_shape(self):
+        ############################################################ 
+
+        from pykeops.torch.nystrom import nystrom as Nystrom_TK
+        import torch
+        
+        length = 100
+        num_sampling = 20
+        x = torch.rand(length,3)*100
+
+        kernels = ['rbf', 'exp']
+        
+        for kernel in kernels:
+            N_NT = Nystrom_TK(n_components=num_sampling, kernel=kernel, random_state=0).fit(x)
+
+            self.assertTrue(N_NT.normalization_.shape == (num_sampling, num_sampling))
+            self.assertTrue(N_NT.transform(x).shape == (length, num_sampling))
+            
 if __name__ == "__main__":
     """
     run tests

From f0da4b188e726c5fc34aee45ceb929fd03ae6b94 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Thu, 4 Mar 2021 14:18:01 +0000
Subject: [PATCH 012/111] linter

---
 pykeops/test/unit_tests_numpy.py   | 27 ++++++++++++----------
 pykeops/test/unit_tests_pytorch.py | 37 +++++++++++++++++-------------
 2 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index 0cd45a6fd..b45439653 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -479,33 +479,36 @@ def test_Nystrom_k_approx(self):
         from pykeops.numpy.nystrom import Nystrom
 
         num_sampling = 20
-        x = np.random.randint(1,10,(100,3)).astype(np.float32)
+        x = np.random.randint(1, 10, (100, 3)).astype(np.float32)
+
+        kernels = ["rbf", "exp"]
 
-        kernels = ['rbf', 'exp']
-        
         for kernel in kernels:
-            N_NK = Nystrom(n_components=num_sampling, kernel=kernel, 
-                           random_state=0).fit(x)
+            N_NK = Nystrom(
+                n_components=num_sampling, kernel=kernel, random_state=0
+            ).fit(x)
             K = N_NK.K_approx(x)
             x_new = N_NK.transform(x)
-            
+
             ML2_error = np.linalg.norm(x_new @ x_new.T - K) / K.size
 
             self.assertTrue(ML2_error < 0.01)
 
-    ############################################################ 
+    ############################################################
     def test_Nystrom_k_shape(self):
         ############################################################
         from pykeops.numpy.nystrom import Nystrom
-        
+
         length = 100
         num_sampling = 20
-        x = np.random.randint(1,10,(length,3)).astype(np.float32)
+        x = np.random.randint(1, 10, (length, 3)).astype(np.float32)
+
+        kernels = ["rbf", "exp"]
 
-        kernels = ['rbf', 'exp']
-        
         for kernel in kernels:
-            N_NK = Nystrom(n_components=num_sampling, kernel=kernel, random_state=0).fit(x)
+            N_NK = Nystrom(
+                n_components=num_sampling, kernel=kernel, random_state=0
+            ).fit(x)
 
             self.assertTrue(N_NK.normalization_.shape == (num_sampling, num_sampling))
             self.assertTrue(N_NK.transform(x).shape == (length, num_sampling))
diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index 3ae518bee..9a06e6d88 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -707,47 +707,52 @@ def test_IVF(self):
 
         self.assertTrue(accuracy >= 0.8, f"Failed at {a}, {accuracy}")
 
-    ############################################################         
+    ############################################################
     def test_Nystrom_K_approx(self):
-        ############################################################ 
+        ############################################################
 
         from pykeops.torch.nystrom import nystrom as Nystrom_TK
         import torch
-        
+
         length = 100
         num_sampling = 20
-        x = torch.rand(length,3)*100
+        x = torch.rand(length, 3) * 100
+
+        kernels = ["rbf", "exp"]
 
-        kernels = ['rbf', 'exp']
-        
         for kernel in kernels:
-            N_TK = Nystrom_TK(n_components=num_sampling, kernel=kernel, random_state=0).fit(x)
+            N_TK = Nystrom_TK(
+                n_components=num_sampling, kernel=kernel, random_state=0
+            ).fit(x)
             K = N_TK.K_approx(x)
             x_new = N_TK.transform(x)
-            
+
             ML2_error = np.linalg.norm(x_new @ x_new.T - K) / K.shape[0]
 
             self.assertTrue(ML2_error < 0.01)
 
-    ############################################################ 
+    ############################################################
     def test_Nystrom_K_shape(self):
-        ############################################################ 
+        ############################################################
 
         from pykeops.torch.nystrom import nystrom as Nystrom_TK
         import torch
-        
+
         length = 100
         num_sampling = 20
-        x = torch.rand(length,3)*100
+        x = torch.rand(length, 3) * 100
+
+        kernels = ["rbf", "exp"]
 
-        kernels = ['rbf', 'exp']
-        
         for kernel in kernels:
-            N_NT = Nystrom_TK(n_components=num_sampling, kernel=kernel, random_state=0).fit(x)
+            N_NT = Nystrom_TK(
+                n_components=num_sampling, kernel=kernel, random_state=0
+            ).fit(x)
 
             self.assertTrue(N_NT.normalization_.shape == (num_sampling, num_sampling))
             self.assertTrue(N_NT.transform(x).shape == (length, num_sampling))
-            
+
+
 if __name__ == "__main__":
     """
     run tests

From 8c6124b08edc0b39626631f756879f0df74e562c Mon Sep 17 00:00:00 2001
From: Anna Hledikova <ahledikova123@gmail.com>
Date: Thu, 4 Mar 2021 19:43:27 +0000
Subject: [PATCH 013/111] removing sklearn function

---
 pykeops/numpy/nystrom/Nystrom.py | 251 ++++++++++++++++---------------
 1 file changed, 128 insertions(+), 123 deletions(-)

diff --git a/pykeops/numpy/nystrom/Nystrom.py b/pykeops/numpy/nystrom/Nystrom.py
index 70bce7cd0..df1ee0c43 100644
--- a/pykeops/numpy/nystrom/Nystrom.py
+++ b/pykeops/numpy/nystrom/Nystrom.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pykeops
+import numbers
 
 from pykeops.numpy import LazyTensor as LazyTensor_n
 from pykeops.numpy.cluster import grid_cluster
@@ -7,7 +8,6 @@
 from pykeops.numpy.cluster import cluster_ranges_centroids, cluster_ranges
 from pykeops.numpy.cluster import sort_clusters
 
-from sklearn.utils import check_random_state
 from pykeops.torch import LazyTensor
 
 # For LinearOperator math
@@ -15,50 +15,55 @@
 from scipy.sparse.linalg.interface import IdentityOperator
 
 
-#################################################################################
-
-
-class Nystrom_NK:
+# Note: this is a function taken from Sklearn
+def check_random_state(seed):
+    """Turn seed into a np.random.RandomState instance
+    Parameters
+    ----------
+    seed : None, int or instance of RandomState
+        If seed is None, return the RandomState singleton used by np.random.
+        If seed is an int, return a new RandomState instance seeded with seed.
+        If seed is already a RandomState instance, return it.
+        Otherwise raise ValueError.
     """
-    Class to implement Nystrom using numpy and PyKeops.
-    * The fit method computes K^{-1}_q.
-    * The transform method maps the data into the feature space underlying
-    the Nystrom-approximated kernel.
-    * The method K_approx directly computes the Nystrom approximation.
-    Parameters:
-    n_components [int] = how many samples to select from data.
-    kernel [str] = type of kernel to use. Current options = {rbf}.
-    sigma [float] = exponential constant for the RBF kernel.
-    exp_sigma [float] = exponential constant for the exponential kernel.
-    eps[float] = size for square bins in block-sparse preprocessing.
-    k_means[int] = number of centroids for KMeans algorithm in block-sparse
-                   preprocessing.
-    n_iter[int] = number of iterations for KMeans
-    dtype[type] = type of data: np.float32 or np.float64
-    inv_eps[float] = additive invertibility constant for matrix decomposition.
-    backend[string] = "GPU" or "CPU" mode
-    verbose[boolean] = set True to print details
-    random_state=[None, float] = to set a random seed for the random
-                                 sampling of the samples. To be used when
-                                 reproducibility is needed.
-    """
-
-    def __init__(
-        self,
-        n_components=100,
-        kernel="rbf",
-        sigma: float = 1.0,
-        exp_sigma: float = 1.0,
-        eps: float = 0.05,
-        mask_radius: float = None,
-        k_means=10,
-        n_iter: int = 10,
-        inv_eps: float = None,
-        dtype=np.float32,
-        backend=None,
-        verbose=False,
-        random_state=None,
-    ):
+    if seed is None or seed is np.random:
+        return np.random.mtrand._rand
+    if isinstance(seed, numbers.Integral):
+        return np.random.RandomState(seed)
+    if isinstance(seed, np.random.RandomState):
+        return seed
+    raise ValueError('%r cannot be used to seed a numpy.random.RandomState'
+                     ' instance' % seed)
+    
+class Nystrom_NK:
+    '''
+        Class to implement Nystrom using numpy and PyKeops.
+        * The fit method computes K^{-1}_q.
+        * The transform method maps the data into the feature space underlying
+        the Nystrom-approximated kernel.
+        * The method K_approx directly computes the Nystrom approximation.
+        Parameters:
+        n_components [int] = how many samples to select from data.
+        kernel [str] = type of kernel to use. Current options = {rbf}.
+        sigma [float] = exponential constant for the RBF kernel. 
+        exp_sigma [float] = exponential constant for the exponential kernel.
+        eps[float] = size for square bins in block-sparse preprocessing.
+        k_means[int] = number of centroids for KMeans algorithm in block-sparse 
+                       preprocessing.
+        n_iter[int] = number of iterations for KMeans
+        dtype[type] = type of data: np.float32 or np.float64
+        inv_eps[float] = additive invertibility constant for matrix decomposition.
+        backend[string] = "GPU" or "CPU" mode
+        verbose[boolean] = set True to print details
+        random_state=[None, float] = to set a random seed for the random
+                                     sampling of the samples. To be used when 
+                                     reproducibility is needed.
+    '''
+  
+    def __init__(self, n_components=100, kernel='rbf', sigma:float = 1.,
+                 exp_sigma:float = 1.0, eps:float = 0.05, mask_radius:float = None,
+                 k_means = 10, n_iter:int = 10, inv_eps:float = None, dtype = np.float32, 
+                 backend = None, verbose = False, random_state=None): 
 
         self.n_components = n_components
         self.kernel = kernel
@@ -73,38 +78,37 @@ def __init__(
         self.verbose = verbose
 
         if not backend:
-            self.backend = "GPU" if pykeops.config.gpu_available else "CPU"
+            self.backend = 'GPU' if pykeops.config.gpu_available else 'CPU'
         else:
             self.backend = backend
 
         if inv_eps:
             self.inv_eps = inv_eps
         else:
-            if kernel == "linear":
+            if kernel == 'linear':
                 self.inv_eps = 1e-4
             else:
                 self.inv_eps = 1e-8
 
         if not mask_radius:
-            if kernel == "rbf":
-                self.mask_radius = 2 * np.sqrt(2) * self.sigma
-            elif kernel == "exp":
+            if kernel == 'rbf':
+                self.mask_radius = 2* np.sqrt(2) * self.sigma
+            elif kernel == 'exp':
                 self.mask_radius = 8 * self.exp_sigma
 
-    def fit(self, x: np.ndarray):
-        """
+
+    def fit(self, x:np.ndarray):
+        ''' 
         Args:   x = numpy array of shape (n_samples, n_features)
         Returns: Fitted instance of the class
-        """
+        '''
         if self.verbose:
-            print(f"Working with backend = {self.backend}")
-
+            print(f'Working with backend = {self.backend}')
+        
         # Basic checks
-        assert type(x) == np.ndarray, "Input to fit(.) must be an array."
-        assert (
-            x.shape[0] >= self.n_components
-        ), "The application needs X.shape[0] >= n_components."
-        assert self.exp_sigma > 0, "Should be working with decaying exponential."
+        assert type(x) == np.ndarray, 'Input to fit(.) must be an array.'
+        assert x.shape[0] >= self.n_components, 'The application needs X.shape[0] >= n_components.'
+        assert self.exp_sigma > 0, 'Should be working with decaying exponential.'
 
         # Update dtype
         self._update_dtype(x)
@@ -112,8 +116,8 @@ def fit(self, x: np.ndarray):
         n_samples = x.shape[0]
         # Define basis
         rnd = check_random_state(self.random_state)
-        inds = rnd.permutation(n_samples)
-        basis_inds = inds[: self.n_components]
+        inds = rnd.permutation(n_samples) 
+        basis_inds = inds[:self.n_components] 
         basis = x[basis_inds]
         # Build smaller kernel
         basis_kernel = self._pairwise_kernels(basis)
@@ -126,108 +130,111 @@ def fit(self, x: np.ndarray):
 
         return self
 
-    def _spectral(self, X_i: LazyTensor):
-        """
+
+    def _spectral(self, X_i:LazyTensor):
+        '''
         Helper function to compute eigendecomposition of K_q.
         Written using LinearOperators which are lazy
         representations of sparse and/or structured data.
         Args: X_i[numpy LazyTensor]
         Returns S[np.array] eigenvalues,
                 U[np.array] eigenvectors
-        """
+        '''
         K_linear = aslinearoperator(X_i)
         # K <- K + eps
-        K_linear = (
-            K_linear + IdentityOperator(K_linear.shape, dtype=self.dtype) * self.inv_eps
-        )
+        K_linear = K_linear + IdentityOperator(K_linear.shape, dtype=self.dtype) * self.inv_eps
         k = K_linear.shape[0] - 1
-        S, U = eigsh(K_linear, k=k, which="LM")
+        S, U = eigsh(K_linear, k=k, which='LM')
 
         return S, U
+        
 
-    def transform(self, x: np.ndarray) -> np.array:
-        """Applies transform on the data.
-
+    def transform(self, x:np.ndarray) -> np.array:
+        ''' Applies transform on the data.
+        
         Args:
             X [np.array] = data to transform
         Returns
             X [np.array] = data after transformation
-        """
-
+        '''
+        
         K_nq = self._pairwise_kernels(x, self.components_)
         x_new = K_nq @ self.normalization_.T
 
         return x_new
 
-    def K_approx(self, x: np.array) -> np.array:
-        """Function to return Nystrom approximation to the kernel.
-
+    
+    def K_approx(self, x:np.array) -> np.array:
+        ''' Function to return Nystrom approximation to the kernel.
+        
         Args:
             X[np.array] = data used in fit(.) function.
         Returns
-            K[np.array] = Nystrom approximation to kernel"""
-
+            K[np.array] = Nystrom approximation to kernel'''
+       
         K_nq = self._pairwise_kernels(x, self.components_)
         # For arrays: K_approx = K_nq @ K_q_inv @ K_nq.T
         # But to use @ with lazy tensors we have:
         K_q_inv = self.normalization_.T @ self.normalization_
-        K_approx = K_nq @ (K_nq @ K_q_inv).T
-
-        return K_approx.T
+        K_approx = K_nq @ (K_nq @ K_q_inv ).T
+        
+        return K_approx.T 
 
-    def _pairwise_kernels(self, x: np.array, y: np.array = None) -> LazyTensor:
-        """Helper function to build kernel
 
+    def _pairwise_kernels(self, x:np.array, y:np.array = None) -> LazyTensor:
+        '''Helper function to build kernel
+        
         Args:   X = torch tensor of dimension 2,
                 K_type = type of Kernel to return.
         Returns:
                 K_ij[LazyTensor]
-        """
+        '''
         if y is None:
             y = x
-        if self.kernel == "linear":
-            K_ij = x @ y.T
-        elif self.kernel == "rbf":
+        if self.kernel == 'linear': 
+            K_ij = x @ y.T 
+        elif self.kernel == 'rbf':
             x /= self.sigma
             y /= self.sigma
             x_i, x_j = LazyTensor_n(x[:, None, :]), LazyTensor_n(y[None, :, :])
-            K_ij = (-(((x_i - x_j) ** 2).sum(dim=2))).exp()
+            K_ij = ( -(( (x_i - x_j)**2 ).sum(dim=2) ) ).exp()
             # block-sparse reduction preprocess
             K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)
-        elif self.kernel == "exp":
+        elif self.kernel == 'exp':
             x /= self.exp_sigma
             y /= self.exp_sigma
             x_i, x_j = LazyTensor_n(x[:, None, :]), LazyTensor_n(y[None, :, :])
-            K_ij = (-(((x_i - x_j) ** 2).sqrt().sum(2))).exp()
+            K_ij =  (- ( ((x_i - x_j) ** 2).sum(-1) )).sqrt().exp()
             # block-sparse reduction preprocess
-            K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)  # TODO
-
+            K_ij = self._Gauss_block_sparse_pre(x, y, K_ij) # TODO 
+       
         K_ij.backend = self.backend
-
+        
         return K_ij
 
-    def _Gauss_block_sparse_pre(self, x: np.array, y: np.array, K_ij: LazyTensor):
-        """
+
+    def _Gauss_block_sparse_pre(self, x:np.array, y:np.array, K_ij:LazyTensor):
+        ''' 
         Helper function to preprocess data for block-sparse reduction
         of the Gaussian kernel
-
-        Args:
+    
+        Args: 
             x[np.array], y[np.array] = arrays giving rise to Gaussian kernel K(x,y)
             K_ij[LazyTensor_n] = symbolic representation of K(x,y)
             eps[float] = size for square bins
         Returns:
-            K_ij[LazyTensor_n] = symbolic representation of K(x,y) with
+            K_ij[LazyTensor_n] = symbolic representation of K(x,y) with 
                                 set sparse ranges
-        """
+        '''
         # labels for low dimensions
         if x.shape[1] < 4 or y.shape[1] < 4:
-            x_labels = grid_cluster(x, self.eps)
-            y_labels = grid_cluster(y, self.eps)
+            x_labels = grid_cluster(x, self.eps) 
+            y_labels = grid_cluster(y, self.eps) 
             # range and centroid per class
             x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
             y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
         else:
-            # labels for higher dimensions
+        # labels for higher dimensions
             x_labels, x_centroids = self._KMeans(x)
             y_labels, y_centroids = self._KMeans(y)
             # compute ranges
@@ -236,14 +243,12 @@ def _Gauss_block_sparse_pre(self, x: np.array, y: np.array, K_ij: LazyTensor):
 
         # sort points
         x, x_labels = sort_clusters(x, x_labels)
-        y, y_labels = sort_clusters(y, y_labels)
+        y, y_labels = sort_clusters(y, y_labels) 
         # Compute a coarse Boolean mask:
-        if self.kernel == "rbf":
+        if self.kernel == 'rbf':
             D = np.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2)
-        elif self.kernel == "exp":
-            D = np.sum(
-                (x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2
-            ).sqrt()
+        elif self.kernel == 'exp':
+            D = np.sqrt(np.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2))
         keep = D < (self.mask_radius) ** 2
         # mask -> set of integer tensors
         ranges_ij = from_matrix(x_ranges, y_ranges, keep)
@@ -251,8 +256,9 @@ def _Gauss_block_sparse_pre(self, x: np.array, y: np.array, K_ij: LazyTensor):
 
         return K_ij
 
-    def _KMeans(self, x: np.array):
-        """KMeans with Pykeops to do binning of original data.
+
+    def _KMeans(self,x:np.array):
+        ''' KMeans with Pykeops to do binning of original data.
         Args:
             x[np.array] = data
             k_means[int] = number of bins to build
@@ -260,32 +266,31 @@ def _KMeans(self, x: np.array):
         Returns:
             labels[np.array] = class labels for each point in x
             clusters[np.array] = coordinates for each centroid
-        """
-        N, D = x.shape
-        clusters = np.copy(x[: self.k_means, :])  # initialization of clusters
-        x_i = LazyTensor_n(x[:, None, :])
+        '''
+        N, D = x.shape  
+        clusters = np.copy(x[:self.k_means, :])  # initialization of clusters
+        x_i = LazyTensor_n(x[:, None, :])  
 
         for i in range(self.n_iter):
 
-            clusters_j = LazyTensor_n(clusters[None, :, :])
+            clusters_j = LazyTensor_n(clusters[None, :, :])  
             D_ij = ((x_i - clusters_j) ** 2).sum(-1)  # points-clusters kernel
-            labels = (
-                D_ij.argmin(axis=1).astype(int).reshape(N)
-            )  # Points -> Nearest cluster
+            labels = D_ij.argmin(axis=1).astype(int).reshape(N)  # Points -> Nearest cluster
             Ncl = np.bincount(labels).astype(self.dtype)  # Class weights
             for d in range(D):  # Compute the cluster centroids with np.bincount:
                 clusters[:, d] = np.bincount(labels, weights=x[:, d]) / Ncl
 
         return labels, clusters
 
-    def _update_dtype(self, x):
-        """Helper function that sets dtype to that of
+        
+    def _update_dtype(self,x):
+        ''' Helper function that sets dtype to that of 
             the given data in the fitting step.
-
+            
         Args:
             x [np.array] = raw data to remap
         Returns:
             nothing
-        """
+        '''
         self.dtype = x.dtype
-        self.inv_eps = np.array([self.inv_eps]).astype(np.float32)[0]
+        self.inv_eps = np.array([self.inv_eps]).astype(np.float32)[0]
\ No newline at end of file

From e7e980c3100403c2c491d2845968af905138cc2e Mon Sep 17 00:00:00 2001
From: Anna Hledikova <ahledikova123@gmail.com>
Date: Thu, 4 Mar 2021 19:49:11 +0000
Subject: [PATCH 014/111] applied black linting

---
 pykeops/numpy/nystrom/Nystrom.py | 234 ++++++++++++++++---------------
 1 file changed, 124 insertions(+), 110 deletions(-)

diff --git a/pykeops/numpy/nystrom/Nystrom.py b/pykeops/numpy/nystrom/Nystrom.py
index df1ee0c43..abb67d7fd 100644
--- a/pykeops/numpy/nystrom/Nystrom.py
+++ b/pykeops/numpy/nystrom/Nystrom.py
@@ -32,38 +32,52 @@ def check_random_state(seed):
         return np.random.RandomState(seed)
     if isinstance(seed, np.random.RandomState):
         return seed
-    raise ValueError('%r cannot be used to seed a numpy.random.RandomState'
-                     ' instance' % seed)
-    
+    raise ValueError(
+        "%r cannot be used to seed a numpy.random.RandomState" " instance" % seed
+    )
+
+
 class Nystrom_NK:
-    '''
-        Class to implement Nystrom using numpy and PyKeops.
-        * The fit method computes K^{-1}_q.
-        * The transform method maps the data into the feature space underlying
-        the Nystrom-approximated kernel.
-        * The method K_approx directly computes the Nystrom approximation.
-        Parameters:
-        n_components [int] = how many samples to select from data.
-        kernel [str] = type of kernel to use. Current options = {rbf}.
-        sigma [float] = exponential constant for the RBF kernel. 
-        exp_sigma [float] = exponential constant for the exponential kernel.
-        eps[float] = size for square bins in block-sparse preprocessing.
-        k_means[int] = number of centroids for KMeans algorithm in block-sparse 
-                       preprocessing.
-        n_iter[int] = number of iterations for KMeans
-        dtype[type] = type of data: np.float32 or np.float64
-        inv_eps[float] = additive invertibility constant for matrix decomposition.
-        backend[string] = "GPU" or "CPU" mode
-        verbose[boolean] = set True to print details
-        random_state=[None, float] = to set a random seed for the random
-                                     sampling of the samples. To be used when 
-                                     reproducibility is needed.
-    '''
-  
-    def __init__(self, n_components=100, kernel='rbf', sigma:float = 1.,
-                 exp_sigma:float = 1.0, eps:float = 0.05, mask_radius:float = None,
-                 k_means = 10, n_iter:int = 10, inv_eps:float = None, dtype = np.float32, 
-                 backend = None, verbose = False, random_state=None): 
+    """
+    Class to implement Nystrom using numpy and PyKeops.
+    * The fit method computes K^{-1}_q.
+    * The transform method maps the data into the feature space underlying
+    the Nystrom-approximated kernel.
+    * The method K_approx directly computes the Nystrom approximation.
+    Parameters:
+    n_components [int] = how many samples to select from data.
+    kernel [str] = type of kernel to use. Current options = {rbf}.
+    sigma [float] = exponential constant for the RBF kernel.
+    exp_sigma [float] = exponential constant for the exponential kernel.
+    eps[float] = size for square bins in block-sparse preprocessing.
+    k_means[int] = number of centroids for KMeans algorithm in block-sparse
+                   preprocessing.
+    n_iter[int] = number of iterations for KMeans
+    dtype[type] = type of data: np.float32 or np.float64
+    inv_eps[float] = additive invertibility constant for matrix decomposition.
+    backend[string] = "GPU" or "CPU" mode
+    verbose[boolean] = set True to print details
+    random_state=[None, float] = to set a random seed for the random
+                                 sampling of the samples. To be used when
+                                 reproducibility is needed.
+    """
+
+    def __init__(
+        self,
+        n_components=100,
+        kernel="rbf",
+        sigma: float = 1.0,
+        exp_sigma: float = 1.0,
+        eps: float = 0.05,
+        mask_radius: float = None,
+        k_means=10,
+        n_iter: int = 10,
+        inv_eps: float = None,
+        dtype=np.float32,
+        backend=None,
+        verbose=False,
+        random_state=None,
+    ):
 
         self.n_components = n_components
         self.kernel = kernel
@@ -78,37 +92,38 @@ def __init__(self, n_components=100, kernel='rbf', sigma:float = 1.,
         self.verbose = verbose
 
         if not backend:
-            self.backend = 'GPU' if pykeops.config.gpu_available else 'CPU'
+            self.backend = "GPU" if pykeops.config.gpu_available else "CPU"
         else:
             self.backend = backend
 
         if inv_eps:
             self.inv_eps = inv_eps
         else:
-            if kernel == 'linear':
+            if kernel == "linear":
                 self.inv_eps = 1e-4
             else:
                 self.inv_eps = 1e-8
 
         if not mask_radius:
-            if kernel == 'rbf':
-                self.mask_radius = 2* np.sqrt(2) * self.sigma
-            elif kernel == 'exp':
+            if kernel == "rbf":
+                self.mask_radius = 2 * np.sqrt(2) * self.sigma
+            elif kernel == "exp":
                 self.mask_radius = 8 * self.exp_sigma
 
-
-    def fit(self, x:np.ndarray):
-        ''' 
+    def fit(self, x: np.ndarray):
+        """
         Args:   x = numpy array of shape (n_samples, n_features)
         Returns: Fitted instance of the class
-        '''
+        """
         if self.verbose:
-            print(f'Working with backend = {self.backend}')
-        
+            print(f"Working with backend = {self.backend}")
+
         # Basic checks
-        assert type(x) == np.ndarray, 'Input to fit(.) must be an array.'
-        assert x.shape[0] >= self.n_components, 'The application needs X.shape[0] >= n_components.'
-        assert self.exp_sigma > 0, 'Should be working with decaying exponential.'
+        assert type(x) == np.ndarray, "Input to fit(.) must be an array."
+        assert (
+            x.shape[0] >= self.n_components
+        ), "The application needs X.shape[0] >= n_components."
+        assert self.exp_sigma > 0, "Should be working with decaying exponential."
 
         # Update dtype
         self._update_dtype(x)
@@ -116,8 +131,8 @@ def fit(self, x:np.ndarray):
         n_samples = x.shape[0]
         # Define basis
         rnd = check_random_state(self.random_state)
-        inds = rnd.permutation(n_samples) 
-        basis_inds = inds[:self.n_components] 
+        inds = rnd.permutation(n_samples)
+        basis_inds = inds[: self.n_components]
         basis = x[basis_inds]
         # Build smaller kernel
         basis_kernel = self._pairwise_kernels(basis)
@@ -130,111 +145,108 @@ def fit(self, x:np.ndarray):
 
         return self
 
-
-    def _spectral(self, X_i:LazyTensor):
-        '''
+    def _spectral(self, X_i: LazyTensor):
+        """
         Helper function to compute eigendecomposition of K_q.
         Written using LinearOperators which are lazy
         representations of sparse and/or structured data.
         Args: X_i[numpy LazyTensor]
         Returns S[np.array] eigenvalues,
                 U[np.array] eigenvectors
-        '''
+        """
         K_linear = aslinearoperator(X_i)
         # K <- K + eps
-        K_linear = K_linear + IdentityOperator(K_linear.shape, dtype=self.dtype) * self.inv_eps
+        K_linear = (
+            K_linear + IdentityOperator(K_linear.shape, dtype=self.dtype) * self.inv_eps
+        )
         k = K_linear.shape[0] - 1
-        S, U = eigsh(K_linear, k=k, which='LM')
+        S, U = eigsh(K_linear, k=k, which="LM")
 
         return S, U
-        
 
-    def transform(self, x:np.ndarray) -> np.array:
-        ''' Applies transform on the data.
-        
+    def transform(self, x: np.ndarray) -> np.array:
+        """Applies transform on the data.
+
         Args:
             X [np.array] = data to transform
         Returns
             X [np.array] = data after transformation
-        '''
-        
+        """
+
         K_nq = self._pairwise_kernels(x, self.components_)
         x_new = K_nq @ self.normalization_.T
 
         return x_new
 
-    
-    def K_approx(self, x:np.array) -> np.array:
-        ''' Function to return Nystrom approximation to the kernel.
-        
+    def K_approx(self, x: np.array) -> np.array:
+        """Function to return Nystrom approximation to the kernel.
+
         Args:
             X[np.array] = data used in fit(.) function.
         Returns
-            K[np.array] = Nystrom approximation to kernel'''
-       
+            K[np.array] = Nystrom approximation to kernel"""
+
         K_nq = self._pairwise_kernels(x, self.components_)
         # For arrays: K_approx = K_nq @ K_q_inv @ K_nq.T
         # But to use @ with lazy tensors we have:
         K_q_inv = self.normalization_.T @ self.normalization_
-        K_approx = K_nq @ (K_nq @ K_q_inv ).T
-        
-        return K_approx.T 
+        K_approx = K_nq @ (K_nq @ K_q_inv).T
+
+        return K_approx.T
 
+    def _pairwise_kernels(self, x: np.array, y: np.array = None) -> LazyTensor:
+        """Helper function to build kernel
 
-    def _pairwise_kernels(self, x:np.array, y:np.array = None) -> LazyTensor:
-        '''Helper function to build kernel
-        
         Args:   X = torch tensor of dimension 2,
                 K_type = type of Kernel to return.
         Returns:
                 K_ij[LazyTensor]
-        '''
+        """
         if y is None:
             y = x
-        if self.kernel == 'linear': 
-            K_ij = x @ y.T 
-        elif self.kernel == 'rbf':
+        if self.kernel == "linear":
+            K_ij = x @ y.T
+        elif self.kernel == "rbf":
             x /= self.sigma
             y /= self.sigma
             x_i, x_j = LazyTensor_n(x[:, None, :]), LazyTensor_n(y[None, :, :])
-            K_ij = ( -(( (x_i - x_j)**2 ).sum(dim=2) ) ).exp()
+            K_ij = (-(((x_i - x_j) ** 2).sum(dim=2))).exp()
             # block-sparse reduction preprocess
             K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)
-        elif self.kernel == 'exp':
+        elif self.kernel == "exp":
             x /= self.exp_sigma
             y /= self.exp_sigma
             x_i, x_j = LazyTensor_n(x[:, None, :]), LazyTensor_n(y[None, :, :])
-            K_ij =  (- ( ((x_i - x_j) ** 2).sum(-1) )).sqrt().exp()
+            K_ij = (-(((x_i - x_j) ** 2).sum(-1))).sqrt().exp()
             # block-sparse reduction preprocess
-            K_ij = self._Gauss_block_sparse_pre(x, y, K_ij) # TODO 
-       
+            K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)  # TODO
+
         K_ij.backend = self.backend
-        
-        return K_ij
 
+        return K_ij
 
-    def _Gauss_block_sparse_pre(self, x:np.array, y:np.array, K_ij:LazyTensor):
-        ''' 
+    def _Gauss_block_sparse_pre(self, x: np.array, y: np.array, K_ij: LazyTensor):
+        """
         Helper function to preprocess data for block-sparse reduction
         of the Gaussian kernel
-    
-        Args: 
+
+        Args:
             x[np.array], y[np.array] = arrays giving rise to Gaussian kernel K(x,y)
             K_ij[LazyTensor_n] = symbolic representation of K(x,y)
             eps[float] = size for square bins
         Returns:
-            K_ij[LazyTensor_n] = symbolic representation of K(x,y) with 
+            K_ij[LazyTensor_n] = symbolic representation of K(x,y) with
                                 set sparse ranges
-        '''
+        """
         # labels for low dimensions
         if x.shape[1] < 4 or y.shape[1] < 4:
-            x_labels = grid_cluster(x, self.eps) 
-            y_labels = grid_cluster(y, self.eps) 
+            x_labels = grid_cluster(x, self.eps)
+            y_labels = grid_cluster(y, self.eps)
             # range and centroid per class
             x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
             y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
         else:
-        # labels for higher dimensions
+            # labels for higher dimensions
             x_labels, x_centroids = self._KMeans(x)
             y_labels, y_centroids = self._KMeans(y)
             # compute ranges
@@ -243,12 +255,14 @@ def _Gauss_block_sparse_pre(self, x:np.array, y:np.array, K_ij:LazyTensor):
 
         # sort points
         x, x_labels = sort_clusters(x, x_labels)
-        y, y_labels = sort_clusters(y, y_labels) 
+        y, y_labels = sort_clusters(y, y_labels)
         # Compute a coarse Boolean mask:
-        if self.kernel == 'rbf':
+        if self.kernel == "rbf":
             D = np.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2)
-        elif self.kernel == 'exp':
-            D = np.sqrt(np.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2))
+        elif self.kernel == "exp":
+            D = np.sqrt(
+                np.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2)
+            )
         keep = D < (self.mask_radius) ** 2
         # mask -> set of integer tensors
         ranges_ij = from_matrix(x_ranges, y_ranges, keep)
@@ -256,9 +270,8 @@ def _Gauss_block_sparse_pre(self, x:np.array, y:np.array, K_ij:LazyTensor):
 
         return K_ij
 
-
-    def _KMeans(self,x:np.array):
-        ''' KMeans with Pykeops to do binning of original data.
+    def _KMeans(self, x: np.array):
+        """KMeans with Pykeops to do binning of original data.
         Args:
             x[np.array] = data
             k_means[int] = number of bins to build
@@ -266,31 +279,32 @@ def _KMeans(self,x:np.array):
         Returns:
             labels[np.array] = class labels for each point in x
             clusters[np.array] = coordinates for each centroid
-        '''
-        N, D = x.shape  
-        clusters = np.copy(x[:self.k_means, :])  # initialization of clusters
-        x_i = LazyTensor_n(x[:, None, :])  
+        """
+        N, D = x.shape
+        clusters = np.copy(x[: self.k_means, :])  # initialization of clusters
+        x_i = LazyTensor_n(x[:, None, :])
 
         for i in range(self.n_iter):
 
-            clusters_j = LazyTensor_n(clusters[None, :, :])  
+            clusters_j = LazyTensor_n(clusters[None, :, :])
             D_ij = ((x_i - clusters_j) ** 2).sum(-1)  # points-clusters kernel
-            labels = D_ij.argmin(axis=1).astype(int).reshape(N)  # Points -> Nearest cluster
+            labels = (
+                D_ij.argmin(axis=1).astype(int).reshape(N)
+            )  # Points -> Nearest cluster
             Ncl = np.bincount(labels).astype(self.dtype)  # Class weights
             for d in range(D):  # Compute the cluster centroids with np.bincount:
                 clusters[:, d] = np.bincount(labels, weights=x[:, d]) / Ncl
 
         return labels, clusters
 
-        
-    def _update_dtype(self,x):
-        ''' Helper function that sets dtype to that of 
+    def _update_dtype(self, x):
+        """Helper function that sets dtype to that of
             the given data in the fitting step.
-            
+
         Args:
             x [np.array] = raw data to remap
         Returns:
             nothing
-        '''
+        """
         self.dtype = x.dtype
-        self.inv_eps = np.array([self.inv_eps]).astype(np.float32)[0]
\ No newline at end of file
+        self.inv_eps = np.array([self.inv_eps]).astype(np.float32)[0]

From e4be739d3c09cf9867c964d87796385d3ab798e0 Mon Sep 17 00:00:00 2001
From: Anna Hledikova <ahledikova123@gmail.com>
Date: Thu, 4 Mar 2021 20:07:22 +0000
Subject: [PATCH 015/111] applied black linting

---
 pykeops/torch/nystrom/nystrom.py | 52 ++++++++++++++------------------
 1 file changed, 22 insertions(+), 30 deletions(-)

diff --git a/pykeops/torch/nystrom/nystrom.py b/pykeops/torch/nystrom/nystrom.py
index 94b7d288c..5432c6b08 100644
--- a/pykeops/torch/nystrom/nystrom.py
+++ b/pykeops/torch/nystrom/nystrom.py
@@ -3,7 +3,7 @@
 # https://colab.research.google.com/drive/1vF2cOSddbRFM5PLqxkIzyZ9XkuzO5DKN?usp=sharing
 import numpy as np
 import torch
-import pykeops
+import numbers
 
 from pykeops.torch.cluster import grid_cluster
 from pykeops.torch.cluster import from_matrix
@@ -11,34 +11,26 @@
 from pykeops.torch.cluster import sort_clusters
 from pykeops.torch import LazyTensor
 
-from sklearn.utils import check_random_state
-
-from scipy.sparse.linalg import aslinearoperator, eigsh
-from scipy.sparse.linalg.interface import IdentityOperator
-
-
-################################################################################
-# Same as LazyNystrom_T but written with pyKeOps
-import numpy as np
-import torch
-import pykeops
-
-from pykeops.numpy import LazyTensor as LazyTensor_n
-from pykeops.torch.cluster import grid_cluster
-from pykeops.torch.cluster import from_matrix
-from pykeops.torch.cluster import cluster_ranges_centroids, cluster_ranges
-from pykeops.torch.cluster import sort_clusters
-from pykeops.torch import LazyTensor
-
-from sklearn.utils import check_random_state, as_float_array
-from scipy.linalg import svd
-
-from scipy.sparse.linalg import aslinearoperator, eigsh
-from scipy.sparse.linalg.interface import IdentityOperator
-from pykeops.torch import Genred
-
-import matplotlib.pyplot as plt
-import time
+# Note: this is a function taken from Sklearn
+def check_random_state(seed):
+    """Turn seed into a np.random.RandomState instance
+    Parameters
+    ----------
+    seed : None, int or instance of RandomState
+        If seed is None, return the RandomState singleton used by np.random.
+        If seed is an int, return a new RandomState instance seeded with seed.
+        If seed is already a RandomState instance, return it.
+        Otherwise raise ValueError.
+    """
+    if seed is None or seed is np.random:
+        return np.random.mtrand._rand
+    if isinstance(seed, numbers.Integral):
+        return np.random.RandomState(seed)
+    if isinstance(seed, np.random.RandomState):
+        return seed
+    raise ValueError(
+        "%r cannot be used to seed a numpy.random.RandomState" " instance" % seed
+    )
 
 
 ################################################################################
@@ -114,7 +106,7 @@ def fit(self, X: torch.tensor):
         assert type(X) == torch.Tensor, "Input to fit(.) must be a Tensor."
         assert (
             X.size(0) >= self.n_components
-        ), f"The application needs X.shape[1] >= n_components."
+        ), "The application needs X.shape[1] >= n_components."
         # self._update_dtype(X)
         # Number of samples
         n_samples = X.size(0)

From 73a58c3e06d35c945157968acb42a77fac9e5675 Mon Sep 17 00:00:00 2001
From: Anna Hledikova <ahledikova123@gmail.com>
Date: Thu, 4 Mar 2021 20:48:57 +0000
Subject: [PATCH 016/111] minor changes and black linting

---
 pykeops/test/unit_tests_numpy.py   | 10 +++++-----
 pykeops/test/unit_tests_pytorch.py |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index b45439653..4c23f7d65 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -476,15 +476,15 @@ def test_IVF(self):
     ############################################################
     def test_Nystrom_k_approx(self):
         ############################################################
-        from pykeops.numpy.nystrom import Nystrom
+        from pykeops.numpy.nystrom.Nystrom import Nystrom_NK
 
         num_sampling = 20
         x = np.random.randint(1, 10, (100, 3)).astype(np.float32)
 
-        kernels = ["rbf", "exp"]
+        kernels = ["rbf"]
 
         for kernel in kernels:
-            N_NK = Nystrom(
+            N_NK = Nystrom_NK(
                 n_components=num_sampling, kernel=kernel, random_state=0
             ).fit(x)
             K = N_NK.K_approx(x)
@@ -497,7 +497,7 @@ def test_Nystrom_k_approx(self):
     ############################################################
     def test_Nystrom_k_shape(self):
         ############################################################
-        from pykeops.numpy.nystrom import Nystrom
+        from pykeops.numpy.nystrom.Nystrom import Nystrom_NK
 
         length = 100
         num_sampling = 20
@@ -506,7 +506,7 @@ def test_Nystrom_k_shape(self):
         kernels = ["rbf", "exp"]
 
         for kernel in kernels:
-            N_NK = Nystrom(
+            N_NK = Nystrom_NK(
                 n_components=num_sampling, kernel=kernel, random_state=0
             ).fit(x)
 
diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index 9a06e6d88..eb2c5ddb5 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -711,7 +711,7 @@ def test_IVF(self):
     def test_Nystrom_K_approx(self):
         ############################################################
 
-        from pykeops.torch.nystrom import nystrom as Nystrom_TK
+        from pykeops.torch.nystrom.nystrom import LazyNystrom_TK
         import torch
 
         length = 100
@@ -721,7 +721,7 @@ def test_Nystrom_K_approx(self):
         kernels = ["rbf", "exp"]
 
         for kernel in kernels:
-            N_TK = Nystrom_TK(
+            N_TK = LazyNystrom_TK(
                 n_components=num_sampling, kernel=kernel, random_state=0
             ).fit(x)
             K = N_TK.K_approx(x)
@@ -735,7 +735,7 @@ def test_Nystrom_K_approx(self):
     def test_Nystrom_K_shape(self):
         ############################################################
 
-        from pykeops.torch.nystrom import nystrom as Nystrom_TK
+        from pykeops.torch.nystrom.nystrom import LazyNystrom_TK
         import torch
 
         length = 100
@@ -745,7 +745,7 @@ def test_Nystrom_K_shape(self):
         kernels = ["rbf", "exp"]
 
         for kernel in kernels:
-            N_NT = Nystrom_TK(
+            N_NT = LazyNystrom_TK(
                 n_components=num_sampling, kernel=kernel, random_state=0
             ).fit(x)
 

From 70c67182fcc30183718109f397d6f023bac8aeb0 Mon Sep 17 00:00:00 2001
From: Anna Hledikova <ahledikova123@gmail.com>
Date: Thu, 4 Mar 2021 21:23:35 +0000
Subject: [PATCH 017/111] changing maximum -> max for older torch

---
 pykeops/torch/nystrom/nystrom.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pykeops/torch/nystrom/nystrom.py b/pykeops/torch/nystrom/nystrom.py
index 5432c6b08..2bace7709 100644
--- a/pykeops/torch/nystrom/nystrom.py
+++ b/pykeops/torch/nystrom/nystrom.py
@@ -119,7 +119,7 @@ def fit(self, X: torch.tensor):
         basis_kernel = self._pairwise_kernels(basis, kernel=self.kernel)
         # Get SVD
         U, S, V = torch.svd(basis_kernel)
-        S = torch.maximum(S, torch.ones(S.size()) * 1e-12)
+        S = torch.max(S, torch.ones(S.size()) * 1e-12)
         self.normalization_ = torch.mm(U / np.sqrt(S), V.t())
         self.components_ = basis
         self.component_indices_ = inds

From fab4ae310e6c8399f99befbb53d5da233b0c7435 Mon Sep 17 00:00:00 2001
From: Anna Hledikova <ahledikova123@gmail.com>
Date: Fri, 26 Mar 2021 13:28:29 +0000
Subject: [PATCH 018/111] updated exp kernel

---
 pykeops/numpy/nystrom/Nystrom.py | 109 ++++++++++++++++---------------
 1 file changed, 56 insertions(+), 53 deletions(-)

diff --git a/pykeops/numpy/nystrom/Nystrom.py b/pykeops/numpy/nystrom/Nystrom.py
index abb67d7fd..7068132fb 100644
--- a/pykeops/numpy/nystrom/Nystrom.py
+++ b/pykeops/numpy/nystrom/Nystrom.py
@@ -15,28 +15,6 @@
 from scipy.sparse.linalg.interface import IdentityOperator
 
 
-# Note: this is a function taken from Sklearn
-def check_random_state(seed):
-    """Turn seed into a np.random.RandomState instance
-    Parameters
-    ----------
-    seed : None, int or instance of RandomState
-        If seed is None, return the RandomState singleton used by np.random.
-        If seed is an int, return a new RandomState instance seeded with seed.
-        If seed is already a RandomState instance, return it.
-        Otherwise raise ValueError.
-    """
-    if seed is None or seed is np.random:
-        return np.random.mtrand._rand
-    if isinstance(seed, numbers.Integral):
-        return np.random.RandomState(seed)
-    if isinstance(seed, np.random.RandomState):
-        return seed
-    raise ValueError(
-        "%r cannot be used to seed a numpy.random.RandomState" " instance" % seed
-    )
-
-
 class Nystrom_NK:
     """
     Class to implement Nystrom using numpy and PyKeops.
@@ -46,7 +24,8 @@ class Nystrom_NK:
     * The method K_approx directly computes the Nystrom approximation.
     Parameters:
     n_components [int] = how many samples to select from data.
-    kernel [str] = type of kernel to use. Current options = {rbf}.
+    kernel [str] = type of kernel to use. Current options = {rbf:Gaussian,
+                                                             exp: exponential}.
     sigma [float] = exponential constant for the RBF kernel.
     exp_sigma [float] = exponential constant for the exponential kernel.
     eps[float] = size for square bins in block-sparse preprocessing.
@@ -99,10 +78,7 @@ def __init__(
         if inv_eps:
             self.inv_eps = inv_eps
         else:
-            if kernel == "linear":
-                self.inv_eps = 1e-4
-            else:
-                self.inv_eps = 1e-8
+            self.inv_eps = 1e-8
 
         if not mask_radius:
             if kernel == "rbf":
@@ -130,12 +106,12 @@ def fit(self, x: np.ndarray):
         # Number of samples
         n_samples = x.shape[0]
         # Define basis
-        rnd = check_random_state(self.random_state)
+        rnd = self._check_random_state(self.random_state)
         inds = rnd.permutation(n_samples)
         basis_inds = inds[: self.n_components]
         basis = x[basis_inds]
         # Build smaller kernel
-        basis_kernel = self._pairwise_kernels(basis)
+        basis_kernel = self._pairwise_kernels(basis, dense=False)
         # Spectral decomposition
         S, U = self._spectral(basis_kernel)
         S = np.maximum(S, 1e-12)
@@ -150,9 +126,11 @@ def _spectral(self, X_i: LazyTensor):
         Helper function to compute eigendecomposition of K_q.
         Written using LinearOperators which are lazy
         representations of sparse and/or structured data.
-        Args: X_i[numpy LazyTensor]
-        Returns S[np.array] eigenvalues,
-                U[np.array] eigenvectors
+        Args:
+            X_i[numpy LazyTensor]
+        Returns
+            S[np.array] eigenvalues,
+            U[np.array] eigenvectors
         """
         K_linear = aslinearoperator(X_i)
         # K <- K + eps
@@ -173,9 +151,8 @@ def transform(self, x: np.ndarray) -> np.array:
             X [np.array] = data after transformation
         """
 
-        K_nq = self._pairwise_kernels(x, self.components_)
+        K_nq = self._pairwise_kernels(x, self.components_, dense=True)
         x_new = K_nq @ self.normalization_.T
-
         return x_new
 
     def K_approx(self, x: np.array) -> np.array:
@@ -186,42 +163,54 @@ def K_approx(self, x: np.array) -> np.array:
         Returns
             K[np.array] = Nystrom approximation to kernel"""
 
-        K_nq = self._pairwise_kernels(x, self.components_)
+        K_nq = self._pairwise_kernels(x, self.components_, dense=True)
         # For arrays: K_approx = K_nq @ K_q_inv @ K_nq.T
         # But to use @ with lazy tensors we have:
         K_q_inv = self.normalization_.T @ self.normalization_
         K_approx = K_nq @ (K_nq @ K_q_inv).T
-
         return K_approx.T
 
-    def _pairwise_kernels(self, x: np.array, y: np.array = None) -> LazyTensor:
+    def _pairwise_kernels(
+        self, x: np.array, y: np.array = None, dense: bool = False
+    ) -> LazyTensor:
         """Helper function to build kernel
 
-        Args:   X = torch tensor of dimension 2,
-                K_type = type of Kernel to return.
+        Args:   x[np.array] = data
+                y[np.array] = array
+                dense[bool] = False to work with lazy tensor reduction,
+                              True to work with dense arrays
         Returns:
-                K_ij[LazyTensor]
+                K_ij[LazyTensor] if dense = False
+                K_ij[np.array] if dense = True
+
         """
         if y is None:
             y = x
-        if self.kernel == "linear":
-            K_ij = x @ y.T
-        elif self.kernel == "rbf":
+        if self.kernel == "rbf":
             x /= self.sigma
             y /= self.sigma
-            x_i, x_j = LazyTensor_n(x[:, None, :]), LazyTensor_n(y[None, :, :])
-            K_ij = (-(((x_i - x_j) ** 2).sum(dim=2))).exp()
-            # block-sparse reduction preprocess
-            K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)
+            if dense:
+                x_i, x_j = x[:, None, :], y[None, :, :]
+                K_ij = np.exp(-(((x_i - x_j) ** 2).sum(axis=2)))
+            else:
+                x_i, x_j = LazyTensor_n(x[:, None, :]), LazyTensor_n(y[None, :, :])
+                K_ij = (-(((x_i - x_j) ** 2).sum(dim=2))).exp()
+                # block-sparse reduction preprocess
+                K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)
         elif self.kernel == "exp":
             x /= self.exp_sigma
             y /= self.exp_sigma
-            x_i, x_j = LazyTensor_n(x[:, None, :]), LazyTensor_n(y[None, :, :])
-            K_ij = (-(((x_i - x_j) ** 2).sum(-1))).sqrt().exp()
-            # block-sparse reduction preprocess
-            K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)  # TODO
+            if dense:
+                x_i, x_j = x[:, None, :], y[None, :, :]
+                K_ij = np.exp(-np.sqrt((((x_i - x_j) ** 2).sum(axis=2))))
+            else:
+                x_i, x_j = LazyTensor_n(x[:, None, :]), LazyTensor_n(y[None, :, :])
+                K_ij = (-(((x_i - x_j) ** 2).sum(-1)).sqrt()).exp()
+                # block-sparse reduction preprocess
+                K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)  # TODO
 
-        K_ij.backend = self.backend
+        if not dense:
+            K_ij.backend = self.backend
 
         return K_ij
 
@@ -307,4 +296,18 @@ def _update_dtype(self, x):
             nothing
         """
         self.dtype = x.dtype
-        self.inv_eps = np.array([self.inv_eps]).astype(np.float32)[0]
+        self.inv_eps = np.array([self.inv_eps]).astype(self.dtype)[0]
+
+    def _check_random_state(self, seed):
+        """Set/get np.random.RandomState instance for permutation
+
+        Args
+            seed[None, int]
+        Returns:
+            numpy random state
+        """
+        if seed is None:
+            return np.random.mtrand._rand
+        elif type(seed) == int:
+            return np.random.RandomState(seed)
+        raise ValueError(f"Seed {seed} must be None or an integer.")

From 50aa46d1661574789d9658e9587471f81b7347b7 Mon Sep 17 00:00:00 2001
From: Anna Hledikova <ahledikova123@gmail.com>
Date: Fri, 26 Mar 2021 13:36:23 +0000
Subject: [PATCH 019/111] updated exp kernel

---
 pykeops/test/unit_tests_numpy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index 4c23f7d65..f4a050d91 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -481,7 +481,7 @@ def test_Nystrom_k_approx(self):
         num_sampling = 20
         x = np.random.randint(1, 10, (100, 3)).astype(np.float32)
 
-        kernels = ["rbf"]
+        kernels = ["rbf", "exp"]
 
         for kernel in kernels:
             N_NK = Nystrom_NK(

From 17cd242b046254c311810d6d50c05a510eb95907 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Thu, 1 Apr 2021 14:08:54 +0100
Subject: [PATCH 020/111] add IVF superclass

---
 pykeops/common/ivf.py   | 120 ++++++++++++++++++++++++++++++++++
 pykeops/numpy/nn/ivf.py | 139 ++++------------------------------------
 pykeops/numpy/utils.py  |  72 +++++++++++++++++++++
 pykeops/torch/nn/ivf.py | 138 ++++-----------------------------------
 pykeops/torch/utils.py  |  75 ++++++++++++++++++++++
 5 files changed, 291 insertions(+), 253 deletions(-)
 create mode 100644 pykeops/common/ivf.py

diff --git a/pykeops/common/ivf.py b/pykeops/common/ivf.py
new file mode 100644
index 000000000..1d5b74db9
--- /dev/null
+++ b/pykeops/common/ivf.py
@@ -0,0 +1,120 @@
+class GenericIVF:
+    def __init__(self, k, metric, normalise):
+        self.__k = k
+        self.__normalise = normalise
+        self.__distance = self.tools.distance_function(metric)
+        self.__metric = metric
+
+    def __get_tools(self):
+        pass
+
+    def __k_argmin(self, x, y, k=1):
+        x_LT = LazyTensor(self.tools.to(self.tools.unsqueeze(x, 1), self.__device))
+        y_LT = LazyTensor(self.tools.to(self.tools.unsqueeze(y, 0), self.__device))
+
+        d = self.__distance(x_LT, y_LT)
+        if not self.tools.is_tensor(x):
+            if self.__backend:
+                d.backend = self.__backend
+
+        if k == 1:
+            return self.tools.view(self.tools.long(d.argmin(dim=1)), -1)
+        else:
+            return self.tools.long(d.argKmin(K=k, dim=1))
+
+    def __sort_clusters(self, x, lab, store_x=True):
+        lab, perm = self.tools.sort(self.tools.view(lab, -1))
+        if store_x:
+            self.__x_perm = perm
+        else:
+            self.__y_perm = perm
+        return x[perm], lab
+
+    def __unsort(self, nn):
+        return self.tools.index_select(self.__x_perm[nn], 0, self.__y_perm.argsort())
+
+    def _fit(self, x, clusters=50, a=5, Niter=15, device=None, backend=None):
+        """
+        Fits the main dataset
+        """
+        if type(clusters) != int:
+            raise ValueError("Clusters must be an integer")
+        if clusters >= len(x):
+            raise ValueError("Number of clusters must be less than length of dataset")
+        if type(a) != int:
+            raise ValueError("Number of clusters to search over must be an integer")
+        if a > clusters:
+            raise ValueError(
+                "Number of clusters to search over must be less than total number of clusters"
+            )
+        if len(x.shape) != 2:
+            raise ValueError("Input must be a 2D array")
+        if self.__normalise:
+            x = x / self.tools.repeat(self.tools.norm(x, 2, -1), x.shape[1]).reshape(
+                -1, x.shape[1]
+            )
+        x = self.tools.contiguous(x)
+        self.__device = device
+        self.__backend = backend
+
+        cl, c = self.tools.kmeans(
+            x, clusters, Niter=Niter, metric=self.__metric, device=self.__device
+        )
+
+        self.__c = c
+
+        cl = self.__assign(x)
+
+        ncl = self.__k_argmin(c, c, k=a)
+        self.__x_ranges, _, _ = cluster_ranges_centroids(x, cl)
+
+        x, x_labels = self.__sort_clusters(x, cl, store_x=True)
+        self.__x = x
+        r = self.tools.repeat(self.tools.arange(clusters, device=self.__device), a)
+        self.__keep = self.tools.zeros(
+            [clusters, clusters], dtype=bool, device=self.__device
+        )
+        self.__keep[r, ncl.flatten()] = True
+
+        return self
+
+    def __assign(self, x, c=None):
+        if c is None:
+            c = self.__c
+        return self.__k_argmin(x, c)
+
+    def _kneighbors(self, y):
+        """
+        Obtain the k nearest neighbors of the query dataset y
+        """
+        if self.__x is None:
+            raise ValueError("Input dataset not fitted yet! Call .fit() first!")
+        if self.__device and self.tools.device(y) != self.__device:
+            raise ValueError("Input dataset and query dataset must be on same device")
+        if len(y.shape) != 2:
+            raise ValueError("Query dataset must be a 2D tensor")
+        if self.__x.shape[-1] != y.shape[-1]:
+            raise ValueError("Query and dataset must have same dimensions")
+        if self.__normalise:
+            y = y / self.tools.repeat(self.tools.norm(y, 2, -1), y.shape[1]).reshape(
+                -1, y.shape[1]
+            )
+        y = self.tools.contiguous(y)
+        y_labels = self.__assign(y)
+
+        y_ranges, _, _ = cluster_ranges_centroids(y, y_labels)
+        self.__y_ranges = y_ranges
+        y, y_labels = self.__sort_clusters(y, y_labels, store_x=False)
+        x_LT = LazyTensor(self.tools.unsqueeze(self.__x, 0))
+        y_LT = LazyTensor(self.tools.unsqueeze(y, 1))
+        D_ij = self.__distance(y_LT, x_LT)
+        ranges_ij = from_matrix(y_ranges, self.__x_ranges, self.__keep)
+        D_ij.ranges = ranges_ij
+        nn = D_ij.argKmin(K=self.__k, axis=1)
+        return self.__unsort(nn)
+
+    def brute_force(self, x, y, k=5):
+        x_LT = LazyTensor(self.tools.unsqueeze(x, 0))
+        y_LT = LazyTensor(self.tools.unsqueeze(y, 1))
+        D_ij = self.__distance(y_LT, x_LT)
+        return D_ij.argKmin(K=k, axis=1)
diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
index dddf508fa..3d45917bb 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/nn/ivf.py
@@ -1,137 +1,24 @@
 from pykeops.numpy import LazyTensor
 from pykeops.numpy.cluster import cluster_ranges_centroids
 from pykeops.numpy.cluster import from_matrix
-import pykeops.config
+from pykeops.common.ivf import GenericIVF
 import numpy as np
 
 
-class ivf:
-    def __init__(self, k=5):
-        self.__c = None
-        self.__k = k
-        self.__x = None
-        self.__keep = None
-        self.__x_ranges = None
-        self.__x_perm = None
-        self.__y_perm = None
-        self.__use_gpu = None
+class IVF(GenericIVF):
+    def __init__(self, k=5, metric="euclidean", normalise=False):
+        self.__get_tools()
+        super().__init__(k=k, metric=metric, normalise=normalise)
 
-    def __KMeans(self, x, K=10, Niter=15):
-        N, D = x.shape
-        c = np.copy(x[:K, :])
-        x_i = LazyTensor(x[:, None, :])
-        for i in range(Niter):
-            c_j = LazyTensor(c[None, :, :])
-            D_ij = ((x_i - c_j) ** 2).sum(-1)
-            if self.__use_gpu:
-                D_ij.backend = "GPU"
-            else:
-                D_ij.backend = "CPU"
-            cl = D_ij.argmin(axis=1).astype(int).reshape(N)
+    def __get_tools(self):
+        self.tools = numpytools
 
-            Ncl = np.bincount(cl).astype(dtype="float32")
-            for d in range(D):
-                c[:, d] = np.bincount(cl, weights=x[:, d]) / Ncl
-        return cl, c
-
-    def __k_argmin(self, x, y, k=1):
-
-        x_LT = LazyTensor(np.expand_dims(x, 1))
-        y_LT = LazyTensor(np.expand_dims(y, 0))
-        d = ((x_LT - y_LT) ** 2).sum(-1)
-        if self.__use_gpu:
-            d.backend = "GPU"
-        else:
-            d.backend = "CPU"
-        if k == 1:
-            return d.argmin(dim=1).flatten()
-        else:
-            return d.argKmin(K=k, dim=1)
-
-    def __sort_clusters(self, x, lab, store_x=True):
-        perm = np.argsort(lab.flatten())
-        if store_x:
-            self.__x_perm = perm
-        else:
-            self.__y_perm = perm
-        return x[perm], lab[perm]
-
-    def __unsort(self, nn):
-        return np.take(self.__x_perm[nn], self.__y_perm.argsort(), axis=0)
-
-    def fit(self, x, clusters=50, a=5, use_gpu=False, n=15):
-        """
-        Fits the main dataset
-        """
+    def fit(self, x, clusters=50, a=5, Niter=15, backend="CPU"):
         if type(x) != np.ndarray:
-            raise ValueError("Input must be a numpy ndarray")
-        if type(clusters) != int:
-            raise ValueError("Clusters must be an integer")
-        if clusters >= len(x):
-            raise ValueError("Number of clusters must be less than length of dataset")
-        if type(a) != int:
-            raise ValueError("Number of clusters to search over must be an integer")
-        if a > clusters:
-            raise ValueError(
-                "Number of clusters to search over must be less than total number of clusters"
-            )
-        if len(x.shape) != 2:
-            raise ValueError("Input must be a 2D array")
-
-        if use_gpu and not pykeops.config.gpu_available:
-            raise ValueError("use_gpu = True but GPU not detected")
-        self.__use_gpu = use_gpu
-        cl, c = self.__KMeans(x, clusters, Niter=n)
-        self.__c = c
-        cl = self.__assign(x)
-
-        ncl = self.__k_argmin(c, c, k=a)
-        self.__x_ranges, _, _ = cluster_ranges_centroids(x, cl)
-        x, x_labels = self.__sort_clusters(x, cl, store_x=True)
-        self.__x = x
+            raise ValueError("Input dataset must be a np array")
+        return self._fit(x, clusters=clusters, a=a, Niter=Niter, backend=backend)
 
-        r = np.arange(clusters).repeat(a).T.reshape(-1)
-        self.__keep = np.zeros([clusters, clusters], dtype=bool)
-        self.__keep[r, ncl.flatten()] = True
-        return self
-
-    def __assign(self, x, c=None):
-        if c is None:
-            c = self.__c
-        return self.__k_argmin(x, c)
-
-    def kneighbors(self, y, sparse=True):
-        """
-        Obtain the k nearest neighbors of the query dataset y
-        """
-        if self.__x is None:
-            raise ValueError("Input dataset not fitted yet! Call .fit() first!")
+    def kneighbors(self, y):
         if type(y) != np.ndarray:
-            raise ValueError("Query dataset must be a numpy ndarray")
-        if len(y.shape) != 2:
-            raise ValueError("Query dataset must be a 2D array")
-        if self.__x.shape[-1] != y.shape[-1]:
-            raise ValueError("Query and dataset must have same dimensions")
-
-        y_labels = self.__assign(y, self.__c)
-        y_ranges, _, _ = cluster_ranges_centroids(y, y_labels)
-
-        y, y_labels = self.__sort_clusters(y, y_labels, store_x=False)
-
-        x_LT = LazyTensor(np.expand_dims(self.__x, 0))
-        y_LT = LazyTensor(np.expand_dims(y, 1))
-        D_ij = ((y_LT - x_LT) ** 2).sum(-1)
-        ranges_ij = from_matrix(y_ranges, self.__x_ranges, self.__keep)
-        D_ij.ranges = ranges_ij
-        if self.__use_gpu:
-            D_ij.backend = "GPU"
-        else:
-            D_ij.backend = "CPU"
-        nn = D_ij.argKmin(K=self.__k, axis=1)
-        return self.__unsort(nn)
-
-    def brute_force(self, x, y, k=5):
-        x_LT = LazyTensor(np.expand_dims(x, 0))
-        y_LT = LazyTensor(np.expand_dims(y, 1))
-        D_ij = ((y_LT - x_LT) ** 2).sum(-1)
-        return D_ij.argKmin(K=k, axis=1)
+            raise ValueError("Query dataset must be a np array")
+        return self._kneighbors(y)
diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index d09c4114c..2161728b6 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -90,6 +90,78 @@ def array(x, dtype=default_dtype, device=None):
     def device(x):
         return "cpu"
 
+    @staticmethod
+    def distance_function(metric):
+        def euclidean(x, y):
+            return ((x - y) ** 2).sum(-1)
+
+        def manhattan(x, y):
+            return ((x - y).abs()).sum(-1)
+
+        def angular(x, y):
+            return x | y
+
+        def hyperbolic(x, y):
+            return ((x - y) ** 2).sum(-1) / (x[0] * y[0])
+
+        if metric == "euclidean":
+            return euclidean
+        elif metric == "manhattan":
+            return manhattan
+        elif metric == "angular":
+            return angular
+        elif metric == "hyperbolic":
+            return hyperbolic
+        else:
+            raise ValueError("Unknown metric")
+
+    @staticmethod
+    def sort(x):
+        perm = np.argsort(x)
+        return x[perm], perm
+
+    @staticmethod
+    def unsqueeze(x, n):
+        return np.expand_dims(x, n)
+
+    @staticmethod
+    def arange(n, device="cpu"):
+        return np.arange(n)
+
+    @staticmethod
+    def repeat(x, n):
+        return np.repeat(x, n)
+
+    @staticmethod
+    def to(x, device):
+        return x
+
+    @staticmethod
+    def index_select(input, dim, index):
+        return np.take(input, index, axis=dim)
+
+    @staticmethod
+    def norm(x, p=2, dim=-1):
+        return np.linalg.norm(x, ord=p, axis=dim)
+
+    @staticmethod
+    def kmeans(x, K=10, Niter=15, metric="euclidean", device="CPU"):
+        from pykeops.numpy import LazyTensor
+
+        distance = numpytools.distance_function(metric)
+        N, D = x.shape
+        c = np.copy(x[:K, :])
+        x_i = LazyTensor(x[:, None, :])
+        for i in range(Niter):
+            c_j = LazyTensor(c[None, :, :])
+            D_ij = distance(x_i, c_j)
+            D_ij.backend = device
+            cl = D_ij.argmin(axis=1).astype(int).reshape(N)
+            Ncl = np.bincount(cl).astype(dtype="float32")
+            for d in range(D):
+                c[:, d] = np.bincount(cl, weights=x[:, d]) / Ncl
+        return cl, c
+
 
 def squared_distances(x, y):
     x_norm = (x ** 2).sum(1).reshape(-1, 1)
diff --git a/pykeops/torch/nn/ivf.py b/pykeops/torch/nn/ivf.py
index aab533b50..e98fbba58 100644
--- a/pykeops/torch/nn/ivf.py
+++ b/pykeops/torch/nn/ivf.py
@@ -1,140 +1,24 @@
 from pykeops.torch import LazyTensor
 from pykeops.torch.cluster import cluster_ranges_centroids
 from pykeops.torch.cluster import from_matrix
-
+from pykeops.common.ivf import GenericIVF
 import torch
 
-use_cuda = torch.cuda.is_available()
-if use_cuda:
-    torch.cuda.synchronize()
-
-
-class ivf:
-    def __init__(self, k=5):
-        self.__c = None
-        self.__k = k
-        self.__x = None
-        self.__keep = None
-        self.__x_ranges = None
-        self.__x_perm = None
-        self.__y_perm = None
-        self.__device = None
 
-    def __KMeans(self, x, K=10, Niter=15):
-        N, D = x.shape
-        c = x[:K, :].clone()
-        x_i = LazyTensor(x.view(N, 1, D).to(self.__device))
-        for i in range(Niter):
-            c_j = LazyTensor(c.view(1, K, D).to(self.__device))
-            D_ij = ((x_i - c_j) ** 2).sum(-1)
-            cl = D_ij.argmin(dim=1).long().view(-1)
-            c.zero_()
-            c.scatter_add_(0, cl[:, None].repeat(1, D), x)
-            Ncl = torch.bincount(cl, minlength=K).type_as(c).view(K, 1)
-            c /= Ncl
-        return cl, c
+class IVF(GenericIVF):
+    def __init__(self, k=5, metric="euclidean", normalise=False):
+        self.__get_tools()
+        super().__init__(k=k, metric=metric, normalise=normalise)
 
-    def __k_argmin(self, x, y, k=1):
-        if use_cuda:
-            torch.cuda.synchronize()
-        x_LT = LazyTensor(x.unsqueeze(1).to(self.__device))
-        y_LT = LazyTensor(y.unsqueeze(0).to(self.__device))
-        d = ((x_LT - y_LT) ** 2).sum(-1)
-        if k == 1:
-            return d.argmin(dim=1).long().view(-1)
-        else:
-            return d.argKmin(K=k, dim=1).long()
+    def __get_tools(self):
+        self.tools = torchtools
 
-    def __sort_clusters(self, x, lab, store_x=True):
-        lab, perm = torch.sort(lab.view(-1))
-        if store_x:
-            self.__x_perm = perm
-        else:
-            self.__y_perm = perm
-        return x[perm], lab
-
-    def __unsort(self, nn):
-        return torch.index_select(self.__x_perm[nn], 0, self.__y_perm.argsort())
-
-    def fit(self, x, clusters=50, a=5, n=15):
-        """
-        Fits the main dataset
-        """
+    def fit(self, x, clusters=50, a=5, Niter=15):
         if type(x) != torch.Tensor:
-            raise ValueError("Input must be a torch tensor")
-        if type(clusters) != int:
-            raise ValueError("Clusters must be an integer")
-        if clusters >= len(x):
-            raise ValueError("Number of clusters must be less than length of dataset")
-        if type(a) != int:
-            raise ValueError("Number of clusters to search over must be an integer")
-        if a > clusters:
-            raise ValueError(
-                "Number of clusters to search over must be less than total number of clusters"
-            )
-        if len(x.shape) != 2:
-            raise ValueError("Input must be a 2D array")
-        x = x.contiguous()
-        self.__device = x.device
-        cl, c = self.__KMeans(x, clusters, Niter=n)
-        self.__c = c
-
-        cl = self.__assign(x)
-        if use_cuda:
-            torch.cuda.synchronize()
-
-        ncl = self.__k_argmin(c, c, k=a)
-        self.__x_ranges, _, _ = cluster_ranges_centroids(x, cl)
-
-        x, x_labels = self.__sort_clusters(x, cl, store_x=True)
-        self.__x = x
-        r = torch.arange(clusters).repeat(a, 1).T.reshape(-1).long()
-        self.__keep = torch.zeros([clusters, clusters], dtype=torch.bool).to(
-            self.__device
-        )
-        self.__keep[r, ncl.flatten()] = True
-        return self
-
-    def __assign(self, x, c=None):
-        if c is None:
-            c = self.__c
-        return self.__k_argmin(x, c)
+            raise ValueError("Input dataset must be a torch tensor")
+        return self._fit(x, clusters=clusters, a=a, Niter=Niter, device=x.device)
 
     def kneighbors(self, y):
-        """
-        Obtain the k nearest neighbors of the query dataset y
-        """
-        if self.__x is None:
-            raise ValueError("Input dataset not fitted yet! Call .fit() first!")
         if type(y) != torch.Tensor:
             raise ValueError("Query dataset must be a torch tensor")
-        if y.device != self.__device:
-            raise ValueError("Input dataset and query dataset must be on same device")
-        if len(y.shape) != 2:
-            raise ValueError("Query dataset must be a 2D tensor")
-        if self.__x.shape[-1] != y.shape[-1]:
-            raise ValueError("Query and dataset must have same dimensions")
-        if use_cuda:
-            torch.cuda.synchronize()
-        y = y.contiguous()
-        y_labels = self.__assign(y)
-
-        y_ranges, _, _ = cluster_ranges_centroids(y, y_labels)
-        self.__y_ranges = y_ranges
-        y, y_labels = self.__sort_clusters(y, y_labels, store_x=False)
-        x_LT = LazyTensor(self.__x.unsqueeze(0).to(self.__device).contiguous())
-        y_LT = LazyTensor(y.unsqueeze(1).to(self.__device).contiguous())
-        D_ij = ((y_LT - x_LT) ** 2).sum(-1)
-
-        ranges_ij = from_matrix(y_ranges, self.__x_ranges, self.__keep)
-        D_ij.ranges = ranges_ij
-        nn = D_ij.argKmin(K=self.__k, axis=1)
-        return self.__unsort(nn)
-
-    def brute_force(self, x, y, k=5):
-        if use_cuda:
-            torch.cuda.synchronize()
-        x_LT = LazyTensor(x.unsqueeze(0))
-        y_LT = LazyTensor(y.unsqueeze(1))
-        D_ij = ((y_LT - x_LT) ** 2).sum(-1)
-        return D_ij.argKmin(K=k, axis=1)
+        return self._kneighbors(y)
diff --git a/pykeops/torch/utils.py b/pykeops/torch/utils.py
index bf138b9cb..c8c0488cb 100644
--- a/pykeops/torch/utils.py
+++ b/pykeops/torch/utils.py
@@ -127,6 +127,81 @@ def device(x):
         else:
             return None
 
+    @staticmethod
+    def distance_function(metric):
+        def euclidean(x, y):
+            return ((x - y) ** 2).sum(-1)
+
+        def manhattan(x, y):
+            return ((x - y).abs()).sum(-1)
+
+        def angular(x, y):
+            return x | y
+
+        def hyperbolic(x, y):
+            return ((x - y) ** 2).sum(-1) / (x[0] * y[0])
+
+        if metric == "euclidean":
+            return euclidean
+        elif metric == "manhattan":
+            return manhattan
+        elif metric == "angular":
+            return angular
+        elif metric == "hyperbolic":
+            return hyperbolic
+        else:
+            raise ValueError("Unknown metric")
+
+    @staticmethod
+    def sort(x):
+        return torch.sort(x)
+
+    @staticmethod
+    def unsqueeze(x, n):
+        return torch.unsqueeze(x, n)
+
+    @staticmethod
+    def arange(n, device="cpu"):
+        return torch.arange(n, device=device)
+
+    @staticmethod
+    def repeat(x, n):
+        return torch.repeat_interleave(x, n)
+
+    @staticmethod
+    def to(x, device):
+        if isinstance(x, torch.Tensor):
+            return x.to(device)
+        return x
+
+    @staticmethod
+    def index_select(input, dim, index):
+        return torch.index_select(input, dim, index)
+
+    @staticmethod
+    def norm(x, p=2, dim=-1):
+        return torch.norm(x, p=p, dim=dim)
+
+    @staticmethod
+    def kmeans(x, K=10, Niter=15, metric="euclidean", device="cuda"):
+        from pykeops.torch import LazyTensor
+
+        distance = torchtools.distance_function(metric)
+        N, D = x.shape
+        c = x[:K, :].clone()
+        x_i = LazyTensor(x.view(N, 1, D).to(device))
+        for i in range(Niter):
+            c_j = LazyTensor(c.view(1, K, D).to(device))
+            D_ij = distance(x_i, c_j)
+            cl = D_ij.argmin(dim=1).long().view(-1)
+            c.zero_()
+            c.scatter_add_(0, cl[:, None].repeat(1, D), x)
+            Ncl = torch.bincount(cl, minlength=K).type_as(c).view(K, 1)
+            c /= Ncl
+            if torch.any(torch.isnan(c)) and metric == "angular":
+                raise ValueError("Please normalise inputs")
+        return cl, c
+
 
 def squared_distances(x, y):
     x_norm = (x ** 2).sum(1).reshape(-1, 1)

From d541aeb17fc4e32edc14a6ef95bf728d0631c361 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Thu, 1 Apr 2021 14:17:07 +0100
Subject: [PATCH 021/111] typo correction

---
 pykeops/common/lazy_tensor.py      | 664 ++++++++++++++++++-----------
 pykeops/test/unit_tests_numpy.py   |   2 +-
 pykeops/test/unit_tests_pytorch.py |   2 +-
 3 files changed, 416 insertions(+), 252 deletions(-)

diff --git a/pykeops/common/lazy_tensor.py b/pykeops/common/lazy_tensor.py
index 60ebca4bf..8b529af75 100644
--- a/pykeops/common/lazy_tensor.py
+++ b/pykeops/common/lazy_tensor.py
@@ -1,8 +1,9 @@
 import copy
 import re
 
-import numpy as np
+import math
 
+import numpy as np
 from pykeops.common.utils import check_broadcasting
 
 
@@ -20,17 +21,19 @@ def is_scalar_and_equals(x, val):
         return False
 
 
+def is_complex_lazytensor(x):
+    return isinstance(x, ComplexGenericLazyTensor)
+
+
 class GenericLazyTensor:
     r"""Symbolic wrapper for NumPy arrays and PyTorch tensors. This is the abstract class,
     end user should use :class:`pykeops.numpy.LazyTensor` or :class:`pykeops.torch.LazyTensor`.
-
     :class:`LazyTensor` encode numerical arrays through the combination
     of a symbolic, **mathematical formula** and a list of **small data arrays**.
     They can be used to implement efficient algorithms on objects
     that are **easy to define**, but **impossible to store** in memory
     (e.g. the matrix of pairwise distances between
     two large point clouds).
-
     :class:`LazyTensor` may be created from standard NumPy arrays or PyTorch tensors,
     combined using simple mathematical operations and converted
     back to NumPy arrays or PyTorch tensors with
@@ -52,15 +55,13 @@ class GenericLazyTensor:
     axis = None
     ranges = None  # Block-sparsity pattern
     backend = None  # "CPU", "GPU", "GPU_2D", etc.
-    dtype = None
-    float_types = []
+    _dtype = None
+    is_complex = False
 
     def __init__(self, x=None, axis=None):
         r"""Creates a KeOps symbolic variable.
-
         Args:
             x: May be either:
-
                 - A *float*, a *list of floats*, a *NumPy float*, a *0D or 1D NumPy array*,
                   a *0D or 1D PyTorch tensor*, in which case the :class:`LazyTensor`
                   represents a constant **vector of parameters**, to be broadcasted
@@ -77,11 +78,8 @@ def __init__(self, x=None, axis=None):
                 - An *integer*, in which case the :class:`LazyTensor` represents an **integer constant** handled
                   efficiently at compilation time.
                 - **None**, for internal use.
-
             axis (int): should be equal to 0 or 1 if **x** is a 2D tensor, and  **None** otherwise.
-
         .. warning::
-
             A :class:`LazyTensor` constructed
             from a NumPy array or a PyTorch tensor retains its **dtype** (float16, float32 or float64)
             and **device** properties (is it stored on the GPU?).
@@ -136,7 +134,7 @@ def __init__(self, x=None, axis=None):
 
             # Float numbers must be encoded as Parameters,  as C++'s templating system cannot deal
             # with floating point arithmetics.
-            elif typex in self.float_types:
+            elif typex in self.tools.float_types:
                 x = [x]  # Convert to list and go to stage 2
                 typex = list
 
@@ -155,89 +153,109 @@ def __init__(self, x=None, axis=None):
                 self.formula = "Var({},{},2)".format(id(x), self.ndim)
                 return  # That's it!
             else:
-                self.dtype = self.tools.dtypename(self.tools.dtype(x))
+                self._dtype = self.tools.dtypename(self.tools.dtype(x))
 
-    def lt_constructor(self, x=None, axis=None):
-        r"""This method is specialized in :class:`pykeops.numpy.LazyTensor` and :class:`pykeops.torch.LazyTensor`. It
-        returns a new instance of a LazyTensor (numpy or pytorch)."""
-        pass
+        typex = type(x)
 
-    def get_tools(self):
-        r"""This method is specialized in :class:`pykeops.numpy.LazyTensor` and :class:`pykeops.torch.LazyTensor`. It
-        populate the tools class."""
-        pass
+        if (
+            typex
+            not in [type(None), tuple, int, float, list, self.tools.arraytype]
+            + self.tools.float_types
+        ):
+            raise TypeError(
+                "LazyTensors should be built from " + self.tools.arrayname + ", "
+                "float/integer numbers, lists of floats or 3-uples of integers. "
+                "Received: {}".format(typex)
+            )
 
-    def infer_dim(self, x, axis):
-        if len(x.shape) >= 3:  # Infer axis from the input shape
-            # If x is a 3D+ array, its shape must be either (..,M,1,D) or (..,1,N,D) or (..,1,1,D).
-            # We infer axis from shape and squeeze out the "1" dimensions:
-            if axis is not None:
-                raise ValueError(
-                    "'axis' parameter should not be given when 'x' is a 3D tensor."
-                )
+        if typex == self.tools.arraytype and len(x.shape) == 0:
+            x = x.view(1)
+        elif typex in self.tools.float_types:
+            x = self.tools.arraytype([x]).view(1)
 
-            if len(x.shape) > 3:  # We're in "batch mode"
-                self.batchdims = tuple(x.shape[:-3])
+        if typex == self.tools.arraytype:
+            if len(x.shape) >= 3:  # Infer axis from the input shape
+                # If x is a 3D+ array, its shape must be either (..,M,1,D) or (..,1,N,D) or (..,1,1,D).
+                # We infer axis from shape and squeeze out the "1" dimensions:
+                if axis is not None:
+                    raise ValueError(
+                        "'axis' parameter should not be given when 'x' is a 3D tensor."
+                    )
 
-            if x.shape[-3] == 1:
-                if x.shape[-2] == 1:  # (..,1,1,D) -> Pm(D)
-                    x = x.squeeze(-2).squeeze(-2)
-                    axis = 2
-                else:  # (..,1,N,D) -> Vj(D)
-                    x = x.squeeze(-3)
-                    axis = 1
+                if len(x.shape) > 3:  # We're in "batch mode"
+                    self.batchdims = tuple(x.shape[:-3])
 
-            elif x.shape[-2] == 1:  # (M,1,D) -> Vi(D)
-                x = x.squeeze(-2)
-                axis = 0
-            else:
-                raise ValueError(
-                    "If 'x' is a 3D+ tensor, its shape should be one of (..,M,1,D), (..,1,N,D) or (..,1,1,D)."
-                )
+                if x.shape[-3] == 1:
+                    if x.shape[-2] == 1:  # (..,1,1,D) -> Pm(D)
+                        x = x.squeeze(-2).squeeze(-2)
+                        axis = 2
+                    else:  # (..,1,N,D) -> Vj(D)
+                        x = x.squeeze(-3)
+                        axis = 1
 
-        # Stage 4: x is now encoded as a 2D or 1D array + batch dimensions --------------------
-        if (
-            len(x.shape) >= 2 and axis != 2
-        ):  # shape is (..,M,D) or (..,N,D), with an explicit 'axis' parameter
-            if axis is None or axis not in (0, 1):
-                raise ValueError(
-                    "When 'x' is encoded as a 2D array, LazyTensor expects an explicit 'axis' value in {0,1}."
-                )
+                elif x.shape[-2] == 1:  # (M,1,D) -> Vi(D)
+                    x = x.squeeze(-2)
+                    axis = 0
+                else:
+                    raise ValueError(
+                        "If 'x' is a 3D+ tensor, its shape should be one of (..,M,1,D), (..,1,N,D) or (..,1,1,D)."
+                    )
 
-            # id(x) is used as temporary identifier for KeOps "Var",
-            # this identifier will be changed when calling method "fixvariables"
-            # But first we do a small hack, in order to distinguish same array involved twice in a formula but with
-            # different axis (e.g. Vi(x)-Vj(x) formula): we do a dummy reshape in order to get a different id
-            if axis == 1:
-                x = self.tools.view(x, x.shape)
+            # Stage 4: x is now encoded as a 2D or 1D array + batch dimensions --------------------
+            if (
+                len(x.shape) >= 2 and axis != 2
+            ):  # shape is (..,M,D) or (..,N,D), with an explicit 'axis' parameter
+                if axis is None or axis not in (0, 1):
+                    raise ValueError(
+                        "When 'x' is encoded as a 2D array, LazyTensor expects an explicit 'axis' value in {0,1}."
+                    )
 
-            self.variables = (x,)
-            self.ndim = x.shape[-1]
-            self.axis = axis
-            self.formula = "Var({},{},{})".format(id(x), self.ndim, self.axis)
+                # id(x) is used as temporary identifier for KeOps "Var",
+                # this identifier will be changed when calling method "fixvariables"
+                # But first we do a small hack, in order to distinguish same array involved twice in a formula but with
+                # different axis (e.g. Vi(x)-Vj(x) formula): we do a dummy reshape in order to get a different id
+                if axis == 1:
+                    x = self.tools.view(x, x.shape)
 
-            if axis == 0:
-                self.ni = x.shape[-2]
-            else:
-                self.nj = x.shape[-2]
+                self.variables = (x,)
+                self.ndim = x.shape[-1]
+                self.axis = axis
+                self.formula = "Var({},{},{})".format(id(x), self.ndim, self.axis)
 
-            self.dtype = self.tools.dtypename(self.tools.dtype(x))
+                if axis == 0:
+                    self.ni = x.shape[-2]
+                else:
+                    self.nj = x.shape[-2]
 
-        elif len(x.shape) == 1 or axis == 2:  # shape is (D,): x is a "Pm(D)" parameter
-            if axis is not None and axis != 2:
+                self._dtype = self.tools.dtypename(self.tools.dtype(x))
+
+            elif (
+                len(x.shape) == 1 or axis == 2
+            ):  # shape is (D,): x is a "Pm(D)" parameter
+                if axis is not None and axis != 2:
+                    raise ValueError(
+                        "When 'x' is encoded as a 1D or 0D array, 'axis' must be None or 2 (= Parameter variable)."
+                    )
+                self.variables = (x,)
+                self.ndim = x.shape[-1]
+                self.axis = 2
+                self.formula = "Var({},{},2)".format(id(x), self.ndim)
+
+            else:
                 raise ValueError(
-                    "When 'x' is encoded as a 1D or 0D array, 'axis' must be None or 2 (= Parameter variable)."
+                    "LazyTensors can be built from 0D, 1D, 2D or 3D+ tensors. "
+                    + "Received x of shape: {}.".format(x.shape)
                 )
-            self.variables = (x,)
-            self.ndim = x.shape[-1]
-            self.axis = 2
-            self.formula = "Var({},{},2)".format(id(x), self.ndim)
 
-        else:
-            raise ValueError(
-                "LazyTensors can be built from 0D, 1D, 2D or 3D+ tensors. "
-                + "Received x of shape: {}.".format(x.shape)
-            )
+    def lt_constructor(self, x=None, axis=None):
+        r"""This method is specialized in :class:`pykeops.numpy.LazyTensor` and :class:`pykeops.torch.LazyTensor`. It
+        returns a new instance of a LazyTensor (numpy or pytorch)."""
+        pass
+
+    def get_tools(self):
+        r"""This method is specialized in :class:`pykeops.numpy.LazyTensor` and :class:`pykeops.torch.LazyTensor`. It
+        populate the tools class."""
+        pass
 
     def fixvariables(self):
         r"""If needed, assigns final labels to each variable and pads their batch dimensions prior to a :mod:`Genred()` call."""
@@ -255,7 +273,7 @@ def fixvariables(self):
         for v in self.variables:
             idv = id(v)
             if type(v) == list:
-                v = self.tools.array(v, self.dtype, device)
+                v = self.tools.array(v, self._dtype, device)
 
             # Replace "Var(idv," by "Var(i," and increment 'i':
             tag = "Var({},".format(idv)
@@ -312,11 +330,11 @@ def separate_kwargs(self, kwargs):
         kwargs_call = dict(kwargs_call)
         return kwargs_init, kwargs_call
 
-    def promote(self, other, props):
+    def promote(self, other, props, is_complex=False):
         r"""
         Creates a new :class:`LazyTensor` whose **None** properties are set to those of **self** or **other**.
         """
-        res = self.lt_constructor()
+        res = self.lt_constructor(is_complex=is_complex)
 
         for prop in props:
             y, x = getattr(self, prop), getattr(other, prop)
@@ -341,13 +359,13 @@ def promote(self, other, props):
                 setattr(res, prop, y)
         return res
 
-    def init(self):
+    def init(self, is_complex=False):
         r"""
         Creates a copy of a :class:`LazyTensor`, without **formula** attribute.
         """
-        res = self.lt_constructor()
+        res = self.lt_constructor(is_complex=is_complex)
         res.tools = self.tools
-        res.dtype = self.dtype
+        res._dtype = self._dtype
         res.Genred = self.Genred
         res.KernelSolve = self.KernelSolve
         res.batchdims = self.batchdims
@@ -359,7 +377,7 @@ def init(self):
         res.symbolic_variables = self.symbolic_variables
         return res
 
-    def join(self, other):
+    def join(self, other, is_complex=False):
         r"""
         Merges the variables and attributes of two :class:`LazyTensor`, with a compatibility check.
         This method concatenates tuples of variables, without paying attention to repetitions.
@@ -367,7 +385,7 @@ def join(self, other):
         res = self.promote(
             other,
             (
-                "dtype",
+                "_dtype",
                 "tools",
                 "Genred",
                 "KernelSolve",
@@ -376,6 +394,7 @@ def join(self, other):
                 "ranges",
                 "backend",
             ),
+            is_complex=is_complex,
         )
         res.symbolic_variables = self.symbolic_variables + other.symbolic_variables
 
@@ -388,13 +407,17 @@ def join(self, other):
 
     # Prototypes for unary and binary operations  ==============================
 
-    def unary(self, operation, dimres=None, opt_arg=None, opt_arg2=None):
+    def unary(
+        self, operation, dimres=None, opt_arg=None, opt_arg2=None, is_complex=None
+    ):
         r"""
         Symbolically applies **operation** to **self**, with optional arguments if needed.
-
         The optional argument **dimres** may be used to specify the dimension of the output **result**.
         """
 
+        if is_complex is None:
+            is_complex = self.is_complex
+
         # we must prevent any operation if self is the output of a reduction operation,
         # i.e. if it has a reduction_op field
         if hasattr(self, "reduction_op"):
@@ -405,7 +428,7 @@ def unary(self, operation, dimres=None, opt_arg=None, opt_arg2=None):
         if not dimres:
             dimres = self.ndim
 
-        res = self.init()  # Copy of self, without a formula
+        res = self.init(is_complex)  # Copy of self, without a formula
         if opt_arg2 is not None:
             res.formula = "{}({},{},{})".format(
                 operation, self.formula, opt_arg, opt_arg2
@@ -427,9 +450,9 @@ def binary(
         opt_arg=None,
         opt_pos="last",
         rversion=False,
+        is_complex=None,
     ):
         r"""Symbolically applies **operation** to **self**, with optional arguments if needed.
-
         Keyword args:
           - dimres (int): May be used to specify the dimension of the output **result**.
           - is_operator (bool, default=False): May be used to specify if **operation** is
@@ -438,10 +461,14 @@ def binary(
             Supported values are ``"same"``, ``"sameor1"``, or **None**.
           - rversion (Boolean): shall we invert lhs and rhs of the binary op, e.g. as in __radd__, __rmut__, etc...
         """
+
         # If needed, convert float numbers / lists / arrays / tensors to LazyTensors:
         if not hasattr(other, "__GenericLazyTensor__"):
             other = self.lt_constructor(other)
 
+        if is_complex is None:
+            is_complex = True if (self.is_complex or other.is_complex) else False
+
         # we must prevent any operation if self or other is the output of a reduction operation,
         # i.e. if it has a reduction_op field
         if hasattr(self, "reduction_op") or hasattr(other, "reduction_op"):
@@ -474,7 +501,10 @@ def binary(
         elif dimcheck != None:
             raise ValueError("incorrect dimcheck keyword in binary operation")
 
-        res = self.join(other)  # Merge the attributes and variables of both operands
+        res = self.join(
+            other, is_complex=is_complex
+        )  # Merge the attributes and variables of both operands
+
         res.ndim = dimres
 
         if not rversion:
@@ -511,7 +541,6 @@ def ternary(
         self, other1, other2, operation, dimres=None, dimcheck="sameor1", opt_arg=None
     ):
         r"""Symbolically applies **operation** to **self**, with optional arguments if needed.
-
         Keyword args:
           - dimres (int): May be used to specify the dimension of the output **result**.
           - is_operator (bool, default=False): May be used to specify if **operation** is
@@ -594,13 +623,13 @@ def reduction(
         axis=None,
         dim=None,
         call=True,
+        is_complex=None,
         **kwargs
     ):
         r"""
         Applies a reduction to a :class:`LazyTensor`. This method is used internally by the LazyTensor class.
         Args:
             reduction_op (string): the string identifier of the reduction, which will be passed to the KeOps routines.
-
         Keyword Args:
           other: May be used to specify some **weights** ; depends on the reduction.
           opt_arg: typically, some integer needed by ArgKMin reductions ; depends on the reduction.
@@ -648,6 +677,12 @@ def reduction(
                                 with formulas involving large dimension variables.
         """
 
+        if is_complex is None:
+            if other is None:
+                is_complex = self.is_complex
+            else:
+                is_complex = self.is_complex or other.is_complex
+
         if axis is None:
             axis = dim  # NumPy uses axis, PyTorch uses dim...
         if axis - self.nbatchdims not in (0, 1):
@@ -656,10 +691,10 @@ def reduction(
             )
 
         if other is None:
-            res = self.init()  # ~ self.copy()
+            res = self.init(is_complex=is_complex)  # ~ self.copy()
             res.formula2 = None
         else:
-            res = self.join(other)
+            res = self.join(other, is_complex=is_complex)
             res.formula2 = other.formula
 
         res.formula = self.formula
@@ -680,7 +715,7 @@ def reduction(
             res.rec_multVar_highdim = id(self.rec_multVar_highdim[1].variables[0])
         else:
             res.rec_multVar_highdim = None
-        if res.dtype is not None:
+        if res._dtype is not None:
             res.fixvariables()  # Turn the "id(x)" numbers into consecutive labels
             # "res" now becomes a callable object:
             res.callfun = res.Genred(
@@ -688,13 +723,13 @@ def reduction(
                 [],
                 res.reduction_op,
                 res.axis,
-                res.dtype,
+                res._dtype,
                 res.opt_arg,
                 res.formula2,
                 **kwargs_init,
                 rec_multVar_highdim=res.rec_multVar_highdim
             )
-        if call and len(res.symbolic_variables) == 0 and res.dtype is not None:
+        if call and len(res.symbolic_variables) == 0 and res._dtype is not None:
             return res()
         else:
             return res
@@ -702,11 +737,9 @@ def reduction(
     def solve(self, other, var=None, call=True, **kwargs):
         r"""
         Solves a positive definite linear system of the form ``sum(self) = other`` or ``sum(self*var) = other`` , using a conjugate gradient solver.
-
         Args:
           self (:class:`LazyTensor`): KeOps variable that encodes a symmetric positive definite matrix / linear operator.
           other (:class:`LazyTensor`): KeOps variable that encodes the second member of the equation.
-
         Keyword args:
           var (:class:`LazyTensor`):
             If **var** is **None**, **solve** will return the solution
@@ -752,9 +785,7 @@ def solve(self, other, var=None, call=True, **kwargs):
                 accuracy for large sized data.
             enable_chunks (bool, default True): enable automatic selection of special "chunked" computation mode for accelerating reductions
                                 with formulas involving large dimension variables.
-
         .. warning::
-
             Please note that **no check** of symmetry and definiteness will be
             performed prior to our conjugate gradient descent.
         """
@@ -803,20 +834,20 @@ def solve(self, other, var=None, call=True, **kwargs):
         else:
             res.rec_multVar_highdim = None
 
-        if res.dtype is not None:
+        if res._dtype is not None:
             res.fixvariables()
             res.callfun = res.KernelSolve(
                 res.formula,
                 [],
                 res.varformula,
                 res.axis,
-                res.dtype,
+                res._dtype,
                 **kwargs_init,
                 rec_multVar_highdim=res.rec_multVar_highdim
             )
 
         # we call if call=True, if other is not symbolic, and if the dtype is set
-        if call and len(other.symbolic_variables) == 0 and res.dtype is not None:
+        if call and len(other.symbolic_variables) == 0 and res._dtype is not None:
             return res()
         else:
             return res
@@ -839,11 +870,11 @@ def __call__(self, *args, **kwargs):
             self.kwargs.update({"backend": self.backend})
 
         if (
-            self.dtype is None
+            self._dtype is None
         ):  # This can only happen if we haven't encountered 2D or 3D arrays just yet...
             self.get_tools()
 
-            self.dtype = self.tools.dtypename(
+            self._dtype = self.tools.dtypename(
                 self.tools.dtype(args[0])
             )  # crash if LazyTensor is called
             self.fixvariables()
@@ -856,7 +887,7 @@ def __call__(self, *args, **kwargs):
                     [],
                     self.formula2,
                     self.axis,
-                    self.dtype,
+                    self._dtype,
                     **kwargs_init,
                     rec_multVar_highdim=self.rec_multVar_highdim
                 )
@@ -866,7 +897,7 @@ def __call__(self, *args, **kwargs):
                     [],
                     self.reduction_op,
                     self.axis,
-                    self.dtype,
+                    self._dtype,
                     self.opt_arg,
                     self.formula2,
                     **kwargs_init,
@@ -886,7 +917,7 @@ def __str__(self):
         r"""
         Returns a verbose string identifier.
         """
-        tmp = self.init()  # ~ self.copy()
+        tmp = self.init(is_complex=self.is_complex)  # ~ self.copy()
         tmp.formula = self.formula
         tmp.formula2 = None if not hasattr(self, "formula2") else self.formula2
 
@@ -913,21 +944,27 @@ def __str__(self):
         return string
 
     @property
-    def shape(self):
-        btch = () if self.batchdims is None else self.batchdims
-        ni = 1 if self.ni is None else self.ni
-        nj = 1 if self.nj is None else self.nj
-        ndim = 1 if self.ndim is None else self.ndim
-        return btch + (ni, nj) if ndim == 1 else btch + (ni, nj, ndim)
+    def dtype(self):
+        return self._dtype
 
     @property
     def _shape(self):
+        r"""returns the internal shape of the LazyTensor."""
         btch = () if self.batchdims is None else self.batchdims
         ni = 1 if self.ni is None else self.ni
         nj = 1 if self.nj is None else self.nj
         ndim = 1 if self.ndim is None else self.ndim
         return btch + (ni, nj, ndim)
 
+    @property
+    def shape(self):
+        r"""returns the shape of the LazyTensor"""
+        s = self._shape
+        if s[-1] == 1:
+            return s[:-1]
+        else:
+            return s
+
     def dim(self):
         r"""
         Just as in PyTorch, returns the number of dimensions of a :class:`LazyTensor`.
@@ -948,58 +985,67 @@ def nbatchdims(self):
     __array_ufunc__ = None
 
     # Arithmetics --------------------------------------------------------------
+
+    def addop(self, other, **kwargs):
+        return self.binary(other, "+", is_operator=True, **kwargs)
+
     def __add__(self, other):
         r"""
         Broadcasted addition operator - a binary operation.
-
         ``x + y`` returns a :class:`LazyTensor` that encodes,
         symbolically, the addition of ``x`` and ``y``.
         """
         if is_scalar_and_equals(other, 0):
             return self
+        elif is_complex_lazytensor(other) and not is_complex_lazytensor(self):
+            return self.real2complex().addop(other)
         else:
-            return self.binary(other, "+", is_operator=True)
+            return self.addop(other)
 
     def __radd__(self, other):
         r"""
         Broadcasted addition operator - a binary operation.
-
         ``x + y`` returns a :class:`LazyTensor` that encodes,
         symbolically, the addition of ``x`` and ``y``.
         """
         if is_scalar_and_equals(other, 0):
             return self
         else:
-            return self.binary(other, "+", is_operator=True, rversion=True)
+            return self.addop(other, rversion=True)
+
+    def subop(self, other, **kwargs):
+        return self.binary(other, "-", is_operator=True, **kwargs)
 
     def __sub__(self, other):
         r"""
         Broadcasted subtraction operator - a binary operation.
-
         ``x - y`` returns a :class:`LazyTensor` that encodes,
         symbolically, the subtraction of ``x`` and ``y``.
         """
         if is_scalar_and_equals(other, 0):
             return self
+        elif is_complex_lazytensor(other) and not is_complex_lazytensor(self):
+            return self.real2complex().subop(other)
         else:
-            return self.binary(other, "-", is_operator=True)
+            return self.subop(other)
 
     def __rsub__(self, other):
         r"""
         Broadcasted subtraction operator - a binary operation.
-
         ``x - y`` returns a :class:`LazyTensor` that encodes,
         symbolically, the subtraction of ``x`` and ``y``.
         """
         if is_scalar_and_equals(other, 0):
             return self.unary("Minus")
         else:
-            return self.binary(other, "-", is_operator=True, rversion=True)
+            return self.subop(other, rversion=True)
+
+    def mulop(self, other, **kwargs):
+        return self.binary(other, "*", is_operator=True, **kwargs)
 
     def __mul__(self, other):
         r"""
         Broadcasted elementwise product - a binary operation.
-
         ``x * y`` returns a :class:`LazyTensor` that encodes, symbolically,
         the elementwise product of ``x`` and ``y``.
         """
@@ -1009,13 +1055,16 @@ def __mul__(self, other):
             return self
         elif is_scalar_and_equals(other, -1):
             return self.unary("Minus")
+        elif is_complex_lazytensor(other) and not is_complex_lazytensor(self):
+            return other.mulop(self)
+        elif self.tools.detect_complex(other) and not is_complex_lazytensor(self):
+            return self.lt_constructor(other).mulop(self)
         else:
-            return self.binary(other, "*", is_operator=True)
+            return self.mulop(other)
 
     def __rmul__(self, other):
         r"""
         Broadcasted elementwise product - a binary operation.
-
         ``x * y`` returns a :class:`LazyTensor` that encodes, symbolically,
         the elementwise product of ``x`` and ``y``.
         """
@@ -1025,25 +1074,30 @@ def __rmul__(self, other):
             return self
         elif is_scalar_and_equals(other, -1):
             return self.unary("Minus")
+        elif self.tools.detect_complex(other) and not is_complex_lazytensor(self):
+            return self.real2complex().mulop(self.lt_constructor(other))
         else:
-            return self.binary(other, "*", is_operator=True, rversion=True)
+            return self.mulop(other, rversion=True)
+
+    def divop(self, other, **kwargs):
+        return self.binary(other, "/", is_operator=True, **kwargs)
 
     def __truediv__(self, other):
         r"""
         Broadcasted elementwise division - a binary operation.
-
         ``x / y`` returns a :class:`LazyTensor` that encodes, symbolically,
         the elementwise division of ``x`` by ``y``.
         """
         if is_scalar_and_equals(other, 1):
             return self
+        elif is_complex_lazytensor(other) and not is_complex_lazytensor(self):
+            return self.real2complex().divop(other)
         else:
-            return self.binary(other, "/", is_operator=True)
+            return self.divop(other)
 
     def __rtruediv__(self, other):
         r"""
         Broadcasted elementwise division - a binary operation.
-
         ``x / y`` returns a :class:`LazyTensor` that encodes, symbolically,
         the elementwise division of ``x`` by ``y``.
         """
@@ -1052,12 +1106,11 @@ def __rtruediv__(self, other):
         elif is_scalar_and_equals(other, 1):
             return self.unary("Inv")
         else:
-            return self.binary(other, "/", is_operator=True, rversion=True)
+            return self.divop(other, rversion=True)
 
     def __or__(self, other):
         r"""
         Euclidean scalar product - a binary operation.
-
         ``(x|y)`` returns a :class:`LazyTensor` that encodes, symbolically,
         the scalar product of ``x`` and ``y`` which are assumed to have the same shape.
         """
@@ -1066,7 +1119,6 @@ def __or__(self, other):
     def __ror__(self, other):
         r"""
         Euclidean scalar product - a binary operation.
-
         ``(x|y)`` returns a :class:`LazyTensor` that encodes, symbolically,
         the scalar product of ``x`` and ``y`` which are assumed to have the same shape.
         """
@@ -1079,7 +1131,6 @@ def __ror__(self, other):
     def __abs__(self):
         r"""
         Element-wise absolute value - a unary operation.
-
         ``abs(x)`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise absolute value of ``x``.
         """
@@ -1088,7 +1139,6 @@ def __abs__(self):
     def abs(self):
         r"""
         Element-wise absolute value - a unary operation.
-
         ``x.abs()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise absolute value of ``x``.
         """
@@ -1097,7 +1147,6 @@ def abs(self):
     def __neg__(self):
         r"""
         Element-wise minus - a unary operation.
-
         ``-x`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise opposite of ``x``.
         """
@@ -1108,7 +1157,6 @@ def __neg__(self):
     def exp(self):
         r"""
         Element-wise exponential - a unary operation.
-
         ``x.exp()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise exponential of ``x``.
         """
@@ -1117,7 +1165,6 @@ def exp(self):
     def log(self):
         r"""
         Element-wise logarithm - a unary operation.
-
         ``x.log()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise logarithm of ``x``.
         """
@@ -1126,7 +1173,6 @@ def log(self):
     def xlogx(self):
         r"""
         Element-wise x*log(x) function - a unary operation.
-
         ``x.xlogx()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise ``x`` times logarithm of ``x`` (with value 0 at 0).
         """
@@ -1135,7 +1181,6 @@ def xlogx(self):
     def cos(self):
         r"""
         Element-wise cosine - a unary operation.
-
         ``x.cos()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise cosine of ``x``.
         """
@@ -1144,16 +1189,30 @@ def cos(self):
     def sin(self):
         r"""
         Element-wise sine - a unary operation.
-
         ``x.sin()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise sine of ``x``.
         """
         return self.unary("Sin")
 
+    def sinxdivx(self):
+        r"""
+        Element-wise sin(x)/x function - a unary operation.
+        ``x.sinxdivx()`` returns a :class:`LazyTensor` that encodes, symbolically,
+        the element-wise sinxdivx function  of ``x``.
+        """
+        return self.unary("SinXDivX")
+
+    def sinc(self):
+        r"""
+        Element-wise sinc(x) = sin(pi x) / (pi x) function - a unary operation.
+        ``x.sinc()`` returns a :class:`LazyTensor` that encodes, symbolically,
+        the element-wise sinc function  of ``x``.
+        """
+        return (math.pi * self).sinxdivx()
+
     def asin(self):
         r"""
         Element-wise arcsine - a unary operation.
-
         ``x.asin()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise arcsine of ``x``.
         """
@@ -1162,7 +1221,6 @@ def asin(self):
     def acos(self):
         r"""
         Element-wise arccosine - a unary operation.
-
         ``x.acos()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise arccosine of ``x``.
         """
@@ -1171,16 +1229,22 @@ def acos(self):
     def atan(self):
         r"""
         Element-wise arctangent - a unary operation.
-
         ``x.atan()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise arctangent of ``x``.
         """
         return self.unary("Atan")
 
+    def atan2(self, other):
+        r"""
+        Element-wise atan2 - a binary operation.
+        ``y.atan2(x)`` returns a :class:`LazyTensor` that encodes, symbolically,
+        the element-wise atan2 of ``x`` and ``y``.
+        """
+        return self.binary(other, "Atan2", dimcheck="same")
+
     def sqrt(self):
         r"""
         Element-wise square root - a unary operation.
-
         ``x.sqrt()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise square root of ``x``.
         """
@@ -1189,7 +1253,6 @@ def sqrt(self):
     def rsqrt(self):
         r"""
         Element-wise inverse square root - a unary operation.
-
         ``x.rsqrt()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise inverse square root of ``x``.
         """
@@ -1198,10 +1261,8 @@ def rsqrt(self):
     def __pow__(self, other):
         r"""
         Broadcasted element-wise power operator - a binary operation.
-
         ``x**y`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise value of ``x`` to the power ``y``.
-
         Note:
           - if **y = 2**, ``x**y`` relies on the ``"Square"`` KeOps operation;
           - if **y = 0.5**, ``x**y`` uses on the ``"Sqrt"`` KeOps operation;
@@ -1236,7 +1297,6 @@ def __pow__(self, other):
     def power(self, other):
         r"""
         Broadcasted element-wise power operator - a binary operation.
-
         ``pow(x,y)`` is equivalent to ``x**y``.
         """
         return self ** other
@@ -1244,7 +1304,6 @@ def power(self, other):
     def square(self):
         r"""
         Element-wise square - a unary operation.
-
         ``x.square()`` is equivalent to ``x**2`` and returns a :class:`LazyTensor`
         that encodes, symbolically, the element-wise square of ``x``.
         """
@@ -1253,7 +1312,6 @@ def square(self):
     def sign(self):
         r"""
         Element-wise sign in {-1,0,+1} - a unary operation.
-
         ``x.sign()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise sign of ``x``.
         """
@@ -1262,7 +1320,6 @@ def sign(self):
     def step(self):
         r"""
         Element-wise step function - a unary operation.
-
         ``x.step()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise sign of ``x``.
         """
@@ -1271,7 +1328,6 @@ def step(self):
     def relu(self):
         r"""
         Element-wise ReLU function - a unary operation.
-
         ``x.relu()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise positive part of ``x``.
         """
@@ -1280,20 +1336,45 @@ def relu(self):
     def clamp(self, other1, other2):
         r"""
         Element-wise Clamp function - a ternary operation.
-
         ``x.clamp(a,b)`` returns a :class:`LazyTensor` that encodes, symbolically,
-        the element-wise clamping of ``x`` in ``(a,b)``. Braoodcasting rules apply.
-        a and b may be fixed integers or floats, or other LazyTensors
+        the element-wise clamping of ``x`` in ``(a,b)``. Broadcasting rules apply.
+        a and b may be fixed integers or floats, or other LazyTensors.
         """
         if (type(other1) == int) and (type(other2) == int):
             return self.unary("ClampInt", opt_arg=other1, opt_arg2=other2)
         else:
             return self.ternary(other1, other2, "Clamp", dimcheck="sameor1")
 
+    def ifelse(self, other1, other2):
+        r"""
+        Element-wise if-else function - a ternary operation.
+        ``x.ifelse(a,b)`` returns a :class:`LazyTensor` that encodes, symbolically,
+        ``a`` where ``x >= 0`` and ``b`` where ``x < 0``.  Broadcasting rules apply.
+        a and b may be fixed integers or floats, or other LazyTensors.
+        """
+        return self.ternary(other1, other2, "IfElse", dimcheck="sameor1")
+
+    def mod(self, modulus, offset=0):
+        r"""
+        Element-wise modulo with offset function - a ternary operation.
+        ``x.mod(a,b)`` returns a :class:`LazyTensor` that encodes, symbolically,
+        the element-wise modulo of ``x`` with modulus ``a`` and offset ``b``.
+        By default b=0, so that x.mod(a) becomes equivalent to the NumPy function mod.
+        Broadcasting rules apply. a and b are fixed integers or float.
+        """
+        return self.ternary(modulus, offset, "Mod", dimcheck="sameor1")
+
+    def round(self, other=0):
+        r"""
+        Element-wise rounding function - a unary operation.
+        ``x.round(d)`` returns a :class:`LazyTensor` that encodes, symbolically,
+        the element-wise rounding of ``x`` to d decimal places. d is int.
+        """
+        return self.unary("Round", opt_arg=other)
+
     def sqnorm2(self):
         r"""
         Squared Euclidean norm - a unary operation.
-
         ``x.sqnorm2()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the squared Euclidean norm of a vector ``x``.
         """
@@ -1302,7 +1383,6 @@ def sqnorm2(self):
     def norm2(self):
         r"""
         Euclidean norm - a unary operation.
-
         ``x.norm2()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the Euclidean norm of a vector ``x``.
         """
@@ -1311,7 +1391,6 @@ def norm2(self):
     def norm(self, dim):
         r"""
         Euclidean norm - a unary operation.
-
         ``x.norm(-1)`` is equivalent to ``x.norm2()`` and returns a
         :class:`LazyTensor` that encodes, symbolically, the Euclidean norm of a vector ``x``.
         """
@@ -1322,7 +1401,6 @@ def norm(self, dim):
     def normalize(self):
         r"""
         Vector normalization - a unary operation.
-
         ``x.normalize()`` returns a :class:`LazyTensor` that encodes, symbolically,
         a vector ``x`` divided by its Euclidean norm.
         """
@@ -1331,7 +1409,6 @@ def normalize(self):
     def sqdist(self, other):
         r"""
         Squared distance - a binary operation.
-
         ``x.sqdist(y)`` returns a :class:`LazyTensor` that encodes, symbolically,
         the squared Euclidean distance between two vectors ``x`` and ``y``.
         """
@@ -1340,7 +1417,6 @@ def sqdist(self, other):
     def weightedsqnorm(self, other):
         r"""
         Weighted squared norm of a LazyTensor ``x`` - a binary operation.
-
         ``x.weightedsqnorm(s)`` returns a :class:`LazyTensor` that encodes, symbolically,
         the weighted squared Norm of a vector ``x`` with weights stored in the LazyTensor ``s``- see
         the :doc:`main reference page <../../../api/math-operations>` for details.
@@ -1362,7 +1438,6 @@ def weightedsqnorm(self, other):
     def weightedsqdist(self, g, s):
         r"""
         Weighted squared distance.
-
         ``x.weightedsqdist(y, s)`` is equivalent to ``(x - y).weightedsqnorm(s)``.
         """
         if not hasattr(g, "__GenericLazyTensor__"):
@@ -1375,7 +1450,6 @@ def weightedsqdist(self, g, s):
     def elem(self, i):
         r"""
         Indexing of a vector - a unary operation.
-
         ``x.elem(i)`` returns a :class:`LazyTensor` that encodes, symbolically,
         the i-th element ``x[i]`` of the vector ``x``.
         """
@@ -1390,7 +1464,6 @@ def elem(self, i):
     def extract(self, i, d):
         r"""
         Range indexing - a unary operation.
-
         ``x.extract(i, d)`` returns a :class:`LazyTensor` that encodes, symbolically,
         the sub-vector ``x[i:i+d]`` of the vector ``x``.
         """
@@ -1405,10 +1478,8 @@ def extract(self, i, d):
     def __getitem__(self, key):
         r"""
         Element or range indexing - a unary operation.
-
         ``x[key]`` redirects to the :meth:`elem` or :meth:`extract` methods, depending on the ``key`` argument.
         Supported values are:
-
             - an integer ``k``, in which case ``x[key]``
               redirects to ``elem(x,k)``,
             - a tuple ``..,:,:,k`` with ``k`` an integer,
@@ -1451,7 +1522,6 @@ def __getitem__(self, key):
     def one_hot(self, D):
         r"""
         Encodes a (rounded) scalar value as a one-hot vector of dimension D.
-
         ``x.one_hot(D)`` returns a :class:`LazyTensor` that encodes, symbolically,
         a vector of length D whose round(x)-th coordinate is equal to 1, and the other ones to zero.
         """
@@ -1467,7 +1537,6 @@ def one_hot(self, D):
     def concat(self, other):
         r"""
         Concatenation of two :class:`LazyTensor` - a binary operation.
-
         ``x.concat(y)`` returns a :class:`LazyTensor` that encodes, symbolically,
         the concatenation of ``x`` and ``y`` along their last dimension.
         """
@@ -1479,7 +1548,6 @@ def concat(self, other):
     def concatenate(tuple_of_lt, axis=-1):
         r"""
         Concatenation of a tuple of :class:`GenericLazyTensor`.
-
         ``GenericLazyTensor.concatenate( (x_1, x_2, ..., x_n), -1)`` returns a :class:`GenericLazyTensor` that encodes, symbolically,
         the concatenation of ``x_1``, ``x_2``, ..., ``x_n`` along their last dimension.
         Note that **axis** should be equal to -1 or 2 (if the ``x_i``'s are 3D GenericLazyTensor):
@@ -1512,7 +1580,6 @@ def concatenate(tuple_of_lt, axis=-1):
     def cat(tuple_of_lt, dim):
         r"""
         Concatenation of a tuple of LazyTensors.
-
         ``LazyTensor.cat( (x_1, x_2, ..., x_n), -1)``
         is a PyTorch-friendly alias for ``LazyTensor.concatenate( (x_1, x_2, ..., x_n), -1)``;
         just like indexing operations, it is only supported along the last dimension.
@@ -1522,7 +1589,6 @@ def cat(tuple_of_lt, dim):
     def matvecmult(self, other):
         r"""
         Matrix-vector product - a binary operation.
-
         If ``x._shape[-1] == A*B`` and ``y._shape[-1] == B``,
         ``z = x.matvecmult(y)`` returns a :class:`GenericLazyTensor`
         such that ``z._shape[-1] == A`` which encodes, symbolically,
@@ -1537,7 +1603,6 @@ def matvecmult(self, other):
     def vecmatmult(self, other):
         r"""
         Vector-matrix product - a binary operation.
-
         If ``x._shape[-1] == A`` and ``y._shape[-1] == A*B``,
         ``z = x.vecmatmult(y)`` returns a :class:`GenericLazyTensor`
         such that ``z._shape[-1] == B`` which encodes, symbolically,
@@ -1552,7 +1617,6 @@ def vecmatmult(self, other):
     def tensorprod(self, other):
         r"""
         Tensor product of vectors - a binary operation.
-
         If ``x._shape[-1] == A`` and ``y._shape[-1] == B``,
         ``z = x.tensorprod(y)`` returns a :class:`GenericLazyTensor`
         such that ``z._shape[-1] == A*B`` which encodes, symbolically,
@@ -1567,7 +1631,6 @@ def tensorprod(self, other):
     def keops_tensordot(self, other, dimfa, dimfb, contfa, contfb, *args):
         """
         Tensor dot product (on KeOps internal dimensions) - a binary operation.
-
         :param other: a LazyTensor
         :param dimfa: tuple of int
         :param dimfb: tuple of int
@@ -1597,7 +1660,6 @@ def keops_tensordot(self, other, dimfa, dimfb, contfa, contfb, *args):
     def grad(self, other, gradin):
         r"""
         Symbolic gradient operation.
-
         ``z = x.grad(v,e)`` returns a :class:`LazyTensor`
         which encodes, symbolically,
         the gradient (more precisely, the adjoint of the differential operator) of ``x``, with
@@ -1619,13 +1681,10 @@ def grad(self, other, gradin):
     def sum(self, axis=-1, dim=None, **kwargs):
         r"""
         Summation unary operation, or Sum reduction.
-
         ``sum(axis, dim, **kwargs)`` will:
-
           - if **axis or dim = 0**, return the sum reduction of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the sum reduction of **self** over the "j" indexes.
           - if **axis or dim = 2**, return a new :class:`LazyTensor` object representing the sum of the values of the vector **self**,
-
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
@@ -1633,7 +1692,6 @@ def sum(self, axis=-1, dim=None, **kwargs):
             dimension of the vector variable).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         if dim is not None:
             axis = dim
@@ -1645,31 +1703,24 @@ def sum(self, axis=-1, dim=None, **kwargs):
     def sum_reduction(self, axis=None, dim=None, **kwargs):
         r"""
         Sum reduction.
-
         ``sum_reduction(axis, dim, **kwargs)`` will return the sum reduction of **self**.
-
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         return self.reduction("Sum", axis=axis, dim=dim, **kwargs)
 
     def logsumexp(self, axis=None, dim=None, weight=None, **kwargs):
         r"""
         Log-Sum-Exp reduction.
-
         ``logsumexp(axis, dim, weight, **kwargs)`` will:
-
           - if **axis or dim = 0**, return the "log-sum-exp" reduction of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the "log-sum-exp" reduction of **self** over the "j" indexes.
-
         For details, please check the documentation of the KeOps reductions ``LogSumExp`` and  ``LogSumExpWeight`` in
         the :doc:`main reference page <../../../api/math-operations>`.
-
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
@@ -1678,7 +1729,6 @@ def logsumexp(self, axis=None, dim=None, weight=None, **kwargs):
           weight (:class:`LazyTensor`): optional object that specifies scalar or vector-valued weights
             in the log-sum-exp operation
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         if weight is None:
             return self.reduction("LogSumExp", axis=axis, dim=dim, **kwargs)
@@ -1696,15 +1746,11 @@ def logsumexp_reduction(self, **kwargs):
     def sumsoftmaxweight(self, weight, axis=None, dim=None, **kwargs):
         r"""
         Sum of weighted Soft-Max reduction.
-
         ``sumsoftmaxweight(weight, axis, dim, **kwargs)`` will:
-
           - if **axis or dim = 0**, return the "sum of weighted Soft-Max" reduction of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the "sum of weighted Soft-Max" reduction of **self** over the "j" indexes.
-
         For details, please check the documentation of the KeOps reduction ``SumSoftMaxWeight`` in
         the :doc:`main reference page <../../../api/math-operations>`.
-
         Keyword Args:
           weight (:class:`LazyTensor`): object that specifies scalar or vector-valued weights.
           axis (integer): reduction dimension, which should be equal to the number
@@ -1712,7 +1758,6 @@ def sumsoftmaxweight(self, weight, axis=None, dim=None, **kwargs):
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         return self.reduction(
             "SumSoftMaxWeight", other=weight, axis=axis, dim=dim, **kwargs
@@ -1727,13 +1772,10 @@ def sumsoftmaxweight_reduction(self, **kwargs):
     def min(self, axis=-1, dim=None, **kwargs):
         r"""
         Minimum unary operation, or Min reduction.
-
         ``min(axis, dim, **kwargs)`` will:
-
           - if **axis or dim = 0**, return the min reduction of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the min reduction of **self** over the "j" indexes.
           - if **axis or dim = 2**, return a new :class:`LazyTensor` object representing the min of the values of the vector **self**,
-
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
@@ -1741,7 +1783,6 @@ def min(self, axis=-1, dim=None, **kwargs):
             dimension of the vector variable).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         if dim is not None:
             axis = dim
@@ -1753,16 +1794,13 @@ def min(self, axis=-1, dim=None, **kwargs):
     def min_reduction(self, axis=None, dim=None, **kwargs):
         r"""
         Min reduction.
-
         ``min_reduction(axis, dim, **kwargs)`` will return the min reduction of **self**.
-
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         return self.reduction("Min", axis=axis, dim=dim, **kwargs)
 
@@ -1775,13 +1813,10 @@ def __min__(self, **kwargs):
     def argmin(self, axis=-1, dim=None, **kwargs):
         r"""
         ArgMin unary operation, or ArgMin reduction.
-
         ``argmin(axis, dim, **kwargs)`` will:
-
           - if **axis or dim = 0**, return the argmin reduction of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the argmin reduction of **self** over the "j" indexes.
           - if **axis or dim = 2**, return a new :class:`LazyTensor` object representing the argmin of the values of the vector **self**,
-
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
@@ -1789,7 +1824,6 @@ def argmin(self, axis=-1, dim=None, **kwargs):
             dimension of the vector variable).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         if dim is not None:
             axis = dim
@@ -1801,35 +1835,28 @@ def argmin(self, axis=-1, dim=None, **kwargs):
     def argmin_reduction(self, axis=None, dim=None, **kwargs):
         r"""
         ArgMin reduction.
-
         ``argmin_reduction(axis, dim, **kwargs)`` will return the argmin reduction of **self**.
-
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         return self.reduction("ArgMin", axis=axis, dim=dim, **kwargs)
 
     def min_argmin(self, axis=None, dim=None, **kwargs):
         r"""
         Min-ArgMin reduction.
-
         ``min_argmin(axis, dim, **kwargs)`` will:
-
           - if **axis or dim = 0**, return the minimal values and its indices of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the minimal values and its indices of **self** over the "j" indexes.
-
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         return self.reduction("Min_ArgMin", axis=axis, dim=dim, **kwargs)
 
@@ -1842,13 +1869,10 @@ def min_argmin_reduction(self, **kwargs):
     def max(self, axis=-1, dim=None, **kwargs):
         r"""
         Miaximum unary operation, or Max reduction.
-
         ``max(axis, dim, **kwargs)`` will:
-
           - if **axis or dim = 0**, return the max reduction of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the max reduction of **self** over the "j" indexes.
           - if **axis or dim = 2**, return a new :class:`LazyTensor` object representing the max of the values of the vector **self**,
-
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
@@ -1856,7 +1880,6 @@ def max(self, axis=-1, dim=None, **kwargs):
             dimension of the vector variable).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         if dim is not None:
             axis = dim
@@ -1868,16 +1891,13 @@ def max(self, axis=-1, dim=None, **kwargs):
     def max_reduction(self, axis=None, dim=None, **kwargs):
         r"""
         Max reduction.
-
         ``max_reduction(axis, dim, **kwargs)`` will return the max reduction of **self**.
-
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         return self.reduction("Max", axis=axis, dim=dim, **kwargs)
 
@@ -1890,13 +1910,10 @@ def __max__(self, **kwargs):
     def argmax(self, axis=-1, dim=None, **kwargs):
         r"""
         ArgMax unary operation, or ArgMax reduction.
-
         ``argmax(axis, dim, **kwargs)`` will:
-
           - if **axis or dim = 0**, return the argmax reduction of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the argmax reduction of **self** over the "j" indexes.
           - if **axis or dim = 2**, return a new :class:`LazyTensor` object representing the argmax of the values of the vector **self**,
-
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
@@ -1904,7 +1921,6 @@ def argmax(self, axis=-1, dim=None, **kwargs):
             dimension of the vector variable).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         if dim is not None:
             axis = dim
@@ -1916,35 +1932,28 @@ def argmax(self, axis=-1, dim=None, **kwargs):
     def argmax_reduction(self, axis=None, dim=None, **kwargs):
         r"""
         ArgMax reduction.
-
         ``argmax_reduction(axis, dim, **kwargs)`` will return the argmax reduction of **self**.
-
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         return self.reduction("ArgMax", axis=axis, dim=dim, **kwargs)
 
     def max_argmax(self, axis=None, dim=None, **kwargs):
         r"""
         Max-ArgMax reduction.
-
         ``max_argmax(axis, dim, **kwargs)`` will:
-
           - if **axis or dim = 0**, return the maximal values and its indices of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the maximal values and its indices of **self** over the "j" indexes.
-
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         return self.reduction("Max_ArgMax", axis=axis, dim=dim, **kwargs)
 
@@ -1957,12 +1966,9 @@ def max_argmax_reduction(self, **kwargs):
     def Kmin(self, K, axis=None, dim=None, **kwargs):
         r"""
         K-Min reduction.
-
         ``Kmin(K, axis, dim, **kwargs)`` will:
-
           - if **axis or dim = 0**, return the K minimal values of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the K minimal values of **self** over the "j" indexes.
-
         Keyword Args:
           K (integer): number of minimal values required
           axis (integer): reduction dimension, which should be equal to the number
@@ -1970,7 +1976,6 @@ def Kmin(self, K, axis=None, dim=None, **kwargs):
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         return self.reduction("KMin", opt_arg=K, axis=axis, dim=dim, **kwargs)
 
@@ -1983,12 +1988,9 @@ def Kmin_reduction(self, **kwargs):
     def argKmin(self, K, axis=None, dim=None, **kwargs):
         r"""
         argKmin reduction.
-
         ``argKmin(K, axis, dim, **kwargs)`` will:
-
           - if **axis or dim = 0**, return the indices of the K minimal values of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the indices of the K minimal values of **self** over the "j" indexes.
-
         Keyword Args:
           K (integer): number of minimal values required
           axis (integer): reduction dimension, which should be equal to the number
@@ -1996,7 +1998,6 @@ def argKmin(self, K, axis=None, dim=None, **kwargs):
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         return self.reduction("ArgKMin", opt_arg=K, axis=axis, dim=dim, **kwargs)
 
@@ -2009,12 +2010,9 @@ def argKmin_reduction(self, **kwargs):
     def Kmin_argKmin(self, K, axis=None, dim=None, **kwargs):
         r"""
         K-Min-argK-min reduction.
-
         ``Kmin_argKmin(K, axis, dim, **kwargs)`` will:
-
           - if **axis or dim = 0**, return the K minimal values and its indices of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the K minimal values and its indices of **self** over the "j" indexes.
-
         Keyword Args:
           K (integer): number of minimal values required
           axis (integer): reduction dimension, which should be equal to the number
@@ -2022,7 +2020,6 @@ def Kmin_argKmin(self, K, axis=None, dim=None, **kwargs):
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
-
         """
         return self.reduction("KMin_ArgKMin", opt_arg=K, axis=axis, dim=dim, **kwargs)
 
@@ -2037,13 +2034,11 @@ def Kmin_argKmin_reduction(self, **kwargs):
     def __matmul__(self, v, **kwargs):
         r"""
         Matrix-vector or Matrix-matrix product, supporting batch dimensions.
-
         If ``K`` is a :class:`LazyTensor` whose trailing dimension ``K._shape[-1]`` is equal to 1,
         we can understand it as a linear operator and apply it to arbitrary NumPy arrays
         or PyTorch Tensors. Assuming that ``v`` is a 1D (resp. ND) tensor such that
         ``K.shape[-1] == v.shape[-1]`` (resp. ``v.shape[-2]``), ``K @ v`` denotes the matrix-vector (resp. matrix-matrix)
         product between the two objects, encoded as a vanilla NumPy or PyTorch 1D (resp. ND) tensor.
-
         Example:
             >>> x, y = torch.randn(1000, 3), torch.randn(2000, 3)
             >>> x_i, y_j = LazyTensor( x[:,None,:] ), LazyTensor( y[None,:,:] )
@@ -2075,11 +2070,9 @@ def __matmul__(self, v, **kwargs):
     def t(self):
         r"""
         Matrix transposition, permuting the axes of :math:`i`- and :math:`j`-variables.
-
         For instance, if ``K`` is a LazyTensor of shape ``(B,M,N,D)``,
         ``K.t()`` returns a symbolic copy of ``K`` whose axes 1 and 2 have
         been switched with each other: ``K.t().shape == (B,N,M,D)``.
-
         Example:
             >>> x, y = torch.randn(1000, 3), torch.randn(2000, 3)
             >>> x_i, y_j = LazyTensor( x[:,None,:] ), LazyTensor( y[None,:,:] )
@@ -2133,12 +2126,10 @@ def T(self):
     def matvec(self, v):
         r"""
         Alias for the matrix-vector product, added for compatibility with :mod:`scipy.sparse.linalg`.
-
         If ``K`` is a :class:`LazyTensor` whose trailing dimension ``K._shape[-1]`` is equal to 1,
         we can understand it as a linear operator and wrap it into a
         :mod:`scipy.sparse.linalg.LinearOperator` object, thus getting access
         to robust solvers and spectral routines.
-
         Example:
             >>> import numpy as np
             >>> x = np.random.randn(1000,3)
@@ -2156,7 +2147,180 @@ def matvec(self, v):
     def rmatvec(self, v):
         r"""
         Alias for the transposed matrix-vector product, added for compatibility with :mod:`scipy.sparse.linalg`.
-
         See :meth:`matvec` for further reference.
         """
         return self.T @ v
+
+    def real2complex(self):
+        r"""
+        Element-wise "real 2 complex" operation - a unary operation.
+        ``x.real2complex()`` returns a :class:`ComplexLazyTensor` that encodes, symbolically,
+        the same tensor as ``x``, but seen as complex-valued (with zero imaginary part for each coefficient)
+        """
+        return self.unary("Real2Complex", dimres=2 * self._shape[-1], is_complex=True)
+
+    def imag2complex(self):
+        r"""
+        Element-wise "imag 2 complex" operation - a unary operation.
+        ``x.real2complex()`` returns a :class:`ComplexLazyTensor` that encodes, symbolically,
+        the multiplication of ``1j`` with ``x``.
+        """
+        return self.unary("Imag2Complex", dimres=2 * self._shape[-1], is_complex=True)
+
+    def exp1j(self):
+        r"""
+        Element-wise "complex exponential of 1j x" operation - a unary operation.
+        ``x.exp1j()`` returns a :class:`ComplexLazyTensor` that encodes, symbolically,
+        the complex exponential of ``1j*x``.
+        """
+        return self.unary("ComplexExp1j", dimres=2 * self._shape[-1], is_complex=True)
+
+
+class ComplexGenericLazyTensor(GenericLazyTensor):
+    r"""Extension of the LazyTensor class for complex operations."""
+
+    def __init__(self, x=None, axis=None):
+        r"""Creates a KeOps symbolic variable of complex dtype."""
+        self.get_tools()
+        if type(x) == complex:
+            x = [x]
+        if type(x) == list:
+            x_ = [None] * (2 * len(x))
+            for i in range(len(x)):
+                x_[2 * i] = x[i].real
+                x_[2 * i + 1] = x[i].imag
+                x = x_
+        elif self.tools.is_tensor(x):
+            x = self.tools.view_as_real(x)
+        super().__init__(x=x, axis=axis)
+        self.is_complex = True
+
+    def __call__(self, *args, **kwargs):
+        res = super().__call__(*args, **kwargs)
+        return self.tools.view_as_complex(res)
+
+    @property
+    def dtype(self):
+        if self._dtype == "float32":
+            return "complex64"
+        elif self._dtype == "float64":
+            return "complex128"
+
+    @property
+    def shape(self):
+        r"""returns the shape of the complex LazyTensor."""
+        s = super()._shape
+        s = s[:-1] + (s[-1] // 2,)
+        if s[-1] == 1:
+            return s[:-1]
+        else:
+            return s
+
+    # List of supported operations  ============================================
+
+    @property
+    def real(self):
+        r"""
+        Element-wise real part of complex - a unary operation.
+        ``z.real`` returns a :class:`LazyTensor` that encodes, symbolically,
+        the element-wise real part of ``z``.
+        """
+        return self.unary("ComplexReal", dimres=self._shape[-1] // 2, is_complex=False)
+
+    @property
+    def imag(self):
+        r"""
+        Element-wise imaginary part of complex - a unary operation.
+        ``z.imag`` returns a :class:`LazyTensor` that encodes, symbolically,
+        the element-wise imaginary part of ``z``.
+        """
+        return self.unary("ComplexImag", dimres=self._shape[-1] // 2, is_complex=False)
+
+    def angle(self):
+        r"""
+        Element-wise angle (or argument) of complex - a unary operation.
+        ``z.angle()`` returns a :class:`LazyTensor` that encodes, symbolically,
+        the element-wise angle of ``z``.
+        """
+        return self.unary("ComplexAngle", dimres=self._shape[-1] // 2, is_complex=False)
+
+    def conj(self):
+        r"""
+        Element-wise complex conjugate - a unary operation.
+        ``z.conj()`` returns a :class:`ComplexLazyTensor` that encodes, symbolically,
+        the element-wise complex conjugate of ``z``.
+        """
+        return self.unary("Conj", dimres=self._shape[-1], is_complex=True)
+
+    def sum(self, axis=-1, dim=None, **kwargs):
+        if dim is not None:
+            axis = dim
+        if axis in [-1, len(self._shape) - 1]:
+            return self.unary("ComplexSum", dimres=2, is_complex=True)
+        else:
+            return self.reduction("Sum", axis=axis, **kwargs)
+
+    def __abs__(self):
+        r"""
+        Element-wise absolute value (or modulus) of complex - a unary operation.
+        ``z.abs()`` returns a :class:`LazyTensor` that encodes, symbolically,
+        the element-wise absolute value of ``z``.
+        """
+        return self.unary("ComplexAbs", dimres=self._shape[-1] // 2, is_complex=False)
+
+    def exp(self):
+        r"""
+        Element-wise complex exponential - a unary operation.
+        ``z.exp()`` returns a :class:`ComplexLazyTensor` that encodes, symbolically,
+        the element-wise complex exponential of ``z``.
+        """
+        return self.unary("ComplexExp", dimres=self._shape[-1], is_complex=True)
+
+    def mulop(self, other, **kwargs):
+        if other._shape[-1] == 1:
+            return other.binary(self, "ComplexRealScal", **kwargs, is_complex=True)
+        elif not is_complex_lazytensor(other):
+            return self.mulop(other.real2complex())
+        elif self._shape[-1] == 2:
+            return self.binary(other, "ComplexScal", **kwargs, is_complex=True, dimcheck=None)
+        elif other._shape[-1] == 2:
+            return other.binary(self, "ComplexScal", **kwargs, is_complex=True, dimcheck=None)
+        else:
+            return self.binary(other, "ComplexMult", **kwargs, is_complex=True)
+
+    def addop(self, other, **kwargs):
+        if not is_complex_lazytensor(other):
+            return self.addop(other.real2complex())
+        elif self._shape[-1] == other._shape[-1]:
+            return self.binary(other, "Add", **kwargs, is_complex=True)
+        else:
+            raise ValueError("incompatible shapes for addition.")
+
+    def subop(self, other, **kwargs):
+        if not is_complex_lazytensor(other):
+            return self.subop(other.real2complex())
+        elif self._shape[-1] == other._shape[-1]:
+            return self.binary(other, "Subtract", **kwargs, is_complex=True)
+        else:
+            raise ValueError("incompatible shapes for subtraction.")
+
+    def divop(self, other, **kwargs):
+        if not is_complex_lazytensor(other):
+            return self.divop(other.real2complex())
+        elif self._shape[-1] == other._shape[-1]:
+            return self.binary(other, "ComplexDivide", **kwargs, is_complex=True)
+        else:
+            raise ValueError("incompatible shapes for division.")
+
+    def real2complex(self):
+        raise ValueError("real2complex cannot be applied to a complex LazyTensor.")
+
+    def imag2complex(self):
+        raise ValueError("imag2complex cannot be applied to a complex LazyTensor.")
+
+    def exp1j(self):
+        raise ValueError("exp1j cannot be applied to a complex LazyTensor.")
+
+    def __call__(self, *args, **kwargs):
+        res = super().__call__(*args, **kwargs)
+        return self.tools.view_as_complex(res)
\ No newline at end of file
diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index f4a050d91..17f6eb808 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -440,7 +440,7 @@ def test_LazyTensor_sum(self):
     ############################################################
     def test_IVF(self):
         ###########################################################
-        from pykeops.numpy.nn.ivf import ivf
+        from pykeops.numpy.nn.ivf import IVF
         import numpy as np
 
         np.random.seed(0)
diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index eb2c5ddb5..a889ffe70 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -676,7 +676,7 @@ def invert_permutation_numpy(permutation):
     ############################################################
     def test_IVF(self):
         ############################################################
-        from pykeops.torch.nn.ivf import ivf
+        from pykeops.torch.nn.ivf import IVF
         import torch
 
         torch.manual_seed(0)

From e311bab85e6cd3079e5b2147ac36848379b683d3 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Thu, 1 Apr 2021 14:22:02 +0100
Subject: [PATCH 022/111] Revert "typo correction"

This reverts commit d541aeb17fc4e32edc14a6ef95bf728d0631c361.
---
 pykeops/common/lazy_tensor.py      | 664 +++++++++++------------------
 pykeops/test/unit_tests_numpy.py   |   2 +-
 pykeops/test/unit_tests_pytorch.py |   2 +-
 3 files changed, 252 insertions(+), 416 deletions(-)

diff --git a/pykeops/common/lazy_tensor.py b/pykeops/common/lazy_tensor.py
index 8b529af75..60ebca4bf 100644
--- a/pykeops/common/lazy_tensor.py
+++ b/pykeops/common/lazy_tensor.py
@@ -1,9 +1,8 @@
 import copy
 import re
 
-import math
-
 import numpy as np
+
 from pykeops.common.utils import check_broadcasting
 
 
@@ -21,19 +20,17 @@ def is_scalar_and_equals(x, val):
         return False
 
 
-def is_complex_lazytensor(x):
-    return isinstance(x, ComplexGenericLazyTensor)
-
-
 class GenericLazyTensor:
     r"""Symbolic wrapper for NumPy arrays and PyTorch tensors. This is the abstract class,
     end user should use :class:`pykeops.numpy.LazyTensor` or :class:`pykeops.torch.LazyTensor`.
+
     :class:`LazyTensor` encode numerical arrays through the combination
     of a symbolic, **mathematical formula** and a list of **small data arrays**.
     They can be used to implement efficient algorithms on objects
     that are **easy to define**, but **impossible to store** in memory
     (e.g. the matrix of pairwise distances between
     two large point clouds).
+
     :class:`LazyTensor` may be created from standard NumPy arrays or PyTorch tensors,
     combined using simple mathematical operations and converted
     back to NumPy arrays or PyTorch tensors with
@@ -55,13 +52,15 @@ class GenericLazyTensor:
     axis = None
     ranges = None  # Block-sparsity pattern
     backend = None  # "CPU", "GPU", "GPU_2D", etc.
-    _dtype = None
-    is_complex = False
+    dtype = None
+    float_types = []
 
     def __init__(self, x=None, axis=None):
         r"""Creates a KeOps symbolic variable.
+
         Args:
             x: May be either:
+
                 - A *float*, a *list of floats*, a *NumPy float*, a *0D or 1D NumPy array*,
                   a *0D or 1D PyTorch tensor*, in which case the :class:`LazyTensor`
                   represents a constant **vector of parameters**, to be broadcasted
@@ -78,8 +77,11 @@ def __init__(self, x=None, axis=None):
                 - An *integer*, in which case the :class:`LazyTensor` represents an **integer constant** handled
                   efficiently at compilation time.
                 - **None**, for internal use.
+
             axis (int): should be equal to 0 or 1 if **x** is a 2D tensor, and  **None** otherwise.
+
         .. warning::
+
             A :class:`LazyTensor` constructed
             from a NumPy array or a PyTorch tensor retains its **dtype** (float16, float32 or float64)
             and **device** properties (is it stored on the GPU?).
@@ -134,7 +136,7 @@ def __init__(self, x=None, axis=None):
 
             # Float numbers must be encoded as Parameters,  as C++'s templating system cannot deal
             # with floating point arithmetics.
-            elif typex in self.tools.float_types:
+            elif typex in self.float_types:
                 x = [x]  # Convert to list and go to stage 2
                 typex = list
 
@@ -153,109 +155,89 @@ def __init__(self, x=None, axis=None):
                 self.formula = "Var({},{},2)".format(id(x), self.ndim)
                 return  # That's it!
             else:
-                self._dtype = self.tools.dtypename(self.tools.dtype(x))
-
-        typex = type(x)
+                self.dtype = self.tools.dtypename(self.tools.dtype(x))
 
-        if (
-            typex
-            not in [type(None), tuple, int, float, list, self.tools.arraytype]
-            + self.tools.float_types
-        ):
-            raise TypeError(
-                "LazyTensors should be built from " + self.tools.arrayname + ", "
-                "float/integer numbers, lists of floats or 3-uples of integers. "
-                "Received: {}".format(typex)
-            )
-
-        if typex == self.tools.arraytype and len(x.shape) == 0:
-            x = x.view(1)
-        elif typex in self.tools.float_types:
-            x = self.tools.arraytype([x]).view(1)
+    def lt_constructor(self, x=None, axis=None):
+        r"""This method is specialized in :class:`pykeops.numpy.LazyTensor` and :class:`pykeops.torch.LazyTensor`. It
+        returns a new instance of a LazyTensor (numpy or pytorch)."""
+        pass
 
-        if typex == self.tools.arraytype:
-            if len(x.shape) >= 3:  # Infer axis from the input shape
-                # If x is a 3D+ array, its shape must be either (..,M,1,D) or (..,1,N,D) or (..,1,1,D).
-                # We infer axis from shape and squeeze out the "1" dimensions:
-                if axis is not None:
-                    raise ValueError(
-                        "'axis' parameter should not be given when 'x' is a 3D tensor."
-                    )
+    def get_tools(self):
+        r"""This method is specialized in :class:`pykeops.numpy.LazyTensor` and :class:`pykeops.torch.LazyTensor`. It
+        populate the tools class."""
+        pass
 
-                if len(x.shape) > 3:  # We're in "batch mode"
-                    self.batchdims = tuple(x.shape[:-3])
+    def infer_dim(self, x, axis):
+        if len(x.shape) >= 3:  # Infer axis from the input shape
+            # If x is a 3D+ array, its shape must be either (..,M,1,D) or (..,1,N,D) or (..,1,1,D).
+            # We infer axis from shape and squeeze out the "1" dimensions:
+            if axis is not None:
+                raise ValueError(
+                    "'axis' parameter should not be given when 'x' is a 3D tensor."
+                )
 
-                if x.shape[-3] == 1:
-                    if x.shape[-2] == 1:  # (..,1,1,D) -> Pm(D)
-                        x = x.squeeze(-2).squeeze(-2)
-                        axis = 2
-                    else:  # (..,1,N,D) -> Vj(D)
-                        x = x.squeeze(-3)
-                        axis = 1
+            if len(x.shape) > 3:  # We're in "batch mode"
+                self.batchdims = tuple(x.shape[:-3])
 
-                elif x.shape[-2] == 1:  # (M,1,D) -> Vi(D)
-                    x = x.squeeze(-2)
-                    axis = 0
-                else:
-                    raise ValueError(
-                        "If 'x' is a 3D+ tensor, its shape should be one of (..,M,1,D), (..,1,N,D) or (..,1,1,D)."
-                    )
+            if x.shape[-3] == 1:
+                if x.shape[-2] == 1:  # (..,1,1,D) -> Pm(D)
+                    x = x.squeeze(-2).squeeze(-2)
+                    axis = 2
+                else:  # (..,1,N,D) -> Vj(D)
+                    x = x.squeeze(-3)
+                    axis = 1
 
-            # Stage 4: x is now encoded as a 2D or 1D array + batch dimensions --------------------
-            if (
-                len(x.shape) >= 2 and axis != 2
-            ):  # shape is (..,M,D) or (..,N,D), with an explicit 'axis' parameter
-                if axis is None or axis not in (0, 1):
-                    raise ValueError(
-                        "When 'x' is encoded as a 2D array, LazyTensor expects an explicit 'axis' value in {0,1}."
-                    )
+            elif x.shape[-2] == 1:  # (M,1,D) -> Vi(D)
+                x = x.squeeze(-2)
+                axis = 0
+            else:
+                raise ValueError(
+                    "If 'x' is a 3D+ tensor, its shape should be one of (..,M,1,D), (..,1,N,D) or (..,1,1,D)."
+                )
 
-                # id(x) is used as temporary identifier for KeOps "Var",
-                # this identifier will be changed when calling method "fixvariables"
-                # But first we do a small hack, in order to distinguish same array involved twice in a formula but with
-                # different axis (e.g. Vi(x)-Vj(x) formula): we do a dummy reshape in order to get a different id
-                if axis == 1:
-                    x = self.tools.view(x, x.shape)
+        # Stage 4: x is now encoded as a 2D or 1D array + batch dimensions --------------------
+        if (
+            len(x.shape) >= 2 and axis != 2
+        ):  # shape is (..,M,D) or (..,N,D), with an explicit 'axis' parameter
+            if axis is None or axis not in (0, 1):
+                raise ValueError(
+                    "When 'x' is encoded as a 2D array, LazyTensor expects an explicit 'axis' value in {0,1}."
+                )
 
-                self.variables = (x,)
-                self.ndim = x.shape[-1]
-                self.axis = axis
-                self.formula = "Var({},{},{})".format(id(x), self.ndim, self.axis)
+            # id(x) is used as temporary identifier for KeOps "Var",
+            # this identifier will be changed when calling method "fixvariables"
+            # But first we do a small hack, in order to distinguish same array involved twice in a formula but with
+            # different axis (e.g. Vi(x)-Vj(x) formula): we do a dummy reshape in order to get a different id
+            if axis == 1:
+                x = self.tools.view(x, x.shape)
 
-                if axis == 0:
-                    self.ni = x.shape[-2]
-                else:
-                    self.nj = x.shape[-2]
+            self.variables = (x,)
+            self.ndim = x.shape[-1]
+            self.axis = axis
+            self.formula = "Var({},{},{})".format(id(x), self.ndim, self.axis)
 
-                self._dtype = self.tools.dtypename(self.tools.dtype(x))
+            if axis == 0:
+                self.ni = x.shape[-2]
+            else:
+                self.nj = x.shape[-2]
 
-            elif (
-                len(x.shape) == 1 or axis == 2
-            ):  # shape is (D,): x is a "Pm(D)" parameter
-                if axis is not None and axis != 2:
-                    raise ValueError(
-                        "When 'x' is encoded as a 1D or 0D array, 'axis' must be None or 2 (= Parameter variable)."
-                    )
-                self.variables = (x,)
-                self.ndim = x.shape[-1]
-                self.axis = 2
-                self.formula = "Var({},{},2)".format(id(x), self.ndim)
+            self.dtype = self.tools.dtypename(self.tools.dtype(x))
 
-            else:
+        elif len(x.shape) == 1 or axis == 2:  # shape is (D,): x is a "Pm(D)" parameter
+            if axis is not None and axis != 2:
                 raise ValueError(
-                    "LazyTensors can be built from 0D, 1D, 2D or 3D+ tensors. "
-                    + "Received x of shape: {}.".format(x.shape)
+                    "When 'x' is encoded as a 1D or 0D array, 'axis' must be None or 2 (= Parameter variable)."
                 )
+            self.variables = (x,)
+            self.ndim = x.shape[-1]
+            self.axis = 2
+            self.formula = "Var({},{},2)".format(id(x), self.ndim)
 
-    def lt_constructor(self, x=None, axis=None):
-        r"""This method is specialized in :class:`pykeops.numpy.LazyTensor` and :class:`pykeops.torch.LazyTensor`. It
-        returns a new instance of a LazyTensor (numpy or pytorch)."""
-        pass
-
-    def get_tools(self):
-        r"""This method is specialized in :class:`pykeops.numpy.LazyTensor` and :class:`pykeops.torch.LazyTensor`. It
-        populate the tools class."""
-        pass
+        else:
+            raise ValueError(
+                "LazyTensors can be built from 0D, 1D, 2D or 3D+ tensors. "
+                + "Received x of shape: {}.".format(x.shape)
+            )
 
     def fixvariables(self):
         r"""If needed, assigns final labels to each variable and pads their batch dimensions prior to a :mod:`Genred()` call."""
@@ -273,7 +255,7 @@ def fixvariables(self):
         for v in self.variables:
             idv = id(v)
             if type(v) == list:
-                v = self.tools.array(v, self._dtype, device)
+                v = self.tools.array(v, self.dtype, device)
 
             # Replace "Var(idv," by "Var(i," and increment 'i':
             tag = "Var({},".format(idv)
@@ -330,11 +312,11 @@ def separate_kwargs(self, kwargs):
         kwargs_call = dict(kwargs_call)
         return kwargs_init, kwargs_call
 
-    def promote(self, other, props, is_complex=False):
+    def promote(self, other, props):
         r"""
         Creates a new :class:`LazyTensor` whose **None** properties are set to those of **self** or **other**.
         """
-        res = self.lt_constructor(is_complex=is_complex)
+        res = self.lt_constructor()
 
         for prop in props:
             y, x = getattr(self, prop), getattr(other, prop)
@@ -359,13 +341,13 @@ def promote(self, other, props, is_complex=False):
                 setattr(res, prop, y)
         return res
 
-    def init(self, is_complex=False):
+    def init(self):
         r"""
         Creates a copy of a :class:`LazyTensor`, without **formula** attribute.
         """
-        res = self.lt_constructor(is_complex=is_complex)
+        res = self.lt_constructor()
         res.tools = self.tools
-        res._dtype = self._dtype
+        res.dtype = self.dtype
         res.Genred = self.Genred
         res.KernelSolve = self.KernelSolve
         res.batchdims = self.batchdims
@@ -377,7 +359,7 @@ def init(self, is_complex=False):
         res.symbolic_variables = self.symbolic_variables
         return res
 
-    def join(self, other, is_complex=False):
+    def join(self, other):
         r"""
         Merges the variables and attributes of two :class:`LazyTensor`, with a compatibility check.
         This method concatenates tuples of variables, without paying attention to repetitions.
@@ -385,7 +367,7 @@ def join(self, other, is_complex=False):
         res = self.promote(
             other,
             (
-                "_dtype",
+                "dtype",
                 "tools",
                 "Genred",
                 "KernelSolve",
@@ -394,7 +376,6 @@ def join(self, other, is_complex=False):
                 "ranges",
                 "backend",
             ),
-            is_complex=is_complex,
         )
         res.symbolic_variables = self.symbolic_variables + other.symbolic_variables
 
@@ -407,17 +388,13 @@ def join(self, other, is_complex=False):
 
     # Prototypes for unary and binary operations  ==============================
 
-    def unary(
-        self, operation, dimres=None, opt_arg=None, opt_arg2=None, is_complex=None
-    ):
+    def unary(self, operation, dimres=None, opt_arg=None, opt_arg2=None):
         r"""
         Symbolically applies **operation** to **self**, with optional arguments if needed.
+
         The optional argument **dimres** may be used to specify the dimension of the output **result**.
         """
 
-        if is_complex is None:
-            is_complex = self.is_complex
-
         # we must prevent any operation if self is the output of a reduction operation,
         # i.e. if it has a reduction_op field
         if hasattr(self, "reduction_op"):
@@ -428,7 +405,7 @@ def unary(
         if not dimres:
             dimres = self.ndim
 
-        res = self.init(is_complex)  # Copy of self, without a formula
+        res = self.init()  # Copy of self, without a formula
         if opt_arg2 is not None:
             res.formula = "{}({},{},{})".format(
                 operation, self.formula, opt_arg, opt_arg2
@@ -450,9 +427,9 @@ def binary(
         opt_arg=None,
         opt_pos="last",
         rversion=False,
-        is_complex=None,
     ):
         r"""Symbolically applies **operation** to **self**, with optional arguments if needed.
+
         Keyword args:
           - dimres (int): May be used to specify the dimension of the output **result**.
           - is_operator (bool, default=False): May be used to specify if **operation** is
@@ -461,14 +438,10 @@ def binary(
             Supported values are ``"same"``, ``"sameor1"``, or **None**.
           - rversion (Boolean): shall we invert lhs and rhs of the binary op, e.g. as in __radd__, __rmut__, etc...
         """
-
         # If needed, convert float numbers / lists / arrays / tensors to LazyTensors:
         if not hasattr(other, "__GenericLazyTensor__"):
             other = self.lt_constructor(other)
 
-        if is_complex is None:
-            is_complex = True if (self.is_complex or other.is_complex) else False
-
         # we must prevent any operation if self or other is the output of a reduction operation,
         # i.e. if it has a reduction_op field
         if hasattr(self, "reduction_op") or hasattr(other, "reduction_op"):
@@ -501,10 +474,7 @@ def binary(
         elif dimcheck != None:
             raise ValueError("incorrect dimcheck keyword in binary operation")
 
-        res = self.join(
-            other, is_complex=is_complex
-        )  # Merge the attributes and variables of both operands
-
+        res = self.join(other)  # Merge the attributes and variables of both operands
         res.ndim = dimres
 
         if not rversion:
@@ -541,6 +511,7 @@ def ternary(
         self, other1, other2, operation, dimres=None, dimcheck="sameor1", opt_arg=None
     ):
         r"""Symbolically applies **operation** to **self**, with optional arguments if needed.
+
         Keyword args:
           - dimres (int): May be used to specify the dimension of the output **result**.
           - is_operator (bool, default=False): May be used to specify if **operation** is
@@ -623,13 +594,13 @@ def reduction(
         axis=None,
         dim=None,
         call=True,
-        is_complex=None,
         **kwargs
     ):
         r"""
         Applies a reduction to a :class:`LazyTensor`. This method is used internally by the LazyTensor class.
         Args:
             reduction_op (string): the string identifier of the reduction, which will be passed to the KeOps routines.
+
         Keyword Args:
           other: May be used to specify some **weights** ; depends on the reduction.
           opt_arg: typically, some integer needed by ArgKMin reductions ; depends on the reduction.
@@ -677,12 +648,6 @@ def reduction(
                                 with formulas involving large dimension variables.
         """
 
-        if is_complex is None:
-            if other is None:
-                is_complex = self.is_complex
-            else:
-                is_complex = self.is_complex or other.is_complex
-
         if axis is None:
             axis = dim  # NumPy uses axis, PyTorch uses dim...
         if axis - self.nbatchdims not in (0, 1):
@@ -691,10 +656,10 @@ def reduction(
             )
 
         if other is None:
-            res = self.init(is_complex=is_complex)  # ~ self.copy()
+            res = self.init()  # ~ self.copy()
             res.formula2 = None
         else:
-            res = self.join(other, is_complex=is_complex)
+            res = self.join(other)
             res.formula2 = other.formula
 
         res.formula = self.formula
@@ -715,7 +680,7 @@ def reduction(
             res.rec_multVar_highdim = id(self.rec_multVar_highdim[1].variables[0])
         else:
             res.rec_multVar_highdim = None
-        if res._dtype is not None:
+        if res.dtype is not None:
             res.fixvariables()  # Turn the "id(x)" numbers into consecutive labels
             # "res" now becomes a callable object:
             res.callfun = res.Genred(
@@ -723,13 +688,13 @@ def reduction(
                 [],
                 res.reduction_op,
                 res.axis,
-                res._dtype,
+                res.dtype,
                 res.opt_arg,
                 res.formula2,
                 **kwargs_init,
                 rec_multVar_highdim=res.rec_multVar_highdim
             )
-        if call and len(res.symbolic_variables) == 0 and res._dtype is not None:
+        if call and len(res.symbolic_variables) == 0 and res.dtype is not None:
             return res()
         else:
             return res
@@ -737,9 +702,11 @@ def reduction(
     def solve(self, other, var=None, call=True, **kwargs):
         r"""
         Solves a positive definite linear system of the form ``sum(self) = other`` or ``sum(self*var) = other`` , using a conjugate gradient solver.
+
         Args:
           self (:class:`LazyTensor`): KeOps variable that encodes a symmetric positive definite matrix / linear operator.
           other (:class:`LazyTensor`): KeOps variable that encodes the second member of the equation.
+
         Keyword args:
           var (:class:`LazyTensor`):
             If **var** is **None**, **solve** will return the solution
@@ -785,7 +752,9 @@ def solve(self, other, var=None, call=True, **kwargs):
                 accuracy for large sized data.
             enable_chunks (bool, default True): enable automatic selection of special "chunked" computation mode for accelerating reductions
                                 with formulas involving large dimension variables.
+
         .. warning::
+
             Please note that **no check** of symmetry and definiteness will be
             performed prior to our conjugate gradient descent.
         """
@@ -834,20 +803,20 @@ def solve(self, other, var=None, call=True, **kwargs):
         else:
             res.rec_multVar_highdim = None
 
-        if res._dtype is not None:
+        if res.dtype is not None:
             res.fixvariables()
             res.callfun = res.KernelSolve(
                 res.formula,
                 [],
                 res.varformula,
                 res.axis,
-                res._dtype,
+                res.dtype,
                 **kwargs_init,
                 rec_multVar_highdim=res.rec_multVar_highdim
             )
 
         # we call if call=True, if other is not symbolic, and if the dtype is set
-        if call and len(other.symbolic_variables) == 0 and res._dtype is not None:
+        if call and len(other.symbolic_variables) == 0 and res.dtype is not None:
             return res()
         else:
             return res
@@ -870,11 +839,11 @@ def __call__(self, *args, **kwargs):
             self.kwargs.update({"backend": self.backend})
 
         if (
-            self._dtype is None
+            self.dtype is None
         ):  # This can only happen if we haven't encountered 2D or 3D arrays just yet...
             self.get_tools()
 
-            self._dtype = self.tools.dtypename(
+            self.dtype = self.tools.dtypename(
                 self.tools.dtype(args[0])
             )  # crash if LazyTensor is called
             self.fixvariables()
@@ -887,7 +856,7 @@ def __call__(self, *args, **kwargs):
                     [],
                     self.formula2,
                     self.axis,
-                    self._dtype,
+                    self.dtype,
                     **kwargs_init,
                     rec_multVar_highdim=self.rec_multVar_highdim
                 )
@@ -897,7 +866,7 @@ def __call__(self, *args, **kwargs):
                     [],
                     self.reduction_op,
                     self.axis,
-                    self._dtype,
+                    self.dtype,
                     self.opt_arg,
                     self.formula2,
                     **kwargs_init,
@@ -917,7 +886,7 @@ def __str__(self):
         r"""
         Returns a verbose string identifier.
         """
-        tmp = self.init(is_complex=self.is_complex)  # ~ self.copy()
+        tmp = self.init()  # ~ self.copy()
         tmp.formula = self.formula
         tmp.formula2 = None if not hasattr(self, "formula2") else self.formula2
 
@@ -944,27 +913,21 @@ def __str__(self):
         return string
 
     @property
-    def dtype(self):
-        return self._dtype
+    def shape(self):
+        btch = () if self.batchdims is None else self.batchdims
+        ni = 1 if self.ni is None else self.ni
+        nj = 1 if self.nj is None else self.nj
+        ndim = 1 if self.ndim is None else self.ndim
+        return btch + (ni, nj) if ndim == 1 else btch + (ni, nj, ndim)
 
     @property
     def _shape(self):
-        r"""returns the internal shape of the LazyTensor."""
         btch = () if self.batchdims is None else self.batchdims
         ni = 1 if self.ni is None else self.ni
         nj = 1 if self.nj is None else self.nj
         ndim = 1 if self.ndim is None else self.ndim
         return btch + (ni, nj, ndim)
 
-    @property
-    def shape(self):
-        r"""returns the shape of the LazyTensor"""
-        s = self._shape
-        if s[-1] == 1:
-            return s[:-1]
-        else:
-            return s
-
     def dim(self):
         r"""
         Just as in PyTorch, returns the number of dimensions of a :class:`LazyTensor`.
@@ -985,67 +948,58 @@ def nbatchdims(self):
     __array_ufunc__ = None
 
     # Arithmetics --------------------------------------------------------------
-
-    def addop(self, other, **kwargs):
-        return self.binary(other, "+", is_operator=True, **kwargs)
-
     def __add__(self, other):
         r"""
         Broadcasted addition operator - a binary operation.
+
         ``x + y`` returns a :class:`LazyTensor` that encodes,
         symbolically, the addition of ``x`` and ``y``.
         """
         if is_scalar_and_equals(other, 0):
             return self
-        elif is_complex_lazytensor(other) and not is_complex_lazytensor(self):
-            return self.real2complex().addop(other)
         else:
-            return self.addop(other)
+            return self.binary(other, "+", is_operator=True)
 
     def __radd__(self, other):
         r"""
         Broadcasted addition operator - a binary operation.
+
         ``x + y`` returns a :class:`LazyTensor` that encodes,
         symbolically, the addition of ``x`` and ``y``.
         """
         if is_scalar_and_equals(other, 0):
             return self
         else:
-            return self.addop(other, rversion=True)
-
-    def subop(self, other, **kwargs):
-        return self.binary(other, "-", is_operator=True, **kwargs)
+            return self.binary(other, "+", is_operator=True, rversion=True)
 
     def __sub__(self, other):
         r"""
         Broadcasted subtraction operator - a binary operation.
+
         ``x - y`` returns a :class:`LazyTensor` that encodes,
         symbolically, the subtraction of ``x`` and ``y``.
         """
         if is_scalar_and_equals(other, 0):
             return self
-        elif is_complex_lazytensor(other) and not is_complex_lazytensor(self):
-            return self.real2complex().subop(other)
         else:
-            return self.subop(other)
+            return self.binary(other, "-", is_operator=True)
 
     def __rsub__(self, other):
         r"""
         Broadcasted subtraction operator - a binary operation.
+
         ``x - y`` returns a :class:`LazyTensor` that encodes,
         symbolically, the subtraction of ``x`` and ``y``.
         """
         if is_scalar_and_equals(other, 0):
             return self.unary("Minus")
         else:
-            return self.subop(other, rversion=True)
-
-    def mulop(self, other, **kwargs):
-        return self.binary(other, "*", is_operator=True, **kwargs)
+            return self.binary(other, "-", is_operator=True, rversion=True)
 
     def __mul__(self, other):
         r"""
         Broadcasted elementwise product - a binary operation.
+
         ``x * y`` returns a :class:`LazyTensor` that encodes, symbolically,
         the elementwise product of ``x`` and ``y``.
         """
@@ -1055,16 +1009,13 @@ def __mul__(self, other):
             return self
         elif is_scalar_and_equals(other, -1):
             return self.unary("Minus")
-        elif is_complex_lazytensor(other) and not is_complex_lazytensor(self):
-            return other.mulop(self)
-        elif self.tools.detect_complex(other) and not is_complex_lazytensor(self):
-            return self.lt_constructor(other).mulop(self)
         else:
-            return self.mulop(other)
+            return self.binary(other, "*", is_operator=True)
 
     def __rmul__(self, other):
         r"""
         Broadcasted elementwise product - a binary operation.
+
         ``x * y`` returns a :class:`LazyTensor` that encodes, symbolically,
         the elementwise product of ``x`` and ``y``.
         """
@@ -1074,30 +1025,25 @@ def __rmul__(self, other):
             return self
         elif is_scalar_and_equals(other, -1):
             return self.unary("Minus")
-        elif self.tools.detect_complex(other) and not is_complex_lazytensor(self):
-            return self.real2complex().mulop(self.lt_constructor(other))
         else:
-            return self.mulop(other, rversion=True)
-
-    def divop(self, other, **kwargs):
-        return self.binary(other, "/", is_operator=True, **kwargs)
+            return self.binary(other, "*", is_operator=True, rversion=True)
 
     def __truediv__(self, other):
         r"""
         Broadcasted elementwise division - a binary operation.
+
         ``x / y`` returns a :class:`LazyTensor` that encodes, symbolically,
         the elementwise division of ``x`` by ``y``.
         """
         if is_scalar_and_equals(other, 1):
             return self
-        elif is_complex_lazytensor(other) and not is_complex_lazytensor(self):
-            return self.real2complex().divop(other)
         else:
-            return self.divop(other)
+            return self.binary(other, "/", is_operator=True)
 
     def __rtruediv__(self, other):
         r"""
         Broadcasted elementwise division - a binary operation.
+
         ``x / y`` returns a :class:`LazyTensor` that encodes, symbolically,
         the elementwise division of ``x`` by ``y``.
         """
@@ -1106,11 +1052,12 @@ def __rtruediv__(self, other):
         elif is_scalar_and_equals(other, 1):
             return self.unary("Inv")
         else:
-            return self.divop(other, rversion=True)
+            return self.binary(other, "/", is_operator=True, rversion=True)
 
     def __or__(self, other):
         r"""
         Euclidean scalar product - a binary operation.
+
         ``(x|y)`` returns a :class:`LazyTensor` that encodes, symbolically,
         the scalar product of ``x`` and ``y`` which are assumed to have the same shape.
         """
@@ -1119,6 +1066,7 @@ def __or__(self, other):
     def __ror__(self, other):
         r"""
         Euclidean scalar product - a binary operation.
+
         ``(x|y)`` returns a :class:`LazyTensor` that encodes, symbolically,
         the scalar product of ``x`` and ``y`` which are assumed to have the same shape.
         """
@@ -1131,6 +1079,7 @@ def __ror__(self, other):
     def __abs__(self):
         r"""
         Element-wise absolute value - a unary operation.
+
         ``abs(x)`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise absolute value of ``x``.
         """
@@ -1139,6 +1088,7 @@ def __abs__(self):
     def abs(self):
         r"""
         Element-wise absolute value - a unary operation.
+
         ``x.abs()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise absolute value of ``x``.
         """
@@ -1147,6 +1097,7 @@ def abs(self):
     def __neg__(self):
         r"""
         Element-wise minus - a unary operation.
+
         ``-x`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise opposite of ``x``.
         """
@@ -1157,6 +1108,7 @@ def __neg__(self):
     def exp(self):
         r"""
         Element-wise exponential - a unary operation.
+
         ``x.exp()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise exponential of ``x``.
         """
@@ -1165,6 +1117,7 @@ def exp(self):
     def log(self):
         r"""
         Element-wise logarithm - a unary operation.
+
         ``x.log()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise logarithm of ``x``.
         """
@@ -1173,6 +1126,7 @@ def log(self):
     def xlogx(self):
         r"""
         Element-wise x*log(x) function - a unary operation.
+
         ``x.xlogx()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise ``x`` times logarithm of ``x`` (with value 0 at 0).
         """
@@ -1181,6 +1135,7 @@ def xlogx(self):
     def cos(self):
         r"""
         Element-wise cosine - a unary operation.
+
         ``x.cos()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise cosine of ``x``.
         """
@@ -1189,30 +1144,16 @@ def cos(self):
     def sin(self):
         r"""
         Element-wise sine - a unary operation.
+
         ``x.sin()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise sine of ``x``.
         """
         return self.unary("Sin")
 
-    def sinxdivx(self):
-        r"""
-        Element-wise sin(x)/x function - a unary operation.
-        ``x.sinxdivx()`` returns a :class:`LazyTensor` that encodes, symbolically,
-        the element-wise sinxdivx function  of ``x``.
-        """
-        return self.unary("SinXDivX")
-
-    def sinc(self):
-        r"""
-        Element-wise sinc(x) = sin(pi x) / (pi x) function - a unary operation.
-        ``x.sinc()`` returns a :class:`LazyTensor` that encodes, symbolically,
-        the element-wise sinc function  of ``x``.
-        """
-        return (math.pi * self).sinxdivx()
-
     def asin(self):
         r"""
         Element-wise arcsine - a unary operation.
+
         ``x.asin()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise arcsine of ``x``.
         """
@@ -1221,6 +1162,7 @@ def asin(self):
     def acos(self):
         r"""
         Element-wise arccosine - a unary operation.
+
         ``x.acos()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise arccosine of ``x``.
         """
@@ -1229,22 +1171,16 @@ def acos(self):
     def atan(self):
         r"""
         Element-wise arctangent - a unary operation.
+
         ``x.atan()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise arctangent of ``x``.
         """
         return self.unary("Atan")
 
-    def atan2(self, other):
-        r"""
-        Element-wise atan2 - a binary operation.
-        ``y.atan2(x)`` returns a :class:`LazyTensor` that encodes, symbolically,
-        the element-wise atan2 of ``x`` and ``y``.
-        """
-        return self.binary(other, "Atan2", dimcheck="same")
-
     def sqrt(self):
         r"""
         Element-wise square root - a unary operation.
+
         ``x.sqrt()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise square root of ``x``.
         """
@@ -1253,6 +1189,7 @@ def sqrt(self):
     def rsqrt(self):
         r"""
         Element-wise inverse square root - a unary operation.
+
         ``x.rsqrt()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise inverse square root of ``x``.
         """
@@ -1261,8 +1198,10 @@ def rsqrt(self):
     def __pow__(self, other):
         r"""
         Broadcasted element-wise power operator - a binary operation.
+
         ``x**y`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise value of ``x`` to the power ``y``.
+
         Note:
           - if **y = 2**, ``x**y`` relies on the ``"Square"`` KeOps operation;
           - if **y = 0.5**, ``x**y`` uses on the ``"Sqrt"`` KeOps operation;
@@ -1297,6 +1236,7 @@ def __pow__(self, other):
     def power(self, other):
         r"""
         Broadcasted element-wise power operator - a binary operation.
+
         ``pow(x,y)`` is equivalent to ``x**y``.
         """
         return self ** other
@@ -1304,6 +1244,7 @@ def power(self, other):
     def square(self):
         r"""
         Element-wise square - a unary operation.
+
         ``x.square()`` is equivalent to ``x**2`` and returns a :class:`LazyTensor`
         that encodes, symbolically, the element-wise square of ``x``.
         """
@@ -1312,6 +1253,7 @@ def square(self):
     def sign(self):
         r"""
         Element-wise sign in {-1,0,+1} - a unary operation.
+
         ``x.sign()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise sign of ``x``.
         """
@@ -1320,6 +1262,7 @@ def sign(self):
     def step(self):
         r"""
         Element-wise step function - a unary operation.
+
         ``x.step()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise sign of ``x``.
         """
@@ -1328,6 +1271,7 @@ def step(self):
     def relu(self):
         r"""
         Element-wise ReLU function - a unary operation.
+
         ``x.relu()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the element-wise positive part of ``x``.
         """
@@ -1336,45 +1280,20 @@ def relu(self):
     def clamp(self, other1, other2):
         r"""
         Element-wise Clamp function - a ternary operation.
+
         ``x.clamp(a,b)`` returns a :class:`LazyTensor` that encodes, symbolically,
-        the element-wise clamping of ``x`` in ``(a,b)``. Broadcasting rules apply.
-        a and b may be fixed integers or floats, or other LazyTensors.
+        the element-wise clamping of ``x`` in ``(a,b)``. Braoodcasting rules apply.
+        a and b may be fixed integers or floats, or other LazyTensors
         """
         if (type(other1) == int) and (type(other2) == int):
             return self.unary("ClampInt", opt_arg=other1, opt_arg2=other2)
         else:
             return self.ternary(other1, other2, "Clamp", dimcheck="sameor1")
 
-    def ifelse(self, other1, other2):
-        r"""
-        Element-wise if-else function - a ternary operation.
-        ``x.ifelse(a,b)`` returns a :class:`LazyTensor` that encodes, symbolically,
-        ``a`` where ``x >= 0`` and ``b`` where ``x < 0``.  Broadcasting rules apply.
-        a and b may be fixed integers or floats, or other LazyTensors.
-        """
-        return self.ternary(other1, other2, "IfElse", dimcheck="sameor1")
-
-    def mod(self, modulus, offset=0):
-        r"""
-        Element-wise modulo with offset function - a ternary operation.
-        ``x.mod(a,b)`` returns a :class:`LazyTensor` that encodes, symbolically,
-        the element-wise modulo of ``x`` with modulus ``a`` and offset ``b``.
-        By default b=0, so that x.mod(a) becomes equivalent to the NumPy function mod.
-        Broadcasting rules apply. a and b are fixed integers or float.
-        """
-        return self.ternary(modulus, offset, "Mod", dimcheck="sameor1")
-
-    def round(self, other=0):
-        r"""
-        Element-wise rounding function - a unary operation.
-        ``x.round(d)`` returns a :class:`LazyTensor` that encodes, symbolically,
-        the element-wise rounding of ``x`` to d decimal places. d is int.
-        """
-        return self.unary("Round", opt_arg=other)
-
     def sqnorm2(self):
         r"""
         Squared Euclidean norm - a unary operation.
+
         ``x.sqnorm2()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the squared Euclidean norm of a vector ``x``.
         """
@@ -1383,6 +1302,7 @@ def sqnorm2(self):
     def norm2(self):
         r"""
         Euclidean norm - a unary operation.
+
         ``x.norm2()`` returns a :class:`LazyTensor` that encodes, symbolically,
         the Euclidean norm of a vector ``x``.
         """
@@ -1391,6 +1311,7 @@ def norm2(self):
     def norm(self, dim):
         r"""
         Euclidean norm - a unary operation.
+
         ``x.norm(-1)`` is equivalent to ``x.norm2()`` and returns a
         :class:`LazyTensor` that encodes, symbolically, the Euclidean norm of a vector ``x``.
         """
@@ -1401,6 +1322,7 @@ def norm(self, dim):
     def normalize(self):
         r"""
         Vector normalization - a unary operation.
+
         ``x.normalize()`` returns a :class:`LazyTensor` that encodes, symbolically,
         a vector ``x`` divided by its Euclidean norm.
         """
@@ -1409,6 +1331,7 @@ def normalize(self):
     def sqdist(self, other):
         r"""
         Squared distance - a binary operation.
+
         ``x.sqdist(y)`` returns a :class:`LazyTensor` that encodes, symbolically,
         the squared Euclidean distance between two vectors ``x`` and ``y``.
         """
@@ -1417,6 +1340,7 @@ def sqdist(self, other):
     def weightedsqnorm(self, other):
         r"""
         Weighted squared norm of a LazyTensor ``x`` - a binary operation.
+
         ``x.weightedsqnorm(s)`` returns a :class:`LazyTensor` that encodes, symbolically,
         the weighted squared Norm of a vector ``x`` with weights stored in the LazyTensor ``s``- see
         the :doc:`main reference page <../../../api/math-operations>` for details.
@@ -1438,6 +1362,7 @@ def weightedsqnorm(self, other):
     def weightedsqdist(self, g, s):
         r"""
         Weighted squared distance.
+
         ``x.weightedsqdist(y, s)`` is equivalent to ``(x - y).weightedsqnorm(s)``.
         """
         if not hasattr(g, "__GenericLazyTensor__"):
@@ -1450,6 +1375,7 @@ def weightedsqdist(self, g, s):
     def elem(self, i):
         r"""
         Indexing of a vector - a unary operation.
+
         ``x.elem(i)`` returns a :class:`LazyTensor` that encodes, symbolically,
         the i-th element ``x[i]`` of the vector ``x``.
         """
@@ -1464,6 +1390,7 @@ def elem(self, i):
     def extract(self, i, d):
         r"""
         Range indexing - a unary operation.
+
         ``x.extract(i, d)`` returns a :class:`LazyTensor` that encodes, symbolically,
         the sub-vector ``x[i:i+d]`` of the vector ``x``.
         """
@@ -1478,8 +1405,10 @@ def extract(self, i, d):
     def __getitem__(self, key):
         r"""
         Element or range indexing - a unary operation.
+
         ``x[key]`` redirects to the :meth:`elem` or :meth:`extract` methods, depending on the ``key`` argument.
         Supported values are:
+
             - an integer ``k``, in which case ``x[key]``
               redirects to ``elem(x,k)``,
             - a tuple ``..,:,:,k`` with ``k`` an integer,
@@ -1522,6 +1451,7 @@ def __getitem__(self, key):
     def one_hot(self, D):
         r"""
         Encodes a (rounded) scalar value as a one-hot vector of dimension D.
+
         ``x.one_hot(D)`` returns a :class:`LazyTensor` that encodes, symbolically,
         a vector of length D whose round(x)-th coordinate is equal to 1, and the other ones to zero.
         """
@@ -1537,6 +1467,7 @@ def one_hot(self, D):
     def concat(self, other):
         r"""
         Concatenation of two :class:`LazyTensor` - a binary operation.
+
         ``x.concat(y)`` returns a :class:`LazyTensor` that encodes, symbolically,
         the concatenation of ``x`` and ``y`` along their last dimension.
         """
@@ -1548,6 +1479,7 @@ def concat(self, other):
     def concatenate(tuple_of_lt, axis=-1):
         r"""
         Concatenation of a tuple of :class:`GenericLazyTensor`.
+
         ``GenericLazyTensor.concatenate( (x_1, x_2, ..., x_n), -1)`` returns a :class:`GenericLazyTensor` that encodes, symbolically,
         the concatenation of ``x_1``, ``x_2``, ..., ``x_n`` along their last dimension.
         Note that **axis** should be equal to -1 or 2 (if the ``x_i``'s are 3D GenericLazyTensor):
@@ -1580,6 +1512,7 @@ def concatenate(tuple_of_lt, axis=-1):
     def cat(tuple_of_lt, dim):
         r"""
         Concatenation of a tuple of LazyTensors.
+
         ``LazyTensor.cat( (x_1, x_2, ..., x_n), -1)``
         is a PyTorch-friendly alias for ``LazyTensor.concatenate( (x_1, x_2, ..., x_n), -1)``;
         just like indexing operations, it is only supported along the last dimension.
@@ -1589,6 +1522,7 @@ def cat(tuple_of_lt, dim):
     def matvecmult(self, other):
         r"""
         Matrix-vector product - a binary operation.
+
         If ``x._shape[-1] == A*B`` and ``y._shape[-1] == B``,
         ``z = x.matvecmult(y)`` returns a :class:`GenericLazyTensor`
         such that ``z._shape[-1] == A`` which encodes, symbolically,
@@ -1603,6 +1537,7 @@ def matvecmult(self, other):
     def vecmatmult(self, other):
         r"""
         Vector-matrix product - a binary operation.
+
         If ``x._shape[-1] == A`` and ``y._shape[-1] == A*B``,
         ``z = x.vecmatmult(y)`` returns a :class:`GenericLazyTensor`
         such that ``z._shape[-1] == B`` which encodes, symbolically,
@@ -1617,6 +1552,7 @@ def vecmatmult(self, other):
     def tensorprod(self, other):
         r"""
         Tensor product of vectors - a binary operation.
+
         If ``x._shape[-1] == A`` and ``y._shape[-1] == B``,
         ``z = x.tensorprod(y)`` returns a :class:`GenericLazyTensor`
         such that ``z._shape[-1] == A*B`` which encodes, symbolically,
@@ -1631,6 +1567,7 @@ def tensorprod(self, other):
     def keops_tensordot(self, other, dimfa, dimfb, contfa, contfb, *args):
         """
         Tensor dot product (on KeOps internal dimensions) - a binary operation.
+
         :param other: a LazyTensor
         :param dimfa: tuple of int
         :param dimfb: tuple of int
@@ -1660,6 +1597,7 @@ def keops_tensordot(self, other, dimfa, dimfb, contfa, contfb, *args):
     def grad(self, other, gradin):
         r"""
         Symbolic gradient operation.
+
         ``z = x.grad(v,e)`` returns a :class:`LazyTensor`
         which encodes, symbolically,
         the gradient (more precisely, the adjoint of the differential operator) of ``x``, with
@@ -1681,10 +1619,13 @@ def grad(self, other, gradin):
     def sum(self, axis=-1, dim=None, **kwargs):
         r"""
         Summation unary operation, or Sum reduction.
+
         ``sum(axis, dim, **kwargs)`` will:
+
           - if **axis or dim = 0**, return the sum reduction of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the sum reduction of **self** over the "j" indexes.
           - if **axis or dim = 2**, return a new :class:`LazyTensor` object representing the sum of the values of the vector **self**,
+
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
@@ -1692,6 +1633,7 @@ def sum(self, axis=-1, dim=None, **kwargs):
             dimension of the vector variable).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         if dim is not None:
             axis = dim
@@ -1703,24 +1645,31 @@ def sum(self, axis=-1, dim=None, **kwargs):
     def sum_reduction(self, axis=None, dim=None, **kwargs):
         r"""
         Sum reduction.
+
         ``sum_reduction(axis, dim, **kwargs)`` will return the sum reduction of **self**.
+
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         return self.reduction("Sum", axis=axis, dim=dim, **kwargs)
 
     def logsumexp(self, axis=None, dim=None, weight=None, **kwargs):
         r"""
         Log-Sum-Exp reduction.
+
         ``logsumexp(axis, dim, weight, **kwargs)`` will:
+
           - if **axis or dim = 0**, return the "log-sum-exp" reduction of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the "log-sum-exp" reduction of **self** over the "j" indexes.
+
         For details, please check the documentation of the KeOps reductions ``LogSumExp`` and  ``LogSumExpWeight`` in
         the :doc:`main reference page <../../../api/math-operations>`.
+
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
@@ -1729,6 +1678,7 @@ def logsumexp(self, axis=None, dim=None, weight=None, **kwargs):
           weight (:class:`LazyTensor`): optional object that specifies scalar or vector-valued weights
             in the log-sum-exp operation
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         if weight is None:
             return self.reduction("LogSumExp", axis=axis, dim=dim, **kwargs)
@@ -1746,11 +1696,15 @@ def logsumexp_reduction(self, **kwargs):
     def sumsoftmaxweight(self, weight, axis=None, dim=None, **kwargs):
         r"""
         Sum of weighted Soft-Max reduction.
+
         ``sumsoftmaxweight(weight, axis, dim, **kwargs)`` will:
+
           - if **axis or dim = 0**, return the "sum of weighted Soft-Max" reduction of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the "sum of weighted Soft-Max" reduction of **self** over the "j" indexes.
+
         For details, please check the documentation of the KeOps reduction ``SumSoftMaxWeight`` in
         the :doc:`main reference page <../../../api/math-operations>`.
+
         Keyword Args:
           weight (:class:`LazyTensor`): object that specifies scalar or vector-valued weights.
           axis (integer): reduction dimension, which should be equal to the number
@@ -1758,6 +1712,7 @@ def sumsoftmaxweight(self, weight, axis=None, dim=None, **kwargs):
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         return self.reduction(
             "SumSoftMaxWeight", other=weight, axis=axis, dim=dim, **kwargs
@@ -1772,10 +1727,13 @@ def sumsoftmaxweight_reduction(self, **kwargs):
     def min(self, axis=-1, dim=None, **kwargs):
         r"""
         Minimum unary operation, or Min reduction.
+
         ``min(axis, dim, **kwargs)`` will:
+
           - if **axis or dim = 0**, return the min reduction of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the min reduction of **self** over the "j" indexes.
           - if **axis or dim = 2**, return a new :class:`LazyTensor` object representing the min of the values of the vector **self**,
+
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
@@ -1783,6 +1741,7 @@ def min(self, axis=-1, dim=None, **kwargs):
             dimension of the vector variable).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         if dim is not None:
             axis = dim
@@ -1794,13 +1753,16 @@ def min(self, axis=-1, dim=None, **kwargs):
     def min_reduction(self, axis=None, dim=None, **kwargs):
         r"""
         Min reduction.
+
         ``min_reduction(axis, dim, **kwargs)`` will return the min reduction of **self**.
+
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         return self.reduction("Min", axis=axis, dim=dim, **kwargs)
 
@@ -1813,10 +1775,13 @@ def __min__(self, **kwargs):
     def argmin(self, axis=-1, dim=None, **kwargs):
         r"""
         ArgMin unary operation, or ArgMin reduction.
+
         ``argmin(axis, dim, **kwargs)`` will:
+
           - if **axis or dim = 0**, return the argmin reduction of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the argmin reduction of **self** over the "j" indexes.
           - if **axis or dim = 2**, return a new :class:`LazyTensor` object representing the argmin of the values of the vector **self**,
+
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
@@ -1824,6 +1789,7 @@ def argmin(self, axis=-1, dim=None, **kwargs):
             dimension of the vector variable).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         if dim is not None:
             axis = dim
@@ -1835,28 +1801,35 @@ def argmin(self, axis=-1, dim=None, **kwargs):
     def argmin_reduction(self, axis=None, dim=None, **kwargs):
         r"""
         ArgMin reduction.
+
         ``argmin_reduction(axis, dim, **kwargs)`` will return the argmin reduction of **self**.
+
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         return self.reduction("ArgMin", axis=axis, dim=dim, **kwargs)
 
     def min_argmin(self, axis=None, dim=None, **kwargs):
         r"""
         Min-ArgMin reduction.
+
         ``min_argmin(axis, dim, **kwargs)`` will:
+
           - if **axis or dim = 0**, return the minimal values and its indices of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the minimal values and its indices of **self** over the "j" indexes.
+
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         return self.reduction("Min_ArgMin", axis=axis, dim=dim, **kwargs)
 
@@ -1869,10 +1842,13 @@ def min_argmin_reduction(self, **kwargs):
     def max(self, axis=-1, dim=None, **kwargs):
         r"""
         Miaximum unary operation, or Max reduction.
+
         ``max(axis, dim, **kwargs)`` will:
+
           - if **axis or dim = 0**, return the max reduction of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the max reduction of **self** over the "j" indexes.
           - if **axis or dim = 2**, return a new :class:`LazyTensor` object representing the max of the values of the vector **self**,
+
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
@@ -1880,6 +1856,7 @@ def max(self, axis=-1, dim=None, **kwargs):
             dimension of the vector variable).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         if dim is not None:
             axis = dim
@@ -1891,13 +1868,16 @@ def max(self, axis=-1, dim=None, **kwargs):
     def max_reduction(self, axis=None, dim=None, **kwargs):
         r"""
         Max reduction.
+
         ``max_reduction(axis, dim, **kwargs)`` will return the max reduction of **self**.
+
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         return self.reduction("Max", axis=axis, dim=dim, **kwargs)
 
@@ -1910,10 +1890,13 @@ def __max__(self, **kwargs):
     def argmax(self, axis=-1, dim=None, **kwargs):
         r"""
         ArgMax unary operation, or ArgMax reduction.
+
         ``argmax(axis, dim, **kwargs)`` will:
+
           - if **axis or dim = 0**, return the argmax reduction of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the argmax reduction of **self** over the "j" indexes.
           - if **axis or dim = 2**, return a new :class:`LazyTensor` object representing the argmax of the values of the vector **self**,
+
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
@@ -1921,6 +1904,7 @@ def argmax(self, axis=-1, dim=None, **kwargs):
             dimension of the vector variable).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         if dim is not None:
             axis = dim
@@ -1932,28 +1916,35 @@ def argmax(self, axis=-1, dim=None, **kwargs):
     def argmax_reduction(self, axis=None, dim=None, **kwargs):
         r"""
         ArgMax reduction.
+
         ``argmax_reduction(axis, dim, **kwargs)`` will return the argmax reduction of **self**.
+
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         return self.reduction("ArgMax", axis=axis, dim=dim, **kwargs)
 
     def max_argmax(self, axis=None, dim=None, **kwargs):
         r"""
         Max-ArgMax reduction.
+
         ``max_argmax(axis, dim, **kwargs)`` will:
+
           - if **axis or dim = 0**, return the maximal values and its indices of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the maximal values and its indices of **self** over the "j" indexes.
+
         Keyword Args:
           axis (integer): reduction dimension, which should be equal to the number
             of batch dimensions plus 0 (= reduction over :math:`i`),
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         return self.reduction("Max_ArgMax", axis=axis, dim=dim, **kwargs)
 
@@ -1966,9 +1957,12 @@ def max_argmax_reduction(self, **kwargs):
     def Kmin(self, K, axis=None, dim=None, **kwargs):
         r"""
         K-Min reduction.
+
         ``Kmin(K, axis, dim, **kwargs)`` will:
+
           - if **axis or dim = 0**, return the K minimal values of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the K minimal values of **self** over the "j" indexes.
+
         Keyword Args:
           K (integer): number of minimal values required
           axis (integer): reduction dimension, which should be equal to the number
@@ -1976,6 +1970,7 @@ def Kmin(self, K, axis=None, dim=None, **kwargs):
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         return self.reduction("KMin", opt_arg=K, axis=axis, dim=dim, **kwargs)
 
@@ -1988,9 +1983,12 @@ def Kmin_reduction(self, **kwargs):
     def argKmin(self, K, axis=None, dim=None, **kwargs):
         r"""
         argKmin reduction.
+
         ``argKmin(K, axis, dim, **kwargs)`` will:
+
           - if **axis or dim = 0**, return the indices of the K minimal values of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the indices of the K minimal values of **self** over the "j" indexes.
+
         Keyword Args:
           K (integer): number of minimal values required
           axis (integer): reduction dimension, which should be equal to the number
@@ -1998,6 +1996,7 @@ def argKmin(self, K, axis=None, dim=None, **kwargs):
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         return self.reduction("ArgKMin", opt_arg=K, axis=axis, dim=dim, **kwargs)
 
@@ -2010,9 +2009,12 @@ def argKmin_reduction(self, **kwargs):
     def Kmin_argKmin(self, K, axis=None, dim=None, **kwargs):
         r"""
         K-Min-argK-min reduction.
+
         ``Kmin_argKmin(K, axis, dim, **kwargs)`` will:
+
           - if **axis or dim = 0**, return the K minimal values and its indices of **self** over the "i" indexes.
           - if **axis or dim = 1**, return the K minimal values and its indices of **self** over the "j" indexes.
+
         Keyword Args:
           K (integer): number of minimal values required
           axis (integer): reduction dimension, which should be equal to the number
@@ -2020,6 +2022,7 @@ def Kmin_argKmin(self, K, axis=None, dim=None, **kwargs):
             or 1 (= reduction over :math:`j`).
           dim (integer): alternative keyword for the axis parameter.
           **kwargs: optional parameters that are passed to the :meth:`reduction` method.
+
         """
         return self.reduction("KMin_ArgKMin", opt_arg=K, axis=axis, dim=dim, **kwargs)
 
@@ -2034,11 +2037,13 @@ def Kmin_argKmin_reduction(self, **kwargs):
     def __matmul__(self, v, **kwargs):
         r"""
         Matrix-vector or Matrix-matrix product, supporting batch dimensions.
+
         If ``K`` is a :class:`LazyTensor` whose trailing dimension ``K._shape[-1]`` is equal to 1,
         we can understand it as a linear operator and apply it to arbitrary NumPy arrays
         or PyTorch Tensors. Assuming that ``v`` is a 1D (resp. ND) tensor such that
         ``K.shape[-1] == v.shape[-1]`` (resp. ``v.shape[-2]``), ``K @ v`` denotes the matrix-vector (resp. matrix-matrix)
         product between the two objects, encoded as a vanilla NumPy or PyTorch 1D (resp. ND) tensor.
+
         Example:
             >>> x, y = torch.randn(1000, 3), torch.randn(2000, 3)
             >>> x_i, y_j = LazyTensor( x[:,None,:] ), LazyTensor( y[None,:,:] )
@@ -2070,9 +2075,11 @@ def __matmul__(self, v, **kwargs):
     def t(self):
         r"""
         Matrix transposition, permuting the axes of :math:`i`- and :math:`j`-variables.
+
         For instance, if ``K`` is a LazyTensor of shape ``(B,M,N,D)``,
         ``K.t()`` returns a symbolic copy of ``K`` whose axes 1 and 2 have
         been switched with each other: ``K.t().shape == (B,N,M,D)``.
+
         Example:
             >>> x, y = torch.randn(1000, 3), torch.randn(2000, 3)
             >>> x_i, y_j = LazyTensor( x[:,None,:] ), LazyTensor( y[None,:,:] )
@@ -2126,10 +2133,12 @@ def T(self):
     def matvec(self, v):
         r"""
         Alias for the matrix-vector product, added for compatibility with :mod:`scipy.sparse.linalg`.
+
         If ``K`` is a :class:`LazyTensor` whose trailing dimension ``K._shape[-1]`` is equal to 1,
         we can understand it as a linear operator and wrap it into a
         :mod:`scipy.sparse.linalg.LinearOperator` object, thus getting access
         to robust solvers and spectral routines.
+
         Example:
             >>> import numpy as np
             >>> x = np.random.randn(1000,3)
@@ -2147,180 +2156,7 @@ def matvec(self, v):
     def rmatvec(self, v):
         r"""
         Alias for the transposed matrix-vector product, added for compatibility with :mod:`scipy.sparse.linalg`.
+
         See :meth:`matvec` for further reference.
         """
         return self.T @ v
-
-    def real2complex(self):
-        r"""
-        Element-wise "real 2 complex" operation - a unary operation.
-        ``x.real2complex()`` returns a :class:`ComplexLazyTensor` that encodes, symbolically,
-        the same tensor as ``x``, but seen as complex-valued (with zero imaginary part for each coefficient)
-        """
-        return self.unary("Real2Complex", dimres=2 * self._shape[-1], is_complex=True)
-
-    def imag2complex(self):
-        r"""
-        Element-wise "imag 2 complex" operation - a unary operation.
-        ``x.real2complex()`` returns a :class:`ComplexLazyTensor` that encodes, symbolically,
-        the multiplication of ``1j`` with ``x``.
-        """
-        return self.unary("Imag2Complex", dimres=2 * self._shape[-1], is_complex=True)
-
-    def exp1j(self):
-        r"""
-        Element-wise "complex exponential of 1j x" operation - a unary operation.
-        ``x.exp1j()`` returns a :class:`ComplexLazyTensor` that encodes, symbolically,
-        the complex exponential of ``1j*x``.
-        """
-        return self.unary("ComplexExp1j", dimres=2 * self._shape[-1], is_complex=True)
-
-
-class ComplexGenericLazyTensor(GenericLazyTensor):
-    r"""Extension of the LazyTensor class for complex operations."""
-
-    def __init__(self, x=None, axis=None):
-        r"""Creates a KeOps symbolic variable of complex dtype."""
-        self.get_tools()
-        if type(x) == complex:
-            x = [x]
-        if type(x) == list:
-            x_ = [None] * (2 * len(x))
-            for i in range(len(x)):
-                x_[2 * i] = x[i].real
-                x_[2 * i + 1] = x[i].imag
-                x = x_
-        elif self.tools.is_tensor(x):
-            x = self.tools.view_as_real(x)
-        super().__init__(x=x, axis=axis)
-        self.is_complex = True
-
-    def __call__(self, *args, **kwargs):
-        res = super().__call__(*args, **kwargs)
-        return self.tools.view_as_complex(res)
-
-    @property
-    def dtype(self):
-        if self._dtype == "float32":
-            return "complex64"
-        elif self._dtype == "float64":
-            return "complex128"
-
-    @property
-    def shape(self):
-        r"""returns the shape of the complex LazyTensor."""
-        s = super()._shape
-        s = s[:-1] + (s[-1] // 2,)
-        if s[-1] == 1:
-            return s[:-1]
-        else:
-            return s
-
-    # List of supported operations  ============================================
-
-    @property
-    def real(self):
-        r"""
-        Element-wise real part of complex - a unary operation.
-        ``z.real`` returns a :class:`LazyTensor` that encodes, symbolically,
-        the element-wise real part of ``z``.
-        """
-        return self.unary("ComplexReal", dimres=self._shape[-1] // 2, is_complex=False)
-
-    @property
-    def imag(self):
-        r"""
-        Element-wise imaginary part of complex - a unary operation.
-        ``z.imag`` returns a :class:`LazyTensor` that encodes, symbolically,
-        the element-wise imaginary part of ``z``.
-        """
-        return self.unary("ComplexImag", dimres=self._shape[-1] // 2, is_complex=False)
-
-    def angle(self):
-        r"""
-        Element-wise angle (or argument) of complex - a unary operation.
-        ``z.angle()`` returns a :class:`LazyTensor` that encodes, symbolically,
-        the element-wise angle of ``z``.
-        """
-        return self.unary("ComplexAngle", dimres=self._shape[-1] // 2, is_complex=False)
-
-    def conj(self):
-        r"""
-        Element-wise complex conjugate - a unary operation.
-        ``z.conj()`` returns a :class:`ComplexLazyTensor` that encodes, symbolically,
-        the element-wise complex conjugate of ``z``.
-        """
-        return self.unary("Conj", dimres=self._shape[-1], is_complex=True)
-
-    def sum(self, axis=-1, dim=None, **kwargs):
-        if dim is not None:
-            axis = dim
-        if axis in [-1, len(self._shape) - 1]:
-            return self.unary("ComplexSum", dimres=2, is_complex=True)
-        else:
-            return self.reduction("Sum", axis=axis, **kwargs)
-
-    def __abs__(self):
-        r"""
-        Element-wise absolute value (or modulus) of complex - a unary operation.
-        ``z.abs()`` returns a :class:`LazyTensor` that encodes, symbolically,
-        the element-wise absolute value of ``z``.
-        """
-        return self.unary("ComplexAbs", dimres=self._shape[-1] // 2, is_complex=False)
-
-    def exp(self):
-        r"""
-        Element-wise complex exponential - a unary operation.
-        ``z.exp()`` returns a :class:`ComplexLazyTensor` that encodes, symbolically,
-        the element-wise complex exponential of ``z``.
-        """
-        return self.unary("ComplexExp", dimres=self._shape[-1], is_complex=True)
-
-    def mulop(self, other, **kwargs):
-        if other._shape[-1] == 1:
-            return other.binary(self, "ComplexRealScal", **kwargs, is_complex=True)
-        elif not is_complex_lazytensor(other):
-            return self.mulop(other.real2complex())
-        elif self._shape[-1] == 2:
-            return self.binary(other, "ComplexScal", **kwargs, is_complex=True, dimcheck=None)
-        elif other._shape[-1] == 2:
-            return other.binary(self, "ComplexScal", **kwargs, is_complex=True, dimcheck=None)
-        else:
-            return self.binary(other, "ComplexMult", **kwargs, is_complex=True)
-
-    def addop(self, other, **kwargs):
-        if not is_complex_lazytensor(other):
-            return self.addop(other.real2complex())
-        elif self._shape[-1] == other._shape[-1]:
-            return self.binary(other, "Add", **kwargs, is_complex=True)
-        else:
-            raise ValueError("incompatible shapes for addition.")
-
-    def subop(self, other, **kwargs):
-        if not is_complex_lazytensor(other):
-            return self.subop(other.real2complex())
-        elif self._shape[-1] == other._shape[-1]:
-            return self.binary(other, "Subtract", **kwargs, is_complex=True)
-        else:
-            raise ValueError("incompatible shapes for subtraction.")
-
-    def divop(self, other, **kwargs):
-        if not is_complex_lazytensor(other):
-            return self.divop(other.real2complex())
-        elif self._shape[-1] == other._shape[-1]:
-            return self.binary(other, "ComplexDivide", **kwargs, is_complex=True)
-        else:
-            raise ValueError("incompatible shapes for division.")
-
-    def real2complex(self):
-        raise ValueError("real2complex cannot be applied to a complex LazyTensor.")
-
-    def imag2complex(self):
-        raise ValueError("imag2complex cannot be applied to a complex LazyTensor.")
-
-    def exp1j(self):
-        raise ValueError("exp1j cannot be applied to a complex LazyTensor.")
-
-    def __call__(self, *args, **kwargs):
-        res = super().__call__(*args, **kwargs)
-        return self.tools.view_as_complex(res)
\ No newline at end of file
diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index 17f6eb808..f4a050d91 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -440,7 +440,7 @@ def test_LazyTensor_sum(self):
     ############################################################
     def test_IVF(self):
         ###########################################################
-        from pykeops.numpy.nn.ivf import IVF
+        from pykeops.numpy.nn.ivf import ivf
         import numpy as np
 
         np.random.seed(0)
diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index a889ffe70..eb2c5ddb5 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -676,7 +676,7 @@ def invert_permutation_numpy(permutation):
     ############################################################
     def test_IVF(self):
         ############################################################
-        from pykeops.torch.nn.ivf import IVF
+        from pykeops.torch.nn.ivf import ivf
         import torch
 
         torch.manual_seed(0)

From fc60335b7308dd27cb3a9645e62daa448b473119 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Thu, 1 Apr 2021 14:24:59 +0100
Subject: [PATCH 023/111] changing tests

---
 pykeops/test/unit_tests_numpy.py   | 4 ++--
 pykeops/test/unit_tests_pytorch.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index f4a050d91..e4fd2d22f 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -440,7 +440,7 @@ def test_LazyTensor_sum(self):
     ############################################################
     def test_IVF(self):
         ###########################################################
-        from pykeops.numpy.nn.ivf import ivf
+        from pykeops.numpy.nn.ivf import IVF
         import numpy as np
 
         np.random.seed(0)
@@ -457,7 +457,7 @@ def test_IVF(self):
         truth = truth[:, :k]
 
         # IVF K nearest neighbours
-        IVF = ivf()
+        IVF = IVF()
         IVF.fit(x, a=a)
         ivf_fit = IVF.kneighbors(y)
 
diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index eb2c5ddb5..d60bf74b9 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -676,7 +676,7 @@ def invert_permutation_numpy(permutation):
     ############################################################
     def test_IVF(self):
         ############################################################
-        from pykeops.torch.nn.ivf import ivf
+        from pykeops.torch.nn.ivf import IVF
         import torch
 
         torch.manual_seed(0)
@@ -691,7 +691,7 @@ def test_IVF(self):
         truth = truth[:, :k]
 
         # IVF K nearest neighbours
-        IVF = ivf()
+        IVF = IVF()
         IVF.fit(x, a=a)
         ivf_fit = IVF.kneighbors(y)
 

From 3686d61f9ed11229863fa51658727ecd9ae18693 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Thu, 1 Apr 2021 14:31:26 +0100
Subject: [PATCH 024/111] import utils correctly

---
 pykeops/numpy/nn/ivf.py | 2 ++
 pykeops/torch/nn/ivf.py | 1 +
 2 files changed, 3 insertions(+)

diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
index 3d45917bb..af455ff4c 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/nn/ivf.py
@@ -2,6 +2,8 @@
 from pykeops.numpy.cluster import cluster_ranges_centroids
 from pykeops.numpy.cluster import from_matrix
 from pykeops.common.ivf import GenericIVF
+from pykeops.numpy.utils import numpytools
+
 import numpy as np
 
 
diff --git a/pykeops/torch/nn/ivf.py b/pykeops/torch/nn/ivf.py
index e98fbba58..5fc43224b 100644
--- a/pykeops/torch/nn/ivf.py
+++ b/pykeops/torch/nn/ivf.py
@@ -2,6 +2,7 @@
 from pykeops.torch.cluster import cluster_ranges_centroids
 from pykeops.torch.cluster import from_matrix
 from pykeops.common.ivf import GenericIVF
+from pykeops.torch.utils import torchtools
 import torch
 
 

From 63d1782d74ca314aaf0cd8e76d5a73aae64a1c18 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Thu, 1 Apr 2021 14:38:43 +0100
Subject: [PATCH 025/111] add lazytensor import to base ivf class

---
 pykeops/common/ivf.py   | 16 ++++++++--------
 pykeops/numpy/nn/ivf.py |  2 +-
 pykeops/torch/nn/ivf.py |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/pykeops/common/ivf.py b/pykeops/common/ivf.py
index 1d5b74db9..6ffcd4eec 100644
--- a/pykeops/common/ivf.py
+++ b/pykeops/common/ivf.py
@@ -1,16 +1,16 @@
 class GenericIVF:
-    def __init__(self, k, metric, normalise):
+    def __init__(self, k, metric, normalise,LazyTensor):
         self.__k = k
         self.__normalise = normalise
         self.__distance = self.tools.distance_function(metric)
         self.__metric = metric
-
+	self.__LazyTensor=LazyTensor
     def __get_tools(self):
         pass
 
     def __k_argmin(self, x, y, k=1):
-        x_LT = LazyTensor(self.tools.to(self.tools.unsqueeze(x, 1), self.__device))
-        y_LT = LazyTensor(self.tools.to(self.tools.unsqueeze(y, 0), self.__device))
+        x_LT = self.__LazyTensor(self.tools.to(self.tools.unsqueeze(x, 1), self.__device))
+        y_LT = self.__LazyTensor(self.tools.to(self.tools.unsqueeze(y, 0), self.__device))
 
         d = self.__distance(x_LT, y_LT)
         if not self.tools.is_tensor(x):
@@ -105,8 +105,8 @@ def _kneighbors(self, y):
         y_ranges, _, _ = cluster_ranges_centroids(y, y_labels)
         self.__y_ranges = y_ranges
         y, y_labels = self.__sort_clusters(y, y_labels, store_x=False)
-        x_LT = LazyTensor(self.tools.unsqueeze(self.__x, 0))
-        y_LT = LazyTensor(self.tools.unsqueeze(y, 1))
+        x_LT = self.__LazyTensor(self.tools.unsqueeze(self.__x, 0))
+        y_LT = self.__LazyTensor(self.tools.unsqueeze(y, 1))
         D_ij = self.__distance(y_LT, x_LT)
         ranges_ij = from_matrix(y_ranges, self.__x_ranges, self.__keep)
         D_ij.ranges = ranges_ij
@@ -114,7 +114,7 @@ def _kneighbors(self, y):
         return self.__unsort(nn)
 
     def brute_force(self, x, y, k=5):
-        x_LT = LazyTensor(self.tools.unsqueeze(x, 0))
-        y_LT = LazyTensor(self.tools.unsqueeze(y, 1))
+        x_LT = self.__LazyTensor(self.tools.unsqueeze(x, 0))
+        y_LT = self.__LazyTensor(self.tools.unsqueeze(y, 1))
         D_ij = self.__distance(y_LT, x_LT)
         return D_ij.argKmin(K=k, axis=1)
diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
index af455ff4c..b53217268 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/nn/ivf.py
@@ -10,7 +10,7 @@
 class IVF(GenericIVF):
     def __init__(self, k=5, metric="euclidean", normalise=False):
         self.__get_tools()
-        super().__init__(k=k, metric=metric, normalise=normalise)
+        super().__init__(k=k, metric=metric, normalise=normalise,LazyTensor=LazyTensor)
 
     def __get_tools(self):
         self.tools = numpytools
diff --git a/pykeops/torch/nn/ivf.py b/pykeops/torch/nn/ivf.py
index 5fc43224b..b065138c0 100644
--- a/pykeops/torch/nn/ivf.py
+++ b/pykeops/torch/nn/ivf.py
@@ -9,7 +9,7 @@
 class IVF(GenericIVF):
     def __init__(self, k=5, metric="euclidean", normalise=False):
         self.__get_tools()
-        super().__init__(k=k, metric=metric, normalise=normalise)
+        super().__init__(k=k, metric=metric, normalise=normalise,LazyTensor=LazyTensor)
 
     def __get_tools(self):
         self.tools = torchtools

From 86c2380accfac9936eb588634c4b9d3a47d8bb70 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Thu, 1 Apr 2021 14:47:12 +0100
Subject: [PATCH 026/111] black

---
 pykeops/common/ivf.py   | 13 +++++++++----
 pykeops/numpy/nn/ivf.py |  2 +-
 pykeops/torch/nn/ivf.py |  2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/pykeops/common/ivf.py b/pykeops/common/ivf.py
index 6ffcd4eec..4b1031ad7 100644
--- a/pykeops/common/ivf.py
+++ b/pykeops/common/ivf.py
@@ -1,16 +1,21 @@
 class GenericIVF:
-    def __init__(self, k, metric, normalise,LazyTensor):
+    def __init__(self, k, metric, normalise, LazyTensor):
         self.__k = k
         self.__normalise = normalise
         self.__distance = self.tools.distance_function(metric)
         self.__metric = metric
-	self.__LazyTensor=LazyTensor
+        self.__LazyTensor = LazyTensor
+
     def __get_tools(self):
         pass
 
     def __k_argmin(self, x, y, k=1):
-        x_LT = self.__LazyTensor(self.tools.to(self.tools.unsqueeze(x, 1), self.__device))
-        y_LT = self.__LazyTensor(self.tools.to(self.tools.unsqueeze(y, 0), self.__device))
+        x_LT = self.__LazyTensor(
+            self.tools.to(self.tools.unsqueeze(x, 1), self.__device)
+        )
+        y_LT = self.__LazyTensor(
+            self.tools.to(self.tools.unsqueeze(y, 0), self.__device)
+        )
 
         d = self.__distance(x_LT, y_LT)
         if not self.tools.is_tensor(x):
diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
index b53217268..3427da84d 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/nn/ivf.py
@@ -10,7 +10,7 @@
 class IVF(GenericIVF):
     def __init__(self, k=5, metric="euclidean", normalise=False):
         self.__get_tools()
-        super().__init__(k=k, metric=metric, normalise=normalise,LazyTensor=LazyTensor)
+        super().__init__(k=k, metric=metric, normalise=normalise, LazyTensor=LazyTensor)
 
     def __get_tools(self):
         self.tools = numpytools
diff --git a/pykeops/torch/nn/ivf.py b/pykeops/torch/nn/ivf.py
index b065138c0..3eacb8e24 100644
--- a/pykeops/torch/nn/ivf.py
+++ b/pykeops/torch/nn/ivf.py
@@ -9,7 +9,7 @@
 class IVF(GenericIVF):
     def __init__(self, k=5, metric="euclidean", normalise=False):
         self.__get_tools()
-        super().__init__(k=k, metric=metric, normalise=normalise,LazyTensor=LazyTensor)
+        super().__init__(k=k, metric=metric, normalise=normalise, LazyTensor=LazyTensor)
 
     def __get_tools(self):
         self.tools = torchtools

From 876525728bed4d0e48e7192145cec82bde27bdd5 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Thu, 1 Apr 2021 14:57:53 +0100
Subject: [PATCH 027/111] add clustering functions as input

---
 pykeops/common/ivf.py   | 12 ++++++++----
 pykeops/numpy/nn/ivf.py |  9 ++++++++-
 pykeops/torch/nn/ivf.py |  9 ++++++++-
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/pykeops/common/ivf.py b/pykeops/common/ivf.py
index 4b1031ad7..45d6fdc41 100644
--- a/pykeops/common/ivf.py
+++ b/pykeops/common/ivf.py
@@ -1,10 +1,14 @@
 class GenericIVF:
-    def __init__(self, k, metric, normalise, LazyTensor):
+    def __init__(
+        self, k, metric, normalise, LazyTensor, cluster_ranges_centroids, from_matrix
+    ):
         self.__k = k
         self.__normalise = normalise
         self.__distance = self.tools.distance_function(metric)
         self.__metric = metric
         self.__LazyTensor = LazyTensor
+        self.__cluster_ranges_centroids = cluster_ranges_centroids
+        self.__from_matrix = from_matrix
 
     def __get_tools(self):
         pass
@@ -71,7 +75,7 @@ def _fit(self, x, clusters=50, a=5, Niter=15, device=None, backend=None):
         cl = self.__assign(x)
 
         ncl = self.__k_argmin(c, c, k=a)
-        self.__x_ranges, _, _ = cluster_ranges_centroids(x, cl)
+        self.__x_ranges, _, _ = self.__cluster_ranges_centroids(x, cl)
 
         x, x_labels = self.__sort_clusters(x, cl, store_x=True)
         self.__x = x
@@ -107,13 +111,13 @@ def _kneighbors(self, y):
         y = self.tools.contiguous(y)
         y_labels = self.__assign(y)
 
-        y_ranges, _, _ = cluster_ranges_centroids(y, y_labels)
+        y_ranges, _, _ = self.__cluster_ranges_centroids(y, y_labels)
         self.__y_ranges = y_ranges
         y, y_labels = self.__sort_clusters(y, y_labels, store_x=False)
         x_LT = self.__LazyTensor(self.tools.unsqueeze(self.__x, 0))
         y_LT = self.__LazyTensor(self.tools.unsqueeze(y, 1))
         D_ij = self.__distance(y_LT, x_LT)
-        ranges_ij = from_matrix(y_ranges, self.__x_ranges, self.__keep)
+        ranges_ij = self.__from_matrix(y_ranges, self.__x_ranges, self.__keep)
         D_ij.ranges = ranges_ij
         nn = D_ij.argKmin(K=self.__k, axis=1)
         return self.__unsort(nn)
diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
index 3427da84d..d01c6a65f 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/nn/ivf.py
@@ -10,7 +10,14 @@
 class IVF(GenericIVF):
     def __init__(self, k=5, metric="euclidean", normalise=False):
         self.__get_tools()
-        super().__init__(k=k, metric=metric, normalise=normalise, LazyTensor=LazyTensor)
+        super().__init__(
+            k=k,
+            metric=metric,
+            normalise=normalise,
+            LazyTensor=LazyTensor,
+            cluster_ranges_centroids=cluster_ranges_centroids,
+            from_matrix=from_matrix,
+        )
 
     def __get_tools(self):
         self.tools = numpytools
diff --git a/pykeops/torch/nn/ivf.py b/pykeops/torch/nn/ivf.py
index 3eacb8e24..48eb90d60 100644
--- a/pykeops/torch/nn/ivf.py
+++ b/pykeops/torch/nn/ivf.py
@@ -9,7 +9,14 @@
 class IVF(GenericIVF):
     def __init__(self, k=5, metric="euclidean", normalise=False):
         self.__get_tools()
-        super().__init__(k=k, metric=metric, normalise=normalise, LazyTensor=LazyTensor)
+        super().__init__(
+            k=k,
+            metric=metric,
+            normalise=normalise,
+            LazyTensor=LazyTensor,
+            cluster_ranges_centroids=cluster_ranges_centroids,
+            from_matrix=from_matrix,
+        )
 
     def __get_tools(self):
         self.tools = torchtools

From 838f68a7827ed9d01f6f6575062765a03d9e0aa1 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Thu, 1 Apr 2021 15:03:28 +0100
Subject: [PATCH 028/111] added unused device to np utils zeros

---
 pykeops/numpy/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index 2161728b6..184461d48 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -75,7 +75,7 @@ def randn(m, n, dtype=default_dtype):
         return np.random.randn(m, n).astype(dtype)
 
     @staticmethod
-    def zeros(shape, dtype=default_dtype):
+    def zeros(shape, dtype=default_dtype, device=None):
         return np.zeros(shape).astype(dtype)
 
     @staticmethod

From 96536b3bef6b4616d09b42deb8753412b92279fe Mon Sep 17 00:00:00 2001
From: Anna Hledikova <ahledikova123@gmail.com>
Date: Mon, 5 Apr 2021 15:12:57 +0100
Subject: [PATCH 029/111] updated utils

---
 pykeops/numpy/utils.py | 31 ++++++++++++++++++++++++++-----
 pykeops/torch/utils.py | 24 ++++++++++++++++++++----
 2 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index 184461d48..ca675a44c 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -2,7 +2,16 @@
 
 from pykeops.numpy import Genred, default_dtype, KernelSolve
 from pykeops.numpy.cluster import swap_axes as np_swap_axes
+from pykeops.numpy.cluster import grid_cluster as np_grid_cluster
+from pykeops.numpy.cluster import from_matrix as np_from_matrix
+from pykeops.numpy.cluster import (
+    cluster_ranges_centroids as np_cluster_ranges_centroids,
+)
+from pykeops.numpy.cluster import cluster_ranges as np_cluster_ranges
+from pykeops.numpy.cluster import sort_clusters as np_sort_clusters
+
 import pykeops.config
+from pykeops.numpy import LazyTensor
 
 
 class numpytools:
@@ -10,13 +19,18 @@ class numpytools:
     arraysum = np.sum
     exp = np.exp
     log = np.log
+    sqrt = np.sqrt
+    copy = np.copy
+
     Genred = Genred
     KernelSolve = KernelSolve
+    LazyTensor = LazyTensor
     swap_axes = np_swap_axes
-
-    @staticmethod
-    def copy(x):
-        return np.copy(x)
+    grid_cluster = np_grid_cluster
+    from_matrix = np_from_matrix
+    cluster_ranges_centroids = np_cluster_ranges_centroids
+    cluster_ranges = np_cluster_ranges
+    sort_clusters = np_sort_clusters
 
     @staticmethod
     def eq(x, y):
@@ -146,7 +160,6 @@ def norm(x, p=2, dim=-1):
 
     @staticmethod
     def kmeans(x, K=10, Niter=15, metric="euclidean", device="CPU"):
-        from pykeops.numpy import LazyTensor
 
         distance = numpytools.distance_function(metric)
         N, D = x.shape
@@ -162,6 +175,14 @@ def kmeans(x, K=10, Niter=15, metric="euclidean", device="CPU"):
                 c[:, d] = np.bincount(cl, weights=x[:, d]) / Ncl
         return cl, c
 
+    @staticmethod
+    def is_tensor(x):
+        return isinstance(x, np.ndarray)
+
+    @staticmethod
+    def LazyTensor(x):
+        return LazyTensor(x)
+
 
 def squared_distances(x, y):
     x_norm = (x ** 2).sum(1).reshape(-1, 1)
diff --git a/pykeops/torch/utils.py b/pykeops/torch/utils.py
index c8c0488cb..9eeb71ccb 100644
--- a/pykeops/torch/utils.py
+++ b/pykeops/torch/utils.py
@@ -2,7 +2,14 @@
 
 from pykeops.torch import Genred, KernelSolve, default_dtype
 from pykeops.torch.cluster import swap_axes as torch_swap_axes
-
+from pykeops.torch import LazyTensor
+from pykeops.torch.cluster import grid_cluster as torch_grid_cluster
+from pykeops.torch.cluster import from_matrix as torch_from_matrix
+from pykeops.torch.cluster import (
+    cluster_ranges_centroids as torch_cluster_ranges_centroids,
+)
+from pykeops.torch.cluster import cluster_ranges as torch_cluster_ranges
+from pykeops.torch.cluster import sort_clusters as torch_sort_clusters
 
 # from pykeops.torch.generic.generic_red import GenredLowlevel
 
@@ -16,11 +23,17 @@ class torchtools:
     exp = torch.exp
     log = torch.log
     norm = torch.norm
-
-    swap_axes = torch_swap_axes
+    sqrt = torch.sqrt
 
     Genred = Genred
     KernelSolve = KernelSolve
+    LazyTensor = LazyTensor
+    swap_axes = torch_swap_axes
+    grid_cluster = torch_grid_cluster
+    from_matrix = torch_from_matrix
+    cluster_ranges_centroids = torch_cluster_ranges_centroids
+    cluster_ranges = torch_cluster_ranges
+    sort_clusters = torch_sort_clusters
 
     # GenredLowlevel = GenredLowlevel
 
@@ -184,7 +197,6 @@ def norm(x, p=2, dim=-1):
 
     @staticmethod
     def kmeans(x, K=10, Niter=15, metric="euclidean", device="cuda"):
-        from pykeops.torch import LazyTensor
 
         distance = torchtools.distance_function(metric)
         N, D = x.shape
@@ -202,6 +214,10 @@ def kmeans(x, K=10, Niter=15, metric="euclidean", device="cuda"):
                 raise ValueError("Please normalise inputs")
         return cl, c
 
+    @staticmethod
+    def is_tensor(x):
+        return isinstance(x, torch.Tensor)
+
 
 def squared_distances(x, y):
     x_norm = (x ** 2).sum(1).reshape(-1, 1)

From f627cf8e82e3597994eec9224abc708ae204a02b Mon Sep 17 00:00:00 2001
From: Anna Hledikova <ahledikova123@gmail.com>
Date: Mon, 5 Apr 2021 15:45:47 +0100
Subject: [PATCH 030/111] added utils

---
 pykeops/numpy/utils.py | 20 +++++++++++---------
 pykeops/torch/utils.py | 10 +++++-----
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index ca675a44c..5db2bbbf9 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -20,7 +20,6 @@ class numpytools:
     exp = np.exp
     log = np.log
     sqrt = np.sqrt
-    copy = np.copy
 
     Genred = Genred
     KernelSolve = KernelSolve
@@ -32,6 +31,17 @@ class numpytools:
     cluster_ranges = np_cluster_ranges
     sort_clusters = np_sort_clusters
 
+    arraytype = np.ndarray
+    float_types = [float, np.float16, np.float32, np.float64]
+
+    @staticmethod
+    def is_tensor(x):
+        return isinstance(x, np.ndarray)
+
+    @staticmethod
+    def copy(x):
+        return np.copy(x)
+
     @staticmethod
     def eq(x, y):
         return np.equal(x, y)
@@ -154,10 +164,6 @@ def to(x, device):
     def index_select(input, dim, index):
         return np.take(input, index, axis=dim)
 
-    @staticmethod
-    def norm(x, p=2, dim=-1):
-        return np.linalg.norm(x, ord=p, axis=dim)
-
     @staticmethod
     def kmeans(x, K=10, Niter=15, metric="euclidean", device="CPU"):
 
@@ -175,10 +181,6 @@ def kmeans(x, K=10, Niter=15, metric="euclidean", device="CPU"):
                 c[:, d] = np.bincount(cl, weights=x[:, d]) / Ncl
         return cl, c
 
-    @staticmethod
-    def is_tensor(x):
-        return isinstance(x, np.ndarray)
-
     @staticmethod
     def LazyTensor(x):
         return LazyTensor(x)
diff --git a/pykeops/torch/utils.py b/pykeops/torch/utils.py
index 9eeb71ccb..3e01c4e9d 100644
--- a/pykeops/torch/utils.py
+++ b/pykeops/torch/utils.py
@@ -25,16 +25,20 @@ class torchtools:
     norm = torch.norm
     sqrt = torch.sqrt
 
+    swap_axes = torch_swap_axes
+
     Genred = Genred
     KernelSolve = KernelSolve
     LazyTensor = LazyTensor
-    swap_axes = torch_swap_axes
     grid_cluster = torch_grid_cluster
     from_matrix = torch_from_matrix
     cluster_ranges_centroids = torch_cluster_ranges_centroids
     cluster_ranges = torch_cluster_ranges
     sort_clusters = torch_sort_clusters
 
+    arraytype = torch.Tensor
+    float_types = [float]
+
     # GenredLowlevel = GenredLowlevel
 
     @staticmethod
@@ -191,10 +195,6 @@ def to(x, device):
     def index_select(input, dim, index):
         return torch.index_select(input, dim, index)
 
-    @staticmethod
-    def norm(x, p=2, dim=-1):
-        return torch.norm(x, p=p, dim=dim)
-
     @staticmethod
     def kmeans(x, K=10, Niter=15, metric="euclidean", device="cuda"):
 

From 14774a1ed69ead60f13143adc660e1512548e489 Mon Sep 17 00:00:00 2001
From: Anna Hledikova <ahledikova123@gmail.com>
Date: Mon, 5 Apr 2021 16:26:09 +0100
Subject: [PATCH 031/111] added numpy utils

---
 pykeops/numpy/utils.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index 5db2bbbf9..db68779ec 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -20,20 +20,18 @@ class numpytools:
     exp = np.exp
     log = np.log
     sqrt = np.sqrt
-
     Genred = Genred
     KernelSolve = KernelSolve
     LazyTensor = LazyTensor
     swap_axes = np_swap_axes
+    arraytype = np.ndarray
+    float_types = [float, np.float16, np.float32, np.float64]
     grid_cluster = np_grid_cluster
     from_matrix = np_from_matrix
     cluster_ranges_centroids = np_cluster_ranges_centroids
     cluster_ranges = np_cluster_ranges
     sort_clusters = np_sort_clusters
 
-    arraytype = np.ndarray
-    float_types = [float, np.float16, np.float32, np.float64]
-
     @staticmethod
     def is_tensor(x):
         return isinstance(x, np.ndarray)

From 5e8b39eb2a81f2e8b297fd493806adee86645344 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Mon, 5 Apr 2021 16:34:36 +0100
Subject: [PATCH 032/111] testing rearranging np utils

---
 pykeops/numpy/utils.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index db68779ec..cefee5879 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -23,14 +23,15 @@ class numpytools:
     Genred = Genred
     KernelSolve = KernelSolve
     LazyTensor = LazyTensor
-    swap_axes = np_swap_axes
-    arraytype = np.ndarray
-    float_types = [float, np.float16, np.float32, np.float64]
     grid_cluster = np_grid_cluster
     from_matrix = np_from_matrix
     cluster_ranges_centroids = np_cluster_ranges_centroids
     cluster_ranges = np_cluster_ranges
     sort_clusters = np_sort_clusters
+    swap_axes = np_swap_axes
+    arraytype = np.ndarray
+    float_types = [float, np.float16, np.float32, np.float64]
+
 
     @staticmethod
     def is_tensor(x):

From 6e8043665a0c70b459d5eb22f4fb88ec2d186396 Mon Sep 17 00:00:00 2001
From: Anna Hledikova <ahledikova123@gmail.com>
Date: Mon, 5 Apr 2021 16:35:17 +0100
Subject: [PATCH 033/111] added numpy utils

---
 pykeops/numpy/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index db68779ec..cbad6e670 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -2,8 +2,8 @@
 
 from pykeops.numpy import Genred, default_dtype, KernelSolve
 from pykeops.numpy.cluster import swap_axes as np_swap_axes
-from pykeops.numpy.cluster import grid_cluster as np_grid_cluster
-from pykeops.numpy.cluster import from_matrix as np_from_matrix
+from pykeops.numpy.cluster.grid_cluster import grid_cluster as np_grid_cluster
+from pykeops.numpy.cluster.matrix import from_matrix as np_from_matrix
 from pykeops.numpy.cluster import (
     cluster_ranges_centroids as np_cluster_ranges_centroids,
 )

From 32a3fe7b21ea99addbc8c12768e49e448e786565 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Mon, 5 Apr 2021 16:35:59 +0100
Subject: [PATCH 034/111] remove 1 space

---
 pykeops/numpy/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index cefee5879..9a27ea4c1 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -32,7 +32,6 @@ class numpytools:
     arraytype = np.ndarray
     float_types = [float, np.float16, np.float32, np.float64]
 
-
     @staticmethod
     def is_tensor(x):
         return isinstance(x, np.ndarray)

From 83d2e646b917d1b8a50d78abccfd79061e942089 Mon Sep 17 00:00:00 2001
From: Anna Hledikova <ahledikova123@gmail.com>
Date: Tue, 6 Apr 2021 11:26:09 +0100
Subject: [PATCH 035/111] removed LazyTensor from utils

---
 pykeops/numpy/utils.py | 7 +------
 pykeops/torch/utils.py | 3 +--
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index 77b148cb3..de5eb8794 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -11,7 +11,6 @@
 from pykeops.numpy.cluster import sort_clusters as np_sort_clusters
 
 import pykeops.config
-from pykeops.numpy import LazyTensor
 
 
 class numpytools:
@@ -22,7 +21,6 @@ class numpytools:
     sqrt = np.sqrt
     Genred = Genred
     KernelSolve = KernelSolve
-    LazyTensor = LazyTensor
     grid_cluster = np_grid_cluster
     from_matrix = np_from_matrix
     cluster_ranges_centroids = np_cluster_ranges_centroids
@@ -164,6 +162,7 @@ def index_select(input, dim, index):
 
     @staticmethod
     def kmeans(x, K=10, Niter=15, metric="euclidean", device="CPU"):
+        from pykeops.numpy import LazyTensor
 
         distance = numpytools.distance_function(metric)
         N, D = x.shape
@@ -179,10 +178,6 @@ def kmeans(x, K=10, Niter=15, metric="euclidean", device="CPU"):
                 c[:, d] = np.bincount(cl, weights=x[:, d]) / Ncl
         return cl, c
 
-    @staticmethod
-    def LazyTensor(x):
-        return LazyTensor(x)
-
 
 def squared_distances(x, y):
     x_norm = (x ** 2).sum(1).reshape(-1, 1)
diff --git a/pykeops/torch/utils.py b/pykeops/torch/utils.py
index 3e01c4e9d..2fc0bab13 100644
--- a/pykeops/torch/utils.py
+++ b/pykeops/torch/utils.py
@@ -2,7 +2,6 @@
 
 from pykeops.torch import Genred, KernelSolve, default_dtype
 from pykeops.torch.cluster import swap_axes as torch_swap_axes
-from pykeops.torch import LazyTensor
 from pykeops.torch.cluster import grid_cluster as torch_grid_cluster
 from pykeops.torch.cluster import from_matrix as torch_from_matrix
 from pykeops.torch.cluster import (
@@ -29,7 +28,6 @@ class torchtools:
 
     Genred = Genred
     KernelSolve = KernelSolve
-    LazyTensor = LazyTensor
     grid_cluster = torch_grid_cluster
     from_matrix = torch_from_matrix
     cluster_ranges_centroids = torch_cluster_ranges_centroids
@@ -197,6 +195,7 @@ def index_select(input, dim, index):
 
     @staticmethod
     def kmeans(x, K=10, Niter=15, metric="euclidean", device="cuda"):
+        from pykeops.torch import LazyTensor
 
         distance = torchtools.distance_function(metric)
         N, D = x.shape

From 5c00d65b5659f1310db74e3af0bdd70a8d7fbe9b Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 6 Apr 2021 16:23:21 +0100
Subject: [PATCH 036/111] update to add kmeans optimisation approximation

---
 pykeops/common/ivf.py   | 69 ++++++++++++++++++++++++++++++++---------
 pykeops/numpy/nn/ivf.py | 11 +++----
 pykeops/numpy/utils.py  |  7 ++---
 pykeops/torch/nn/ivf.py | 17 +++-------
 pykeops/torch/utils.py  |  5 +++
 5 files changed, 71 insertions(+), 38 deletions(-)

diff --git a/pykeops/common/ivf.py b/pykeops/common/ivf.py
index 45d6fdc41..68ef6be6a 100644
--- a/pykeops/common/ivf.py
+++ b/pykeops/common/ivf.py
@@ -1,14 +1,33 @@
 class GenericIVF:
-    def __init__(
-        self, k, metric, normalise, LazyTensor, cluster_ranges_centroids, from_matrix
-    ):
+    def __init__(self, k, metric, normalise, LazyTensor):
         self.__k = k
         self.__normalise = normalise
-        self.__distance = self.tools.distance_function(metric)
-        self.__metric = metric
+
+        self.__update_metric(metric)
         self.__LazyTensor = LazyTensor
-        self.__cluster_ranges_centroids = cluster_ranges_centroids
-        self.__from_matrix = from_matrix
+
+        self.__c = None
+
+    def __update_metric(self, metric):
+        if isinstance(metric, str):
+            self.__distance = self.tools.distance_function(metric)
+            self.__metric = metric
+        elif callable(metric):
+            self.__distance = metric
+            self.__metric = "custom"
+        else:
+            raise ValueError("Unrecognised metric input type")
+
+    @property
+    def metric(self):
+        return self.__metric
+
+    @property
+    def c(self):
+        if self.__c is not None:
+            return self.__c
+        else:
+            raise ValueError("Run .fit() first!")
 
     def __get_tools(self):
         pass
@@ -42,7 +61,17 @@ def __sort_clusters(self, x, lab, store_x=True):
     def __unsort(self, nn):
         return self.tools.index_select(self.__x_perm[nn], 0, self.__y_perm.argsort())
 
-    def _fit(self, x, clusters=50, a=5, Niter=15, device=None, backend=None):
+    def _fit(
+        self,
+        x,
+        clusters=50,
+        a=5,
+        Niter=15,
+        device=None,
+        backend=None,
+        approx=False,
+        n=50,
+    ):
         """
         Fits the main dataset
         """
@@ -62,26 +91,36 @@ def _fit(self, x, clusters=50, a=5, Niter=15, device=None, backend=None):
             x = x / self.tools.repeat(self.tools.norm(x, 2, -1), x.shape[1]).reshape(
                 -1, x.shape[1]
             )
+
+        # if we want to use the approximation in Kmeans, and our metric is angular, switch to full angular metric
+        if approx and self.__metric == "angular":
+            self.__update_metric("angular_full")
+
         x = self.tools.contiguous(x)
         self.__device = device
         self.__backend = backend
 
         cl, c = self.tools.kmeans(
-            x, clusters, Niter=Niter, metric=self.__metric, device=self.__device
+            x,
+            self.__distance,
+            clusters,
+            Niter=Niter,
+            device=self.__device,
+            approx=approx,
+            normalise=self.__normalise,
         )
 
         self.__c = c
-
         cl = self.__assign(x)
 
         ncl = self.__k_argmin(c, c, k=a)
-        self.__x_ranges, _, _ = self.__cluster_ranges_centroids(x, cl)
+        self.__x_ranges, _, _ = self.tools.cluster_ranges_centroids(x, cl)
 
         x, x_labels = self.__sort_clusters(x, cl, store_x=True)
         self.__x = x
         r = self.tools.repeat(self.tools.arange(clusters, device=self.__device), a)
-        self.__keep = self.tools.zeros(
-            [clusters, clusters], dtype=bool, device=self.__device
+        self.__keep = self.tools.to(
+            self.tools.zeros([clusters, clusters], dtype=bool), self.__device
         )
         self.__keep[r, ncl.flatten()] = True
 
@@ -111,13 +150,13 @@ def _kneighbors(self, y):
         y = self.tools.contiguous(y)
         y_labels = self.__assign(y)
 
-        y_ranges, _, _ = self.__cluster_ranges_centroids(y, y_labels)
+        y_ranges, _, _ = self.tools.cluster_ranges_centroids(y, y_labels)
         self.__y_ranges = y_ranges
         y, y_labels = self.__sort_clusters(y, y_labels, store_x=False)
         x_LT = self.__LazyTensor(self.tools.unsqueeze(self.__x, 0))
         y_LT = self.__LazyTensor(self.tools.unsqueeze(y, 1))
         D_ij = self.__distance(y_LT, x_LT)
-        ranges_ij = self.__from_matrix(y_ranges, self.__x_ranges, self.__keep)
+        ranges_ij = self.tools.from_matrix(y_ranges, self.__x_ranges, self.__keep)
         D_ij.ranges = ranges_ij
         nn = D_ij.argKmin(K=self.__k, axis=1)
         return self.__unsort(nn)
diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
index d01c6a65f..34bde1fc1 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/nn/ivf.py
@@ -1,9 +1,6 @@
 from pykeops.numpy import LazyTensor
-from pykeops.numpy.cluster import cluster_ranges_centroids
-from pykeops.numpy.cluster import from_matrix
 from pykeops.common.ivf import GenericIVF
 from pykeops.numpy.utils import numpytools
-
 import numpy as np
 
 
@@ -14,15 +11,15 @@ def __init__(self, k=5, metric="euclidean", normalise=False):
             k=k,
             metric=metric,
             normalise=normalise,
-            LazyTensor=LazyTensor,
-            cluster_ranges_centroids=cluster_ranges_centroids,
-            from_matrix=from_matrix,
+            LazyTensor=LazyTensor
         )
 
     def __get_tools(self):
         self.tools = numpytools
 
-    def fit(self, x, clusters=50, a=5, Niter=15, backend="CPU"):
+    def fit(self, x, clusters=50, a=5, Niter=15, backend="CPU", approx=False):
+	if approx:
+	    raise ValueError("Approximation not supported for numpy")
         if type(x) != np.ndarray:
             raise ValueError("Input dataset must be a np array")
         return self._fit(x, clusters=clusters, a=a, Niter=Niter, backend=backend)
diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index 9a27ea4c1..f4f90c8bb 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -123,9 +123,6 @@ def manhattan(x, y):
         def angular(x, y):
             return x | y
 
-        def hyperbolic(x, y):
-            return ((x - y) ** 2).sum(-1) / (x[0] * y[0])
-
         if metric == "euclidean":
             return euclidean
         elif metric == "manhattan":
@@ -133,7 +130,9 @@ def hyperbolic(x, y):
         elif metric == "angular":
             return angular
         elif metric == "hyperbolic":
-            return hyperbolic
+            raise ValueError(
+                "Hyperbolic not supported for numpy, please use torch version with approximation"
+            )
         else:
             raise ValueError("Unknown metric")
 
diff --git a/pykeops/torch/nn/ivf.py b/pykeops/torch/nn/ivf.py
index 48eb90d60..9d7bf05db 100644
--- a/pykeops/torch/nn/ivf.py
+++ b/pykeops/torch/nn/ivf.py
@@ -1,6 +1,4 @@
 from pykeops.torch import LazyTensor
-from pykeops.torch.cluster import cluster_ranges_centroids
-from pykeops.torch.cluster import from_matrix
 from pykeops.common.ivf import GenericIVF
 from pykeops.torch.utils import torchtools
 import torch
@@ -9,22 +7,17 @@
 class IVF(GenericIVF):
     def __init__(self, k=5, metric="euclidean", normalise=False):
         self.__get_tools()
-        super().__init__(
-            k=k,
-            metric=metric,
-            normalise=normalise,
-            LazyTensor=LazyTensor,
-            cluster_ranges_centroids=cluster_ranges_centroids,
-            from_matrix=from_matrix,
-        )
+        super().__init__(k=k, metric=metric, normalise=normalise, LazyTensor=LazyTensor)
 
     def __get_tools(self):
         self.tools = torchtools
 
-    def fit(self, x, clusters=50, a=5, Niter=15):
+    def fit(self, x, clusters=50, a=5, Niter=15, approx=False, n=50):
         if type(x) != torch.Tensor:
             raise ValueError("Input dataset must be a torch tensor")
-        return self._fit(x, clusters=clusters, a=a, Niter=Niter, device=x.device)
+        return self._fit(
+            x, clusters=clusters, a=a, Niter=Niter, device=x.device, approx=approx, n=n
+        )
 
     def kneighbors(self, y):
         if type(y) != torch.Tensor:
diff --git a/pykeops/torch/utils.py b/pykeops/torch/utils.py
index 3e01c4e9d..f3a9b6552 100644
--- a/pykeops/torch/utils.py
+++ b/pykeops/torch/utils.py
@@ -155,6 +155,9 @@ def manhattan(x, y):
         def angular(x, y):
             return x | y
 
+        def angular_full(x, y):
+            return angular(x, y) / ((angular(x, x) * angular(y, y)).sqrt())
+
         def hyperbolic(x, y):
             return ((x - y) ** 2).sum(-1) / (x[0] * y[0])
 
@@ -164,6 +167,8 @@ def hyperbolic(x, y):
             return manhattan
         elif metric == "angular":
             return angular
+        elif metric == "angular_full":
+            return angular_full
         elif metric == "hyperbolic":
             return hyperbolic
         else:

From 46ba1fc3bf313a7ba6ff0f2a57b07d3c1f59b6d6 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 6 Apr 2021 16:41:43 +0100
Subject: [PATCH 037/111] changing kmeans inputs

---
 pykeops/numpy/nn/ivf.py |  2 +-
 pykeops/numpy/utils.py  |  2 +-
 pykeops/torch/utils.py  | 68 ++++++++++++++++++++++++++++++-----------
 3 files changed, 52 insertions(+), 20 deletions(-)

diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
index 34bde1fc1..81472ed9a 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/nn/ivf.py
@@ -21,7 +21,7 @@ def fit(self, x, clusters=50, a=5, Niter=15, backend="CPU", approx=False):
 	if approx:
 	    raise ValueError("Approximation not supported for numpy")
         if type(x) != np.ndarray:
-            raise ValueError("Input dataset must be a np array")
+            raise ValueError("Input dataset must be np array")
         return self._fit(x, clusters=clusters, a=a, Niter=Niter, backend=backend)
 
     def kneighbors(self, y):
diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index d0e75bbe3..56a501a1d 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -160,7 +160,7 @@ def index_select(input, dim, index):
         return np.take(input, index, axis=dim)
 
     @staticmethod
-    def kmeans(x, K=10, Niter=15, metric="euclidean", device="CPU"):
+    def kmeans(x, distance, K=10, Niter=15, device="CPU"):
         from pykeops.numpy import LazyTensor
 
         distance = numpytools.distance_function(metric)
diff --git a/pykeops/torch/utils.py b/pykeops/torch/utils.py
index deba3e56f..1eaf67c4b 100644
--- a/pykeops/torch/utils.py
+++ b/pykeops/torch/utils.py
@@ -198,25 +198,57 @@ def to(x, device):
     def index_select(input, dim, index):
         return torch.index_select(input, dim, index)
 
-    @staticmethod
-    def kmeans(x, K=10, Niter=15, metric="euclidean", device="cuda"):
-        from pykeops.torch import LazyTensor
-
-        distance = torchtools.distance_function(metric)
-        N, D = x.shape
-        c = x[:K, :].clone()
-        x_i = LazyTensor(x.view(N, 1, D).to(device))
-        for i in range(Niter):
-            c_j = LazyTensor(c.view(1, K, D).to(device))
-            D_ij = distance(x_i, c_j)
-            cl = D_ij.argmin(dim=1).long().view(-1)
-            c.zero_()
-            c.scatter_add_(0, cl[:, None].repeat(1, D), x)
+     @staticmethod
+    def kmeans(x,distance,K=10,Niter=15,device='cuda',approx=False,n=10,normalise=False):
+
+      from pykeops.torch import LazyTensor
+
+      def calc_centroid(x,c,cl,n=10):
+        "Helper function to optimise centroid location"
+        c=torch.clone(c.detach()).to(device)
+        c.requires_grad=True
+        x1=LazyTensor(x.unsqueeze(0))
+        op=torch.optim.Adam([c],lr=1/n)
+        scaling=1/torch.gather(torch.bincount(cl),0,cl).view(-1,1)
+        scaling.requires_grad=False
+        with torch.autograd.set_detect_anomaly(True):
+          for _ in range(n):
+            c.requires_grad=True
+            op.zero_grad()
+            c1=LazyTensor(torch.index_select(c,0,cl).unsqueeze(0))
+            d=distance(x1,c1)
+            loss=(d.sum(0) * scaling).sum() #calculate distance to centroid for each datapoint, divide by total number of points in that cluster, and sum
+            loss.backward(retain_graph=False)
+            op.step()
+            if normalise:
+              with torch.no_grad():
+                c=c/torch.norm(c,dim=-1).repeat_interleave(c.shape[1]).reshape(-1,c.shape[1]) #normalising centroids to have norm 1
+        return c.detach()
+
+      N, D = x.shape  
+      c = x[:K, :].clone() 
+      x_i = LazyTensor(x.view(N, 1, D).to(device))  
+
+      for i in range(Niter):
+          c_j = LazyTensor(c.view(1, K, D).to(device))  
+          D_ij=distance(x_i,c_j)
+          cl = D_ij.argmin(dim=1).long().view(-1)  
+
+          #updating c: either with approximation or exact
+          if approx:
+            #approximate with GD optimisation 
+            c=calc_centroid(x,c,cl,n)
+
+          else:
+            #exact from average
+            c.zero_() 
+            c.scatter_add_(0, cl[:, None].repeat(1, D), x) 
             Ncl = torch.bincount(cl, minlength=K).type_as(c).view(K, 1)
-            c /= Ncl
-            if torch.any(torch.isnan(c)) and metric == "angular":
-                raise ValueError("Please normalise inputs")
-        return cl, c
+            c /= Ncl  
+
+          if torch.any(torch.isnan(c)):
+            raise ValueError("NaN detected in centroids during KMeans, please check metric is correct")
+      return cl, c      
 
     @staticmethod
     def is_tensor(x):

From 9ea0af67cbe91756f26b389fc8e08b9b1053a764 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 6 Apr 2021 16:51:42 +0100
Subject: [PATCH 038/111] typo

---
 pykeops/common/ivf.py  | 1 +
 pykeops/torch/utils.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pykeops/common/ivf.py b/pykeops/common/ivf.py
index 68ef6be6a..29ed3ee7e 100644
--- a/pykeops/common/ivf.py
+++ b/pykeops/common/ivf.py
@@ -108,6 +108,7 @@ def _fit(
             device=self.__device,
             approx=approx,
             normalise=self.__normalise,
+            n=n,
         )
 
         self.__c = c
diff --git a/pykeops/torch/utils.py b/pykeops/torch/utils.py
index 1eaf67c4b..d61e8f5c6 100644
--- a/pykeops/torch/utils.py
+++ b/pykeops/torch/utils.py
@@ -198,7 +198,7 @@ def to(x, device):
     def index_select(input, dim, index):
         return torch.index_select(input, dim, index)
 
-     @staticmethod
+    @staticmethod
     def kmeans(x,distance,K=10,Niter=15,device='cuda',approx=False,n=10,normalise=False):
 
       from pykeops.torch import LazyTensor

From 0337e7f223483d6ea74a7a7762436af1a9d75b79 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 6 Apr 2021 16:59:52 +0100
Subject: [PATCH 039/111] edit spacing to match

---
 pykeops/numpy/nn/ivf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
index 81472ed9a..5f79184f4 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/nn/ivf.py
@@ -18,7 +18,7 @@ def __get_tools(self):
         self.tools = numpytools
 
     def fit(self, x, clusters=50, a=5, Niter=15, backend="CPU", approx=False):
-	if approx:
+        if approx:
 	    raise ValueError("Approximation not supported for numpy")
         if type(x) != np.ndarray:
             raise ValueError("Input dataset must be np array")

From 8fcdade030a38a42af37fe4026edd50f6bc46e33 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 6 Apr 2021 17:43:18 +0100
Subject: [PATCH 040/111] change tab to space

---
 pykeops/numpy/nn/ivf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
index 5f79184f4..1c87880f0 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/nn/ivf.py
@@ -19,7 +19,7 @@ def __get_tools(self):
 
     def fit(self, x, clusters=50, a=5, Niter=15, backend="CPU", approx=False):
         if approx:
-	    raise ValueError("Approximation not supported for numpy")
+            raise ValueError("Approximation not supported for numpy")
         if type(x) != np.ndarray:
             raise ValueError("Input dataset must be np array")
         return self._fit(x, clusters=clusters, a=a, Niter=Niter, backend=backend)

From b6a4c681eec6dbaabf4716b79ac0b733e6b1a433 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 6 Apr 2021 17:54:15 +0100
Subject: [PATCH 041/111] add dummy inputs to np kmeans

---
 pykeops/numpy/nn/ivf.py | 7 +------
 pykeops/numpy/utils.py  | 4 +++-
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
index 1c87880f0..243cb5592 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/nn/ivf.py
@@ -7,12 +7,7 @@
 class IVF(GenericIVF):
     def __init__(self, k=5, metric="euclidean", normalise=False):
         self.__get_tools()
-        super().__init__(
-            k=k,
-            metric=metric,
-            normalise=normalise,
-            LazyTensor=LazyTensor
-        )
+        super().__init__(k=k, metric=metric, normalise=normalise, LazyTensor=LazyTensor)
 
     def __get_tools(self):
         self.tools = numpytools
diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index 56a501a1d..217ab020b 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -160,7 +160,9 @@ def index_select(input, dim, index):
         return np.take(input, index, axis=dim)
 
     @staticmethod
-    def kmeans(x, distance, K=10, Niter=15, device="CPU"):
+    def kmeans(x, distance, K=10, Niter=15, device="CPU", approx=False, n=0):
+        if approx:
+            raise ValueError("Approx not supported on numpy version")
         from pykeops.numpy import LazyTensor
 
         distance = numpytools.distance_function(metric)

From ad342f426b6a4071f7250d27f8cace489f733023 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 6 Apr 2021 18:17:17 +0100
Subject: [PATCH 042/111] remove normalising in kmeans

---
 pykeops/common/ivf.py  |   3 --
 pykeops/numpy/utils.py |   4 +-
 pykeops/torch/utils.py | 111 ++++++++++++++++++++++-------------------
 3 files changed, 64 insertions(+), 54 deletions(-)

diff --git a/pykeops/common/ivf.py b/pykeops/common/ivf.py
index 29ed3ee7e..094c49be1 100644
--- a/pykeops/common/ivf.py
+++ b/pykeops/common/ivf.py
@@ -2,10 +2,8 @@ class GenericIVF:
     def __init__(self, k, metric, normalise, LazyTensor):
         self.__k = k
         self.__normalise = normalise
-
         self.__update_metric(metric)
         self.__LazyTensor = LazyTensor
-
         self.__c = None
 
     def __update_metric(self, metric):
@@ -107,7 +105,6 @@ def _fit(
             Niter=Niter,
             device=self.__device,
             approx=approx,
-            normalise=self.__normalise,
             n=n,
         )
 
diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index 217ab020b..f2b088ffb 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -160,7 +160,9 @@ def index_select(input, dim, index):
         return np.take(input, index, axis=dim)
 
     @staticmethod
-    def kmeans(x, distance, K=10, Niter=15, device="CPU", approx=False, n=0):
+    def kmeans(x, distance=None, K=10, Niter=15, device="CPU", approx=False, n=0):
+        if distance is None:
+            distance = numpytools.distance_function("euclidean")
         if approx:
             raise ValueError("Approx not supported on numpy version")
         from pykeops.numpy import LazyTensor
diff --git a/pykeops/torch/utils.py b/pykeops/torch/utils.py
index d61e8f5c6..bfe100adb 100644
--- a/pykeops/torch/utils.py
+++ b/pykeops/torch/utils.py
@@ -199,56 +199,67 @@ def index_select(input, dim, index):
         return torch.index_select(input, dim, index)
 
     @staticmethod
-    def kmeans(x,distance,K=10,Niter=15,device='cuda',approx=False,n=10,normalise=False):
-
-      from pykeops.torch import LazyTensor
-
-      def calc_centroid(x,c,cl,n=10):
-        "Helper function to optimise centroid location"
-        c=torch.clone(c.detach()).to(device)
-        c.requires_grad=True
-        x1=LazyTensor(x.unsqueeze(0))
-        op=torch.optim.Adam([c],lr=1/n)
-        scaling=1/torch.gather(torch.bincount(cl),0,cl).view(-1,1)
-        scaling.requires_grad=False
-        with torch.autograd.set_detect_anomaly(True):
-          for _ in range(n):
-            c.requires_grad=True
-            op.zero_grad()
-            c1=LazyTensor(torch.index_select(c,0,cl).unsqueeze(0))
-            d=distance(x1,c1)
-            loss=(d.sum(0) * scaling).sum() #calculate distance to centroid for each datapoint, divide by total number of points in that cluster, and sum
-            loss.backward(retain_graph=False)
-            op.step()
-            if normalise:
-              with torch.no_grad():
-                c=c/torch.norm(c,dim=-1).repeat_interleave(c.shape[1]).reshape(-1,c.shape[1]) #normalising centroids to have norm 1
-        return c.detach()
-
-      N, D = x.shape  
-      c = x[:K, :].clone() 
-      x_i = LazyTensor(x.view(N, 1, D).to(device))  
-
-      for i in range(Niter):
-          c_j = LazyTensor(c.view(1, K, D).to(device))  
-          D_ij=distance(x_i,c_j)
-          cl = D_ij.argmin(dim=1).long().view(-1)  
-
-          #updating c: either with approximation or exact
-          if approx:
-            #approximate with GD optimisation 
-            c=calc_centroid(x,c,cl,n)
-
-          else:
-            #exact from average
-            c.zero_() 
-            c.scatter_add_(0, cl[:, None].repeat(1, D), x) 
-            Ncl = torch.bincount(cl, minlength=K).type_as(c).view(K, 1)
-            c /= Ncl  
-
-          if torch.any(torch.isnan(c)):
-            raise ValueError("NaN detected in centroids during KMeans, please check metric is correct")
-      return cl, c      
+    def kmeans(x, distance=None, K=10, Niter=15, device="cuda", approx=False, n=10):
+
+        from pykeops.torch import LazyTensor
+
+        if distance is None:
+            distance = torchtools.distance_function("euclidean")
+
+        def calc_centroid(x, c, cl, n=10):
+            "Helper function to optimise centroid location"
+            c = torch.clone(c.detach()).to(device)
+            c.requires_grad = True
+            x1 = LazyTensor(x.unsqueeze(0))
+            op = torch.optim.Adam([c], lr=1 / n)
+            scaling = 1 / torch.gather(torch.bincount(cl), 0, cl).view(-1, 1)
+            scaling.requires_grad = False
+            with torch.autograd.set_detect_anomaly(True):
+                for _ in range(n):
+                    c.requires_grad = True
+                    op.zero_grad()
+                    c1 = LazyTensor(torch.index_select(c, 0, cl).unsqueeze(0))
+                    d = distance(x1, c1)
+                    loss = (
+                        d.sum(0) * scaling
+                    ).sum()  # calculate distance to centroid for each datapoint, divide by total number of points in that cluster, and sum
+                    loss.backward(retain_graph=False)
+                    op.step()
+                    if normalise:
+                        with torch.no_grad():
+                            c = c / torch.norm(c, dim=-1).repeat_interleave(
+                                c.shape[1]
+                            ).reshape(
+                                -1, c.shape[1]
+                            )  # normalising centroids to have norm 1
+            return c.detach()
+
+        N, D = x.shape
+        c = x[:K, :].clone()
+        x_i = LazyTensor(x.view(N, 1, D).to(device))
+
+        for i in range(Niter):
+            c_j = LazyTensor(c.view(1, K, D).to(device))
+            D_ij = distance(x_i, c_j)
+            cl = D_ij.argmin(dim=1).long().view(-1)
+
+            # updating c: either with approximation or exact
+            if approx:
+                # approximate with GD optimisation
+                c = calc_centroid(x, c, cl, n)
+
+            else:
+                # exact from average
+                c.zero_()
+                c.scatter_add_(0, cl[:, None].repeat(1, D), x)
+                Ncl = torch.bincount(cl, minlength=K).type_as(c).view(K, 1)
+                c /= Ncl
+
+            if torch.any(torch.isnan(c)):
+                raise ValueError(
+                    "NaN detected in centroids during KMeans, please check metric is correct"
+                )
+        return cl, c
 
     @staticmethod
     def is_tensor(x):

From 8d23e6c24a3c4fbf209602bf17336ba7dad7b796 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 6 Apr 2021 18:24:49 +0100
Subject: [PATCH 043/111] update var name

---
 pykeops/numpy/utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index f2b088ffb..6e72f0508 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -166,8 +166,7 @@ def kmeans(x, distance=None, K=10, Niter=15, device="CPU", approx=False, n=0):
         if approx:
             raise ValueError("Approx not supported on numpy version")
         from pykeops.numpy import LazyTensor
-
-        distance = numpytools.distance_function(metric)
+        distance = numpytools.distance_function(distance)
         N, D = x.shape
         c = np.copy(x[:K, :])
         x_i = LazyTensor(x[:, None, :])

From 3fa27824d08e3be84f1ff8e625992c737e360595 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 6 Apr 2021 18:46:35 +0100
Subject: [PATCH 044/111] correction

---
 pykeops/numpy/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index 6e72f0508..478fa96d4 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -166,7 +166,6 @@ def kmeans(x, distance=None, K=10, Niter=15, device="CPU", approx=False, n=0):
         if approx:
             raise ValueError("Approx not supported on numpy version")
         from pykeops.numpy import LazyTensor
-        distance = numpytools.distance_function(distance)
         N, D = x.shape
         c = np.copy(x[:K, :])
         x_i = LazyTensor(x[:, None, :])

From 9429712ef44202d9d2586b713e0f54775fc5acfb Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 9 Apr 2021 15:08:05 +0100
Subject: [PATCH 045/111] change angular to negative dot product

---
 pykeops/numpy/utils.py | 2 +-
 pykeops/torch/utils.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index 478fa96d4..2e944ad0e 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -119,7 +119,7 @@ def manhattan(x, y):
             return ((x - y).abs()).sum(-1)
 
         def angular(x, y):
-            return x | y
+            return -(x | y)
 
         if metric == "euclidean":
             return euclidean
diff --git a/pykeops/torch/utils.py b/pykeops/torch/utils.py
index bfe100adb..fce97bca5 100644
--- a/pykeops/torch/utils.py
+++ b/pykeops/torch/utils.py
@@ -151,7 +151,7 @@ def manhattan(x, y):
             return ((x - y).abs()).sum(-1)
 
         def angular(x, y):
-            return x | y
+            return -(x | y)
 
         def angular_full(x, y):
             return angular(x, y) / ((angular(x, x) * angular(y, y)).sqrt())

From 085f071ed788245a69adb95b0d6d644dabd86665 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 9 Apr 2021 15:11:59 +0100
Subject: [PATCH 046/111] add import ivf to init files

---
 pykeops/numpy/__init__.py | 3 +++
 pykeops/torch/__init__.py | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/pykeops/numpy/__init__.py b/pykeops/numpy/__init__.py
index b866bf90b..5af5a65b1 100644
--- a/pykeops/numpy/__init__.py
+++ b/pykeops/numpy/__init__.py
@@ -15,7 +15,9 @@
     generic_argmin,
     generic_argkmin,
 )
+
 from .lazytensor.LazyTensor import LazyTensor, Vi, Vj, Pm
+from .nn.ivf import IVF
 
 __all__ = sorted(
     [
@@ -29,5 +31,6 @@
         "Vi",
         "Vj",
         "Pm",
+        "IVF",
     ]
 )
diff --git a/pykeops/torch/__init__.py b/pykeops/torch/__init__.py
index 5396d6664..a63d0dac5 100644
--- a/pykeops/torch/__init__.py
+++ b/pykeops/torch/__init__.py
@@ -36,6 +36,7 @@
 )
 from .operations import KernelSolve
 from .lazytensor.LazyTensor import LazyTensor, Vi, Vj, Pm
+from .nn.ivf import IVF
 
 __all__ = sorted(
     [
@@ -49,5 +50,6 @@
         "Vi",
         "Vj",
         "Pm",
+        "IVF",
     ]
 )

From 785b0380bd7a16a946166d31eeb2ee4484aa4f8e Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 9 Apr 2021 15:19:13 +0100
Subject: [PATCH 047/111] trying to resolve merge conflict

---
 pykeops/numpy/__init__.py | 6 +++---
 pykeops/torch/__init__.py | 5 +++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/pykeops/numpy/__init__.py b/pykeops/numpy/__init__.py
index 5af5a65b1..2706b4965 100644
--- a/pykeops/numpy/__init__.py
+++ b/pykeops/numpy/__init__.py
@@ -15,9 +15,9 @@
     generic_argmin,
     generic_argkmin,
 )
-
-from .lazytensor.LazyTensor import LazyTensor, Vi, Vj, Pm
 from .nn.ivf import IVF
+from .lazytensor.LazyTensor import LazyTensor, Vi, Vj, Pm
+
 
 __all__ = sorted(
     [
@@ -27,10 +27,10 @@
         "generic_argmin",
         "generic_argkmin",
         "KernelSolve",
+        "IVF",
         "LazyTensor",
         "Vi",
         "Vj",
         "Pm",
-        "IVF",
     ]
 )
diff --git a/pykeops/torch/__init__.py b/pykeops/torch/__init__.py
index a63d0dac5..0f926f70b 100644
--- a/pykeops/torch/__init__.py
+++ b/pykeops/torch/__init__.py
@@ -35,8 +35,9 @@
     generic_argkmin,
 )
 from .operations import KernelSolve
-from .lazytensor.LazyTensor import LazyTensor, Vi, Vj, Pm
 from .nn.ivf import IVF
+from .lazytensor.LazyTensor import LazyTensor, Vi, Vj, Pm
+
 
 __all__ = sorted(
     [
@@ -46,10 +47,10 @@
         "generic_argmin",
         "generic_argkmin",
         "KernelSolve",
+        "IVF",
         "LazyTensor",
         "Vi",
         "Vj",
         "Pm",
-        "IVF",
     ]
 )

From cc5c84de8620dd3d93e3da5e2efdfa4950709c05 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 9 Apr 2021 15:23:11 +0100
Subject: [PATCH 048/111] moving around code

---
 pykeops/numpy/__init__.py | 5 ++---
 pykeops/torch/__init__.py | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/pykeops/numpy/__init__.py b/pykeops/numpy/__init__.py
index 2706b4965..9f9d35d58 100644
--- a/pykeops/numpy/__init__.py
+++ b/pykeops/numpy/__init__.py
@@ -15,9 +15,8 @@
     generic_argmin,
     generic_argkmin,
 )
+from .lazytensor.LazyTensor import LazyTensor, ComplexLazyTensor, Vi, Vj, Pm
 from .nn.ivf import IVF
-from .lazytensor.LazyTensor import LazyTensor, Vi, Vj, Pm
-
 
 __all__ = sorted(
     [
@@ -27,10 +26,10 @@
         "generic_argmin",
         "generic_argkmin",
         "KernelSolve",
-        "IVF",
         "LazyTensor",
         "Vi",
         "Vj",
         "Pm",
+        "IVF",
     ]
 )
diff --git a/pykeops/torch/__init__.py b/pykeops/torch/__init__.py
index 0f926f70b..2ba440c2a 100644
--- a/pykeops/torch/__init__.py
+++ b/pykeops/torch/__init__.py
@@ -36,7 +36,7 @@
 )
 from .operations import KernelSolve
 from .nn.ivf import IVF
-from .lazytensor.LazyTensor import LazyTensor, Vi, Vj, Pm
+from .lazytensor.LazyTensor import LazyTensor, ComplexLazyTensor, Vi, Vj, Pm
 
 
 __all__ = sorted(

From 17435c8ae559be5feb01263e0fc55672d0d993df Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 9 Apr 2021 15:25:27 +0100
Subject: [PATCH 049/111] rearrange torch init

---
 pykeops/torch/__init__.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pykeops/torch/__init__.py b/pykeops/torch/__init__.py
index 2ba440c2a..c227e5880 100644
--- a/pykeops/torch/__init__.py
+++ b/pykeops/torch/__init__.py
@@ -35,9 +35,8 @@
     generic_argkmin,
 )
 from .operations import KernelSolve
-from .nn.ivf import IVF
 from .lazytensor.LazyTensor import LazyTensor, ComplexLazyTensor, Vi, Vj, Pm
-
+from .nn.ivf import IVF
 
 __all__ = sorted(
     [
@@ -47,10 +46,10 @@
         "generic_argmin",
         "generic_argkmin",
         "KernelSolve",
-        "IVF",
         "LazyTensor",
         "Vi",
         "Vj",
         "Pm",
+        "IVF",
     ]
 )

From 765279b401962fcc1e74498cfa34df4cf89eb850 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 9 Apr 2021 15:34:42 +0100
Subject: [PATCH 050/111] removing space

---
 pykeops/torch/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pykeops/torch/__init__.py b/pykeops/torch/__init__.py
index c227e5880..511a8e0ca 100644
--- a/pykeops/torch/__init__.py
+++ b/pykeops/torch/__init__.py
@@ -37,7 +37,6 @@
 from .operations import KernelSolve
 from .lazytensor.LazyTensor import LazyTensor, ComplexLazyTensor, Vi, Vj, Pm
 from .nn.ivf import IVF
-
 __all__ = sorted(
     [
         "Genred",

From a4e6c9b458da645e2800fad05e3d1bcefc806f00 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 9 Apr 2021 15:36:14 +0100
Subject: [PATCH 051/111] Revert "removing space"

This reverts commit 765279b401962fcc1e74498cfa34df4cf89eb850.
---
 pykeops/torch/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pykeops/torch/__init__.py b/pykeops/torch/__init__.py
index 511a8e0ca..c227e5880 100644
--- a/pykeops/torch/__init__.py
+++ b/pykeops/torch/__init__.py
@@ -37,6 +37,7 @@
 from .operations import KernelSolve
 from .lazytensor.LazyTensor import LazyTensor, ComplexLazyTensor, Vi, Vj, Pm
 from .nn.ivf import IVF
+
 __all__ = sorted(
     [
         "Genred",

From 4bea5b2e8e8e93867fa693e0e1428bb0c8a81079 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 9 Apr 2021 15:39:29 +0100
Subject: [PATCH 052/111] add space

---
 pykeops/numpy/__init__.py | 1 +
 pykeops/torch/__init__.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/pykeops/numpy/__init__.py b/pykeops/numpy/__init__.py
index 9f9d35d58..8d0f3f3aa 100644
--- a/pykeops/numpy/__init__.py
+++ b/pykeops/numpy/__init__.py
@@ -18,6 +18,7 @@
 from .lazytensor.LazyTensor import LazyTensor, ComplexLazyTensor, Vi, Vj, Pm
 from .nn.ivf import IVF
 
+
 __all__ = sorted(
     [
         "Genred",
diff --git a/pykeops/torch/__init__.py b/pykeops/torch/__init__.py
index c227e5880..7083b880d 100644
--- a/pykeops/torch/__init__.py
+++ b/pykeops/torch/__init__.py
@@ -38,6 +38,7 @@
 from .lazytensor.LazyTensor import LazyTensor, ComplexLazyTensor, Vi, Vj, Pm
 from .nn.ivf import IVF
 
+
 __all__ = sorted(
     [
         "Genred",

From 41ddcbaa16617bca56b75910069bf1bca059d98e Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 9 Apr 2021 15:42:44 +0100
Subject: [PATCH 053/111] moving code around

---
 pykeops/numpy/__init__.py | 5 ++---
 pykeops/torch/__init__.py | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/pykeops/numpy/__init__.py b/pykeops/numpy/__init__.py
index 8d0f3f3aa..e005c46e0 100644
--- a/pykeops/numpy/__init__.py
+++ b/pykeops/numpy/__init__.py
@@ -6,6 +6,7 @@
 # Import pyKeOps routines
 
 
+from .nn.ivf import IVF
 from .generic.generic_red import Genred
 from .operations import KernelSolve
 from .convolutions.radial_kernel import RadialKernelConv, RadialKernelGrad1conv
@@ -16,11 +17,10 @@
     generic_argkmin,
 )
 from .lazytensor.LazyTensor import LazyTensor, ComplexLazyTensor, Vi, Vj, Pm
-from .nn.ivf import IVF
-
 
 __all__ = sorted(
     [
+        "IVF",
         "Genred",
         "generic_sum",
         "generic_logsumexp",
@@ -31,6 +31,5 @@
         "Vi",
         "Vj",
         "Pm",
-        "IVF",
     ]
 )
diff --git a/pykeops/torch/__init__.py b/pykeops/torch/__init__.py
index 7083b880d..18e3407c4 100644
--- a/pykeops/torch/__init__.py
+++ b/pykeops/torch/__init__.py
@@ -27,6 +27,7 @@
 ##########################################################
 # Import pyKeOps routines
 
+from .nn.ivf import IVF
 from .generic.generic_red import Genred
 from .generic.generic_ops import (
     generic_sum,
@@ -36,11 +37,10 @@
 )
 from .operations import KernelSolve
 from .lazytensor.LazyTensor import LazyTensor, ComplexLazyTensor, Vi, Vj, Pm
-from .nn.ivf import IVF
-
 
 __all__ = sorted(
     [
+        "IVF",
         "Genred",
         "generic_sum",
         "generic_logsumexp",
@@ -51,6 +51,5 @@
         "Vi",
         "Vj",
         "Pm",
-        "IVF",
     ]
 )

From c1a79f03f28d808ea857b21da6dd05f363a898af Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 9 Apr 2021 15:44:55 +0100
Subject: [PATCH 054/111] running black

---
 pykeops/numpy/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index 2e944ad0e..1bb73f4f0 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -166,6 +166,7 @@ def kmeans(x, distance=None, K=10, Niter=15, device="CPU", approx=False, n=0):
         if approx:
             raise ValueError("Approx not supported on numpy version")
         from pykeops.numpy import LazyTensor
+
         N, D = x.shape
         c = np.copy(x[:K, :])
         x_i = LazyTensor(x[:, None, :])

From 03952a7218a80005286f0517cff75e92b91f8628 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 9 Apr 2021 15:53:32 +0100
Subject: [PATCH 055/111] test

---
 pykeops/numpy/__init__.py | 2 +-
 pykeops/torch/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pykeops/numpy/__init__.py b/pykeops/numpy/__init__.py
index e005c46e0..0c29cdd9f 100644
--- a/pykeops/numpy/__init__.py
+++ b/pykeops/numpy/__init__.py
@@ -16,7 +16,7 @@
     generic_argmin,
     generic_argkmin,
 )
-from .lazytensor.LazyTensor import LazyTensor, ComplexLazyTensor, Vi, Vj, Pm
+from .lazytensor.LazyTensor import LazyTensor, Vi, Vj, Pm
 
 __all__ = sorted(
     [
diff --git a/pykeops/torch/__init__.py b/pykeops/torch/__init__.py
index 18e3407c4..969ea8698 100644
--- a/pykeops/torch/__init__.py
+++ b/pykeops/torch/__init__.py
@@ -36,7 +36,7 @@
     generic_argkmin,
 )
 from .operations import KernelSolve
-from .lazytensor.LazyTensor import LazyTensor, ComplexLazyTensor, Vi, Vj, Pm
+from .lazytensor.LazyTensor import LazyTensor, Vi, Vj, Pm
 
 __all__ = sorted(
     [

From 9de2b853fca3009ab125617caee5960bb106d7f9 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 9 Apr 2021 15:58:31 +0100
Subject: [PATCH 056/111] changed import structure

---
 pykeops/numpy/nn/ivf.py | 5 +++--
 pykeops/torch/nn/ivf.py | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
index 243cb5592..849b96cf7 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/nn/ivf.py
@@ -1,11 +1,12 @@
-from pykeops.numpy import LazyTensor
 from pykeops.common.ivf import GenericIVF
-from pykeops.numpy.utils import numpytools
 import numpy as np
 
 
 class IVF(GenericIVF):
     def __init__(self, k=5, metric="euclidean", normalise=False):
+        from pykeops.numpy import LazyTensor
+        from pykeops.numpy.utils import numpytools
+
         self.__get_tools()
         super().__init__(k=k, metric=metric, normalise=normalise, LazyTensor=LazyTensor)
 
diff --git a/pykeops/torch/nn/ivf.py b/pykeops/torch/nn/ivf.py
index 9d7bf05db..997c1eeee 100644
--- a/pykeops/torch/nn/ivf.py
+++ b/pykeops/torch/nn/ivf.py
@@ -1,11 +1,12 @@
-from pykeops.torch import LazyTensor
 from pykeops.common.ivf import GenericIVF
-from pykeops.torch.utils import torchtools
 import torch
 
 
 class IVF(GenericIVF):
     def __init__(self, k=5, metric="euclidean", normalise=False):
+        from pykeops.torch import LazyTensor
+        from pykeops.torch.utils import torchtools
+
         self.__get_tools()
         super().__init__(k=k, metric=metric, normalise=normalise, LazyTensor=LazyTensor)
 

From ac33af7f4ec4f6ee0b370cdf3d711124ba1360bd Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 9 Apr 2021 16:50:48 +0100
Subject: [PATCH 057/111] changed import structure again

---
 pykeops/numpy/nn/ivf.py | 3 ++-
 pykeops/torch/nn/ivf.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
index 849b96cf7..a6901ba3b 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/nn/ivf.py
@@ -5,12 +5,13 @@
 class IVF(GenericIVF):
     def __init__(self, k=5, metric="euclidean", normalise=False):
         from pykeops.numpy import LazyTensor
-        from pykeops.numpy.utils import numpytools
 
         self.__get_tools()
         super().__init__(k=k, metric=metric, normalise=normalise, LazyTensor=LazyTensor)
 
     def __get_tools(self):
+        from pykeops.numpy.utils import numpytools
+
         self.tools = numpytools
 
     def fit(self, x, clusters=50, a=5, Niter=15, backend="CPU", approx=False):
diff --git a/pykeops/torch/nn/ivf.py b/pykeops/torch/nn/ivf.py
index 997c1eeee..1ae70b686 100644
--- a/pykeops/torch/nn/ivf.py
+++ b/pykeops/torch/nn/ivf.py
@@ -5,12 +5,13 @@
 class IVF(GenericIVF):
     def __init__(self, k=5, metric="euclidean", normalise=False):
         from pykeops.torch import LazyTensor
-        from pykeops.torch.utils import torchtools
 
         self.__get_tools()
         super().__init__(k=k, metric=metric, normalise=normalise, LazyTensor=LazyTensor)
 
     def __get_tools(self):
+        from pykeops.torch.utils import torchtools
+
         self.tools = torchtools
 
     def fit(self, x, clusters=50, a=5, Niter=15, approx=False, n=50):

From 8e1b404a58c53ad8b43725324b339c3f80c32b9b Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 9 Apr 2021 19:26:35 +0100
Subject: [PATCH 058/111] adding angular full metric

---
 pykeops/numpy/utils.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index 1bb73f4f0..6e8972c59 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -121,12 +121,17 @@ def manhattan(x, y):
         def angular(x, y):
             return -(x | y)
 
+        def angular_full(x, y):
+            return angular(x, y) / ((angular(x, x) * angular(y, y)).sqrt())
+
         if metric == "euclidean":
             return euclidean
         elif metric == "manhattan":
             return manhattan
         elif metric == "angular":
             return angular
+        elif metric == "angular_full":
+            return angular_full
         elif metric == "hyperbolic":
             raise ValueError(
                 "Hyperbolic not supported for numpy, please use torch version with approximation"

From 020d4688f4ab6f1cc62a9e02f072c8c6ca9bc853 Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Fri, 9 Apr 2021 19:36:55 +0100
Subject: [PATCH 059/111] added angular, manhattan metrics to numpy test

---
 pykeops/test/unit_tests_numpy.py | 81 ++++++++++++++++++++++----------
 1 file changed, 55 insertions(+), 26 deletions(-)

diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index e4fd2d22f..48edba740 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -444,34 +444,63 @@ def test_IVF(self):
         import numpy as np
 
         np.random.seed(0)
-        N, D, K, k, a = 10 ** 3, 3, 50, 5, 5
+        N, D, clusters, k, a = 10 ** 3, 3, 10, 5, 5
 
         # Generate random datapoints x, y
-        x = 0.7 * np.random.normal(size=(N, D)) + 0.3
-        y = 0.7 * np.random.normal(size=(N, D)) + 0.3
-
-        # Ground truth K nearest neighbours
-        truth = np.argsort(
-            ((np.expand_dims(y, 1) - np.expand_dims(x, 0)) ** 2).sum(-1), axis=1
-        )
-        truth = truth[:, :k]
-
-        # IVF K nearest neighbours
-        IVF = IVF()
-        IVF.fit(x, a=a)
-        ivf_fit = IVF.kneighbors(y)
-
-        # Calculate accuracy
-        accuracy = 0
-        for i in range(k):
-            accuracy += float(np.sum(ivf_fit == truth)) / N
-            truth = np.roll(
-                truth, 1, -1
-            )  # Create a rolling window (index positions may not match)
-        # Record accuracies
-        accuracy = float(accuracy / k)
-
-        self.assertTrue(accuracy >= 0.8, f"Failed at {a}, {accuracy}")
+        x = np.random.normal(size=(N, D))
+        y = np.random.normal(size=(N, D))
+
+        x2 = np.expand_dims(x, 0)
+        y2 = np.expand_dims(y, 1)
+
+        # Metrics (hyperbolic metric not implemented for numpy)
+        metrics = ['euclidean','manhattan','angular','angular_full']
+
+        for metric in metrics:
+            # Inputs to IVF algorithm
+            normalise = False
+            approx = False
+
+            # Brute force distance calculation
+            if metric == 'euclidean':
+                distance = ((y2 - x2) ** 2).sum(-1)
+            elif metric == 'manhattan':
+                distance = np.abs(y2 - x2).sum(-1)
+            elif metric in {'angular','angular_full'}:
+                x3 = x/np.linalg.norm(x, axis=1, keepdims=True)
+                y3 = y/np.linalg.norm(y, axis=1, keepdims=True)
+                distance = -y3@(x3.T)
+                if metric == 'angular':
+                # Need to normalize data for angular metric
+                    normalise = True
+            elif metric == 'hyperbolic':
+                # Placeholder in case hyperbolic metric is implemented in future
+                # Need to ensure first dimension is positive for hyperbolic metric
+                x += 5
+                y += 5
+                approx = True
+                distance = ((y2-x2) ** 2).sum(-1) / (np.expand_dims(x[:,0],0) * np.expand_dims(y[:,0],1))
+
+            # Ground truth K nearest neighbours
+            truth = np.argsort(distance,axis=1)
+            truth = truth[:, :k]
+
+            # IVF K nearest neighbours
+            test = IVF(metric=metric, k=k, normalise=normalise)
+            test.fit(x, a=a, approx=approx, clusters=clusters)
+            ivf_fit = test.kneighbors(y)
+
+            # Calculate accuracy
+            accuracy = 0
+            for i in range(k):
+                accuracy += float(np.sum(ivf_fit == truth)) / N
+                truth = np.roll(
+                    truth, 1, -1
+                )  # Create a rolling window (index positions may not match)
+            
+            accuracy = float(accuracy / k)
+
+            self.assertTrue(accuracy >= 0.8, f"Failed at {a}, {accuracy}")
 
     ############################################################
     def test_Nystrom_k_approx(self):

From 2a929a5b474738013cbfbbd0dace80ecf8f691db Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Fri, 9 Apr 2021 19:38:41 +0100
Subject: [PATCH 060/111] added metrics to torch unit test (ivf)

---
 pykeops/test/unit_tests_pytorch.py | 79 +++++++++++++++++++++---------
 1 file changed, 55 insertions(+), 24 deletions(-)

diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index d60bf74b9..b8a896508 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -680,32 +680,63 @@ def test_IVF(self):
         import torch
 
         torch.manual_seed(0)
-        N, D, K, k, a = 10 ** 3, 3, 50, 5, 5
+        N, D, clusters, k, a = 10 ** 3, 3, 10, 5, 5
 
         # Generate random datapoints x, y
-        x = 0.7 * torch.randn(N, D) + 0.3
-        y = 0.7 * torch.randn(N, D) + 0.3
-
-        # Ground truth K nearest neighbours
-        truth = torch.argsort(((y.unsqueeze(1) - x.unsqueeze(0)) ** 2).sum(-1), dim=1)
-        truth = truth[:, :k]
-
-        # IVF K nearest neighbours
-        IVF = IVF()
-        IVF.fit(x, a=a)
-        ivf_fit = IVF.kneighbors(y)
-
-        # Calculate accuracy
-        accuracy = 0
-        for i in range(k):
-            accuracy += torch.sum(ivf_fit == truth).float() / N
-            truth = torch.roll(
-                truth, 1, -1
-            )  # Create a rolling window (index positions may not match)
-        # Record accuracies
-        accuracy = float(accuracy / k)
-
-        self.assertTrue(accuracy >= 0.8, f"Failed at {a}, {accuracy}")
+        x = torch.randn(N, D)
+        y = torch.randn(N, D)
+
+        x2 = x.unsqueeze(0)
+        y2 = y.unsqueeze(1)
+
+        # Metrics
+        metrics = ['euclidean','manhattan','angular','angular_full','hyperbolic']
+
+        for metric in metrics:
+            # Inputs to IVF algorithm
+            normalise = False
+            approx = False
+
+            # Brute force distance calculation
+            if metric == 'euclidean':
+                distance = ((y2-x2) ** 2).sum(-1)
+            elif metric == 'manhattan':
+                distance = ((y2-x2) .abs()).sum(-1)
+            elif metric in {'angular', 'angular_full'}:
+                # Calculate normalised dot product (angular distances)
+                x3 = x/torch.linalg.norm(x, dim=1, keepdim=True)
+                y3 = y/torch.linalg.norm(y, dim=1, keepdim=True)
+                distance = -y3@(x3.T)
+                if metric == 'angular':
+                # Need to normalize data for angular metric
+                    normalise = True
+            elif metric == 'hyperbolic':
+                # Need to ensure first dimension is positive for hyperbolic metric
+                x += 5
+                y += 5
+                approx = True
+                distance = ((y2-x2) ** 2).sum(-1) / (x[:,0].unsqueeze(0) * y[:,0].unsqueeze(1))
+
+            # Ground truth K nearest neighbours
+            truth = torch.argsort(distance,dim=1)
+            truth = truth[:, :k]
+
+            # IVF K nearest neighbours
+            test = IVF(metric=metric, k=k, normalise=normalise)
+            test.fit(x, a=a, approx=approx, clusters=clusters)
+            ivf_fit = test.kneighbors(y)
+
+            # Calculate accuracy
+            accuracy = 0
+            for i in range(k):
+                accuracy += torch.sum(ivf_fit == truth).float() / N
+                truth = torch.roll(
+                    truth, 1, -1
+                )  # Create a rolling window (index positions may not match)
+
+            accuracy = float(accuracy / k)
+
+            self.assertTrue(accuracy >= 0.8, f"Failed at {a}, {accuracy}")
 
     ############################################################
     def test_Nystrom_K_approx(self):

From d407a62587441b84ee7b4054d6f4f1422b9c952b Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Fri, 9 Apr 2021 19:47:41 +0100
Subject: [PATCH 061/111] calc angular distances without torch.linalg - test

---
 pykeops/test/unit_tests_pytorch.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index b8a896508..0a1828d99 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -704,9 +704,7 @@ def test_IVF(self):
                 distance = ((y2-x2) .abs()).sum(-1)
             elif metric in {'angular', 'angular_full'}:
                 # Calculate normalised dot product (angular distances)
-                x3 = x/torch.linalg.norm(x, dim=1, keepdim=True)
-                y3 = y/torch.linalg.norm(y, dim=1, keepdim=True)
-                distance = -y3@(x3.T)
+                distance = -y@(x.T)/(((x@(x.T)).diag().unsqueeze(0)*(y@(y.T)).diag().unsqueeze(1)).sqrt())
                 if metric == 'angular':
                 # Need to normalize data for angular metric
                     normalise = True

From fb6b5fb1001feca8c3a07804add840d034ddd19d Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 9 Apr 2021 19:54:31 +0100
Subject: [PATCH 062/111] delete normalise

---
 pykeops/torch/utils.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/pykeops/torch/utils.py b/pykeops/torch/utils.py
index fce97bca5..a7748bb9c 100644
--- a/pykeops/torch/utils.py
+++ b/pykeops/torch/utils.py
@@ -225,13 +225,6 @@ def calc_centroid(x, c, cl, n=10):
                     ).sum()  # calculate distance to centroid for each datapoint, divide by total number of points in that cluster, and sum
                     loss.backward(retain_graph=False)
                     op.step()
-                    if normalise:
-                        with torch.no_grad():
-                            c = c / torch.norm(c, dim=-1).repeat_interleave(
-                                c.shape[1]
-                            ).reshape(
-                                -1, c.shape[1]
-                            )  # normalising centroids to have norm 1
             return c.detach()
 
         N, D = x.shape

From 7a6dcc76f445a74632841019f2b37e9dac4360f5 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 9 Apr 2021 19:59:02 +0100
Subject: [PATCH 063/111] black

---
 pykeops/test/unit_tests_numpy.py   | 28 +++++++++++++-----------
 pykeops/test/unit_tests_pytorch.py | 35 ++++++++++++++++++++----------
 2 files changed, 38 insertions(+), 25 deletions(-)

diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index 48edba740..51ab5d860 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -454,7 +454,7 @@ def test_IVF(self):
         y2 = np.expand_dims(y, 1)
 
         # Metrics (hyperbolic metric not implemented for numpy)
-        metrics = ['euclidean','manhattan','angular','angular_full']
+        metrics = ["euclidean", "manhattan", "angular", "angular_full"]
 
         for metric in metrics:
             # Inputs to IVF algorithm
@@ -462,27 +462,29 @@ def test_IVF(self):
             approx = False
 
             # Brute force distance calculation
-            if metric == 'euclidean':
+            if metric == "euclidean":
                 distance = ((y2 - x2) ** 2).sum(-1)
-            elif metric == 'manhattan':
+            elif metric == "manhattan":
                 distance = np.abs(y2 - x2).sum(-1)
-            elif metric in {'angular','angular_full'}:
-                x3 = x/np.linalg.norm(x, axis=1, keepdims=True)
-                y3 = y/np.linalg.norm(y, axis=1, keepdims=True)
-                distance = -y3@(x3.T)
-                if metric == 'angular':
-                # Need to normalize data for angular metric
+            elif metric in {"angular", "angular_full"}:
+                x3 = x / np.linalg.norm(x, axis=1, keepdims=True)
+                y3 = y / np.linalg.norm(y, axis=1, keepdims=True)
+                distance = -y3 @ (x3.T)
+                if metric == "angular":
+                    # Need to normalize data for angular metric
                     normalise = True
-            elif metric == 'hyperbolic':
+            elif metric == "hyperbolic":
                 # Placeholder in case hyperbolic metric is implemented in future
                 # Need to ensure first dimension is positive for hyperbolic metric
                 x += 5
                 y += 5
                 approx = True
-                distance = ((y2-x2) ** 2).sum(-1) / (np.expand_dims(x[:,0],0) * np.expand_dims(y[:,0],1))
+                distance = ((y2 - x2) ** 2).sum(-1) / (
+                    np.expand_dims(x[:, 0], 0) * np.expand_dims(y[:, 0], 1)
+                )
 
             # Ground truth K nearest neighbours
-            truth = np.argsort(distance,axis=1)
+            truth = np.argsort(distance, axis=1)
             truth = truth[:, :k]
 
             # IVF K nearest neighbours
@@ -497,7 +499,7 @@ def test_IVF(self):
                 truth = np.roll(
                     truth, 1, -1
                 )  # Create a rolling window (index positions may not match)
-            
+
             accuracy = float(accuracy / k)
 
             self.assertTrue(accuracy >= 0.8, f"Failed at {a}, {accuracy}")
diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index 0a1828d99..750aaa4b8 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -690,7 +690,7 @@ def test_IVF(self):
         y2 = y.unsqueeze(1)
 
         # Metrics
-        metrics = ['euclidean','manhattan','angular','angular_full','hyperbolic']
+        metrics = ["euclidean", "manhattan", "angular", "angular_full", "hyperbolic"]
 
         for metric in metrics:
             # Inputs to IVF algorithm
@@ -698,25 +698,36 @@ def test_IVF(self):
             approx = False
 
             # Brute force distance calculation
-            if metric == 'euclidean':
-                distance = ((y2-x2) ** 2).sum(-1)
-            elif metric == 'manhattan':
-                distance = ((y2-x2) .abs()).sum(-1)
-            elif metric in {'angular', 'angular_full'}:
+            if metric == "euclidean":
+                distance = ((y2 - x2) ** 2).sum(-1)
+            elif metric == "manhattan":
+                distance = ((y2 - x2).abs()).sum(-1)
+            elif metric in {"angular", "angular_full"}:
                 # Calculate normalised dot product (angular distances)
-                distance = -y@(x.T)/(((x@(x.T)).diag().unsqueeze(0)*(y@(y.T)).diag().unsqueeze(1)).sqrt())
-                if metric == 'angular':
-                # Need to normalize data for angular metric
+                distance = (
+                    -y
+                    @ (x.T)
+                    / (
+                        (
+                            (x @ (x.T)).diag().unsqueeze(0)
+                            * (y @ (y.T)).diag().unsqueeze(1)
+                        ).sqrt()
+                    )
+                )
+                if metric == "angular":
+                    # Need to normalize data for angular metric
                     normalise = True
-            elif metric == 'hyperbolic':
+            elif metric == "hyperbolic":
                 # Need to ensure first dimension is positive for hyperbolic metric
                 x += 5
                 y += 5
                 approx = True
-                distance = ((y2-x2) ** 2).sum(-1) / (x[:,0].unsqueeze(0) * y[:,0].unsqueeze(1))
+                distance = ((y2 - x2) ** 2).sum(-1) / (
+                    x[:, 0].unsqueeze(0) * y[:, 0].unsqueeze(1)
+                )
 
             # Ground truth K nearest neighbours
-            truth = torch.argsort(distance,dim=1)
+            truth = torch.argsort(distance, dim=1)
             truth = truth[:, :k]
 
             # IVF K nearest neighbours

From d8deff20bd7795b70c4bc10a2436ce52514c9eed Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Sun, 11 Apr 2021 15:41:49 +0100
Subject: [PATCH 064/111] add docstrings + NND

---
 pykeops/common/ivf.py         |  18 ++
 pykeops/numpy/nn/ivf.py       |  34 +++
 pykeops/torch/nn/NNDescent.py | 446 ++++++++++++++++++++++++++++++++++
 pykeops/torch/nn/ivf.py       |  41 ++++
 4 files changed, 539 insertions(+)
 create mode 100644 pykeops/torch/nn/NNDescent.py

diff --git a/pykeops/common/ivf.py b/pykeops/common/ivf.py
index 094c49be1..4fea7499d 100644
--- a/pykeops/common/ivf.py
+++ b/pykeops/common/ivf.py
@@ -1,5 +1,11 @@
 class GenericIVF:
+    """Abstract class to compute IVF functions
+
+    End-users should use 'pykeops.numpy.ivf' or 'pykeops.torch.ivf'
+
+    """
     def __init__(self, k, metric, normalise, LazyTensor):
+
         self.__k = k
         self.__normalise = normalise
         self.__update_metric(metric)
@@ -18,10 +24,14 @@ def __update_metric(self, metric):
 
     @property
     def metric(self):
+        """Returns the metric used in the search
+        """
         return self.__metric
 
     @property
     def c(self):
+        """Returns the clusters obtained through K-Means
+        """
         if self.__c is not None:
             return self.__c
         else:
@@ -160,6 +170,14 @@ def _kneighbors(self, y):
         return self.__unsort(nn)
 
     def brute_force(self, x, y, k=5):
+        """Performs a brute force search with KeOps
+
+        Args:
+          x (array): Input dataset
+          y (array): Query dataset
+          k (int): Number of nearest neighbors to obtain
+
+        """
         x_LT = self.__LazyTensor(self.tools.unsqueeze(x, 0))
         y_LT = self.__LazyTensor(self.tools.unsqueeze(y, 1))
         D_ij = self.__distance(y_LT, x_LT)
diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
index a6901ba3b..0485f7243 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/nn/ivf.py
@@ -3,7 +3,23 @@
 
 
 class IVF(GenericIVF):
+    """IVF-Flat is a KNN approximation algorithm that first clusters the data and then performs the query search on a subset of the input dataset."""
     def __init__(self, k=5, metric="euclidean", normalise=False):
+        """Initialise the IVF-Flat class.
+
+        IVF-Flat is a KNN approximation algorithm that first clusters the data and then performs the query search on a subset of the input dataset. 
+
+        Args:
+          k (int): Number of nearest neighbours to obtain
+          metric (str,function): Metric to use
+            Currently, "euclidean", "manhattan" and "angular" are directly supported
+            Custom metrics are not supported in numpy, please use torch version instead
+            For more information, refer to the tutorial
+          normalise (bool): Whether or not to normalise all input data to norm 1
+            This is used mainly for angular metric
+            In place of this, "angular_full" metric may be used instead
+
+        """
         from pykeops.numpy import LazyTensor
 
         self.__get_tools()
@@ -15,6 +31,19 @@ def __get_tools(self):
         self.tools = numpytools
 
     def fit(self, x, clusters=50, a=5, Niter=15, backend="CPU", approx=False):
+        """Fits a dataset to perform the nearest neighbour search over
+        
+        K-Means is performed on the dataset to obtain clusters
+        Then the closest clusters to each cluster is stored for use during query time
+
+        Args:
+          x (torch.Tensor): Torch tensor dataset of shape N, D 
+            Where N is the number of points and D is the number of dimensions
+          clusters (int): Total number of clusters to create in K-Means
+          a (int): Number of clusters to search over, must be less than total number of clusters created
+          Niter (int): Number of iterations to run in K-Means algorithm
+          
+        """
         if approx:
             raise ValueError("Approximation not supported for numpy")
         if type(x) != np.ndarray:
@@ -22,6 +51,11 @@ def fit(self, x, clusters=50, a=5, Niter=15, backend="CPU", approx=False):
         return self._fit(x, clusters=clusters, a=a, Niter=Niter, backend=backend)
 
     def kneighbors(self, y):
+        """Obtains the nearest neighbors for an input dataset from the fitted dataset
+
+        Args:
+          y (np.ndarray): Input dataset to search over
+        """
         if type(y) != np.ndarray:
             raise ValueError("Query dataset must be a np array")
         return self._kneighbors(y)
diff --git a/pykeops/torch/nn/NNDescent.py b/pykeops/torch/nn/NNDescent.py
new file mode 100644
index 000000000..fd09ed0dc
--- /dev/null
+++ b/pykeops/torch/nn/NNDescent.py
@@ -0,0 +1,446 @@
+class NNDescent:
+    def __init__(
+        self,
+        data=None,
+        k=5,
+        metric="euclidian",
+        initialization_method="forest",
+        num_trees=5,
+        leaf_multiplier=1,
+        big_leaf_depth=5,
+        verbose=False,
+    ):
+        """Initialize the NNDescent class.
+
+        Initializes the NNDescent class given all relevant parameters. If data is
+        provided, it fits the NNDescent search graph to the data.
+
+        Args:
+          data ((N,d) Tensor): Dataset of N datapoints of dimensionality d.
+          k (int): The number of neighbors to which each node connects in the search graph.
+          metric (string): Name of metric, either "euclidian" and "manhattan"
+          initialization_method (string): The type of initialization to be used for
+            the search graph. Can be "random", "random_big" or "forest".
+          num_trees (int): Number of trees used in "random_big" or "forest" initializations.
+          leaf_multiplier (int): Parameter for the Tree class for tree-based initializations.
+          big_leaf_depth (int): The depth at which the big leaves are taken to be used at
+            the start of search.
+        """
+
+        # Setting parameters
+        self.k = k
+        self.metric = metric
+        self.init_method = initialization_method
+        self.num_trees = num_trees
+        self.leaf_multiplier = leaf_multiplier
+        self.big_leaf_depth = big_leaf_depth
+        self.big_leaves = None
+
+        # If data is provided, we call the fit function.
+        if data is not None:
+            self.fit(data, verbose=verbose)
+
+    def distance(self, x, y):
+        # Square of euclidian distance. Skip the root for faster computation.
+        if self.metric == "euclidian":
+            return ((x - y) ** 2).sum(-1)
+        elif self.metric == "manhattan":
+            return ((x - y).abs()).sum(-1)
+
+    def fit(self, X, iter=20, verbose=False):
+        """Fits the NNDescent search graph to the data set X.
+
+        Args:
+          X ((N,d) Tensor): Dataset of N datapoints of dimensionality d.
+        """
+        self.data = X
+
+        # A 2D tensor representing a directed graph.
+        # The value a = graph[i,j] represents an edge from point x_i to x_a.
+        N = X.shape[0]
+        self.graph = torch.zeros(size=[N, self.k], dtype=torch.long)
+
+        # Initialize graph
+        if self.init_method == "random":
+            self._initialize_graph_randomly()
+        elif self.init_method == "random_big":
+            self._initialize_graph_big_random(self.data, self.num_trees)
+        elif self.init_method == "forest":
+            self._initialize_graph_forest(
+                self.data, self.num_trees, self.leaf_multiplier, self.big_leaf_depth
+            )
+
+        # A set of tuples (i,j) of indices for which the distance has already been calculated.
+        self.explored_edges = set()
+
+        # A 2D tensor representing the distance between point x_i and x_graph[i,j]
+        self.k_distances = torch.zeros([N, self.k])
+
+        # Update the graph
+        self._calculate_all_distances()
+        self._update_graph(iter=iter, verbose=verbose)
+
+    def _update_graph(self, iter=25, verbose=False):
+        """Updates the graph using algorithm: https://pynndescent.readthedocs.io/en/latest/how_pynndescent_works.html
+
+        Args:
+          iter (int): Number of iterations to use when updating search graph.
+        """
+        # [STEP 1: Start with random graph.] Iterate
+        start = time.time()
+        for it in range(iter):
+            if verbose:
+                print(
+                    "Iteration number",
+                    it,
+                    "with average distance of",
+                    torch.mean(self.k_distances).item(),
+                    "Took",
+                    time.time() - start,
+                    "seconds.",
+                )
+            has_changed = False
+
+            # [STEP 2: For each node:] (TODO: Investigate whether this can be vectorized.)
+            for i, neighbors in enumerate(self.graph):
+                # Distances of current neighbors
+                dist_current_neighbors = self.k_distances[i]
+
+                # [STEP 3: Measure distance from the node to the neighbors of its neighbors]
+                # Find neighbors of neighbors
+                potential_neighbors = {
+                    a.item()
+                    for a in self.graph[neighbors].flatten()
+                    if a not in neighbors
+                    and a != i
+                    and (i, int(a)) not in self.explored_edges
+                }
+                potential_distances = torch.Tensor(
+                    [
+                        self.distance(self.data[i], self.data[n])
+                        for n in potential_neighbors
+                    ]
+                )
+                self.explored_edges.update([(i, int(r)) for r in potential_neighbors])
+
+                # Concatenate potential neighbors to list of neighbors (indices and distances)
+                cat_idx = torch.cat(
+                    [neighbors, torch.Tensor(list(potential_neighbors))]
+                )
+                cat_dist = torch.cat([self.k_distances[i], potential_distances])
+
+                # [STEP 4: If any are closer, then update the graph accordingly, and only keep the k closest]
+                dist_sorted, idx = torch.sort(cat_dist)
+                if torch.max(idx[: self.k]) >= self.k:
+                    has_changed = True
+                    self.graph[i] = cat_idx[idx[: self.k]]
+                    self.k_distances[i] = dist_sorted[: self.k]
+
+            # [STEP 5: If any changes were made, repeat iteration, otherwise stop]
+            if not has_changed:
+                if verbose:
+                    print("Fitting complete! Took", it, "iterations.")
+                break
+
+    def kneighbors(self, X, max_num_steps=100, tree_init=True, verbose=False):
+        """Returns k nearest neighbors of input X using NNDescent.
+
+        Our code is largely based on this algorithm:
+          https://pynndescent.readthedocs.io/en/latest/how_pynndescent_works.html#Searching-using-a-nearest-neighbor-graph
+
+        Args:
+          X ((N,d) Tensor): A query set for which to find k neighbors.
+          max_num_steps (int): The maximum number of steps to take during search.
+
+        Returns:
+          The indices of the k nearest neighbors in the fitted data.
+        """
+
+        # N datapoints of dimension d
+        N, d = X.shape
+        k = self.k
+
+        # Boolean mask to keep track of those points whose search is still ongoing
+        is_active = torch.ones(N) == 1
+
+        # If graph was initialized using trees, we can use information from there to initialize in a diversed manner.
+        if self.big_leaves is not None and tree_init:
+            candidate_idx = self.big_leaves.unsqueeze(0).repeat(N, 1)  # Shape: (N,32)
+        else:
+            # Random initialization for starting points of search.
+            candidate_idx = torch.randint(
+                high=len(self.data), size=[N, k + 1], dtype=torch.long
+            )
+
+        # Sort the candidates by distance from X
+        distances = self.distance(self.data[candidate_idx], X.unsqueeze(1))
+        # distances = ((self.data[candidate_idx] - X.unsqueeze(1))**2).sum(-1)
+        sorted, idx = torch.sort(distances, dim=1)
+        candidate_idx = torch.gather(candidate_idx, dim=1, index=idx)
+        # Truncate to k+1 nearest
+        candidate_idx = candidate_idx[:, : (k + 1)]
+
+        # Track the nodes we have explored already, in N x num_explored tensor
+        num_explored = self.k * 2
+        explored = torch.full(size=[N, num_explored], fill_value=-1)
+
+        start = time.time()
+        # The initialization of candidates and explored set is done. Now we can search.
+        count = 0
+        while count < max_num_steps:
+            if verbose:
+                print(
+                    "Step",
+                    count,
+                    "- Search is completed for",
+                    1 - torch.mean(1.0 * is_active).item(),
+                    "- this step took",
+                    time.time() - start,
+                    "s",
+                )
+            start = time.time()
+
+            # [2. Look at nodes connected by an edge to the best untried node in graph]
+            # diff_bool.shape is (M, k+1, num_explored), where M is the number of active searches
+            diff_bool = (
+                candidate_idx[is_active].unsqueeze(2) - explored[is_active].unsqueeze(1)
+                == 0
+            )
+            in_explored = torch.any(diff_bool, dim=2)
+            # batch_active is true for those who haven't been fully explored in the current batch
+            batch_active = ~torch.all(in_explored[:, :-1], dim=1)
+
+            # Update is_active mask. If none are active, break search
+            is_active[is_active.clone()] = batch_active
+            if not is_active.any():
+                break
+
+            # first_unexplored has indices of first unexplored element per row
+            first_unexplored = torch.max(~in_explored[batch_active], dim=1)[
+                1
+            ].unsqueeze(1)
+            # Unexplored nodes to be expanded
+            unexplored_idx = torch.gather(
+                candidate_idx[is_active], dim=1, index=first_unexplored
+            ).squeeze(-1)
+            explored[is_active, (count % num_explored)] = unexplored_idx
+
+            # [3. Add all these nodes to our potential candidate pool]
+            # Add neighbors of the first unexplored point to the list of candidates
+            expanded_idx = torch.cat(
+                (self.graph[unexplored_idx], candidate_idx[is_active]), dim=1
+            )
+
+            # We remove repeated indices from consideration by adding float('inf') to them.
+            expanded_idx = torch.sort(expanded_idx)[0]
+            shift = torch.cat(
+                (
+                    torch.full((len(expanded_idx), 1), -1),
+                    torch.sort(expanded_idx, dim=1)[0][:, :-1],
+                ),
+                dim=1,
+            )
+            unwanted_indices = expanded_idx == shift
+
+            # [4. Sort by closeness].
+            distances = self.distance(
+                self.data[expanded_idx], X[is_active].unsqueeze(1)
+            )
+            # distances = ((self.data[expanded_idx] - X[is_active].unsqueeze(1))**2).sum(-1)
+            distances[unwanted_indices] += float("inf")
+            sorted, idx = torch.sort(distances, dim=1)
+            expanded_idx = torch.gather(expanded_idx, dim=1, index=idx)
+
+            # [5. Truncate to k+1 best]
+            candidate_idx[is_active] = expanded_idx[:, : (self.k + 1)]
+
+            # [6. Return to step 2. If we have already tried all candidates in pool, we stop in the if not unexplored]
+            count += 1
+
+        # Return the k candidates
+        if verbose:
+            print(
+                "Graph search finished after",
+                count,
+                "steps. Finished for:",
+                1 - torch.mean(1.0 * is_active).item(),
+            )
+        return candidate_idx[:, :-1]
+
+    def _calculate_all_distances(self):
+        """Updates the distances (self.k_distances) of the edges found in self.graph."""
+        # Uses loop for simplicity.
+        for i, row in enumerate(self.graph):
+            # Indices of current k neighbors in self.graph
+            neighbor_indices = [(i, int(r)) for r in row]
+            # The distances of those neighbors are saved in k_distances
+            self.k_distances[i] = torch.Tensor(
+                [self.distance(self.data[a], self.data[b]) for a, b in neighbor_indices]
+            )
+            # Add pairs to explored_edges set
+            self.explored_edges.update(neighbor_indices)
+
+    def _initialize_graph_randomly(self):
+        """Initializes self.graph with random values such that each point has k distinct neighbors"""
+        N, k = self.graph.shape
+        # Initialize graph randomly, removing self-loops
+        self.graph = torch.randint(high=N - 1, size=[N, k], dtype=torch.long)
+        row_indices = torch.arange(N).unsqueeze(1).repeat(1, k)
+        self.graph[self.graph >= row_indices] += 1
+
+    def _initialize_graph_big_random(self, data, numtrees):
+        """Initializes self.graph randomly, but with more neighbours at the start"""
+        N, k = self.graph.shape
+        temp_graph = torch.tensor([])
+
+        # make 'trees', combine into giant graph with each element (row) having k * num_trees neighbours
+        # this is a small for loop - numtrees and k << datapoints
+        for j in range(numtrees):
+            tree_graph = torch.tensor([])
+            for i in range(k):
+                tree_graph = torch.cat(
+                    (tree_graph, torch.randperm(N)), 0
+                )  # generate randomly shuffled list of N indices
+            tree_graph = tree_graph.reshape(
+                -1, k
+            )  # creates a N x k tensor with N indices, each appearing k times. This represents 1 'tree'
+            temp_graph = torch.cat(
+                (temp_graph, tree_graph), 1
+            )  # combine into giant N x (k*num_trees) tensor. This represents the forest
+
+        # find KNN for each row in giant graph
+        # TODO - implement the below without a for loop
+        for i, row in enumerate(temp_graph):
+            temp_row = torch.unique(row).type(torch.LongTensor)  # remove duplicates
+            temp_row = temp_row[temp_row != i]  # remove self
+
+            temp_points = data[temp_row, :]  # pick out elements from dataset
+            distances = self.distance(temp_points, data[i])  # Euclidean distances
+            indices = distances.topk(
+                k=self.k, largest=False
+            ).indices  # find indices of KNN
+            self.graph[i] = temp_row[indices]  # assign KNN to graph
+
+    def _initialize_graph_forest(self, data, numtrees, leaf_multiplier, big_leaf_depth):
+        """Initializes self.graph with a forest of random trees, such that each point has k distinct neighbors"""
+        N, k = self.graph.shape
+        dim = data.shape[1]
+
+        temp_graph = torch.tensor(())
+        for j in range(numtrees):
+            # Create trees, obtain leaves
+            t = Tree(data, k=k * leaf_multiplier, big_leaf_depth=big_leaf_depth)
+
+            # Create temporary graph, 1 for each tree
+            # Leaves are of uneven size; select smallest leaf size as graph size
+            cols = min([len(leaf) for leaf in t.leaves])
+            rows = len(t.leaves)
+            tree_graph = torch.zeros((N, cols))
+            leaves = torch.tensor(())
+            idx_update = torch.tensor(())
+
+            # Update graph using leaves
+            for leaf in t.leaves:
+                temp_idx = torch.as_strided(
+                    torch.tensor(leaf).repeat(1, 2),
+                    size=[len(leaf), cols],
+                    stride=[1, 1],
+                    storage_offset=1,
+                )
+                tree_graph[
+                    leaf, :
+                ] = temp_idx.float()  # update graph. a lot of overwriting
+            # Concatenate all graphs from all trees into 1 giant graph
+            temp_graph = torch.cat((temp_graph, tree_graph), 1)
+
+            # Add the first tree's big_leaves to the NNDescent's big_leaves
+            if j == 0:
+                self.big_leaves = torch.LongTensor(t.big_leaves)
+
+        warning_count = 0  # number of indices for which some neighbours are random
+
+        # find KNN for each row in giant graph
+        # TODO - implement the below without a for loop
+        for i, row in enumerate(temp_graph):
+            temp_row = torch.unique(row).type(torch.LongTensor)  # remove duplicates
+            temp_row = temp_row[temp_row != i]  # remove self
+
+            temp_points = data[temp_row, :]  # pick out elements from dataset
+            d = (
+                (data[i].reshape(1, dim).unsqueeze(1) - temp_points.unsqueeze(0)) ** 2
+            ).sum(-1)
+            distances, indices = torch.sort(d, dim=1)
+            indices = indices.flatten()[:k]
+
+            indices = temp_row[indices]
+
+            # pad with random indices if there are not enough neighbours
+            warning = False  # warning flag
+            while len(indices) < k:
+                pad = torch.randint(
+                    high=N - 1,
+                    size=[
+                        k - len(indices),
+                    ],
+                    dtype=torch.long,
+                )
+                indices = torch.cat((indices, pad))
+                indices = torch.unique(indices).type(
+                    torch.LongTensor
+                )  # remove duplicates
+                indices = indices[indices != i]  # remove self
+                warning = True
+
+            self.graph[i] = indices  # assign KNN to graph
+
+            if warning:
+                warning_count += 1
+
+        if warning_count:
+            print("WARNING!", warning_count, " INDICES ARE RANDOM!")
+
+
+class Tree:
+    """
+    Random projection tree class that splits the data evenly per split
+    Each split is performed by calculating the projection distance of each datapoint to a random unit vector
+    The datapoints are then split by the median of of these projection distances
+    The indices of the datapoints are stored in tree.leaves, as a nested list
+    """
+
+    def __init__(self, x, k=5, big_leaf_depth=5):
+        self.min_size = 2 * k - 1
+        self.leaves = []
+        self.sizes = []
+        self.big_leaf_depth = big_leaf_depth
+        self.big_leaves = []  # leaves at depth = 5
+        indices = torch.arange(x.shape[0])
+        self.tree = self.make_tree(x, indices, depth=0)
+
+    def make_tree(self, x, indices, depth):
+        if depth == self.big_leaf_depth:  # add to big_leaves if depth=5
+            self.big_leaves.append(int(indices[0]))
+        if x.shape[0] > self.min_size:
+            v = self.choose_rule(x)
+            distances = torch.tensordot(
+                x, v, dims=1
+            )  # create list of projection distances
+            median = torch.median(distances)
+            left_bool = (
+                distances <= median
+            )  # create boolean array where entries are true if distance <= median
+            right_bool = ~left_bool  # inverse of left_bool
+            left_indices = indices[left_bool]
+            right_indices = indices[right_bool]
+            self.make_tree(x[left_bool, :], left_indices, depth + 1)
+            self.make_tree(x[right_bool, :], right_indices, depth + 1)
+        elif x.shape[0] != 0:
+            self.leaves.append(indices.tolist())
+            self.sizes.append(x.shape[0])
+        return
+
+    def choose_rule(self, x):
+        dim = x.shape[1]
+        v = torch.rand(dim)  # create random vector
+        v /= torch.norm(v)  # normalize to unit vector
+        return v
diff --git a/pykeops/torch/nn/ivf.py b/pykeops/torch/nn/ivf.py
index 1ae70b686..dd6b92584 100644
--- a/pykeops/torch/nn/ivf.py
+++ b/pykeops/torch/nn/ivf.py
@@ -3,7 +3,24 @@
 
 
 class IVF(GenericIVF):
+    """IVF-Flat is a KNN approximation algorithm that first clusters the data and then performs the query search on a subset of the input dataset."""
     def __init__(self, k=5, metric="euclidean", normalise=False):
+        """Initialise the IVF-Flat class.
+
+        IVF-Flat is a KNN approximation algorithm that first clusters the data and then performs the query search on a subset of the input dataset. 
+
+        Args:
+          k (int): Number of nearest neighbours to obtain
+          metric (str,function): Metric to use
+            Currently, "euclidean", "manhattan", "angular" and "hyperbolic" are directly supported, apart from custom metrics
+            Hyperbolic metric requires the use of approx = True, during the fit() function later 
+            Custom metrics should be in the form of a function with 2 inputs and returns their distance
+            For more information, refer to the tutorial
+          normalise (bool): Whether or not to normalise all input data to norm 1
+            This is used mainly for angular metric
+            In place of this, "angular_full" metric may be used instead
+
+        """
         from pykeops.torch import LazyTensor
 
         self.__get_tools()
@@ -15,6 +32,25 @@ def __get_tools(self):
         self.tools = torchtools
 
     def fit(self, x, clusters=50, a=5, Niter=15, approx=False, n=50):
+        """Fits a dataset to perform the nearest neighbour search over
+        
+        K-Means is performed on the dataset to obtain clusters
+        Then the closest clusters to each cluster is stored for use during query time
+
+        Args:
+          x (torch.Tensor): Torch tensor dataset of shape N, D 
+            Where N is the number of points and D is the number of dimensions
+          clusters (int): Total number of clusters to create in K-Means
+          a (int): Number of clusters to search over, must be less than total number of clusters created
+          Niter (int): Number of iterations to run in K-Means algorithm
+          approx (bool): Whether or not to use an approximation step in K-Means
+            In hyperbolic metric and custom metric, this should be set to True
+            This is because the optimal cluster centroid may not have a simple closed form expression
+          n (int): Number of iterations to optimise the cluster centroid, when approx = True
+            A value of around 50 is recommended
+            Lower values are faster while higher values give better accuracy in centroid location
+
+        """
         if type(x) != torch.Tensor:
             raise ValueError("Input dataset must be a torch tensor")
         return self._fit(
@@ -22,6 +58,11 @@ def fit(self, x, clusters=50, a=5, Niter=15, approx=False, n=50):
         )
 
     def kneighbors(self, y):
+        """Obtains the nearest neighbors for an input dataset from the fitted dataset
+
+        Args:
+          y (torch.Tensor): Input dataset to search over
+        """
         if type(y) != torch.Tensor:
             raise ValueError("Query dataset must be a torch tensor")
         return self._kneighbors(y)

From 73ff7185b66159ba3263c2a8b013cc60668aec09 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Sun, 11 Apr 2021 15:42:59 +0100
Subject: [PATCH 065/111] black

---
 pykeops/common/ivf.py   | 7 +++----
 pykeops/numpy/nn/ivf.py | 9 +++++----
 pykeops/torch/nn/ivf.py | 9 +++++----
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/pykeops/common/ivf.py b/pykeops/common/ivf.py
index 4fea7499d..e384cea17 100644
--- a/pykeops/common/ivf.py
+++ b/pykeops/common/ivf.py
@@ -4,6 +4,7 @@ class GenericIVF:
     End-users should use 'pykeops.numpy.ivf' or 'pykeops.torch.ivf'
 
     """
+
     def __init__(self, k, metric, normalise, LazyTensor):
 
         self.__k = k
@@ -24,14 +25,12 @@ def __update_metric(self, metric):
 
     @property
     def metric(self):
-        """Returns the metric used in the search
-        """
+        """Returns the metric used in the search"""
         return self.__metric
 
     @property
     def c(self):
-        """Returns the clusters obtained through K-Means
-        """
+        """Returns the clusters obtained through K-Means"""
         if self.__c is not None:
             return self.__c
         else:
diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/nn/ivf.py
index 0485f7243..6ecd4224c 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/nn/ivf.py
@@ -4,10 +4,11 @@
 
 class IVF(GenericIVF):
     """IVF-Flat is a KNN approximation algorithm that first clusters the data and then performs the query search on a subset of the input dataset."""
+
     def __init__(self, k=5, metric="euclidean", normalise=False):
         """Initialise the IVF-Flat class.
 
-        IVF-Flat is a KNN approximation algorithm that first clusters the data and then performs the query search on a subset of the input dataset. 
+        IVF-Flat is a KNN approximation algorithm that first clusters the data and then performs the query search on a subset of the input dataset.
 
         Args:
           k (int): Number of nearest neighbours to obtain
@@ -32,17 +33,17 @@ def __get_tools(self):
 
     def fit(self, x, clusters=50, a=5, Niter=15, backend="CPU", approx=False):
         """Fits a dataset to perform the nearest neighbour search over
-        
+
         K-Means is performed on the dataset to obtain clusters
         Then the closest clusters to each cluster is stored for use during query time
 
         Args:
-          x (torch.Tensor): Torch tensor dataset of shape N, D 
+          x (torch.Tensor): Torch tensor dataset of shape N, D
             Where N is the number of points and D is the number of dimensions
           clusters (int): Total number of clusters to create in K-Means
           a (int): Number of clusters to search over, must be less than total number of clusters created
           Niter (int): Number of iterations to run in K-Means algorithm
-          
+
         """
         if approx:
             raise ValueError("Approximation not supported for numpy")
diff --git a/pykeops/torch/nn/ivf.py b/pykeops/torch/nn/ivf.py
index dd6b92584..64d916411 100644
--- a/pykeops/torch/nn/ivf.py
+++ b/pykeops/torch/nn/ivf.py
@@ -4,16 +4,17 @@
 
 class IVF(GenericIVF):
     """IVF-Flat is a KNN approximation algorithm that first clusters the data and then performs the query search on a subset of the input dataset."""
+
     def __init__(self, k=5, metric="euclidean", normalise=False):
         """Initialise the IVF-Flat class.
 
-        IVF-Flat is a KNN approximation algorithm that first clusters the data and then performs the query search on a subset of the input dataset. 
+        IVF-Flat is a KNN approximation algorithm that first clusters the data and then performs the query search on a subset of the input dataset.
 
         Args:
           k (int): Number of nearest neighbours to obtain
           metric (str,function): Metric to use
             Currently, "euclidean", "manhattan", "angular" and "hyperbolic" are directly supported, apart from custom metrics
-            Hyperbolic metric requires the use of approx = True, during the fit() function later 
+            Hyperbolic metric requires the use of approx = True, during the fit() function later
             Custom metrics should be in the form of a function with 2 inputs and returns their distance
             For more information, refer to the tutorial
           normalise (bool): Whether or not to normalise all input data to norm 1
@@ -33,12 +34,12 @@ def __get_tools(self):
 
     def fit(self, x, clusters=50, a=5, Niter=15, approx=False, n=50):
         """Fits a dataset to perform the nearest neighbour search over
-        
+
         K-Means is performed on the dataset to obtain clusters
         Then the closest clusters to each cluster is stored for use during query time
 
         Args:
-          x (torch.Tensor): Torch tensor dataset of shape N, D 
+          x (torch.Tensor): Torch tensor dataset of shape N, D
             Where N is the number of points and D is the number of dimensions
           clusters (int): Total number of clusters to create in K-Means
           a (int): Number of clusters to search over, must be less than total number of clusters created

From d4b2ca3a499df90790c808cff7d56142d2bc89f3 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Sun, 11 Apr 2021 15:54:36 +0100
Subject: [PATCH 066/111] add imports for NND

---
 pykeops/torch/nn/NNDescent.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pykeops/torch/nn/NNDescent.py b/pykeops/torch/nn/NNDescent.py
index fd09ed0dc..ece1f5af4 100644
--- a/pykeops/torch/nn/NNDescent.py
+++ b/pykeops/torch/nn/NNDescent.py
@@ -1,3 +1,7 @@
+import torch
+import time
+
+
 class NNDescent:
     def __init__(
         self,

From 403be68cfc73554a4c4294413fca80410326382c Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Mon, 12 Apr 2021 16:21:47 +0100
Subject: [PATCH 067/111] fixed euclidean typo

---
 pykeops/torch/nn/NNDescent.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pykeops/torch/nn/NNDescent.py b/pykeops/torch/nn/NNDescent.py
index ece1f5af4..59b6b22ec 100644
--- a/pykeops/torch/nn/NNDescent.py
+++ b/pykeops/torch/nn/NNDescent.py
@@ -22,7 +22,7 @@ def __init__(
         Args:
           data ((N,d) Tensor): Dataset of N datapoints of dimensionality d.
           k (int): The number of neighbors to which each node connects in the search graph.
-          metric (string): Name of metric, either "euclidian" and "manhattan"
+          metric (string): Name of metric, either "euclidean" and "manhattan"
           initialization_method (string): The type of initialization to be used for
             the search graph. Can be "random", "random_big" or "forest".
           num_trees (int): Number of trees used in "random_big" or "forest" initializations.
@@ -45,8 +45,8 @@ def __init__(
             self.fit(data, verbose=verbose)
 
     def distance(self, x, y):
-        # Square of euclidian distance. Skip the root for faster computation.
-        if self.metric == "euclidian":
+        # Square of euclidean distance. Skip the root for faster computation.
+        if self.metric == "euclidean":
             return ((x - y) ** 2).sum(-1)
         elif self.metric == "manhattan":
             return ((x - y).abs()).sum(-1)

From 6b7e77aae3c1a2a68fce05101f27b00c68f22fa4 Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Mon, 12 Apr 2021 16:45:32 +0100
Subject: [PATCH 068/111] typo + changed leaf_multiplier default

---
 pykeops/torch/nn/NNDescent.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pykeops/torch/nn/NNDescent.py b/pykeops/torch/nn/NNDescent.py
index 59b6b22ec..eea647803 100644
--- a/pykeops/torch/nn/NNDescent.py
+++ b/pykeops/torch/nn/NNDescent.py
@@ -7,10 +7,10 @@ def __init__(
         self,
         data=None,
         k=5,
-        metric="euclidian",
+        metric="euclidean",
         initialization_method="forest",
         num_trees=5,
-        leaf_multiplier=1,
+        leaf_multiplier=10,
         big_leaf_depth=5,
         verbose=False,
     ):

From 330645a24a2133f1ff0640ac681bf8d14a628e2c Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Mon, 12 Apr 2021 18:28:20 +0100
Subject: [PATCH 069/111] Add files via upload

---
 pykeops/benchmarks/plot_benchmark_KNN.py | 890 +++++++++++++++++++++++
 1 file changed, 890 insertions(+)
 create mode 100644 pykeops/benchmarks/plot_benchmark_KNN.py

diff --git a/pykeops/benchmarks/plot_benchmark_KNN.py b/pykeops/benchmarks/plot_benchmark_KNN.py
new file mode 100644
index 000000000..4e0fb6434
--- /dev/null
+++ b/pykeops/benchmarks/plot_benchmark_KNN.py
@@ -0,0 +1,890 @@
+"""
+K-Nearest Neighbors search
+=========================================
+
+We compare the performances of PyTorch, JAX, KeOps, Scikit-Learn and FAISS (when applicable) 
+for K-NN queries on random samples and standard datasets.
+A detailed discussion of these results can be found in Section 5.2
+of our `NeurIPS 2020 paper <https://www.jeanfeydy.com/Papers/KeOps_NeurIPS_2020.pdf>`_.
+Generally speaking, generic KeOps routines are orders
+of magnitude faster and more memory efficient than
+their PyTorch and JAX counterparts.
+They perform on par
+with the handcrafted CUDA kernels of the FAISS-Flat (bruteforce) method for problems
+with **up to 1M samples in dimension 1 to 50-100**,
+but are sub-optimal on larger datasets.
+Crucially, KeOps is easy to use with **any metric**:
+it provides the only competitive run times in the many settings
+that are not supported by existing C++ libraries.
+
+In this demo, we often use exact **bruteforce** computations 
+(tensorized for PyTorch/JAX, on-the-fly for KeOps) and do not leverage any
+quantization scheme or multiscale
+decomposition of the distance matrix.
+First support for these approximation strategies with KeOps is scheduled for
+May-June 2021. Going forward, a major priority for KeOps
+is to get closer to the reference run times of the FAISS library
+in all settings.
+We intend to provide a versatile, generic and pythonic code that is easy to
+modify and integrate in other projects.
+Hopefully, this will **stimulate research on non-Euclidean metrics**,
+such as hyperbolic or discrete spaces.
+
+.. note::
+    Note that timings are always subject to change:
+    libraries and hardware get better with time.
+    If you find a way of improving these benchmarks, please 
+    `let us know <https://github.com/getkeops/keops/issues>`_!
+ 
+"""
+
+
+##############################################
+# Setup
+# ---------------------
+#
+# First, we load some utility routines to run and display benchmarks:
+
+import numpy as np
+import torch
+from matplotlib import pyplot as plt
+from functools import partial
+
+from benchmark_utils import (
+    full_benchmark,
+    timer,
+    tensor,
+    int_tensor,
+    jax_tensor,
+    globalize,
+)
+from dataset_utils import generate_samples
+
+use_cuda = torch.cuda.is_available()
+
+
+##############################################
+# We then specify the values of K that we will inspect:
+
+Ks = [1, 10, 50, 100]  # Numbers of neighbors to find
+
+
+##############################################
+# PyTorch bruteforce implementation
+# ------------------------------------------
+#
+# As a first baseline, we benchmark a PyTorch K-NN routine on the GPU.
+# We implement a collection of distance-like matrices between
+# point clouds :math:`(x_i)` and :math:`(y_j)`:
+#
+# - The squared **Euclidean** distance :math:`\|x-y\|^2 = \sum_k (x[k] - y[k])^2`.
+# - The **Manhattan** distance :math:`\|x-y\|_{L^1} = \sum_k |x[k] - y[k]|`.
+# - The **cosine similarity** :math:`\langle x, y\rangle = \sum_k (x[k] \cdot y[k])`.
+# - The **hyperbolic** distance on the Poincare half-space :math:`\mathbb{H}`
+#   of vectors :math:`x` such that :math:`x[0] > 0`,
+#   :math:`\text{d}_{\mathbb{H}}(x, y)= \text{arcosh}(1+ \|x-y\|^2 / (2 \,x[0]y[0]))`.
+#   Since :math:`d \mapsto \text{arcosh}(1+d/2)` is increasing,
+#   we only compute the pseudo distance
+#   :math:`\|x-y\|^2 / x[0]y[0]`.
+#
+# .. note::
+#   Expanding the squared norm :math:`\|x-y\|^2` as a sum
+#   :math:`\|x\|^2 - 2 \langle x, y \rangle + \|y\|^2` allows us
+#   to leverage the fast matrix-matrix product of the BLAS/cuBLAS
+#   libraries. We rely on this identity whenever possible.
+#
+
+
+def KNN_torch_fun(x_train, x_train_norm, x_test, K, metric):
+
+    largest = False  # Default behaviour is to look for the smallest values
+
+    if metric == "euclidean":
+        x_test_norm = (x_test ** 2).sum(-1)
+        diss = (
+            x_test_norm.view(-1, 1)
+            + x_train_norm.view(1, -1)
+            - 2 * x_test @ x_train.t()  # Rely on cuBLAS for better performance!
+        )
+
+    elif metric == "manhattan":
+        diss = (x_test[:, None, :] - x_train[None, :, :]).abs().sum(dim=2)
+
+    elif metric == "angular":
+        diss = x_test @ x_train.t()
+        largest = True
+
+    elif metric == "hyperbolic":
+        x_test_norm = (x_test ** 2).sum(-1)
+        diss = (
+            x_test_norm.view(-1, 1)
+            + x_train_norm.view(1, -1)
+            - 2 * x_test @ x_train.t()
+        )
+        diss /= x_test[:, 0].view(-1, 1) * x_train[:, 0].view(1, -1)
+    else:
+        raise NotImplementedError(f"The '{metric}' distance is not supported.")
+
+    return diss.topk(K, dim=1, largest=largest).indices
+
+
+############################################################################
+# We rely on the **tensorized**
+# implementation above to define a simple K-NN query operator.
+# We follow the scikit-learn API with "train" and "test" methods:
+
+
+def KNN_torch(K, metric="euclidean", **kwargs):
+    def fit(x_train):
+        # Setup the K-NN estimator:
+        x_train = tensor(x_train)
+        start = timer()
+        # The "training" time here should be negligible:
+        x_train_norm = (x_train ** 2).sum(-1)
+        elapsed = timer() - start
+
+        def f(x_test):
+            x_test = tensor(x_test)
+            start = timer()
+
+            # Actual K-NN query:
+            out = KNN_torch_fun(x_train, x_train_norm, x_test, K, metric)
+
+            elapsed = timer() - start
+            indices = out.cpu().numpy()
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+#############################################################################
+# Unfortunately, the code above creates a full
+# :math:`\mathrm{N}_{\text{queries}}\times \mathrm{N}_{\text{points}}`
+# distance matrix that may not fit in the GPU memory.
+# To work around this problem and avoid memory overflows, we benchmark a second implementation
+# that works with small batches of queries at time:
+
+
+def KNN_torch_batch_loop(K, metric="euclidean", **kwargs):
+    def fit(x_train):
+        # Setup the K-NN estimator:
+        x_train = tensor(x_train)
+        Ntrain, D = x_train.shape
+        start = timer()
+        # The "training" time here should be negligible:
+        x_train_norm = (x_train ** 2).sum(-1)
+        elapsed = timer() - start
+
+        def f(x_test):
+            x_test = tensor(x_test)
+
+            # Estimate the largest reasonable batch size:
+            Ntest = x_test.shape[0]
+            av_mem = int(5e8)  # 500 Mb of GPU memory per batch
+            # Remember that a vector of D float32 number takes up 4*D bytes:
+            Ntest_loop = min(max(1, av_mem // (4 * D * Ntrain)), Ntest)
+            Nloop = (Ntest - 1) // Ntest_loop + 1
+            out = int_tensor(Ntest, K)
+
+            start = timer()
+            # Actual K-NN query:
+            for k in range(Nloop):
+                x_test_k = x_test[Ntest_loop * k : Ntest_loop * (k + 1), :]
+                out[Ntest_loop * k : Ntest_loop * (k + 1), :] = KNN_torch_fun(
+                    x_train, x_train_norm, x_test_k, K, metric
+                )
+
+            # torch.cuda.empty_cache()
+
+            elapsed = timer() - start
+            indices = out.cpu().numpy()
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+############################################################################
+# JAX bruteforce implementation
+# ------------------------------------------
+#
+# We now re-implement the same method with JAX-XLA routines.
+#
+# Note that we run this script with the command line option
+# ``XLA_PYTHON_CLIENT_ALLOCATOR=platform``: this prevents JAX
+# from locking up GPU memory and allows
+# us to benchmark JAX, FAISS, PyTorch and KeOps next to each other.
+# This may impact performances - but as a general rule,
+# we found JAX to be orders of magnitude slower than PyTorch
+# and KeOps in these benchmarks, even with unrestrained access to the GPU device memory.
+# Needless to say, this is subject to change with future releases:
+# we stay tuned to keep this documentation up to date and welcome
+# all suggestions!
+
+from functools import partial
+import jax
+import jax.numpy as jnp
+
+
+@partial(jax.jit, static_argnums=(2, 3))
+def knn_jax_fun(x_train, x_test, K, metric):
+
+    if metric == "euclidean":
+        diss = (
+            (x_test ** 2).sum(-1)[:, None]
+            + (x_train ** 2).sum(-1)[None, :]
+            - 2 * x_test @ x_train.T
+        )
+
+    elif metric == "manhattan":
+        diss = jax.lax.abs(x_test[:, None, :] - x_train[None, :, :]).sum(-1)
+
+    elif metric == "angular":
+        diss = -x_test @ x_train.T
+
+    elif metric == "hyperbolic":
+        diss = (
+            (x_test ** 2).sum(-1)[:, None]
+            + (x_train ** 2).sum(-1)[None, :]
+            - 2 * x_test @ x_train.T
+        )
+        diss = diss / (x_test[:, 0][:, None] * x_train[:, 0][None, :])
+
+    else:
+        raise NotImplementedError(f"The '{metric}' distance is not supported.")
+
+    indices = jax.lax.top_k(-diss, K)[1]
+    return indices
+
+
+############################################################################
+# Straightforward K-NN query, with a scikit-learn interface:
+
+
+def KNN_JAX(K, metric="euclidean", **kwargs):
+    def fit(x_train):
+
+        # Setup the K-NN estimator:
+        start = timer(use_torch=False)
+        x_train = jax_tensor(x_train)
+        elapsed = timer(use_torch=False) - start
+
+        def f(x_test):
+            x_test = jax_tensor(x_test)
+
+            # Actual K-NN query:
+            start = timer(use_torch=False)
+            indices = knn_jax_fun(x_train, x_test, K, metric)
+            indices = np.array(indices)
+            elapsed = timer(use_torch=False) - start
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+#############################################################################
+# Smarter routine, which relies on small batches to avoid memory overflows:
+
+
+def KNN_JAX_batch_loop(K, metric="euclidean", **kwargs):
+    def fit(x_train):
+
+        # Setup the K-NN estimator:
+        start = timer(use_torch=False)
+        x_train = jax_tensor(x_train)
+        elapsed = timer(use_torch=False) - start
+
+        def f(x_test):
+            x_test = jax_tensor(x_test)
+
+            # Estimate the largest reasonable batch size
+            av_mem = int(5e8)  # 500 Mb
+            Ntrain, D = x_train.shape
+            Ntest = x_test.shape[0]
+            Ntest_loop = min(max(1, av_mem // (4 * D * Ntrain)), Ntest)
+            Nloop = (Ntest - 1) // Ntest_loop + 1
+            indices = np.zeros((Ntest, K), dtype=int)
+
+            start = timer(use_torch=False)
+            # Actual K-NN query:
+            for k in range(Nloop):
+                x_test_k = x_test[Ntest_loop * k : Ntest_loop * (k + 1), :]
+                indices[Ntest_loop * k : Ntest_loop * (k + 1), :] = knn_jax_fun(
+                    x_train, x_test_k, K, metric
+                )
+            elapsed = timer(use_torch=False) - start
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+############################################################################
+# KeOps bruteforce implementation
+# --------------------------------------
+#
+# KeOps lets us implement a bruteforce K-NN search efficiently,
+# **without having to worry about memory overflows**.
+# We perform all the "symbolic" computations on the distance formulas
+# ahead of time, using the advanced ``Vi`` and ``Vj`` helpers that are described
+# in `this tutorial <../_auto_tutorials/a_LazyTensors/plot_lazytensors_c.html>`_.
+# Note that we could also rely on the simpler ``LazyTensor`` syntax,
+# at the cost of a small overhead that is negligible in most settings.
+
+from pykeops.torch import Vi, Vj
+
+
+def KNN_KeOps(K, metric="euclidean", **kwargs):
+    def fit(x_train):
+        # Setup the K-NN estimator:
+        x_train = tensor(x_train)
+        start = timer()
+
+        # Encoding as KeOps LazyTensors:
+        D = x_train.shape[1]
+        X_i = Vi(0, D)  # Purely symbolic "i" variable, without any data array
+        X_j = Vj(1, D)  # Purely symbolic "j" variable, without any data array
+
+        # Symbolic distance matrix:
+        if metric == "euclidean":
+            D_ij = ((X_i - X_j) ** 2).sum(-1)
+        elif metric == "manhattan":
+            D_ij = (X_i - X_j).abs().sum(-1)
+        elif metric == "angular":
+            D_ij = -(X_i | X_j)
+        elif metric == "hyperbolic":
+            D_ij = ((X_i - X_j) ** 2).sum(-1) / (X_i[0] * X_j[0])
+        else:
+            raise NotImplementedError(f"The '{metric}' distance is not supported.")
+
+        # K-NN query operator:
+        KNN_fun = D_ij.argKmin(K, dim=1)
+
+        # N.B.: The "training" time here should be negligible.
+        elapsed = timer() - start
+
+        def f(x_test):
+            x_test = tensor(x_test)
+            start = timer()
+
+            # Actual K-NN query:
+            indices = KNN_fun(x_test, x_train)
+
+            elapsed = timer() - start
+
+            indices = indices.cpu().numpy()
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+############################################################################
+# KeOps IVF-Flat implementation
+# --------------------------------------
+#
+# KeOps IVF-Flat is an approximation method that leverages the KeOps engine. It uses the IVF-Flat approximation algorithm comprising 4 steps: (1) split the training data into clusters using k-means, (2) find the 'a' nearest clusters to each cluster, (3) find the nearest cluster to each query point, and (4) perform the nearest neighbour search within only these nearest clusters, and the 'a' nearest clusters to each of these clusters. (1) and (2) are performed during fitting, while (3) and (4) are performed during query time. Steps (3) and (4) achieve time savings during query time by reducing the amount of pair-wise distance calculations.
+
+from pykeops.torch.nn.ivf import IVF
+
+
+def KNN_KeOps_ivf_flat(K, metric="euclidean", clusters=100, a=10, **kwargs):
+
+    # Setup the K-NN estimator:
+    if metric == "angular":
+        metric = "angular_full"
+    KNN = IVF(k=K, metric=metric)  # normalise=False because dataset is normalised
+
+    def fit(x_train):
+        x_train = tensor(x_train)
+        start = timer()
+        KNN.fit(x_train, clusters=clusters, a=a)
+        elapsed = timer() - start
+
+        def f(x_test):
+            x_test = tensor(x_test)
+            start = timer()
+            indices = KNN.kneighbors(x_test)
+            elapsed = timer() - start
+            indices = indices.cpu().numpy()
+
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+##################################################################
+# The time savings and accuracies achieved depend on the underlying data structure, the number of clusters chosen and the 'a' parameter. The algorithm speed suffers for clusters >200. Reducing the proportion of clusters searched over (i.e. the a/clusters value) increases the algorithm speed, but lowers its accuracy. For structured data (e.g. MNIST), high accuracies >90% can be reached by just searching over 10% of clusters. However, for uniformly distributed random data, over 80% of the clusters will need to be searched over to attain >90% accuracy.
+
+# Here, we propose 2 sets of parameters that work well on real data (e.g. MNIST, GloVe):
+
+KNN_KeOps_gpu_IVFFlat_fast = partial(KNN_KeOps_ivf_flat, clusters=10, a=1)
+KNN_KeOps_gpu_IVFFlat_slow = partial(KNN_KeOps_ivf_flat, clusters=200, a=40)
+
+
+################################################################################
+# SciKit-Learn tree-based and bruteforce methods
+# -----------------------------------------------------
+#
+# As a standard baseline, we include the scikit-learn K-NN operators
+# in our benchmark. Note that these routines only run on the CPU
+# and don't perform well on high-dimensional datasets:
+
+from sklearn.neighbors import NearestNeighbors
+
+
+def KNN_sklearn(K, metric="euclidean", algorithm=None, **kwargs):
+
+    if metric in ["euclidean", "angular"]:
+        p = 2
+    elif metric == "manhattan":
+        p = 1
+    else:
+        raise NotImplementedError(f"The '{metric}' distance is not supported.")
+
+    KNN_meth = NearestNeighbors(n_neighbors=K, algorithm=algorithm, p=p, n_jobs=-1)
+
+    def fit(x_train):
+        # Setup the K-NN estimator:
+        start = timer()
+        KNN_fun = KNN_meth.fit(x_train).kneighbors
+        elapsed = timer() - start
+
+        def f(x_test):
+            start = timer()
+            distances, indices = KNN_fun(x_test)
+            elapsed = timer() - start
+
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+KNN_sklearn_auto = partial(KNN_sklearn, algorithm="auto")
+KNN_sklearn_ball_tree = partial(KNN_sklearn, algorithm="ball_tree")
+KNN_sklearn_kd_tree = partial(KNN_sklearn, algorithm="kd_tree")
+KNN_sklearn_brute = partial(KNN_sklearn, algorithm="brute")
+
+
+########################################################
+# FAISS approximate and brute-force methods
+# --------------------------------------------------
+#
+# Finally, we include run times for the reference FAISS library:
+# out of the many (excellent) packages that are showcased on the
+# `ANN-benchmarks website <http://ann-benchmarks.com>`_,
+# it is probably the most popular option and the package that provides the
+# best GPU support.
+#
+# A first baseline method is given by the
+# Hierarchical Navigable Small World graphs algorithm (**HNSW**), on the **CPU**.
+# Note that the reference implementation provided by the
+# `Non-Metric Space Library <https://github.com/nmslib/nmslib>`_
+# would probably be even more efficient.
+#
+
+import faiss
+
+
+def KNN_faiss_HNSW(K, metric="euclidean", M=36, **kwargs):
+    def fit(x_train):
+        from benchmark_utils import timer
+
+        D = x_train.shape[1]
+
+        if metric in ["euclidean", "angular"]:
+            index = faiss.IndexHNSWFlat(D, M)
+            index.hnsw.efConstruction = 500
+        else:
+            raise NotImplementedError(f"The '{metric}' distance is not supported.")
+
+        # Pre-processing:
+        start = timer(use_torch=False)
+        index.add(x_train)
+        elapsed = timer(use_torch=False) - start
+
+        # Return an operator for actual KNN queries:
+        def f(x_test, efSearch=10):
+            faiss.ParameterSpace().set_index_parameter(index, "efSearch", efSearch)
+            start = timer(use_torch=False)
+            distances, indices = index.search(x_test, K)
+            elapsed = timer(use_torch=False) - start
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+##################################################################
+# Choosing good parameter values for approximate nearest neighbors schemes
+# is a non-trivial problem.
+# To keep things simple, we stick to the guidelines of the
+# reference `ANN-Benchmarks website <https://github.com/erikbern/ann-benchmarks/blob/cb954d1af7124c201aa2c8dfc77681e639fce586/algos.yaml#L95>`_
+# and consider two configurations with an **increasing level of precision**,
+# but **slower run times**:
+#
+
+KNN_faiss_HNSW_fast = partial(KNN_faiss_HNSW, M=4)
+KNN_faiss_HNSW_slow = partial(KNN_faiss_HNSW, M=36)
+
+
+##############################################
+# We also benchmark two of the **fast GPU methods** provided by the FAISS library:
+#
+# - a **bruteforce "Flat"** method, with no parameters;
+# - the **approximate "IVF-Flat"** method, with two main parameters (`nlist` and `nprobe`).
+#
+# Crucially, we do **not** benchmark the most advanced schemes
+# provided by FAISS, such as the quantization-based
+# **IVF-PQ** algorithm. These methods are powerful and very efficient, but come with many
+# caveats and parameters to tune: we lack the expertise to
+# use them properly and leave them aside for the moment.
+#
+
+# Load FAISS on the GPU:
+# (The library pre-allocates a cache file of around ~1Gb on the device.)
+res = faiss.StandardGpuResources()
+deviceId = 0
+res.initializeForDevice(deviceId)
+
+
+def KNN_faiss_gpu(
+    K,
+    metric,
+    algorithm="flat",
+    nlist=8192,
+    nprobe=100,
+    m=None,
+    use_float16=False,
+    **kwargs,
+):
+    def fit(x_train):
+
+        D = x_train.shape[1]
+
+        co = faiss.GpuClonerOptions()
+        co.useFloat16 = use_float16
+
+        if metric in ["euclidean", "angular"]:
+
+            if algorithm == "flat":
+                index = faiss.IndexFlatL2(D)  # May be used as quantizer
+                index = faiss.index_cpu_to_gpu(res, deviceId, index, co)
+
+            elif algorithm == "ivfflat":
+                quantizer = faiss.IndexFlatL2(D)  # the other index
+                faiss_metric = (
+                    faiss.METRIC_L2
+                    if metric == "euclidean"
+                    else faiss.METRIC_INNER_PRODUCT
+                )
+                index = faiss.IndexIVFFlat(quantizer, D, nlist, faiss_metric)
+                index = faiss.index_cpu_to_gpu(res, deviceId, index, co)
+
+                assert not index.is_trained
+                index.train(x_train)  # add vectors to the index
+                assert index.is_trained
+
+        else:
+            raise NotImplementedError(f"The '{metric}' distance is not supported.")
+
+        # Pre-processing:
+        start = timer(use_torch=False)
+        index.add(x_train)
+        index.nprobe = nprobe
+        elapsed = timer(use_torch=False) - start
+
+        # Return an operator for actual KNN queries:
+        def f(x_test):
+            start = timer(use_torch=False)
+            distances, indices = index.search(x_test, K)
+            elapsed = timer(use_torch=False) - start
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+##################################################################
+# Using the FAISS-Flat bruteforce routines is straightforward:
+#
+
+KNN_faiss_gpu_Flat = partial(KNN_faiss_gpu, algorithm="flat")
+
+#######################################
+# On the other hand, the FAISS-IVF-Flat method is a bit more complex.
+# Just as we did for the HNSW algorithm, we rely on the
+# `ANN-Benchmarks guidelines <https://github.com/erikbern/ann-benchmarks/blob/cb954d1af7124c201aa2c8dfc77681e639fce586/algos.yaml#L50>`_
+# and define two routines with **increasing levels of precision**:
+
+KNN_faiss_gpu_IVFFlat_fast = partial(
+    KNN_faiss_gpu, algorithm="ivfflat", nlist=400, nprobe=1
+)
+KNN_faiss_gpu_IVFFlat_slow = partial(
+    KNN_faiss_gpu, algorithm="ivfflat", nlist=4096, nprobe=40
+)
+
+
+##############################################
+# Benchmark parameters
+# --------------------------------------------------------
+#
+# Finally, we compare all our methods through a unified interface.
+#
+# .. note::
+#   Fitting KeOps, JAX, PyTorch and FAISS in a single script is not easy:
+#   all these libraries have different failure modes,
+#   with some of the C++ errors thrown by JAX and FAISS being
+#   very hard to "catch" in a proper Python structure.
+#   To keep things simple, we use environment variables
+#   to make a few "pre-compilation runs" prior to the
+#   final benchmark that is rendered on this website.
+
+import os
+
+getenv = lambda s: bool(os.getenv(s, "False").lower() in ["true", "1"])
+
+keops_only = getenv("KEOPS_DOC_PRECOMPILE")
+jax_only = getenv("KEOPS_DOC_PRECOMPILE_JAX")
+
+
+def run_KNN_benchmark(name, loops=[1]):
+
+    # Load the dataset and some info:
+    dataset = generate_samples(name)(1)
+    N_train, dimension = dataset["train"].shape
+    N_test, _ = dataset["test"].shape
+    metric = dataset["metric"]
+
+    # Routines to benchmark:
+    if keops_only:
+        routines = [(KNN_KeOps, "KeOps (GPU)", {})]
+    elif jax_only:
+        routines = [(KNN_JAX_batch_loop, "JAX (small batches, GPU)", {})]
+    else:
+        routines = [
+            (KNN_KeOps_gpu_IVFFlat_fast, "IVF-Flat Keops (GPU, nprobe=1)", {}),
+            (KNN_KeOps_gpu_IVFFlat_slow, "IVF-Flat Keops (GPU, nprobe=40)", {}),
+            (KNN_KeOps, "KeOps (GPU)", {}),
+            (KNN_faiss_gpu_Flat, "FAISS-Flat (GPU)", {}),
+            (KNN_faiss_gpu_IVFFlat_fast, "FAISS-IVF-Flat (GPU, nprobe=1)", {}),
+            (KNN_faiss_gpu_IVFFlat_slow, "FAISS-IVF-Flat (GPU, nprobe=40)", {}),
+            (KNN_torch, "PyTorch (GPU)", {}),
+            (KNN_torch_batch_loop, "PyTorch  (small batches, GPU)", {}),
+            (KNN_JAX_batch_loop, "JAX (small batches, GPU)", {}),
+            (KNN_faiss_HNSW_fast, "FAISS-HNSW (CPU, M=4)", {}),
+            (KNN_faiss_HNSW_slow, "FAISS-HNSW (CPU, M=36)", {}),
+            (KNN_sklearn_ball_tree, "sklearn, Ball-tree (CPU)", {}),
+            (KNN_sklearn_kd_tree, "sklearn, KD-tree (CPU)", {}),
+            (KNN_sklearn_brute, "sklearn, bruteforce (CPU)", {}),
+        ]
+
+    # Actual run:
+    full_benchmark(
+        f"K-NN search on {name}: {N_test:,} queries on a dataset of {N_train:,} points\nin dimension {dimension:,} with a {metric} metric.",
+        routines,
+        generate_samples(name),
+        min_time=1e-4,
+        max_time=1 if (keops_only or jax_only) else 10,
+        loops=loops,
+        problem_sizes=Ks,
+        xlabel="Number of neighbours K",
+        frequency=True,
+        ylabel="Queries per second (Hz = 1/s)",
+        legend_location="upper right",
+        linestyles=[
+            "o-",
+            "s-",
+            "^:",
+            "<:",
+            "v-",
+            "x-",
+            "+-",
+            "*--",
+            "p--",
+            "s-.",
+            "^-.",
+            "<-.",
+        ],
+    )
+
+
+##############################################
+# Random samples in a Euclidean space
+# --------------------------------------------------------
+#
+# Small dataset of **10k points in dimension 3**, as is typical in
+# e.g. **shape analysis** and point cloud processing.
+# In this scenario, bruteforce approaches are most efficient:
+# **KeOps slightly edges FAISS-Flat**, with both methods out-performing
+# other routines by **an order of magnitude**.
+# Note that the HNSW, IVF-Flat and scikit-learn functions
+# incur a significant "training" pre-processing time,
+# detailed below the curves.
+#
+
+run_KNN_benchmark("R^D a", loops=[10, 1])
+
+########################################
+# Large dataset of **1M points in dimension 3**, as is typical
+# in **computer graphics**.
+# In this setting, taking some time to create a multiscale
+# index of the input dataset can be worthwhile:
+# the IVF-Flat and HNSW methods provide **faster queries** at the cost
+# of significant **pre-processing times**.
+# Among "on-the-fly" bruteforce methods, KeOps edges
+# the FAISS-Flat routine and is the most competitive option.
+
+run_KNN_benchmark("R^D b")
+
+
+########################################
+# Large dataset of **1M points in dimension 10**,
+# as can be typical in **low-dimensional machine learning**.
+# In this setting, approximate strategies such as the IVF-Flat method
+# are **most competitive** - and we would expect the IVF-PQ routines to perform
+# even better!
+#
+# .. note::
+#   We don't display CPU-based methods with pre-processing
+#   times longer than 60s, but stress that these routines can
+#   provide excellent performances in "offline" scenarios.
+
+run_KNN_benchmark("R^D c")
+
+
+########################################
+# Large dataset of **1M points in dimension 100**,
+# with **random Gaussian samples**.
+# Crucially, when the dataset is high-dimensional and has
+# little to no geometric structure, **bruteforce methods become relevant once again**:
+# FAISS-Flat and KeOps provide the only two reasonable run times.
+# As detailed in `our high-dimensional benchmarks <plot_benchmark_high_dimension.html>`_,
+# the cuBLAS-based routines of FAISS edge our KeOps implementation
+# when the dimension of the ambient space D exceeds 50-100.
+#
+# One of our top priorities for early 2021 is to close this gap
+# with improved CUDA schemes. Adding support for
+# some of the new hardware features of Ampere GPUs (Tensor cores,
+# quantized numerical types, etc.) should also help
+# to improve performances across the board.
+
+run_KNN_benchmark("R^D d")
+
+
+########################################
+# Random samples in other spaces
+# -------------------------------------------------------
+#
+# **Cosine similarity metric with 1M points in dimension 10**,
+# as can be typical in low-dimensional machine learning.
+# This metric is generally well-supported by standard libraries:
+# using efficient matrix-matrix products,
+# it is even easier to implement than the squared Euclidean distance.
+#
+# Unsurprisingly, run times follow closely the trends
+# of the previous examples.
+# In dimension 10, approximate IVF-like strategies provide
+# the largest amount of queries per second.
+# KeOps remains competitive among bruteforce methods,
+# without any pre-processing time.
+
+run_KNN_benchmark("S^{D-1}")
+
+
+########################################
+# The picture changes completely
+# once we start working with less common formulas
+# such as the **Manhattan-L1 metric**.
+# In this scenario, neither cuBLAS nor FAISS can be used and
+# KeOps remain the only competitive library for K-NN search on the GPU.
+# This is true with **1M points in dimension 10**:
+#
+
+run_KNN_benchmark("R^D f")
+
+
+########################################
+# **1M point in dimension 100**, or any other dataset:
+
+run_KNN_benchmark("R^D g")
+
+
+########################################
+# The same lesson holds in e.g. hyperbolic spaces.
+# In the example below, we perform K-NN queries
+# for the hyperbolic metric with **1M points in the Poincare half-plane of dimension 10**.
+# The run times for KeOps remain in line with the "Euclidean" benchmarks
+# and **orders of magnitude faster** than standard PyTorch and JAX implementations.
+
+run_KNN_benchmark("H^D")
+
+
+########################################
+# Standard datasets
+# --------------------------------------------------------
+#
+# The benchmarks above were all performed on random Gaussian samples.
+# These results provide an informative baseline...
+# But in practice, most real-life datasets present a
+# **geometric structure** that can be leveraged by clever algorithms.
+# To measure the performances of bruteforce and IVF-like methods in
+# "realistic" machine learning scenarios, we now benchmark
+# our routines on several `standard datasets <https://ann-benchmarks.com>`_.
+#
+# First of all, on the well-known **MNIST collection of handwritten digits**:
+# a collection of 60k 28-by-28 images, encoded as vectors
+# of dimension 784 and endowed with the **Euclidean metric**.
+# This dataset is relatively **small** (less than 100k training samples)
+# but **high-dimensional** (D > 50) and highly **clustered** around
+# a dozen of prototypes (the digits 0, 1, ..., 9 and their variants).
+# Unsurprisingly, it is handled much more efficiently by the FAISS routines
+# than by our bruteforce KeOps implementation.
+#
+
+run_KNN_benchmark("MNIST a")
+
+
+########################################
+# Note, however, that KeOps remains the only viable option
+# to work easily with less common metrics such as the Manhattan-L1 norm:
+
+run_KNN_benchmark("MNIST b")
+
+
+########################################
+# To conclude this benchmark, we evaluate our routines
+# on the `GloVe word embeddings <https://nlp.stanford.edu/projects/glove/>`_
+# for natural language processing:
+# **1.2M words**, represented as vectors of **dimension 25-100** and
+# compared with each other using the **cosine similarity metric**.
+#
+# In dimension 25, KeOps performs on par with the FAISS-Flat bruteforce
+# routines. Both methods are slower than IVF-like algorithms
+# in terms of queries per second:
+
+run_KNN_benchmark("GloVe25")
+
+
+########################################
+# In dimension 100, the pre-processing times associated
+# to IVF-like methods increase significantly while
+# the FAISS-Flat routine edges the KeOps engine
+# by a sizeable margin:
+
+run_KNN_benchmark("GloVe100")
+
+plt.show()

From 66e500caddcf811bbdb4f864d4ca9c9aff2a962e Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Mon, 12 Apr 2021 18:28:52 +0100
Subject: [PATCH 070/111] Add files via upload

---
 pykeops/benchmarks/plot_benchmark_KNN.py | 890 +++++++++++++++++++++++
 1 file changed, 890 insertions(+)
 create mode 100644 pykeops/benchmarks/plot_benchmark_KNN.py

diff --git a/pykeops/benchmarks/plot_benchmark_KNN.py b/pykeops/benchmarks/plot_benchmark_KNN.py
new file mode 100644
index 000000000..4e0fb6434
--- /dev/null
+++ b/pykeops/benchmarks/plot_benchmark_KNN.py
@@ -0,0 +1,890 @@
+"""
+K-Nearest Neighbors search
+=========================================
+
+We compare the performances of PyTorch, JAX, KeOps, Scikit-Learn and FAISS (when applicable) 
+for K-NN queries on random samples and standard datasets.
+A detailed discussion of these results can be found in Section 5.2
+of our `NeurIPS 2020 paper <https://www.jeanfeydy.com/Papers/KeOps_NeurIPS_2020.pdf>`_.
+Generally speaking, generic KeOps routines are orders
+of magnitude faster and more memory efficient than
+their PyTorch and JAX counterparts.
+They perform on par
+with the handcrafted CUDA kernels of the FAISS-Flat (bruteforce) method for problems
+with **up to 1M samples in dimension 1 to 50-100**,
+but are sub-optimal on larger datasets.
+Crucially, KeOps is easy to use with **any metric**:
+it provides the only competitive run times in the many settings
+that are not supported by existing C++ libraries.
+
+In this demo, we often use exact **bruteforce** computations 
+(tensorized for PyTorch/JAX, on-the-fly for KeOps) and do not leverage any
+quantization scheme or multiscale
+decomposition of the distance matrix.
+First support for these approximation strategies with KeOps is scheduled for
+May-June 2021. Going forward, a major priority for KeOps
+is to get closer to the reference run times of the FAISS library
+in all settings.
+We intend to provide a versatile, generic and pythonic code that is easy to
+modify and integrate in other projects.
+Hopefully, this will **stimulate research on non-Euclidean metrics**,
+such as hyperbolic or discrete spaces.
+
+.. note::
+    Note that timings are always subject to change:
+    libraries and hardware get better with time.
+    If you find a way of improving these benchmarks, please 
+    `let us know <https://github.com/getkeops/keops/issues>`_!
+ 
+"""
+
+
+##############################################
+# Setup
+# ---------------------
+#
+# First, we load some utility routines to run and display benchmarks:
+
+import numpy as np
+import torch
+from matplotlib import pyplot as plt
+from functools import partial
+
+from benchmark_utils import (
+    full_benchmark,
+    timer,
+    tensor,
+    int_tensor,
+    jax_tensor,
+    globalize,
+)
+from dataset_utils import generate_samples
+
+use_cuda = torch.cuda.is_available()
+
+
+##############################################
+# We then specify the values of K that we will inspect:
+
+Ks = [1, 10, 50, 100]  # Numbers of neighbors to find
+
+
+##############################################
+# PyTorch bruteforce implementation
+# ------------------------------------------
+#
+# As a first baseline, we benchmark a PyTorch K-NN routine on the GPU.
+# We implement a collection of distance-like matrices between
+# point clouds :math:`(x_i)` and :math:`(y_j)`:
+#
+# - The squared **Euclidean** distance :math:`\|x-y\|^2 = \sum_k (x[k] - y[k])^2`.
+# - The **Manhattan** distance :math:`\|x-y\|_{L^1} = \sum_k |x[k] - y[k]|`.
+# - The **cosine similarity** :math:`\langle x, y\rangle = \sum_k (x[k] \cdot y[k])`.
+# - The **hyperbolic** distance on the Poincare half-space :math:`\mathbb{H}`
+#   of vectors :math:`x` such that :math:`x[0] > 0`,
+#   :math:`\text{d}_{\mathbb{H}}(x, y)= \text{arcosh}(1+ \|x-y\|^2 / (2 \,x[0]y[0]))`.
+#   Since :math:`d \mapsto \text{arcosh}(1+d/2)` is increasing,
+#   we only compute the pseudo distance
+#   :math:`\|x-y\|^2 / x[0]y[0]`.
+#
+# .. note::
+#   Expanding the squared norm :math:`\|x-y\|^2` as a sum
+#   :math:`\|x\|^2 - 2 \langle x, y \rangle + \|y\|^2` allows us
+#   to leverage the fast matrix-matrix product of the BLAS/cuBLAS
+#   libraries. We rely on this identity whenever possible.
+#
+
+
+def KNN_torch_fun(x_train, x_train_norm, x_test, K, metric):
+
+    largest = False  # Default behaviour is to look for the smallest values
+
+    if metric == "euclidean":
+        x_test_norm = (x_test ** 2).sum(-1)
+        diss = (
+            x_test_norm.view(-1, 1)
+            + x_train_norm.view(1, -1)
+            - 2 * x_test @ x_train.t()  # Rely on cuBLAS for better performance!
+        )
+
+    elif metric == "manhattan":
+        diss = (x_test[:, None, :] - x_train[None, :, :]).abs().sum(dim=2)
+
+    elif metric == "angular":
+        diss = x_test @ x_train.t()
+        largest = True
+
+    elif metric == "hyperbolic":
+        x_test_norm = (x_test ** 2).sum(-1)
+        diss = (
+            x_test_norm.view(-1, 1)
+            + x_train_norm.view(1, -1)
+            - 2 * x_test @ x_train.t()
+        )
+        diss /= x_test[:, 0].view(-1, 1) * x_train[:, 0].view(1, -1)
+    else:
+        raise NotImplementedError(f"The '{metric}' distance is not supported.")
+
+    return diss.topk(K, dim=1, largest=largest).indices
+
+
+############################################################################
+# We rely on the **tensorized**
+# implementation above to define a simple K-NN query operator.
+# We follow the scikit-learn API with "train" and "test" methods:
+
+
+def KNN_torch(K, metric="euclidean", **kwargs):
+    def fit(x_train):
+        # Setup the K-NN estimator:
+        x_train = tensor(x_train)
+        start = timer()
+        # The "training" time here should be negligible:
+        x_train_norm = (x_train ** 2).sum(-1)
+        elapsed = timer() - start
+
+        def f(x_test):
+            x_test = tensor(x_test)
+            start = timer()
+
+            # Actual K-NN query:
+            out = KNN_torch_fun(x_train, x_train_norm, x_test, K, metric)
+
+            elapsed = timer() - start
+            indices = out.cpu().numpy()
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+#############################################################################
+# Unfortunately, the code above creates a full
+# :math:`\mathrm{N}_{\text{queries}}\times \mathrm{N}_{\text{points}}`
+# distance matrix that may not fit in the GPU memory.
+# To work around this problem and avoid memory overflows, we benchmark a second implementation
+# that works with small batches of queries at time:
+
+
+def KNN_torch_batch_loop(K, metric="euclidean", **kwargs):
+    def fit(x_train):
+        # Setup the K-NN estimator:
+        x_train = tensor(x_train)
+        Ntrain, D = x_train.shape
+        start = timer()
+        # The "training" time here should be negligible:
+        x_train_norm = (x_train ** 2).sum(-1)
+        elapsed = timer() - start
+
+        def f(x_test):
+            x_test = tensor(x_test)
+
+            # Estimate the largest reasonable batch size:
+            Ntest = x_test.shape[0]
+            av_mem = int(5e8)  # 500 Mb of GPU memory per batch
+            # Remember that a vector of D float32 number takes up 4*D bytes:
+            Ntest_loop = min(max(1, av_mem // (4 * D * Ntrain)), Ntest)
+            Nloop = (Ntest - 1) // Ntest_loop + 1
+            out = int_tensor(Ntest, K)
+
+            start = timer()
+            # Actual K-NN query:
+            for k in range(Nloop):
+                x_test_k = x_test[Ntest_loop * k : Ntest_loop * (k + 1), :]
+                out[Ntest_loop * k : Ntest_loop * (k + 1), :] = KNN_torch_fun(
+                    x_train, x_train_norm, x_test_k, K, metric
+                )
+
+            # torch.cuda.empty_cache()
+
+            elapsed = timer() - start
+            indices = out.cpu().numpy()
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+############################################################################
+# JAX bruteforce implementation
+# ------------------------------------------
+#
+# We now re-implement the same method with JAX-XLA routines.
+#
+# Note that we run this script with the command line option
+# ``XLA_PYTHON_CLIENT_ALLOCATOR=platform``: this prevents JAX
+# from locking up GPU memory and allows
+# us to benchmark JAX, FAISS, PyTorch and KeOps next to each other.
+# This may impact performances - but as a general rule,
+# we found JAX to be orders of magnitude slower than PyTorch
+# and KeOps in these benchmarks, even with unrestrained access to the GPU device memory.
+# Needless to say, this is subject to change with future releases:
+# we stay tuned to keep this documentation up to date and welcome
+# all suggestions!
+
+from functools import partial
+import jax
+import jax.numpy as jnp
+
+
+@partial(jax.jit, static_argnums=(2, 3))
+def knn_jax_fun(x_train, x_test, K, metric):
+
+    if metric == "euclidean":
+        diss = (
+            (x_test ** 2).sum(-1)[:, None]
+            + (x_train ** 2).sum(-1)[None, :]
+            - 2 * x_test @ x_train.T
+        )
+
+    elif metric == "manhattan":
+        diss = jax.lax.abs(x_test[:, None, :] - x_train[None, :, :]).sum(-1)
+
+    elif metric == "angular":
+        diss = -x_test @ x_train.T
+
+    elif metric == "hyperbolic":
+        diss = (
+            (x_test ** 2).sum(-1)[:, None]
+            + (x_train ** 2).sum(-1)[None, :]
+            - 2 * x_test @ x_train.T
+        )
+        diss = diss / (x_test[:, 0][:, None] * x_train[:, 0][None, :])
+
+    else:
+        raise NotImplementedError(f"The '{metric}' distance is not supported.")
+
+    indices = jax.lax.top_k(-diss, K)[1]
+    return indices
+
+
+############################################################################
+# Straightforward K-NN query, with a scikit-learn interface:
+
+
+def KNN_JAX(K, metric="euclidean", **kwargs):
+    def fit(x_train):
+
+        # Setup the K-NN estimator:
+        start = timer(use_torch=False)
+        x_train = jax_tensor(x_train)
+        elapsed = timer(use_torch=False) - start
+
+        def f(x_test):
+            x_test = jax_tensor(x_test)
+
+            # Actual K-NN query:
+            start = timer(use_torch=False)
+            indices = knn_jax_fun(x_train, x_test, K, metric)
+            indices = np.array(indices)
+            elapsed = timer(use_torch=False) - start
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+#############################################################################
+# Smarter routine, which relies on small batches to avoid memory overflows:
+
+
+def KNN_JAX_batch_loop(K, metric="euclidean", **kwargs):
+    def fit(x_train):
+
+        # Setup the K-NN estimator:
+        start = timer(use_torch=False)
+        x_train = jax_tensor(x_train)
+        elapsed = timer(use_torch=False) - start
+
+        def f(x_test):
+            x_test = jax_tensor(x_test)
+
+            # Estimate the largest reasonable batch size
+            av_mem = int(5e8)  # 500 Mb
+            Ntrain, D = x_train.shape
+            Ntest = x_test.shape[0]
+            Ntest_loop = min(max(1, av_mem // (4 * D * Ntrain)), Ntest)
+            Nloop = (Ntest - 1) // Ntest_loop + 1
+            indices = np.zeros((Ntest, K), dtype=int)
+
+            start = timer(use_torch=False)
+            # Actual K-NN query:
+            for k in range(Nloop):
+                x_test_k = x_test[Ntest_loop * k : Ntest_loop * (k + 1), :]
+                indices[Ntest_loop * k : Ntest_loop * (k + 1), :] = knn_jax_fun(
+                    x_train, x_test_k, K, metric
+                )
+            elapsed = timer(use_torch=False) - start
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+############################################################################
+# KeOps bruteforce implementation
+# --------------------------------------
+#
+# KeOps lets us implement a bruteforce K-NN search efficiently,
+# **without having to worry about memory overflows**.
+# We perform all the "symbolic" computations on the distance formulas
+# ahead of time, using the advanced ``Vi`` and ``Vj`` helpers that are described
+# in `this tutorial <../_auto_tutorials/a_LazyTensors/plot_lazytensors_c.html>`_.
+# Note that we could also rely on the simpler ``LazyTensor`` syntax,
+# at the cost of a small overhead that is negligible in most settings.
+
+from pykeops.torch import Vi, Vj
+
+
+def KNN_KeOps(K, metric="euclidean", **kwargs):
+    def fit(x_train):
+        # Setup the K-NN estimator:
+        x_train = tensor(x_train)
+        start = timer()
+
+        # Encoding as KeOps LazyTensors:
+        D = x_train.shape[1]
+        X_i = Vi(0, D)  # Purely symbolic "i" variable, without any data array
+        X_j = Vj(1, D)  # Purely symbolic "j" variable, without any data array
+
+        # Symbolic distance matrix:
+        if metric == "euclidean":
+            D_ij = ((X_i - X_j) ** 2).sum(-1)
+        elif metric == "manhattan":
+            D_ij = (X_i - X_j).abs().sum(-1)
+        elif metric == "angular":
+            D_ij = -(X_i | X_j)
+        elif metric == "hyperbolic":
+            D_ij = ((X_i - X_j) ** 2).sum(-1) / (X_i[0] * X_j[0])
+        else:
+            raise NotImplementedError(f"The '{metric}' distance is not supported.")
+
+        # K-NN query operator:
+        KNN_fun = D_ij.argKmin(K, dim=1)
+
+        # N.B.: The "training" time here should be negligible.
+        elapsed = timer() - start
+
+        def f(x_test):
+            x_test = tensor(x_test)
+            start = timer()
+
+            # Actual K-NN query:
+            indices = KNN_fun(x_test, x_train)
+
+            elapsed = timer() - start
+
+            indices = indices.cpu().numpy()
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+############################################################################
+# KeOps IVF-Flat implementation
+# --------------------------------------
+#
+# KeOps IVF-Flat is an approximation method that leverages the KeOps engine. It uses the IVF-Flat approximation algorithm comprising 4 steps: (1) split the training data into clusters using k-means, (2) find the 'a' nearest clusters to each cluster, (3) find the nearest cluster to each query point, and (4) perform the nearest neighbour search within only these nearest clusters, and the 'a' nearest clusters to each of these clusters. (1) and (2) are performed during fitting, while (3) and (4) are performed during query time. Steps (3) and (4) achieve time savings during query time by reducing the amount of pair-wise distance calculations.
+
+from pykeops.torch.nn.ivf import IVF
+
+
+def KNN_KeOps_ivf_flat(K, metric="euclidean", clusters=100, a=10, **kwargs):
+
+    # Setup the K-NN estimator:
+    if metric == "angular":
+        metric = "angular_full"
+    KNN = IVF(k=K, metric=metric)  # normalise=False because dataset is normalised
+
+    def fit(x_train):
+        x_train = tensor(x_train)
+        start = timer()
+        KNN.fit(x_train, clusters=clusters, a=a)
+        elapsed = timer() - start
+
+        def f(x_test):
+            x_test = tensor(x_test)
+            start = timer()
+            indices = KNN.kneighbors(x_test)
+            elapsed = timer() - start
+            indices = indices.cpu().numpy()
+
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+##################################################################
+# The time savings and accuracies achieved depend on the underlying data structure, the number of clusters chosen and the 'a' parameter. The algorithm speed suffers for clusters >200. Reducing the proportion of clusters searched over (i.e. the a/clusters value) increases the algorithm speed, but lowers its accuracy. For structured data (e.g. MNIST), high accuracies >90% can be reached by just searching over 10% of clusters. However, for uniformly distributed random data, over 80% of the clusters will need to be searched over to attain >90% accuracy.
+
+# Here, we propose 2 sets of parameters that work well on real data (e.g. MNIST, GloVe):
+
+KNN_KeOps_gpu_IVFFlat_fast = partial(KNN_KeOps_ivf_flat, clusters=10, a=1)
+KNN_KeOps_gpu_IVFFlat_slow = partial(KNN_KeOps_ivf_flat, clusters=200, a=40)
+
+
+################################################################################
+# SciKit-Learn tree-based and bruteforce methods
+# -----------------------------------------------------
+#
+# As a standard baseline, we include the scikit-learn K-NN operators
+# in our benchmark. Note that these routines only run on the CPU
+# and don't perform well on high-dimensional datasets:
+
+from sklearn.neighbors import NearestNeighbors
+
+
+def KNN_sklearn(K, metric="euclidean", algorithm=None, **kwargs):
+
+    if metric in ["euclidean", "angular"]:
+        p = 2
+    elif metric == "manhattan":
+        p = 1
+    else:
+        raise NotImplementedError(f"The '{metric}' distance is not supported.")
+
+    KNN_meth = NearestNeighbors(n_neighbors=K, algorithm=algorithm, p=p, n_jobs=-1)
+
+    def fit(x_train):
+        # Setup the K-NN estimator:
+        start = timer()
+        KNN_fun = KNN_meth.fit(x_train).kneighbors
+        elapsed = timer() - start
+
+        def f(x_test):
+            start = timer()
+            distances, indices = KNN_fun(x_test)
+            elapsed = timer() - start
+
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+KNN_sklearn_auto = partial(KNN_sklearn, algorithm="auto")
+KNN_sklearn_ball_tree = partial(KNN_sklearn, algorithm="ball_tree")
+KNN_sklearn_kd_tree = partial(KNN_sklearn, algorithm="kd_tree")
+KNN_sklearn_brute = partial(KNN_sklearn, algorithm="brute")
+
+
+########################################################
+# FAISS approximate and brute-force methods
+# --------------------------------------------------
+#
+# Finally, we include run times for the reference FAISS library:
+# out of the many (excellent) packages that are showcased on the
+# `ANN-benchmarks website <http://ann-benchmarks.com>`_,
+# it is probably the most popular option and the package that provides the
+# best GPU support.
+#
+# A first baseline method is given by the
+# Hierarchical Navigable Small World graphs algorithm (**HNSW**), on the **CPU**.
+# Note that the reference implementation provided by the
+# `Non-Metric Space Library <https://github.com/nmslib/nmslib>`_
+# would probably be even more efficient.
+#
+
+import faiss
+
+
+def KNN_faiss_HNSW(K, metric="euclidean", M=36, **kwargs):
+    def fit(x_train):
+        from benchmark_utils import timer
+
+        D = x_train.shape[1]
+
+        if metric in ["euclidean", "angular"]:
+            index = faiss.IndexHNSWFlat(D, M)
+            index.hnsw.efConstruction = 500
+        else:
+            raise NotImplementedError(f"The '{metric}' distance is not supported.")
+
+        # Pre-processing:
+        start = timer(use_torch=False)
+        index.add(x_train)
+        elapsed = timer(use_torch=False) - start
+
+        # Return an operator for actual KNN queries:
+        def f(x_test, efSearch=10):
+            faiss.ParameterSpace().set_index_parameter(index, "efSearch", efSearch)
+            start = timer(use_torch=False)
+            distances, indices = index.search(x_test, K)
+            elapsed = timer(use_torch=False) - start
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+##################################################################
+# Choosing good parameter values for approximate nearest neighbors schemes
+# is a non-trivial problem.
+# To keep things simple, we stick to the guidelines of the
+# reference `ANN-Benchmarks website <https://github.com/erikbern/ann-benchmarks/blob/cb954d1af7124c201aa2c8dfc77681e639fce586/algos.yaml#L95>`_
+# and consider two configurations with an **increasing level of precision**,
+# but **slower run times**:
+#
+
+KNN_faiss_HNSW_fast = partial(KNN_faiss_HNSW, M=4)
+KNN_faiss_HNSW_slow = partial(KNN_faiss_HNSW, M=36)
+
+
+##############################################
+# We also benchmark two of the **fast GPU methods** provided by the FAISS library:
+#
+# - a **bruteforce "Flat"** method, with no parameters;
+# - the **approximate "IVF-Flat"** method, with two main parameters (`nlist` and `nprobe`).
+#
+# Crucially, we do **not** benchmark the most advanced schemes
+# provided by FAISS, such as the quantization-based
+# **IVF-PQ** algorithm. These methods are powerful and very efficient, but come with many
+# caveats and parameters to tune: we lack the expertise to
+# use them properly and leave them aside for the moment.
+#
+
+# Load FAISS on the GPU:
+# (The library pre-allocates a cache file of around ~1Gb on the device.)
+res = faiss.StandardGpuResources()
+deviceId = 0
+res.initializeForDevice(deviceId)
+
+
+def KNN_faiss_gpu(
+    K,
+    metric,
+    algorithm="flat",
+    nlist=8192,
+    nprobe=100,
+    m=None,
+    use_float16=False,
+    **kwargs,
+):
+    def fit(x_train):
+
+        D = x_train.shape[1]
+
+        co = faiss.GpuClonerOptions()
+        co.useFloat16 = use_float16
+
+        if metric in ["euclidean", "angular"]:
+
+            if algorithm == "flat":
+                index = faiss.IndexFlatL2(D)  # May be used as quantizer
+                index = faiss.index_cpu_to_gpu(res, deviceId, index, co)
+
+            elif algorithm == "ivfflat":
+                quantizer = faiss.IndexFlatL2(D)  # the other index
+                faiss_metric = (
+                    faiss.METRIC_L2
+                    if metric == "euclidean"
+                    else faiss.METRIC_INNER_PRODUCT
+                )
+                index = faiss.IndexIVFFlat(quantizer, D, nlist, faiss_metric)
+                index = faiss.index_cpu_to_gpu(res, deviceId, index, co)
+
+                assert not index.is_trained
+                index.train(x_train)  # add vectors to the index
+                assert index.is_trained
+
+        else:
+            raise NotImplementedError(f"The '{metric}' distance is not supported.")
+
+        # Pre-processing:
+        start = timer(use_torch=False)
+        index.add(x_train)
+        index.nprobe = nprobe
+        elapsed = timer(use_torch=False) - start
+
+        # Return an operator for actual KNN queries:
+        def f(x_test):
+            start = timer(use_torch=False)
+            distances, indices = index.search(x_test, K)
+            elapsed = timer(use_torch=False) - start
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+##################################################################
+# Using the FAISS-Flat bruteforce routines is straightforward:
+#
+
+KNN_faiss_gpu_Flat = partial(KNN_faiss_gpu, algorithm="flat")
+
+#######################################
+# On the other hand, the FAISS-IVF-Flat method is a bit more complex.
+# Just as we did for the HNSW algorithm, we rely on the
+# `ANN-Benchmarks guidelines <https://github.com/erikbern/ann-benchmarks/blob/cb954d1af7124c201aa2c8dfc77681e639fce586/algos.yaml#L50>`_
+# and define two routines with **increasing levels of precision**:
+
+KNN_faiss_gpu_IVFFlat_fast = partial(
+    KNN_faiss_gpu, algorithm="ivfflat", nlist=400, nprobe=1
+)
+KNN_faiss_gpu_IVFFlat_slow = partial(
+    KNN_faiss_gpu, algorithm="ivfflat", nlist=4096, nprobe=40
+)
+
+
+##############################################
+# Benchmark parameters
+# --------------------------------------------------------
+#
+# Finally, we compare all our methods through a unified interface.
+#
+# .. note::
+#   Fitting KeOps, JAX, PyTorch and FAISS in a single script is not easy:
+#   all these libraries have different failure modes,
+#   with some of the C++ errors thrown by JAX and FAISS being
+#   very hard to "catch" in a proper Python structure.
+#   To keep things simple, we use environment variables
+#   to make a few "pre-compilation runs" prior to the
+#   final benchmark that is rendered on this website.
+
+import os
+
+getenv = lambda s: bool(os.getenv(s, "False").lower() in ["true", "1"])
+
+keops_only = getenv("KEOPS_DOC_PRECOMPILE")
+jax_only = getenv("KEOPS_DOC_PRECOMPILE_JAX")
+
+
+def run_KNN_benchmark(name, loops=[1]):
+
+    # Load the dataset and some info:
+    dataset = generate_samples(name)(1)
+    N_train, dimension = dataset["train"].shape
+    N_test, _ = dataset["test"].shape
+    metric = dataset["metric"]
+
+    # Routines to benchmark:
+    if keops_only:
+        routines = [(KNN_KeOps, "KeOps (GPU)", {})]
+    elif jax_only:
+        routines = [(KNN_JAX_batch_loop, "JAX (small batches, GPU)", {})]
+    else:
+        routines = [
+            (KNN_KeOps_gpu_IVFFlat_fast, "IVF-Flat Keops (GPU, nprobe=1)", {}),
+            (KNN_KeOps_gpu_IVFFlat_slow, "IVF-Flat Keops (GPU, nprobe=40)", {}),
+            (KNN_KeOps, "KeOps (GPU)", {}),
+            (KNN_faiss_gpu_Flat, "FAISS-Flat (GPU)", {}),
+            (KNN_faiss_gpu_IVFFlat_fast, "FAISS-IVF-Flat (GPU, nprobe=1)", {}),
+            (KNN_faiss_gpu_IVFFlat_slow, "FAISS-IVF-Flat (GPU, nprobe=40)", {}),
+            (KNN_torch, "PyTorch (GPU)", {}),
+            (KNN_torch_batch_loop, "PyTorch  (small batches, GPU)", {}),
+            (KNN_JAX_batch_loop, "JAX (small batches, GPU)", {}),
+            (KNN_faiss_HNSW_fast, "FAISS-HNSW (CPU, M=4)", {}),
+            (KNN_faiss_HNSW_slow, "FAISS-HNSW (CPU, M=36)", {}),
+            (KNN_sklearn_ball_tree, "sklearn, Ball-tree (CPU)", {}),
+            (KNN_sklearn_kd_tree, "sklearn, KD-tree (CPU)", {}),
+            (KNN_sklearn_brute, "sklearn, bruteforce (CPU)", {}),
+        ]
+
+    # Actual run:
+    full_benchmark(
+        f"K-NN search on {name}: {N_test:,} queries on a dataset of {N_train:,} points\nin dimension {dimension:,} with a {metric} metric.",
+        routines,
+        generate_samples(name),
+        min_time=1e-4,
+        max_time=1 if (keops_only or jax_only) else 10,
+        loops=loops,
+        problem_sizes=Ks,
+        xlabel="Number of neighbours K",
+        frequency=True,
+        ylabel="Queries per second (Hz = 1/s)",
+        legend_location="upper right",
+        linestyles=[
+            "o-",
+            "s-",
+            "^:",
+            "<:",
+            "v-",
+            "x-",
+            "+-",
+            "*--",
+            "p--",
+            "s-.",
+            "^-.",
+            "<-.",
+        ],
+    )
+
+
+##############################################
+# Random samples in a Euclidean space
+# --------------------------------------------------------
+#
+# Small dataset of **10k points in dimension 3**, as is typical in
+# e.g. **shape analysis** and point cloud processing.
+# In this scenario, bruteforce approaches are most efficient:
+# **KeOps slightly edges FAISS-Flat**, with both methods out-performing
+# other routines by **an order of magnitude**.
+# Note that the HNSW, IVF-Flat and scikit-learn functions
+# incur a significant "training" pre-processing time,
+# detailed below the curves.
+#
+
+run_KNN_benchmark("R^D a", loops=[10, 1])
+
+########################################
+# Large dataset of **1M points in dimension 3**, as is typical
+# in **computer graphics**.
+# In this setting, taking some time to create a multiscale
+# index of the input dataset can be worthwhile:
+# the IVF-Flat and HNSW methods provide **faster queries** at the cost
+# of significant **pre-processing times**.
+# Among "on-the-fly" bruteforce methods, KeOps edges
+# the FAISS-Flat routine and is the most competitive option.
+
+run_KNN_benchmark("R^D b")
+
+
+########################################
+# Large dataset of **1M points in dimension 10**,
+# as can be typical in **low-dimensional machine learning**.
+# In this setting, approximate strategies such as the IVF-Flat method
+# are **most competitive** - and we would expect the IVF-PQ routines to perform
+# even better!
+#
+# .. note::
+#   We don't display CPU-based methods with pre-processing
+#   times longer than 60s, but stress that these routines can
+#   provide excellent performances in "offline" scenarios.
+
+run_KNN_benchmark("R^D c")
+
+
+########################################
+# Large dataset of **1M points in dimension 100**,
+# with **random Gaussian samples**.
+# Crucially, when the dataset is high-dimensional and has
+# little to no geometric structure, **bruteforce methods become relevant once again**:
+# FAISS-Flat and KeOps provide the only two reasonable run times.
+# As detailed in `our high-dimensional benchmarks <plot_benchmark_high_dimension.html>`_,
+# the cuBLAS-based routines of FAISS edge our KeOps implementation
+# when the dimension of the ambient space D exceeds 50-100.
+#
+# One of our top priorities for early 2021 is to close this gap
+# with improved CUDA schemes. Adding support for
+# some of the new hardware features of Ampere GPUs (Tensor cores,
+# quantized numerical types, etc.) should also help
+# to improve performances across the board.
+
+run_KNN_benchmark("R^D d")
+
+
+########################################
+# Random samples in other spaces
+# -------------------------------------------------------
+#
+# **Cosine similarity metric with 1M points in dimension 10**,
+# as can be typical in low-dimensional machine learning.
+# This metric is generally well-supported by standard libraries:
+# using efficient matrix-matrix products,
+# it is even easier to implement than the squared Euclidean distance.
+#
+# Unsurprisingly, run times follow closely the trends
+# of the previous examples.
+# In dimension 10, approximate IVF-like strategies provide
+# the largest amount of queries per second.
+# KeOps remains competitive among bruteforce methods,
+# without any pre-processing time.
+
+run_KNN_benchmark("S^{D-1}")
+
+
+########################################
+# The picture changes completely
+# once we start working with less common formulas
+# such as the **Manhattan-L1 metric**.
+# In this scenario, neither cuBLAS nor FAISS can be used and
+# KeOps remain the only competitive library for K-NN search on the GPU.
+# This is true with **1M points in dimension 10**:
+#
+
+run_KNN_benchmark("R^D f")
+
+
+########################################
+# **1M point in dimension 100**, or any other dataset:
+
+run_KNN_benchmark("R^D g")
+
+
+########################################
+# The same lesson holds in e.g. hyperbolic spaces.
+# In the example below, we perform K-NN queries
+# for the hyperbolic metric with **1M points in the Poincare half-plane of dimension 10**.
+# The run times for KeOps remain in line with the "Euclidean" benchmarks
+# and **orders of magnitude faster** than standard PyTorch and JAX implementations.
+
+run_KNN_benchmark("H^D")
+
+
+########################################
+# Standard datasets
+# --------------------------------------------------------
+#
+# The benchmarks above were all performed on random Gaussian samples.
+# These results provide an informative baseline...
+# But in practice, most real-life datasets present a
+# **geometric structure** that can be leveraged by clever algorithms.
+# To measure the performances of bruteforce and IVF-like methods in
+# "realistic" machine learning scenarios, we now benchmark
+# our routines on several `standard datasets <https://ann-benchmarks.com>`_.
+#
+# First of all, on the well-known **MNIST collection of handwritten digits**:
+# a collection of 60k 28-by-28 images, encoded as vectors
+# of dimension 784 and endowed with the **Euclidean metric**.
+# This dataset is relatively **small** (less than 100k training samples)
+# but **high-dimensional** (D > 50) and highly **clustered** around
+# a dozen of prototypes (the digits 0, 1, ..., 9 and their variants).
+# Unsurprisingly, it is handled much more efficiently by the FAISS routines
+# than by our bruteforce KeOps implementation.
+#
+
+run_KNN_benchmark("MNIST a")
+
+
+########################################
+# Note, however, that KeOps remains the only viable option
+# to work easily with less common metrics such as the Manhattan-L1 norm:
+
+run_KNN_benchmark("MNIST b")
+
+
+########################################
+# To conclude this benchmark, we evaluate our routines
+# on the `GloVe word embeddings <https://nlp.stanford.edu/projects/glove/>`_
+# for natural language processing:
+# **1.2M words**, represented as vectors of **dimension 25-100** and
+# compared with each other using the **cosine similarity metric**.
+#
+# In dimension 25, KeOps performs on par with the FAISS-Flat bruteforce
+# routines. Both methods are slower than IVF-like algorithms
+# in terms of queries per second:
+
+run_KNN_benchmark("GloVe25")
+
+
+########################################
+# In dimension 100, the pre-processing times associated
+# to IVF-like methods increase significantly while
+# the FAISS-Flat routine edges the KeOps engine
+# by a sizeable margin:
+
+run_KNN_benchmark("GloVe100")
+
+plt.show()

From df92907df794e1ebbfa489fd80a212ad87afb843 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 13 Apr 2021 15:29:19 +0100
Subject: [PATCH 071/111] add ivf torch tut

---
 pykeops/tutorials/knn/plot_ivf_torch.ipynb | 585 +++++++++++++++++++++
 1 file changed, 585 insertions(+)
 create mode 100644 pykeops/tutorials/knn/plot_ivf_torch.ipynb

diff --git a/pykeops/tutorials/knn/plot_ivf_torch.ipynb b/pykeops/tutorials/knn/plot_ivf_torch.ipynb
new file mode 100644
index 000000000..ef2fe2862
--- /dev/null
+++ b/pykeops/tutorials/knn/plot_ivf_torch.ipynb
@@ -0,0 +1,585 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.8"
+    },
+    "colab": {
+      "name": "plot_ivf_torch.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "vPa2u6sO753X"
+      },
+      "source": [
+        "%matplotlib inline"
+      ],
+      "execution_count": 1,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gQpF1e7J753b"
+      },
+      "source": [
+        "\n",
+        "# IVF-Flat approximate nearest neighbors search - PyTorch API\n",
+        "\n",
+        "The :class:`pykeops.torch.IVF` class supported by KeOps allows us\n",
+        "to perform **approximate nearest neighbor search** with four lines of code.\n",
+        "It can thus be used to compute a **large-scale** nearest neighbors search **much faster**. The code is based on the IVF-Flat algorithm and uses KeOps' block-sparse reductions to speed up the search by reducing the search space.\n",
+        "\n",
+        "Euclidean, Manhattan, Angular and Hyperbolic metrics are supported along with custom metrics.\n",
+        "\n",
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>Hyperbolic and custom metrics require the use of an approximation during the K-Means step to obtain the centroid locations since a closed form expression might not be readily available.</p></div>\n",
+        "\n",
+        "  \n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TaH5GxTr753c"
+      },
+      "source": [
+        "## Setup\n",
+        "Standard imports:\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "eVVKpbGW753c"
+      },
+      "source": [
+        "import time\n",
+        "import torch\n",
+        "from matplotlib import pyplot as plt\n",
+        "#from pykeops.torch import IVF\n",
+        "\n",
+        "use_cuda = torch.cuda.is_available()\n",
+        "device = torch.device('cuda') if use_cuda else torch.device('cpu')\n",
+        "dtype = torch.float32 if use_cuda else torch.float64"
+      ],
+      "execution_count": 6,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GtpiZ-q4753e"
+      },
+      "source": [
+        "## IVF nearest neighbour search with Euclidean metric\n",
+        "First experiment with N=$10^6$ points in dimension D=3 and 5 nearest neighbours\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "w_xRXXLn753f"
+      },
+      "source": [
+        "N, D, k = 10**6, 3, 5"
+      ],
+      "execution_count": 7,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LkYe4YSQ753f"
+      },
+      "source": [
+        "Define our dataset:\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qFkqpF3H753g"
+      },
+      "source": [
+        "torch.manual_seed(1)\n",
+        "x = 0.7 * torch.randn(N, D, dtype=dtype, device=device) + 0.3\n",
+        "y = 0.7 * torch.randn(N, D, dtype=dtype, device=device) + 0.3"
+      ],
+      "execution_count": 8,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MJnZEZlb753g"
+      },
+      "source": [
+        "Create the IVF class and fit the dataset:\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "r0uKEYTF753g",
+        "outputId": "dbcfef07-dd7c-4e1d-98cc-47edaa9ef303"
+      },
+      "source": [
+        "nn = IVF(k=k)\n",
+        "#set the number of clusters in K-Means to 50\n",
+        "#set the number of nearest clusters we search over during the final query search to 5\n",
+        "nn.fit(x, clusters = 50, a = 5)"
+      ],
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "<__main__.IVF at 0x7fc220cbc750>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 9
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uNM5UD_x753h"
+      },
+      "source": [
+        "Query dataset search\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "uYiiJxc_yCyU"
+      },
+      "source": [
+        "approx_nn = nn.kneighbors(y)"
+      ],
+      "execution_count": 10,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "awldFVOs753j"
+      },
+      "source": [
+        "Now computing the true nearest neighbors with brute force search\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "XJ56FXzB753j"
+      },
+      "source": [
+        "true_nn = nn.brute_force(x, y, k=k)"
+      ],
+      "execution_count": 11,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qdT8szO9753k"
+      },
+      "source": [
+        "Define the function to compute recall of the nearest neighbors\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "hkoH4UaS753l"
+      },
+      "source": [
+        "def accuracy(indices_test, indices_truth):\n",
+        "  '''\n",
+        "  Compares the test and ground truth indices (rows = KNN for each point in dataset)\n",
+        "  Returns accuracy: proportion of correct nearest neighbours\n",
+        "  '''\n",
+        "  N, k = indices_test.shape\n",
+        "  \n",
+        "  # Calculate number of correct nearest neighbours\n",
+        "  accuracy = 0\n",
+        "  for i in range(k):\n",
+        "    accuracy += torch.sum(indices_test == indices_truth).float()/N\n",
+        "    indices_truth = torch.roll(indices_truth, 1, -1) # Create a rolling window (index positions may not match)\n",
+        "  accuracy = float(accuracy/k) # percentage accuracy\n",
+        "\n",
+        "  return accuracy"
+      ],
+      "execution_count": 12,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "V_FLgW29753l"
+      },
+      "source": [
+        "Check the performance of our algorithm\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "kXIoY57u753m",
+        "outputId": "88d14e97-0f56-4d17-af6f-be412943c964"
+      },
+      "source": [
+        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
+      ],
+      "execution_count": 13,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "IVF Recall: 0.9830819368362427\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mHWGG4yO4mN8"
+      },
+      "source": [
+        "Timing the algorithms to observe their performance"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "rrumsJ3q4lef",
+        "outputId": "89e366cb-8d7b-463b-ed78-6cd2aec6af2a"
+      },
+      "source": [
+        "start=time.time()\n",
+        "iters=10\n",
+        "\n",
+        "#timing KeOps brute force\n",
+        "for _ in range(iters):\n",
+        "  true_nn = nn.brute_force(x, y, k=k)\n",
+        "bf_time = time.time()-start\n",
+        "print('KeOps brute force timing for', N, 'points with', D, 'dimensions:', bf_time/iters)\n",
+        "\n",
+        "#timing IVF\n",
+        "nn = IVF(k=k)\n",
+        "nn.fit(x)\n",
+        "start = time.time()\n",
+        "for _ in range(iters):\n",
+        "  approx_nn = nn.kneighbors(y)\n",
+        "ivf_time = time.time() - start\n",
+        "print('KeOps IVF-Flat timing for', N, 'points with', D, 'dimensions:', ivf_time/iters)\n"
+      ],
+      "execution_count": 14,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "KeOps brute force timing for 1000000 points with 3 dimensions: 4.693825650215149\n",
+            "KeOps IVF-Flat timing for 1000000 points with 3 dimensions: 0.601522707939148\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "j8joqj-r753m"
+      },
+      "source": [
+        "## IVF nearest neighbors search with angular metric\n",
+        "Second experiment with N=$10^6$ points in dimension D=3, with 5 nearest neighbors\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "PdjTMuE1753n"
+      },
+      "source": [
+        "torch.manual_seed(1)\n",
+        "x = 0.7 * torch.randn(N, D, dtype=dtype, device=device) + 0.3\n",
+        "y = 0.7 * torch.randn(N, D, dtype=dtype, device=device) + 0.3\n",
+        "\n",
+        "#normalising the inputs to have norm of 1\n",
+        "x_norm=x/torch.linalg.norm(x,dim=1,keepdim=True)\n",
+        "y_norm=y/torch.linalg.norm(y,dim=1,keepdim=True)"
+      ],
+      "execution_count": 15,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "JyMhcbF6XR7k"
+      },
+      "source": [
+        "nn = IVF(metric = 'angular')\n",
+        "true_nn = nn.brute_force(x_norm, y_norm)"
+      ],
+      "execution_count": 16,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "fWjVpEuCXWvB",
+        "outputId": "f1550d02-0ac6-451c-8760-ac8aa167202a"
+      },
+      "source": [
+        "nn = IVF(metric = 'angular')\n",
+        "nn.fit(x_norm)\n",
+        "approx_nn = nn.kneighbors(y_norm)\n",
+        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
+      ],
+      "execution_count": 17,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "IVF Recall: 0.998617947101593\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AqYVOtAVYgmd"
+      },
+      "source": [
+        "The IVF class also has an option to automatically normalise all inputs"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "OzE6F9EkYfNO",
+        "outputId": "dbe30d60-548a-4054-b40c-a79431182853"
+      },
+      "source": [
+        "nn = IVF(metric = 'angular', normalise = True)\n",
+        "nn.fit(x)\n",
+        "approx_nn = nn.kneighbors(y)\n",
+        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
+      ],
+      "execution_count": 18,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "IVF Recall: 0.9986152052879333\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YZgvkGCzYy2p"
+      },
+      "source": [
+        "There is also an option to use full angular metric \"angular_full\", which uses the full angular metric. \"angular\" simply uses the dot product."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "qMr5jpn8ZCAO",
+        "outputId": "69b187bb-96e4-416e-e1cf-5852e79243aa"
+      },
+      "source": [
+        "nn = IVF(metric = 'angular_full')\n",
+        "nn.fit(x)\n",
+        "approx_nn = nn.kneighbors(y)\n",
+        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
+      ],
+      "execution_count": 19,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "IVF Recall: 0.9928072094917297\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8RsIuAPyZGwi"
+      },
+      "source": [
+        "## IVF nearest neighbors search with approximations for K-Means centroids\n",
+        "We run two experiment with N=$10^6$ points in dimension D=3, with 5 nearest neighbors. The first uses the hyperbolic metric while the second uses a custom metric."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-Yju_HG1ZPPU"
+      },
+      "source": [
+        "#hyperbolic data generation\n",
+        "torch.manual_seed(1)\n",
+        "x = 0.5 + torch.rand(N, D, dtype=dtype, device=device) \n",
+        "y = 0.5 + torch.rand(N, D, dtype=dtype, device=device) "
+      ],
+      "execution_count": 20,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "IqDycMpscBoH",
+        "outputId": "36cae4cb-7d2a-4b44-f9a3-42ef6d872918"
+      },
+      "source": [
+        "nn = IVF(metric = 'hyperbolic')\n",
+        "#set approx to True\n",
+        "#n is the number of times we run gradient descent steps for the approximation, default of 50\n",
+        "nn.fit(x, approx = True, n = 50)\n",
+        "approx_nn = nn.kneighbors(y)\n",
+        "true_nn = nn.brute_force(x, y)\n",
+        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
+      ],
+      "execution_count": 21,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "IVF Recall: 0.9897241592407227\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Pg5kOKjacttb"
+      },
+      "source": [
+        "#define a custom metric\n",
+        "def minkowski(x, y, p = 3):\n",
+        "  \"\"\"Returns the computation of a metric\n",
+        "  Note the shape of the input tensors the function should accept\n",
+        "\n",
+        "  Args:\n",
+        "    x (tensor): Input dataset of size 1, N, D\n",
+        "    y (tensor): Query dataset of size M, 1, D\n",
+        "\n",
+        "  \"\"\"  \n",
+        "  return ((x - y).abs()**p).sum(-1)"
+      ],
+      "execution_count": 22,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "nIVYuN9Mdpgt",
+        "outputId": "4ca63cf6-3b37-4703-cf74-96339d7026fd"
+      },
+      "source": [
+        "#testing custom metric\n",
+        "nn = IVF(metric = minkowski)\n",
+        "nn.fit(x, approx = True)\n",
+        "approx_nn = nn.kneighbors(y)\n",
+        "true_nn = nn.brute_force(x, y)\n",
+        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
+      ],
+      "execution_count": 23,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "IVF Recall: 0.9897966384887695\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "8Qoacr6Hk64h"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file

From a1a3b0f4e7934db2725d92664db63a9cc6a31ec5 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 13 Apr 2021 15:35:39 +0100
Subject: [PATCH 072/111] rearranging code to avoid conflict

---
 pykeops/benchmarks/plot_benchmark_KNN.py | 88 ++++++++++++------------
 1 file changed, 43 insertions(+), 45 deletions(-)

diff --git a/pykeops/benchmarks/plot_benchmark_KNN.py b/pykeops/benchmarks/plot_benchmark_KNN.py
index 4e0fb6434..53ec59601 100644
--- a/pykeops/benchmarks/plot_benchmark_KNN.py
+++ b/pykeops/benchmarks/plot_benchmark_KNN.py
@@ -68,6 +68,49 @@
 
 Ks = [1, 10, 50, 100]  # Numbers of neighbors to find
 
+############################################################################
+# KeOps IVF-Flat implementation
+# --------------------------------------
+#
+# KeOps IVF-Flat is an approximation method that leverages the KeOps engine. It uses the IVF-Flat approximation algorithm comprising 4 steps: (1) split the training data into clusters using k-means, (2) find the 'a' nearest clusters to each cluster, (3) find the nearest cluster to each query point, and (4) perform the nearest neighbour search within only these nearest clusters, and the 'a' nearest clusters to each of these clusters. (1) and (2) are performed during fitting, while (3) and (4) are performed during query time. Steps (3) and (4) achieve time savings during query time by reducing the amount of pair-wise distance calculations.
+
+from pykeops.torch.nn.ivf import IVF
+
+
+def KNN_KeOps_ivf_flat(K, metric="euclidean", clusters=100, a=10, **kwargs):
+
+    # Setup the K-NN estimator:
+    if metric == "angular":
+        metric = "angular_full"
+    KNN = IVF(k=K, metric=metric)  # normalise=False because dataset is normalised
+
+    def fit(x_train):
+        x_train = tensor(x_train)
+        start = timer()
+        KNN.fit(x_train, clusters=clusters, a=a)
+        elapsed = timer() - start
+
+        def f(x_test):
+            x_test = tensor(x_test)
+            start = timer()
+            indices = KNN.kneighbors(x_test)
+            elapsed = timer() - start
+            indices = indices.cpu().numpy()
+
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+##################################################################
+# The time savings and accuracies achieved depend on the underlying data structure, the number of clusters chosen and the 'a' parameter. The algorithm speed suffers for clusters >200. Reducing the proportion of clusters searched over (i.e. the a/clusters value) increases the algorithm speed, but lowers its accuracy. For structured data (e.g. MNIST), high accuracies >90% can be reached by just searching over 10% of clusters. However, for uniformly distributed random data, over 80% of the clusters will need to be searched over to attain >90% accuracy.
+
+# Here, we propose 2 sets of parameters that work well on real data (e.g. MNIST, GloVe):
+
+KNN_KeOps_gpu_IVFFlat_fast = partial(KNN_KeOps_ivf_flat, clusters=10, a=1)
+KNN_KeOps_gpu_IVFFlat_slow = partial(KNN_KeOps_ivf_flat, clusters=200, a=40)
 
 ##############################################
 # PyTorch bruteforce implementation
@@ -386,51 +429,6 @@ def f(x_test):
     return fit
 
 
-############################################################################
-# KeOps IVF-Flat implementation
-# --------------------------------------
-#
-# KeOps IVF-Flat is an approximation method that leverages the KeOps engine. It uses the IVF-Flat approximation algorithm comprising 4 steps: (1) split the training data into clusters using k-means, (2) find the 'a' nearest clusters to each cluster, (3) find the nearest cluster to each query point, and (4) perform the nearest neighbour search within only these nearest clusters, and the 'a' nearest clusters to each of these clusters. (1) and (2) are performed during fitting, while (3) and (4) are performed during query time. Steps (3) and (4) achieve time savings during query time by reducing the amount of pair-wise distance calculations.
-
-from pykeops.torch.nn.ivf import IVF
-
-
-def KNN_KeOps_ivf_flat(K, metric="euclidean", clusters=100, a=10, **kwargs):
-
-    # Setup the K-NN estimator:
-    if metric == "angular":
-        metric = "angular_full"
-    KNN = IVF(k=K, metric=metric)  # normalise=False because dataset is normalised
-
-    def fit(x_train):
-        x_train = tensor(x_train)
-        start = timer()
-        KNN.fit(x_train, clusters=clusters, a=a)
-        elapsed = timer() - start
-
-        def f(x_test):
-            x_test = tensor(x_test)
-            start = timer()
-            indices = KNN.kneighbors(x_test)
-            elapsed = timer() - start
-            indices = indices.cpu().numpy()
-
-            return indices, elapsed
-
-        return f, elapsed
-
-    return fit
-
-
-##################################################################
-# The time savings and accuracies achieved depend on the underlying data structure, the number of clusters chosen and the 'a' parameter. The algorithm speed suffers for clusters >200. Reducing the proportion of clusters searched over (i.e. the a/clusters value) increases the algorithm speed, but lowers its accuracy. For structured data (e.g. MNIST), high accuracies >90% can be reached by just searching over 10% of clusters. However, for uniformly distributed random data, over 80% of the clusters will need to be searched over to attain >90% accuracy.
-
-# Here, we propose 2 sets of parameters that work well on real data (e.g. MNIST, GloVe):
-
-KNN_KeOps_gpu_IVFFlat_fast = partial(KNN_KeOps_ivf_flat, clusters=10, a=1)
-KNN_KeOps_gpu_IVFFlat_slow = partial(KNN_KeOps_ivf_flat, clusters=200, a=40)
-
-
 ################################################################################
 # SciKit-Learn tree-based and bruteforce methods
 # -----------------------------------------------------

From 9ccc927596540c8a5708a191dd86847f53f5e580 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 13 Apr 2021 16:12:49 +0100
Subject: [PATCH 073/111] add np tutorial for ivf

---
 pykeops/tutorials/knn/plot_ivf_numpy.ipynb | 467 +++++++++++++++++++++
 pykeops/tutorials/knn/plot_ivf_torch.ipynb |  54 +--
 2 files changed, 488 insertions(+), 33 deletions(-)
 create mode 100644 pykeops/tutorials/knn/plot_ivf_numpy.ipynb

diff --git a/pykeops/tutorials/knn/plot_ivf_numpy.ipynb b/pykeops/tutorials/knn/plot_ivf_numpy.ipynb
new file mode 100644
index 000000000..406f81c6f
--- /dev/null
+++ b/pykeops/tutorials/knn/plot_ivf_numpy.ipynb
@@ -0,0 +1,467 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.8"
+    },
+    "colab": {
+      "name": "plot_ivf_numpy.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gQpF1e7J753b"
+      },
+      "source": [
+        "\n",
+        "# IVF-Flat approximate nearest neighbors search - Numpy API\n",
+        "\n",
+        "The :class:`pykeops.torch.IVF` class supported by KeOps allows us\n",
+        "to perform **approximate nearest neighbor search** with four lines of code.\n",
+        "It can thus be used to compute a **large-scale** nearest neighbors search **much faster**. The code is based on the IVF-Flat algorithm and uses KeOps' block-sparse reductions to speed up the search by reducing the search space.\n",
+        "\n",
+        "Euclidean, Manhattan and Angular metrics are supported.\n",
+        "\n",
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>Hyperbolic and custom metrics are not supported in the Numpy API, please use the PyTorch API instead.</p></div>\n",
+        "\n",
+        "  \n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TaH5GxTr753c"
+      },
+      "source": [
+        "## Setup\n",
+        "Standard imports:\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "eVVKpbGW753c"
+      },
+      "source": [
+        "import time\n",
+        "import numpy as np\n",
+        "from pykeops.numpy import IVF"
+      ],
+      "execution_count": 4,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GtpiZ-q4753e"
+      },
+      "source": [
+        "## IVF nearest neighbour search with Euclidean metric\n",
+        "First experiment with N=$10^5$ points in dimension D=3 and 5 nearest neighbours\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "w_xRXXLn753f"
+      },
+      "source": [
+        "N, D, k = 10**5, 3, 5"
+      ],
+      "execution_count": 5,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LkYe4YSQ753f"
+      },
+      "source": [
+        "Define our dataset:\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qFkqpF3H753g"
+      },
+      "source": [
+        "np.random.seed(1)\n",
+        "x = 0.7 * np.random.randn(N, D) + 0.3\n",
+        "y = 0.7 * np.random.randn(N, D) + 0.3"
+      ],
+      "execution_count": 6,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MJnZEZlb753g"
+      },
+      "source": [
+        "Create the IVF class and fit the dataset:\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "r0uKEYTF753g",
+        "outputId": "034abd75-f6d5-4c4e-8a1d-d8a68cf9f19b"
+      },
+      "source": [
+        "nn = IVF(k=k)\n",
+        "#set the number of clusters in K-Means to 50\n",
+        "#set the number of nearest clusters we search over during the final query search to 5\n",
+        "nn.fit(x, clusters = 50, a = 5)"
+      ],
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "<__main__.IVF at 0x7f8657401110>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 7
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uNM5UD_x753h"
+      },
+      "source": [
+        "Query dataset search\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "uYiiJxc_yCyU"
+      },
+      "source": [
+        "approx_nn = nn.kneighbors(y)"
+      ],
+      "execution_count": 8,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "awldFVOs753j"
+      },
+      "source": [
+        "Now computing the true nearest neighbors with brute force search\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "XJ56FXzB753j"
+      },
+      "source": [
+        "true_nn = nn.brute_force(x, y, k=k)"
+      ],
+      "execution_count": 9,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qdT8szO9753k"
+      },
+      "source": [
+        "Define the function to compute recall of the nearest neighbors\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "hkoH4UaS753l"
+      },
+      "source": [
+        "def accuracy(indices_test, indices_truth):\n",
+        "  '''\n",
+        "  Compares the test and ground truth indices (rows = KNN for each point in dataset)\n",
+        "  Returns accuracy: proportion of correct nearest neighbours\n",
+        "  '''\n",
+        "  N, k = indices_test.shape\n",
+        "  \n",
+        "  # Calculate number of correct nearest neighbours\n",
+        "  accuracy = 0\n",
+        "  for i in range(k):\n",
+        "    accuracy += float(np.sum(indices_test == indices_truth))/N\n",
+        "    indices_truth = np.roll(indices_truth, 1, -1) # Create a rolling window (index positions may not match)\n",
+        "  accuracy = float(accuracy/k) # percentage accuracy\n",
+        "\n",
+        "  return accuracy"
+      ],
+      "execution_count": 10,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "V_FLgW29753l"
+      },
+      "source": [
+        "Check the performance of our algorithm\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "kXIoY57u753m",
+        "outputId": "be568c38-661a-4639-b44f-e40a2b99114a"
+      },
+      "source": [
+        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
+      ],
+      "execution_count": 11,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "IVF Recall: 0.9652399999999999\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mHWGG4yO4mN8"
+      },
+      "source": [
+        "Timing the algorithms to observe their performance"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "rrumsJ3q4lef",
+        "outputId": "629889c2-3cb7-43cf-c2c2-50e19c95a402"
+      },
+      "source": [
+        "start=time.time()\n",
+        "iters=10\n",
+        "\n",
+        "#timing KeOps brute force\n",
+        "for _ in range(iters):\n",
+        "  true_nn = nn.brute_force(x, y, k=k)\n",
+        "bf_time = time.time()-start\n",
+        "print('KeOps brute force timing for', N, 'points with', D, 'dimensions:', bf_time/iters)\n",
+        "\n",
+        "#timing IVF\n",
+        "nn = IVF(k=k)\n",
+        "nn.fit(x)\n",
+        "start = time.time()\n",
+        "for _ in range(iters):\n",
+        "  approx_nn = nn.kneighbors(y)\n",
+        "ivf_time = time.time() - start\n",
+        "print('KeOps IVF-Flat timing for', N, 'points with', D, 'dimensions:', ivf_time/iters)\n"
+      ],
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "KeOps brute force timing for 100000 points with 3 dimensions: 0.21520822048187255\n",
+            "KeOps IVF-Flat timing for 100000 points with 3 dimensions: 0.05834429264068604\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "j8joqj-r753m"
+      },
+      "source": [
+        "## IVF nearest neighbors search with angular metric\n",
+        "Second experiment with N=$10^5$ points in dimension D=3, with 5 nearest neighbors\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "PdjTMuE1753n"
+      },
+      "source": [
+        "np.random.seed(1)\n",
+        "x = 0.7 * np.random.randn(N, D) + 0.3\n",
+        "y = 0.7 * np.random.randn(N, D) + 0.3\n",
+        "\n",
+        "#normalising the inputs to have norm of 1\n",
+        "x_norm = x / np.linalg.norm(x, axis=1, keepdims=True)\n",
+        "y_norm = y / np.linalg.norm(x, axis=1, keepdims=True)"
+      ],
+      "execution_count": 13,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "JyMhcbF6XR7k"
+      },
+      "source": [
+        "nn = IVF(metric = 'angular')\n",
+        "true_nn = nn.brute_force(x_norm, y_norm)"
+      ],
+      "execution_count": 14,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "fWjVpEuCXWvB",
+        "outputId": "fda9d458-d497-4e67-ec22-36bef77a7f5f"
+      },
+      "source": [
+        "nn = IVF(metric = 'angular')\n",
+        "nn.fit(x_norm)\n",
+        "approx_nn = nn.kneighbors(y_norm)\n",
+        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
+      ],
+      "execution_count": 15,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "IVF Recall: 0.9958119999999999\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AqYVOtAVYgmd"
+      },
+      "source": [
+        "The IVF class also has an option to automatically normalise all inputs"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "OzE6F9EkYfNO",
+        "outputId": "a50dba76-d121-40a6-fd3b-00e03b24585b"
+      },
+      "source": [
+        "nn = IVF(metric = 'angular', normalise = True)\n",
+        "nn.fit(x)\n",
+        "approx_nn = nn.kneighbors(y)\n",
+        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
+      ],
+      "execution_count": 16,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "IVF Recall: 0.9958119999999999\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YZgvkGCzYy2p"
+      },
+      "source": [
+        "There is also an option to use full angular metric \"angular_full\", which uses the full angular metric. \"angular\" simply uses the dot product."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "qMr5jpn8ZCAO",
+        "outputId": "3fe0a152-eff7-490c-9f3b-b3707bf81571"
+      },
+      "source": [
+        "nn = IVF(metric = 'angular_full')\n",
+        "nn.fit(x)\n",
+        "approx_nn = nn.kneighbors(y)\n",
+        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
+      ],
+      "execution_count": 17,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "IVF Recall: 0.995626\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "8Qoacr6Hk64h"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
diff --git a/pykeops/tutorials/knn/plot_ivf_torch.ipynb b/pykeops/tutorials/knn/plot_ivf_torch.ipynb
index ef2fe2862..902ed3dfb 100644
--- a/pykeops/tutorials/knn/plot_ivf_torch.ipynb
+++ b/pykeops/tutorials/knn/plot_ivf_torch.ipynb
@@ -27,17 +27,6 @@
     "accelerator": "GPU"
   },
   "cells": [
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "vPa2u6sO753X"
-      },
-      "source": [
-        "%matplotlib inline"
-      ],
-      "execution_count": 1,
-      "outputs": []
-    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -77,14 +66,13 @@
       "source": [
         "import time\n",
         "import torch\n",
-        "from matplotlib import pyplot as plt\n",
-        "#from pykeops.torch import IVF\n",
+        "from pykeops.torch import IVF\n",
         "\n",
         "use_cuda = torch.cuda.is_available()\n",
         "device = torch.device('cuda') if use_cuda else torch.device('cpu')\n",
         "dtype = torch.float32 if use_cuda else torch.float64"
       ],
-      "execution_count": 6,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -107,7 +95,7 @@
       "source": [
         "N, D, k = 10**6, 3, 5"
       ],
-      "execution_count": 7,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -130,7 +118,7 @@
         "x = 0.7 * torch.randn(N, D, dtype=dtype, device=device) + 0.3\n",
         "y = 0.7 * torch.randn(N, D, dtype=dtype, device=device) + 0.3"
       ],
-      "execution_count": 8,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -158,7 +146,7 @@
         "#set the number of nearest clusters we search over during the final query search to 5\n",
         "nn.fit(x, clusters = 50, a = 5)"
       ],
-      "execution_count": 9,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -192,7 +180,7 @@
       "source": [
         "approx_nn = nn.kneighbors(y)"
       ],
-      "execution_count": 10,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -213,7 +201,7 @@
       "source": [
         "true_nn = nn.brute_force(x, y, k=k)"
       ],
-      "execution_count": 11,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -247,7 +235,7 @@
         "\n",
         "  return accuracy"
       ],
-      "execution_count": 12,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -272,7 +260,7 @@
       "source": [
         "print('IVF Recall:', accuracy(approx_nn, true_nn))"
       ],
-      "execution_count": 13,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -320,7 +308,7 @@
         "ivf_time = time.time() - start\n",
         "print('KeOps IVF-Flat timing for', N, 'points with', D, 'dimensions:', ivf_time/iters)\n"
       ],
-      "execution_count": 14,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -354,10 +342,10 @@
         "y = 0.7 * torch.randn(N, D, dtype=dtype, device=device) + 0.3\n",
         "\n",
         "#normalising the inputs to have norm of 1\n",
-        "x_norm=x/torch.linalg.norm(x,dim=1,keepdim=True)\n",
-        "y_norm=y/torch.linalg.norm(y,dim=1,keepdim=True)"
+        "x_norm = x / torch.linalg.norm(x,dim=1,keepdim=True)\n",
+        "y_norm = y / torch.linalg.norm(y,dim=1,keepdim=True)"
       ],
-      "execution_count": 15,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -369,7 +357,7 @@
         "nn = IVF(metric = 'angular')\n",
         "true_nn = nn.brute_force(x_norm, y_norm)"
       ],
-      "execution_count": 16,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -387,7 +375,7 @@
         "approx_nn = nn.kneighbors(y_norm)\n",
         "print('IVF Recall:', accuracy(approx_nn, true_nn))"
       ],
-      "execution_count": 17,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -422,7 +410,7 @@
         "approx_nn = nn.kneighbors(y)\n",
         "print('IVF Recall:', accuracy(approx_nn, true_nn))"
       ],
-      "execution_count": 18,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -457,7 +445,7 @@
         "approx_nn = nn.kneighbors(y)\n",
         "print('IVF Recall:', accuracy(approx_nn, true_nn))"
       ],
-      "execution_count": 19,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -489,7 +477,7 @@
         "x = 0.5 + torch.rand(N, D, dtype=dtype, device=device) \n",
         "y = 0.5 + torch.rand(N, D, dtype=dtype, device=device) "
       ],
-      "execution_count": 20,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -510,7 +498,7 @@
         "true_nn = nn.brute_force(x, y)\n",
         "print('IVF Recall:', accuracy(approx_nn, true_nn))"
       ],
-      "execution_count": 21,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -539,7 +527,7 @@
         "  \"\"\"  \n",
         "  return ((x - y).abs()**p).sum(-1)"
       ],
-      "execution_count": 22,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -559,7 +547,7 @@
         "true_nn = nn.brute_force(x, y)\n",
         "print('IVF Recall:', accuracy(approx_nn, true_nn))"
       ],
-      "execution_count": 23,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",

From 59a7debec5e0a5737f2652c1b528ec37e4a7fa90 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 13 Apr 2021 19:30:56 +0100
Subject: [PATCH 074/111] add spaces

---
 pykeops/benchmarks/plot_benchmark_KNN.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pykeops/benchmarks/plot_benchmark_KNN.py b/pykeops/benchmarks/plot_benchmark_KNN.py
index 2df476bb2..bd8cd0ae7 100644
--- a/pykeops/benchmarks/plot_benchmark_KNN.py
+++ b/pykeops/benchmarks/plot_benchmark_KNN.py
@@ -1,6 +1,7 @@
 """
 K-Nearest Neighbors search
 =========================================
+
 We compare the performances of PyTorch, JAX, KeOps, Scikit-Learn and FAISS (when applicable) 
 for K-NN queries on random samples and standard datasets.
 A detailed discussion of these results can be found in Section 5.2
@@ -15,6 +16,7 @@
 Crucially, KeOps is easy to use with **any metric**:
 it provides the only competitive run times in the many settings
 that are not supported by existing C++ libraries.
+
 In this demo, we often use exact **bruteforce** computations 
 (tensorized for PyTorch/JAX, on-the-fly for KeOps) and do not leverage any
 quantization scheme or multiscale
@@ -27,6 +29,7 @@
 modify and integrate in other projects.
 Hopefully, this will **stimulate research on non-Euclidean metrics**,
 such as hyperbolic or discrete spaces.
+
 .. note::
     Note that timings are always subject to change:
     libraries and hardware get better with time.

From d3cf5564f55da1a59814a5d3587291926cdea7b4 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 13 Apr 2021 19:41:26 +0100
Subject: [PATCH 075/111] adding back new code for knn benchmark

---
 pykeops/benchmarks/plot_benchmark_KNN.py | 47 +++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/pykeops/benchmarks/plot_benchmark_KNN.py b/pykeops/benchmarks/plot_benchmark_KNN.py
index bd8cd0ae7..fc08f6f09 100644
--- a/pykeops/benchmarks/plot_benchmark_KNN.py
+++ b/pykeops/benchmarks/plot_benchmark_KNN.py
@@ -62,6 +62,49 @@
 
 use_cuda = torch.cuda.is_available()
 
+############################################################################
+# KeOps IVF-Flat implementation
+# --------------------------------------
+#
+# KeOps IVF-Flat is an approximation method that leverages the KeOps engine. It uses the IVF-Flat approximation algorithm comprising 4 steps: (1) split the training data into clusters using k-means, (2) find the 'a' nearest clusters to each cluster, (3) find the nearest cluster to each query point, and (4) perform the nearest neighbour search within only these nearest clusters, and the 'a' nearest clusters to each of these clusters. (1) and (2) are performed during fitting, while (3) and (4) are performed during query time. Steps (3) and (4) achieve time savings during query time by reducing the amount of pair-wise distance calculations.
+
+from pykeops.torch.nn.ivf import IVF
+
+
+def KNN_KeOps_ivf_flat(K, metric="euclidean", clusters=100, a=10, **kwargs):
+
+    # Setup the K-NN estimator:
+    if metric == "angular":
+        metric = "angular_full"
+    KNN = IVF(k=K, metric=metric)  # normalise=False because dataset is normalised
+
+    def fit(x_train):
+        x_train = tensor(x_train)
+        start = timer()
+        KNN.fit(x_train, clusters=clusters, a=a)
+        elapsed = timer() - start
+
+        def f(x_test):
+            x_test = tensor(x_test)
+            start = timer()
+            indices = KNN.kneighbors(x_test)
+            elapsed = timer() - start
+            indices = indices.cpu().numpy()
+
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+##################################################################
+# The time savings and accuracies achieved depend on the underlying data structure, the number of clusters chosen and the 'a' parameter. The algorithm speed suffers for clusters >200. Reducing the proportion of clusters searched over (i.e. the a/clusters value) increases the algorithm speed, but lowers its accuracy. For structured data (e.g. MNIST), high accuracies >90% can be reached by just searching over 10% of clusters. However, for uniformly distributed random data, over 80% of the clusters will need to be searched over to attain >90% accuracy.
+
+# Here, we propose 2 sets of parameters that work well on real data (e.g. MNIST, GloVe):
+
+KNN_KeOps_gpu_IVFFlat_fast = partial(KNN_KeOps_ivf_flat, clusters=10, a=1)
+KNN_KeOps_gpu_IVFFlat_slow = partial(KNN_KeOps_ivf_flat, clusters=200, a=40)
 
 ##############################################
 # We then specify the values of K that we will inspect:
@@ -631,6 +674,8 @@ def run_KNN_benchmark(name, loops=[1]):
         routines = [(KNN_JAX_batch_loop, "JAX (small batches, GPU)", {})]
     else:
         routines = [
+            (KNN_KeOps_gpu_IVFFlat_fast, "IVF-Flat Keops (GPU, nprobe=1)", {}),
+            (KNN_KeOps_gpu_IVFFlat_slow, "IVF-Flat Keops (GPU, nprobe=40)", {}),
             (KNN_KeOps, "KeOps (GPU)", {}),
             (KNN_faiss_gpu_Flat, "FAISS-Flat (GPU)", {}),
             (KNN_faiss_gpu_IVFFlat_fast, "FAISS-IVF-Flat (GPU, nprobe=1)", {}),
@@ -840,4 +885,4 @@ def run_KNN_benchmark(name, loops=[1]):
 
 run_KNN_benchmark("GloVe100")
 
-plt.show()
\ No newline at end of file
+plt.show()

From 47da991a0f1ab696e3bdfb3e4a476f034ed1f920 Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Thu, 15 Apr 2021 15:56:50 +0100
Subject: [PATCH 076/111] NNDescent version with clusters

---
 pykeops/torch/nn/NNDescent.py | 295 ++++++++++++++++++++++++++++------
 1 file changed, 244 insertions(+), 51 deletions(-)

diff --git a/pykeops/torch/nn/NNDescent.py b/pykeops/torch/nn/NNDescent.py
index eea647803..55662ce6f 100644
--- a/pykeops/torch/nn/NNDescent.py
+++ b/pykeops/torch/nn/NNDescent.py
@@ -1,5 +1,14 @@
 import torch
 import time
+from pykeops.torch import LazyTensor
+from pykeops.torch.cluster import cluster_ranges_centroids, from_matrix, sort_clusters
+
+use_cuda = torch.cuda.is_available()
+if use_cuda:
+    torch.cuda.synchronize()
+    device = torch.device("cuda")
+else:
+    device = torch.device("cpu")
 
 
 class NNDescent:
@@ -10,9 +19,10 @@ def __init__(
         metric="euclidean",
         initialization_method="forest",
         num_trees=5,
-        leaf_multiplier=10,
+        leaf_multiplier=128,
         big_leaf_depth=5,
         verbose=False,
+        LT=False,
     ):
         """Initialize the NNDescent class.
 
@@ -21,14 +31,20 @@ def __init__(
 
         Args:
           data ((N,d) Tensor): Dataset of N datapoints of dimensionality d.
-          k (int): The number of neighbors to which each node connects in the search graph.
+          k (int): The number of nearest neighbors which we want to find for each query point
           metric (string): Name of metric, either "euclidean" and "manhattan"
           initialization_method (string): The type of initialization to be used for
-            the search graph. Can be "random", "random_big" or "forest".
+            the search graph. Can be "random", "random_big", "forest" or "cluster".
           num_trees (int): Number of trees used in "random_big" or "forest" initializations.
-          leaf_multiplier (int): Parameter for the Tree class for tree-based initializations.
           big_leaf_depth (int): The depth at which the big leaves are taken to be used at
             the start of search.
+          verbose (boolean): Determines whether or not to print information while fitting.
+          LT (boolean): Determines if we want to use LazyTensors in cluster initialization.
+
+        Arg not used when initialization_method = "cluster":
+          leaf_multiplier (int): Parameter for the Tree class for tree-based initializations.
+          when initialization_method = "cluster", this parameter is used to adjust the number
+            of clusters to be close to the value specified in the fit function.
         """
 
         # Setting parameters
@@ -39,30 +55,54 @@ def __init__(
         self.leaf_multiplier = leaf_multiplier
         self.big_leaf_depth = big_leaf_depth
         self.big_leaves = None
+        self.LT = LT
 
         # If data is provided, we call the fit function.
         if data is not None:
             self.fit(data, verbose=verbose)
 
     def distance(self, x, y):
-        # Square of euclidean distance. Skip the root for faster computation.
+        # Square of euclidian distance. Skip the root for faster computation.
         if self.metric == "euclidean":
             return ((x - y) ** 2).sum(-1)
         elif self.metric == "manhattan":
             return ((x - y).abs()).sum(-1)
+        else:
+            raise ValueError("Metric not implemented!")
 
-    def fit(self, X, iter=20, verbose=False):
+    def fit(self, X, iter=20, verbose=False, clusters=32, a=10, queue=5):
         """Fits the NNDescent search graph to the data set X.
 
         Args:
           X ((N,d) Tensor): Dataset of N datapoints of dimensionality d.
+          iter (int): Maximum number of iterations for graph updates
+          verbose (boolean): Determines whether or not to print information while fitting.
+          queue (int): The number of neighbors to which each node connects in the search graph.
+
+        Used only when initialization_method = "cluster":
+          clusters (int): The min no. of clusters that we want the data to be clustered into
+          a (int): The number of clusters we want to search over using the cluster method.
+
         """
         self.data = X
+        self.queue = queue
+
+        if queue < self.k and self.init_method is not "cluster":
+            self.queue = self.k
+            print(
+                "Warning: Value of queue must be larger than or equal to k! Set queue = k."
+            )
+        elif queue > a and self.init_method is "cluster":
+            raise ValueError("Value of queue must be smaller than value of a!")
+        elif clusters < 32:
+            raise ValueError("Minimum number of clusters is 32!")
+        elif a > clusters:
+            raise ValueError("Number of clusters must be larger than or equal to a!")
 
         # A 2D tensor representing a directed graph.
         # The value a = graph[i,j] represents an edge from point x_i to x_a.
         N = X.shape[0]
-        self.graph = torch.zeros(size=[N, self.k], dtype=torch.long)
+        self.graph = torch.zeros(size=[N, self.queue], dtype=torch.long)
 
         # Initialize graph
         if self.init_method == "random":
@@ -73,22 +113,29 @@ def fit(self, X, iter=20, verbose=False):
             self._initialize_graph_forest(
                 self.data, self.num_trees, self.leaf_multiplier, self.big_leaf_depth
             )
+        elif self.init_method == "cluster":
+            # Parameters used only for cluster search
+            self.a = a
+            self.num_clusters = clusters
+            self._initialize_graph_clusters(self.data)
 
         # A set of tuples (i,j) of indices for which the distance has already been calculated.
         self.explored_edges = set()
 
-        # A 2D tensor representing the distance between point x_i and x_graph[i,j]
-        self.k_distances = torch.zeros([N, self.k])
+        if self.init_method != "cluster":
+            # A 2D tensor representing the distance between point x_i and x_graph[i,j]
+            self.k_distances = torch.zeros([N, self.queue])
 
-        # Update the graph
-        self._calculate_all_distances()
-        self._update_graph(iter=iter, verbose=verbose)
+            # Update the graph
+            self._calculate_all_distances()
+            self._update_graph(iter=iter, verbose=verbose)
 
-    def _update_graph(self, iter=25, verbose=False):
+    def _update_graph(self, iter, verbose=False):
         """Updates the graph using algorithm: https://pynndescent.readthedocs.io/en/latest/how_pynndescent_works.html
 
         Args:
           iter (int): Number of iterations to use when updating search graph.
+          verbose (boolean): Printing information about iterations while searching.
         """
         # [STEP 1: Start with random graph.] Iterate
         start = time.time()
@@ -110,7 +157,7 @@ def _update_graph(self, iter=25, verbose=False):
                 # Distances of current neighbors
                 dist_current_neighbors = self.k_distances[i]
 
-                # [STEP 3: Measure distance from the node to the neighbors of its neighbors]
+                # [STEP 3: Measure distance from the node to the neighbors of its neighbors]
                 # Find neighbors of neighbors
                 potential_neighbors = {
                     a.item()
@@ -135,10 +182,10 @@ def _update_graph(self, iter=25, verbose=False):
 
                 # [STEP 4: If any are closer, then update the graph accordingly, and only keep the k closest]
                 dist_sorted, idx = torch.sort(cat_dist)
-                if torch.max(idx[: self.k]) >= self.k:
+                if torch.max(idx[: self.queue]) >= self.queue:
                     has_changed = True
-                    self.graph[i] = cat_idx[idx[: self.k]]
-                    self.k_distances[i] = dist_sorted[: self.k]
+                    self.graph[i] = cat_idx[idx[: self.queue]]
+                    self.k_distances[i] = dist_sorted[: self.queue]
 
             # [STEP 5: If any changes were made, repeat iteration, otherwise stop]
             if not has_changed:
@@ -152,9 +199,15 @@ def kneighbors(self, X, max_num_steps=100, tree_init=True, verbose=False):
         Our code is largely based on this algorithm:
           https://pynndescent.readthedocs.io/en/latest/how_pynndescent_works.html#Searching-using-a-nearest-neighbor-graph
 
+        If init_method = 'clusters', we first cluster the data. Each node in the graph then represents a cluster.
+        We then use the KeOps engine to perform the final nearest neighbours search over the nearest clusters to each query point
+
         Args:
           X ((N,d) Tensor): A query set for which to find k neighbors.
+          K (int): How many neighbors to search for. Must be <=self.k for non-cluster methods. Default: self.k
           max_num_steps (int): The maximum number of steps to take during search.
+          tree_init (boolean): Determine whether or not to use big leaves from projection trees as the starting point of search.
+          verbose (boolean): Printing information about iterations while searching.
 
         Returns:
           The indices of the k nearest neighbors in the fitted data.
@@ -162,7 +215,7 @@ def kneighbors(self, X, max_num_steps=100, tree_init=True, verbose=False):
 
         # N datapoints of dimension d
         N, d = X.shape
-        k = self.k
+        k = self.queue
 
         # Boolean mask to keep track of those points whose search is still ongoing
         is_active = torch.ones(N) == 1
@@ -176,6 +229,10 @@ def kneighbors(self, X, max_num_steps=100, tree_init=True, verbose=False):
                 high=len(self.data), size=[N, k + 1], dtype=torch.long
             )
 
+        if self.init_method == "cluster":
+            is_active = is_active.to(device)
+            candidate_idx = candidate_idx.to(device)
+
         # Sort the candidates by distance from X
         distances = self.distance(self.data[candidate_idx], X.unsqueeze(1))
         # distances = ((self.data[candidate_idx] - X.unsqueeze(1))**2).sum(-1)
@@ -185,9 +242,12 @@ def kneighbors(self, X, max_num_steps=100, tree_init=True, verbose=False):
         candidate_idx = candidate_idx[:, : (k + 1)]
 
         # Track the nodes we have explored already, in N x num_explored tensor
-        num_explored = self.k * 2
+        num_explored = k * 2
         explored = torch.full(size=[N, num_explored], fill_value=-1)
 
+        if self.init_method == "cluster":
+            explored = explored.to(device)
+
         start = time.time()
         # The initialization of candidates and explored set is done. Now we can search.
         count = 0
@@ -237,9 +297,15 @@ def kneighbors(self, X, max_num_steps=100, tree_init=True, verbose=False):
 
             # We remove repeated indices from consideration by adding float('inf') to them.
             expanded_idx = torch.sort(expanded_idx)[0]
+            temp = torch.full((len(expanded_idx), 1), -1)
+
+            if self.init_method == "cluster":
+                expanded_idx = expanded_idx.to(device)
+                temp = temp.to(device)
+
             shift = torch.cat(
                 (
-                    torch.full((len(expanded_idx), 1), -1),
+                    temp,
                     torch.sort(expanded_idx, dim=1)[0][:, :-1],
                 ),
                 dim=1,
@@ -256,7 +322,7 @@ def kneighbors(self, X, max_num_steps=100, tree_init=True, verbose=False):
             expanded_idx = torch.gather(expanded_idx, dim=1, index=idx)
 
             # [5. Truncate to k+1 best]
-            candidate_idx[is_active] = expanded_idx[:, : (self.k + 1)]
+            candidate_idx[is_active] = expanded_idx[:, : (k + 1)]
 
             # [6. Return to step 2. If we have already tried all candidates in pool, we stop in the if not unexplored]
             count += 1
@@ -267,9 +333,14 @@ def kneighbors(self, X, max_num_steps=100, tree_init=True, verbose=False):
                 "Graph search finished after",
                 count,
                 "steps. Finished for:",
-                1 - torch.mean(1.0 * is_active).item(),
+                (1 - torch.mean(1.0 * is_active).item()) * 100,
+                "%.",
             )
-        return candidate_idx[:, :-1]
+
+        if self.init_method == "cluster":
+            return self.final_brute_force(candidate_idx[:, : self.k], X)
+        else:
+            return candidate_idx[:, : self.k]
 
     def _calculate_all_distances(self):
         """Updates the distances (self.k_distances) of the edges found in self.graph."""
@@ -285,7 +356,7 @@ def _calculate_all_distances(self):
             self.explored_edges.update(neighbor_indices)
 
     def _initialize_graph_randomly(self):
-        """Initializes self.graph with random values such that each point has k distinct neighbors"""
+        """Initializes self.graph with random values such that each point has 'queue' distinct neighbors"""
         N, k = self.graph.shape
         # Initialize graph randomly, removing self-loops
         self.graph = torch.randint(high=N - 1, size=[N, k], dtype=torch.long)
@@ -321,12 +392,12 @@ def _initialize_graph_big_random(self, data, numtrees):
             temp_points = data[temp_row, :]  # pick out elements from dataset
             distances = self.distance(temp_points, data[i])  # Euclidean distances
             indices = distances.topk(
-                k=self.k, largest=False
+                k=self.queue, largest=False
             ).indices  # find indices of KNN
             self.graph[i] = temp_row[indices]  # assign KNN to graph
 
     def _initialize_graph_forest(self, data, numtrees, leaf_multiplier, big_leaf_depth):
-        """Initializes self.graph with a forest of random trees, such that each point has k distinct neighbors"""
+        """Initializes self.graph with a forest of random trees, such that each point has 'queue' distinct neighbors"""
         N, k = self.graph.shape
         dim = data.shape[1]
 
@@ -358,7 +429,7 @@ def _initialize_graph_forest(self, data, numtrees, leaf_multiplier, big_leaf_dep
             temp_graph = torch.cat((temp_graph, tree_graph), 1)
 
             # Add the first tree's big_leaves to the NNDescent's big_leaves
-            if j == 0:
+            if j == 0 and t.big_leaves:
                 self.big_leaves = torch.LongTensor(t.big_leaves)
 
         warning_count = 0  # number of indices for which some neighbours are random
@@ -370,9 +441,9 @@ def _initialize_graph_forest(self, data, numtrees, leaf_multiplier, big_leaf_dep
             temp_row = temp_row[temp_row != i]  # remove self
 
             temp_points = data[temp_row, :]  # pick out elements from dataset
-            d = (
-                (data[i].reshape(1, dim).unsqueeze(1) - temp_points.unsqueeze(0)) ** 2
-            ).sum(-1)
+            d = self.distance(
+                data[i].reshape(1, dim).unsqueeze(1), temp_points.unsqueeze(0)
+            )
             distances, indices = torch.sort(d, dim=1)
             indices = indices.flatten()[:k]
 
@@ -403,8 +474,112 @@ def _initialize_graph_forest(self, data, numtrees, leaf_multiplier, big_leaf_dep
         if warning_count:
             print("WARNING!", warning_count, " INDICES ARE RANDOM!")
 
+    def _initialize_graph_clusters(self, data):
+        """Initializes self.graph on cluster centroids, such that each cluster has 'a' distinct neighbors"""
+        N, dim = data.shape
+        k = self.k
+        a = self.a
+        LT = self.LT
+        leaf_multiplier = (
+            N / self.num_clusters / k
+        )  # to get number of clusters ~ num_clusters
+        self.clusters = (
+            torch.ones(
+                N,
+            )
+            * -1
+        )
+
+        data = data.to(device)
+
+        # Create trees, obtain leaves
+        t = Tree(
+            data, k, self.big_leaf_depth, leaf_multiplier, LT
+        )  # TreeClusters(data, k, leaf_multiplier, LT)
+
+        self.leaves = len(t.leaves)
+
+        # Assign each point to a cluster, 1 cluster per tree in forest
+        for i, leaf in enumerate(t.leaves):
+            self.clusters[leaf] = i
+        self.data_orig = self.data.clone()  # ADDED BY STEFAN
+        self.data = t.centroids.clone()  # CHANGED TO self.centroids to self.data
+
+        # Find nearest centroids
+        x_LT = LazyTensor(self.data.unsqueeze(1).to(device))
+        y_LT = LazyTensor(self.data.unsqueeze(0).to(device))
+        d = self.distance(x_LT, y_LT)
+        indices = d.argKmin(K=a + 1, dim=1).long()
+        self.centroids_neighbours = indices[:, 1:].long()
+
+        self.num_clusters = self.centroids_neighbours.shape[0]
+        self.graph = self.centroids_neighbours
+
+        # Assign big_leaves by searching for the correct cluster
+        self.big_leaves = torch.LongTensor(t.big_leaves)
+        for i, index in enumerate(self.big_leaves):
+            self.big_leaves[i] = self.clusters[index]
+        return
+
+    def final_brute_force(self, nearest_clusters, query_pts, verbose=False):
+        """ Final brute force search over clusters in cluster method"""
+        if verbose:
+            print("Starting brute force search over clusters.")
+        return self._final_brute_force(nearest_clusters, query_pts)
+
+    def _final_brute_force(self, nearest_clusters, query_pts):
+        """ Final brute force search over clusters in cluster method"""
+        if use_cuda:
+            torch.cuda.synchronize()
+
+        k = self.k
 
-class Tree:
+        x = self.data_orig.to(device)
+        x_labels = self.clusters.long()
+        y = query_pts.to(device)
+        y_labels = nearest_clusters[:, 0]
+
+        x = x.contiguous()
+        y = y.contiguous()
+        x_labels = x_labels.to(device)
+        y_labels = y_labels.to(device)
+
+        clusters, a = self.graph.shape
+        r = torch.arange(clusters).repeat(a, 1).T.reshape(-1).long()
+        keep = torch.zeros([clusters, clusters], dtype=torch.bool).to(device)
+        keep[r, self.graph.flatten()] = True
+        keep += torch.eye(clusters).bool().to(device)
+
+        x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
+        y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
+
+        x, x_labels = self.__sort_clusters(x, x_labels, store_x=True)
+        y, y_labels = self.__sort_clusters(y, y_labels, store_x=False)
+
+        x_LT = LazyTensor(x.unsqueeze(0).to(device).contiguous())
+        y_LT = LazyTensor(y.unsqueeze(1).to(device).contiguous())
+        D_ij = self.distance(y_LT, x_LT)
+
+        x_ranges = x_ranges.to(device)
+        y_ranges = y_ranges.to(device)
+        ranges_ij = from_matrix(y_ranges, x_ranges, keep)
+        D_ij.ranges = ranges_ij
+        nn = D_ij.argKmin(K=k, axis=1)
+        return self.__unsort(nn)
+
+    def __sort_clusters(self, x, lab, store_x=True):
+        lab, perm = torch.sort(lab.view(-1))
+        if store_x:
+            self.__x_perm = perm
+        else:
+            self.__y_perm = perm
+        return x[perm], lab
+
+    def __unsort(self, nn):
+        return torch.index_select(self.__x_perm[nn], 0, self.__y_perm.argsort())
+
+
+class Tree:  # NN clusters tree
     """
     Random projection tree class that splits the data evenly per split
     Each split is performed by calculating the projection distance of each datapoint to a random unit vector
@@ -412,39 +587,57 @@ class Tree:
     The indices of the datapoints are stored in tree.leaves, as a nested list
     """
 
-    def __init__(self, x, k=5, big_leaf_depth=5):
-        self.min_size = 2 * k - 1
+    def __init__(self, x, k=5, big_leaf_depth=5, leaf_multiplier=128, LT=False):
+        self.min_size = k * leaf_multiplier
         self.leaves = []
         self.sizes = []
+        self.centroids = torch.tensor(()).to(device)
         self.big_leaf_depth = big_leaf_depth
         self.big_leaves = []  # leaves at depth = 5
         indices = torch.arange(x.shape[0])
-        self.tree = self.make_tree(x, indices, depth=0)
 
-    def make_tree(self, x, indices, depth):
-        if depth == self.big_leaf_depth:  # add to big_leaves if depth=5
+        self.dim = x.shape[1]
+        self.data = x.to(device)
+        self.LT = LT  # Boolean to choose LT or torch initialization
+
+        self.tree = self.make_tree(indices, depth=0)
+        self.centroids = self.centroids.reshape(-1, x.shape[1])
+
+    def make_tree(self, indices, depth):
+        if depth == 5:  # add to big_leaves if depth=5
             self.big_leaves.append(int(indices[0]))
-        if x.shape[0] > self.min_size:
-            v = self.choose_rule(x)
-            distances = torch.tensordot(
-                x, v, dims=1
-            )  # create list of projection distances
+        if indices.shape[0] > self.min_size:
+            v = self.choose_rule().to(device)
+
+            if self.LT:
+                distances = self.dot_product(
+                    self.data[indices], v
+                )  # create list of projection distances
+            else:
+                distances = torch.tensordot(
+                    self.data[indices], v, dims=1
+                )  # create list of projection distances
+
             median = torch.median(distances)
             left_bool = (
                 distances <= median
             )  # create boolean array where entries are true if distance <= median
-            right_bool = ~left_bool  # inverse of left_bool
-            left_indices = indices[left_bool]
-            right_indices = indices[right_bool]
-            self.make_tree(x[left_bool, :], left_indices, depth + 1)
-            self.make_tree(x[right_bool, :], right_indices, depth + 1)
-        elif x.shape[0] != 0:
+            self.make_tree(indices[left_bool], depth + 1)
+            self.make_tree(indices[~left_bool], depth + 1)
+        elif indices.shape[0] != 0:
             self.leaves.append(indices.tolist())
-            self.sizes.append(x.shape[0])
+            self.sizes.append(indices.shape[0])
+            centroid = self.data[indices].mean(dim=0)  # get centroid position
+            self.centroids = torch.cat((self.centroids, centroid))
         return
 
-    def choose_rule(self, x):
-        dim = x.shape[1]
-        v = torch.rand(dim)  # create random vector
+    def choose_rule(self):
+        v = torch.rand(self.dim)  # create random vector
         v /= torch.norm(v)  # normalize to unit vector
         return v
+
+    def dot_product(self, x, v):
+        # Calculate dot product between matrix x and vector v using LazyTensors
+        v_LT = LazyTensor(v.view(1, 1, -1))
+        x_LT = LazyTensor(x.unsqueeze(0))
+        return (v_LT | x_LT).sum_reduction(axis=0).flatten()

From a495a26dba04f53786748f5386cb06f2fa19edc3 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Sun, 18 Apr 2021 16:14:01 +0100
Subject: [PATCH 077/111] requested edits 1

---
 pykeops/common/ivf.py                  | 85 ++++++++++++++++++++------
 pykeops/numpy/__init__.py              |  2 +-
 pykeops/numpy/{nn => knn}/__init__.py  |  0
 pykeops/numpy/{nn => knn}/ivf.py       |  6 +-
 pykeops/torch/__init__.py              |  2 +-
 pykeops/torch/{nn => knn}/NNDescent.py |  0
 pykeops/torch/{nn => knn}/__init__.py  |  0
 pykeops/torch/{nn => knn}/ivf.py       |  6 +-
 8 files changed, 69 insertions(+), 32 deletions(-)
 rename pykeops/numpy/{nn => knn}/__init__.py (100%)
 rename pykeops/numpy/{nn => knn}/ivf.py (91%)
 rename pykeops/torch/{nn => knn}/NNDescent.py (100%)
 rename pykeops/torch/{nn => knn}/__init__.py (100%)
 rename pykeops/torch/{nn => knn}/ivf.py (92%)

diff --git a/pykeops/common/ivf.py b/pykeops/common/ivf.py
index e384cea17..fb3ed1809 100644
--- a/pykeops/common/ivf.py
+++ b/pykeops/common/ivf.py
@@ -5,15 +5,18 @@ class GenericIVF:
 
     """
 
-    def __init__(self, k, metric, normalise, LazyTensor):
+    def __init__(self, k, metric, normalise, lazytensor):
 
         self.__k = k
         self.__normalise = normalise
         self.__update_metric(metric)
-        self.__LazyTensor = LazyTensor
+        self.__LazyTensor = lazytensor
         self.__c = None
 
     def __update_metric(self, metric):
+        """
+        Update the metric used in the class
+        """
         if isinstance(metric, str):
             self.__distance = self.tools.distance_function(metric)
             self.__metric = metric
@@ -21,7 +24,9 @@ def __update_metric(self, metric):
             self.__distance = metric
             self.__metric = "custom"
         else:
-            raise ValueError("Unrecognised metric input type")
+            raise ValueError(
+                f"The 'metric' argument has type {type(metric)}, but only strings and functions are supported."
+            )
 
     @property
     def metric(self):
@@ -29,35 +34,46 @@ def metric(self):
         return self.__metric
 
     @property
-    def c(self):
+    def clusters(self):
         """Returns the clusters obtained through K-Means"""
         if self.__c is not None:
             return self.__c
         else:
-            raise ValueError("Run .fit() first!")
+            raise NotImplementedError("Run .fit() first!")
 
     def __get_tools(self):
         pass
 
     def __k_argmin(self, x, y, k=1):
-        x_LT = self.__LazyTensor(
+        """
+        Compute the k nearest neighbors between x and y, for various k
+        """
+        x_i = self.__LazyTensor(
             self.tools.to(self.tools.unsqueeze(x, 1), self.__device)
         )
-        y_LT = self.__LazyTensor(
+        y_j = self.__LazyTensor(
             self.tools.to(self.tools.unsqueeze(y, 0), self.__device)
         )
 
-        d = self.__distance(x_LT, y_LT)
+        D_ij = self.__distance(x_i, y_j)
         if not self.tools.is_tensor(x):
             if self.__backend:
-                d.backend = self.__backend
+                D_ij.backend = self.__backend
 
         if k == 1:
-            return self.tools.view(self.tools.long(d.argmin(dim=1)), -1)
+            return self.tools.view(self.tools.long(D_ij.argmin(dim=1)), -1)
         else:
-            return self.tools.long(d.argKmin(K=k, dim=1))
+            return self.tools.long(D_ij.argKmin(K=k, dim=1))
 
     def __sort_clusters(self, x, lab, store_x=True):
+        """
+        Takes in a dataset and sorts according to its labels.
+
+        Args:
+          x ((N, D) array): Input dataset of N points in dimension D.
+          lab ((N) array): Labels for each point in x
+          store_x (bool): Store the sort permutations for use later
+        """
         lab, perm = self.tools.sort(self.tools.view(lab, -1))
         if store_x:
             self.__x_perm = perm
@@ -65,8 +81,15 @@ def __sort_clusters(self, x, lab, store_x=True):
             self.__y_perm = perm
         return x[perm], lab
 
-    def __unsort(self, nn):
-        return self.tools.index_select(self.__x_perm[nn], 0, self.__y_perm.argsort())
+    def __unsort(self, indices):
+        """
+        Given an input indices, undo and prior sorting operations.
+        First, select the true x indices with __x_perm[indices]
+        Then, use index_select to choose the indices in true x, for each true y.
+        """
+        return self.tools.index_select(
+            self.__x_perm[indices], 0, self.__y_perm.argsort()
+        )
 
     def _fit(
         self,
@@ -82,6 +105,8 @@ def _fit(
         """
         Fits the main dataset
         """
+
+        # basic checks that the hyperparameters are as expected
         if type(clusters) != int:
             raise ValueError("Clusters must be an integer")
         if clusters >= len(x):
@@ -94,6 +119,7 @@ def _fit(
             )
         if len(x.shape) != 2:
             raise ValueError("Input must be a 2D array")
+        # normalise the input if selected
         if self.__normalise:
             x = x / self.tools.repeat(self.tools.norm(x, 2, -1), x.shape[1]).reshape(
                 -1, x.shape[1]
@@ -107,6 +133,7 @@ def _fit(
         self.__device = device
         self.__backend = backend
 
+        # perform K-Means
         cl, c = self.tools.kmeans(
             x,
             self.__distance,
@@ -118,22 +145,34 @@ def _fit(
         )
 
         self.__c = c
+        # perform one final cluster assignment, since K-Means ends on cluster update step
         cl = self.__assign(x)
 
+        # obtain the nearest clusters to each cluster
         ncl = self.__k_argmin(c, c, k=a)
         self.__x_ranges, _, _ = self.tools.cluster_ranges_centroids(x, cl)
 
         x, x_labels = self.__sort_clusters(x, cl, store_x=True)
         self.__x = x
         r = self.tools.repeat(self.tools.arange(clusters, device=self.__device), a)
+        # create a [clusters, clusters] sized boolean matrix
         self.__keep = self.tools.to(
             self.tools.zeros([clusters, clusters], dtype=bool), self.__device
         )
+        # set the indices of the nearest clusters to each cluster to True
         self.__keep[r, ncl.flatten()] = True
 
         return self
 
     def __assign(self, x, c=None):
+        """
+        Assigns nearest clusters to a dataset.
+        If no clusters are given, uses the clusters found through K-Means.
+
+        Args:
+          x ((N, D) array): Input dataset of N points in dimension D.
+          c ((M, D) array): Cluster locations of M points in dimension D.
+        """
         if c is None:
             c = self.__c
         return self.__k_argmin(x, c)
@@ -155,25 +194,31 @@ def _kneighbors(self, y):
                 -1, y.shape[1]
             )
         y = self.tools.contiguous(y)
+        # assign y to the previously found clusters and get labels
         y_labels = self.__assign(y)
 
+        # obtain y_ranges
         y_ranges, _, _ = self.tools.cluster_ranges_centroids(y, y_labels)
         self.__y_ranges = y_ranges
+
+        # sort y contiguous
         y, y_labels = self.__sort_clusters(y, y_labels, store_x=False)
-        x_LT = self.__LazyTensor(self.tools.unsqueeze(self.__x, 0))
-        y_LT = self.__LazyTensor(self.tools.unsqueeze(y, 1))
-        D_ij = self.__distance(y_LT, x_LT)
+
+        # perform actual knn computation
+        x_i = self.__LazyTensor(self.tools.unsqueeze(self.__x, 0))
+        y_j = self.__LazyTensor(self.tools.unsqueeze(y, 1))
+        D_ij = self.__distance(y_j, x_i)
         ranges_ij = self.tools.from_matrix(y_ranges, self.__x_ranges, self.__keep)
         D_ij.ranges = ranges_ij
-        nn = D_ij.argKmin(K=self.__k, axis=1)
-        return self.__unsort(nn)
+        indices = D_ij.argKmin(K=self.__k, axis=1)
+        return self.__unsort(indices)
 
     def brute_force(self, x, y, k=5):
         """Performs a brute force search with KeOps
 
         Args:
-          x (array): Input dataset
-          y (array): Query dataset
+          x ((N, D) array): Input dataset of N points in dimension D.
+          y ((M, D) array): Query dataset of M points in dimension D.
           k (int): Number of nearest neighbors to obtain
 
         """
diff --git a/pykeops/numpy/__init__.py b/pykeops/numpy/__init__.py
index e005c46e0..2df9564c1 100644
--- a/pykeops/numpy/__init__.py
+++ b/pykeops/numpy/__init__.py
@@ -6,7 +6,7 @@
 # Import pyKeOps routines
 
 
-from .nn.ivf import IVF
+from .knn.ivf import IVF
 from .generic.generic_red import Genred
 from .operations import KernelSolve
 from .convolutions.radial_kernel import RadialKernelConv, RadialKernelGrad1conv
diff --git a/pykeops/numpy/nn/__init__.py b/pykeops/numpy/knn/__init__.py
similarity index 100%
rename from pykeops/numpy/nn/__init__.py
rename to pykeops/numpy/knn/__init__.py
diff --git a/pykeops/numpy/nn/ivf.py b/pykeops/numpy/knn/ivf.py
similarity index 91%
rename from pykeops/numpy/nn/ivf.py
rename to pykeops/numpy/knn/ivf.py
index 6ecd4224c..7611cfb13 100644
--- a/pykeops/numpy/nn/ivf.py
+++ b/pykeops/numpy/knn/ivf.py
@@ -24,7 +24,7 @@ def __init__(self, k=5, metric="euclidean", normalise=False):
         from pykeops.numpy import LazyTensor
 
         self.__get_tools()
-        super().__init__(k=k, metric=metric, normalise=normalise, LazyTensor=LazyTensor)
+        super().__init__(k=k, metric=metric, normalise=normalise, lazytensor=LazyTensor)
 
     def __get_tools(self):
         from pykeops.numpy.utils import numpytools
@@ -47,8 +47,6 @@ def fit(self, x, clusters=50, a=5, Niter=15, backend="CPU", approx=False):
         """
         if approx:
             raise ValueError("Approximation not supported for numpy")
-        if type(x) != np.ndarray:
-            raise ValueError("Input dataset must be np array")
         return self._fit(x, clusters=clusters, a=a, Niter=Niter, backend=backend)
 
     def kneighbors(self, y):
@@ -57,6 +55,4 @@ def kneighbors(self, y):
         Args:
           y (np.ndarray): Input dataset to search over
         """
-        if type(y) != np.ndarray:
-            raise ValueError("Query dataset must be a np array")
         return self._kneighbors(y)
diff --git a/pykeops/torch/__init__.py b/pykeops/torch/__init__.py
index 18e3407c4..6574a65f7 100644
--- a/pykeops/torch/__init__.py
+++ b/pykeops/torch/__init__.py
@@ -27,7 +27,7 @@
 ##########################################################
 # Import pyKeOps routines
 
-from .nn.ivf import IVF
+from .knn.ivf import IVF
 from .generic.generic_red import Genred
 from .generic.generic_ops import (
     generic_sum,
diff --git a/pykeops/torch/nn/NNDescent.py b/pykeops/torch/knn/NNDescent.py
similarity index 100%
rename from pykeops/torch/nn/NNDescent.py
rename to pykeops/torch/knn/NNDescent.py
diff --git a/pykeops/torch/nn/__init__.py b/pykeops/torch/knn/__init__.py
similarity index 100%
rename from pykeops/torch/nn/__init__.py
rename to pykeops/torch/knn/__init__.py
diff --git a/pykeops/torch/nn/ivf.py b/pykeops/torch/knn/ivf.py
similarity index 92%
rename from pykeops/torch/nn/ivf.py
rename to pykeops/torch/knn/ivf.py
index 64d916411..c92e13859 100644
--- a/pykeops/torch/nn/ivf.py
+++ b/pykeops/torch/knn/ivf.py
@@ -25,7 +25,7 @@ def __init__(self, k=5, metric="euclidean", normalise=False):
         from pykeops.torch import LazyTensor
 
         self.__get_tools()
-        super().__init__(k=k, metric=metric, normalise=normalise, LazyTensor=LazyTensor)
+        super().__init__(k=k, metric=metric, normalise=normalise, lazytensor=LazyTensor)
 
     def __get_tools(self):
         from pykeops.torch.utils import torchtools
@@ -52,8 +52,6 @@ def fit(self, x, clusters=50, a=5, Niter=15, approx=False, n=50):
             Lower values are faster while higher values give better accuracy in centroid location
 
         """
-        if type(x) != torch.Tensor:
-            raise ValueError("Input dataset must be a torch tensor")
         return self._fit(
             x, clusters=clusters, a=a, Niter=Niter, device=x.device, approx=approx, n=n
         )
@@ -64,6 +62,4 @@ def kneighbors(self, y):
         Args:
           y (torch.Tensor): Input dataset to search over
         """
-        if type(y) != torch.Tensor:
-            raise ValueError("Query dataset must be a torch tensor")
         return self._kneighbors(y)

From af1d29b6cd7efeed305fdcfb2adb25c539cf8c81 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Sun, 18 Apr 2021 16:26:35 +0100
Subject: [PATCH 078/111] edit tests to reflect correct import structure

---
 pykeops/test/unit_tests_numpy.py   | 35 +-----------------------------
 pykeops/test/unit_tests_pytorch.py | 35 +-----------------------------
 2 files changed, 2 insertions(+), 68 deletions(-)

diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index ac8d35d68..e7fb9d1b7 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -436,44 +436,11 @@ def test_LazyTensor_sum(self):
         for (res_keops, res_numpy) in zip(full_results[0], full_results[1]):
             self.assertTrue(res_keops.shape == res_numpy.shape)
             self.assertTrue(np.allclose(res_keops, res_numpy, atol=1e-3))
-            
-    ############################################################
-    def test_IVF(self):
-        ###########################################################
-        from pykeops.numpy.nn.ivf_np import ivf
-        import numpy as np
-
-        np.random.seed(0)
-        N, D, K, k, a = 10**3, 3, 50, 5, 5
-        
-        # Generate random datapoints x, y
-        x = 0.7 * np.random.normal(size=(N, D)) + 0.3
-        y = 0.7 * np.random.normal(size=(N, D)) + 0.3
-
-        # Ground truth K nearest neighbours
-        truth = np.argsort(((np.expand_dims(y,1)-np.expand_dims(x,0))**2).sum(-1),axis=1)
-        truth = truth[:,:k]
-        
-        # IVF K nearest neighbours
-        IVF = ivf()
-        IVF.fit(x,a=a)
-        ivf_fit = IVF.kneighbors(y)
-        
-        # Calculate accuracy
-        accuracy = 0
-        for i in range(k):
-            accuracy += float(np.sum(ivf_fit == truth))/N
-            truth = np.roll(truth, 1, -1) # Create a rolling window (index positions may not match)
-        # Record accuracies
-        accuracy = float(accuracy/k)
-        
-        print(a,accuracy)
-        self.assertTrue(accuracy >= 0.8, f'Failed at {a}, {accuracy}')
 
     ############################################################
     def test_IVF(self):
         ###########################################################
-        from pykeops.numpy.nn.ivf import IVF
+        from pykeops.numpy import IVF
         import numpy as np
 
         np.random.seed(0)
diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index 4df23896a..372b066ec 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -672,44 +672,11 @@ def invert_permutation_numpy(permutation):
         self.assertTrue(
             torch.allclose(grad_keops.flatten(), grad_torch.flatten(), rtol=1e-4)
         )
-        
-    ############################################################
-    def test_IVF(self):
-        ############################################################
-        from pykeops.torch.nn.ivf_torch import ivf
-        import torch
-
-        torch.manual_seed(0)
-        N, D, K, k, a = 10**3, 3, 50, 5, 5
-        
-        # Generate random datapoints x, y
-        x = 0.7 * torch.randn(N, D) + 0.3
-        y = 0.7 * torch.randn(N, D) + 0.3
-
-        # Ground truth K nearest neighbours
-        truth = torch.argsort(((y.unsqueeze(1)-x.unsqueeze(0))**2).sum(-1),dim=1)
-        truth = truth[:,:k]
-
-        # IVF K nearest neighbours
-        IVF = ivf()
-        IVF.fit(x,a=a)
-        ivf_fit = IVF.kneighbors(y)
-
-        # Calculate accuracy
-        accuracy = 0
-        for i in range(k):
-            accuracy += torch.sum(ivf_fit == truth).float()/N
-            truth = torch.roll(truth, 1, -1) # Create a rolling window (index positions may not match)
-        # Record accuracies
-        accuracy = float(accuracy/k)
-
-        print(a,accuracy)
-        self.assertTrue(accuracy >= 0.8, f'Failed at {a}, {accuracy}')
 
     ############################################################
     def test_IVF(self):
         ############################################################
-        from pykeops.torch.nn.ivf import IVF
+        from pykeops.torch import IVF
         import torch
 
         torch.manual_seed(0)

From 3c9d1848c01330da686ca421ef8650c268a9c8f8 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Sun, 18 Apr 2021 16:33:07 +0100
Subject: [PATCH 079/111] full stops on generic ivf class

---
 pykeops/common/ivf.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/pykeops/common/ivf.py b/pykeops/common/ivf.py
index fb3ed1809..3c45056ca 100644
--- a/pykeops/common/ivf.py
+++ b/pykeops/common/ivf.py
@@ -1,7 +1,7 @@
 class GenericIVF:
-    """Abstract class to compute IVF functions
+    """Abstract class to compute IVF functions.
 
-    End-users should use 'pykeops.numpy.ivf' or 'pykeops.torch.ivf'
+    End-users should use 'pykeops.numpy.ivf' or 'pykeops.torch.ivf'.
 
     """
 
@@ -15,7 +15,7 @@ def __init__(self, k, metric, normalise, lazytensor):
 
     def __update_metric(self, metric):
         """
-        Update the metric used in the class
+        Update the metric used in the class.
         """
         if isinstance(metric, str):
             self.__distance = self.tools.distance_function(metric)
@@ -30,12 +30,12 @@ def __update_metric(self, metric):
 
     @property
     def metric(self):
-        """Returns the metric used in the search"""
+        """Returns the metric used in the search."""
         return self.__metric
 
     @property
     def clusters(self):
-        """Returns the clusters obtained through K-Means"""
+        """Returns the clusters obtained through K-Means."""
         if self.__c is not None:
             return self.__c
         else:
@@ -46,7 +46,7 @@ def __get_tools(self):
 
     def __k_argmin(self, x, y, k=1):
         """
-        Compute the k nearest neighbors between x and y, for various k
+        Compute the k nearest neighbors between x and y, for various k.
         """
         x_i = self.__LazyTensor(
             self.tools.to(self.tools.unsqueeze(x, 1), self.__device)
@@ -71,8 +71,8 @@ def __sort_clusters(self, x, lab, store_x=True):
 
         Args:
           x ((N, D) array): Input dataset of N points in dimension D.
-          lab ((N) array): Labels for each point in x
-          store_x (bool): Store the sort permutations for use later
+          lab ((N) array): Labels for each point in x.
+          store_x (bool): Store the sort permutations for use later.
         """
         lab, perm = self.tools.sort(self.tools.view(lab, -1))
         if store_x:
@@ -84,7 +84,7 @@ def __sort_clusters(self, x, lab, store_x=True):
     def __unsort(self, indices):
         """
         Given an input indices, undo and prior sorting operations.
-        First, select the true x indices with __x_perm[indices]
+        First, select the true x indices with __x_perm[indices].
         Then, use index_select to choose the indices in true x, for each true y.
         """
         return self.tools.index_select(
@@ -179,7 +179,7 @@ def __assign(self, x, c=None):
 
     def _kneighbors(self, y):
         """
-        Obtain the k nearest neighbors of the query dataset y
+        Obtain the k nearest neighbors of the query dataset y.
         """
         if self.__x is None:
             raise ValueError("Input dataset not fitted yet! Call .fit() first!")
@@ -219,7 +219,7 @@ def brute_force(self, x, y, k=5):
         Args:
           x ((N, D) array): Input dataset of N points in dimension D.
           y ((M, D) array): Query dataset of M points in dimension D.
-          k (int): Number of nearest neighbors to obtain
+          k (int): Number of nearest neighbors to obtain.
 
         """
         x_LT = self.__LazyTensor(self.tools.unsqueeze(x, 0))

From dd0d7021ea8b999c97a79b34e149ad0d293e90bd Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Sun, 18 Apr 2021 16:48:20 +0100
Subject: [PATCH 080/111] change doc strings for parent classes

---
 pykeops/numpy/knn/ivf.py | 45 ++++++++++++++++--------------
 pykeops/torch/knn/ivf.py | 59 ++++++++++++++++++++++------------------
 2 files changed, 57 insertions(+), 47 deletions(-)

diff --git a/pykeops/numpy/knn/ivf.py b/pykeops/numpy/knn/ivf.py
index 7611cfb13..5a5814645 100644
--- a/pykeops/numpy/knn/ivf.py
+++ b/pykeops/numpy/knn/ivf.py
@@ -3,22 +3,27 @@
 
 
 class IVF(GenericIVF):
-    """IVF-Flat is a KNN approximation algorithm that first clusters the data and then performs the query search on a subset of the input dataset."""
+    """IVF-Flat is a KNN approximation algorithm.
+
+    The original FAISS paper can found at https://arxiv.org/abs/1702.08734 .
+    """
 
     def __init__(self, k=5, metric="euclidean", normalise=False):
         """Initialise the IVF-Flat class.
 
-        IVF-Flat is a KNN approximation algorithm that first clusters the data and then performs the query search on a subset of the input dataset.
+        IVF-Flat is a KNN approximation algorithm.
+        It first clusters the input data.
+        During query time, it searches only within the closest clusters.
 
         Args:
-          k (int): Number of nearest neighbours to obtain
-          metric (str,function): Metric to use
-            Currently, "euclidean", "manhattan" and "angular" are directly supported
-            Custom metrics are not supported in numpy, please use torch version instead
-            For more information, refer to the tutorial
-          normalise (bool): Whether or not to normalise all input data to norm 1
-            This is used mainly for angular metric
-            In place of this, "angular_full" metric may be used instead
+          k (int): Number of nearest neighbours to obtain.
+          metric (str,function): Metric to use.
+            Currently, "euclidean", "manhattan" and "angular" are directly supported.
+            Custom metrics are not supported in numpy, please use torch version instead.
+            For more information, refer to the tutorial.
+          normalise (bool): Whether or not to normalise all input data to norm 1.
+            This is used mainly for angular metric.
+            In place of this, "angular_full" metric may be used instead.
 
         """
         from pykeops.numpy import LazyTensor
@@ -32,17 +37,17 @@ def __get_tools(self):
         self.tools = numpytools
 
     def fit(self, x, clusters=50, a=5, Niter=15, backend="CPU", approx=False):
-        """Fits a dataset to perform the nearest neighbour search over
+        """Fits a dataset to perform the nearest neighbour search over.
 
-        K-Means is performed on the dataset to obtain clusters
-        Then the closest clusters to each cluster is stored for use during query time
+        K-Means is performed on the dataset to obtain clusters.
+        Then the closest clusters to each cluster is stored for use during query time.
 
         Args:
-          x (torch.Tensor): Torch tensor dataset of shape N, D
-            Where N is the number of points and D is the number of dimensions
-          clusters (int): Total number of clusters to create in K-Means
-          a (int): Number of clusters to search over, must be less than total number of clusters created
-          Niter (int): Number of iterations to run in K-Means algorithm
+          x ((N, D) array): Input dataset of N points in dimension D.
+            Where N is the number of points and D is the number of dimensions.
+          clusters (int): Total number of clusters to create in K-Means.
+          a (int): Number of clusters to search over, must be less than total number of clusters created.
+          Niter (int): Number of iterations to run in K-Means algorithm.
 
         """
         if approx:
@@ -50,9 +55,9 @@ def fit(self, x, clusters=50, a=5, Niter=15, backend="CPU", approx=False):
         return self._fit(x, clusters=clusters, a=a, Niter=Niter, backend=backend)
 
     def kneighbors(self, y):
-        """Obtains the nearest neighbors for an input dataset from the fitted dataset
+        """Obtains the nearest neighbors for an input dataset from the fitted dataset.
 
         Args:
-          y (np.ndarray): Input dataset to search over
+          y ((M, D) array): Query dataset of M points in dimension D.
         """
         return self._kneighbors(y)
diff --git a/pykeops/torch/knn/ivf.py b/pykeops/torch/knn/ivf.py
index c92e13859..d4b77450d 100644
--- a/pykeops/torch/knn/ivf.py
+++ b/pykeops/torch/knn/ivf.py
@@ -3,23 +3,28 @@
 
 
 class IVF(GenericIVF):
-    """IVF-Flat is a KNN approximation algorithm that first clusters the data and then performs the query search on a subset of the input dataset."""
+    """IVF-Flat is a KNN approximation algorithm.
+
+    The original FAISS paper can found at https://arxiv.org/abs/1702.08734
+    """
 
     def __init__(self, k=5, metric="euclidean", normalise=False):
         """Initialise the IVF-Flat class.
 
-        IVF-Flat is a KNN approximation algorithm that first clusters the data and then performs the query search on a subset of the input dataset.
+        IVF-Flat is a KNN approximation algorithm.
+        It first clusters the input data.
+        During query time, it searches only within the closest clusters.
 
         Args:
-          k (int): Number of nearest neighbours to obtain
-          metric (str,function): Metric to use
-            Currently, "euclidean", "manhattan", "angular" and "hyperbolic" are directly supported, apart from custom metrics
-            Hyperbolic metric requires the use of approx = True, during the fit() function later
-            Custom metrics should be in the form of a function with 2 inputs and returns their distance
-            For more information, refer to the tutorial
-          normalise (bool): Whether or not to normalise all input data to norm 1
-            This is used mainly for angular metric
-            In place of this, "angular_full" metric may be used instead
+          k (int): Number of nearest neighbours to obtain.
+          metric (str,function): Metric to use.
+            Currently, "euclidean", "manhattan", "angular" and "hyperbolic" are directly supported, apart from custom metrics.
+            Hyperbolic metric requires the use of approx = True, during the fit() function later.
+            Custom metrics should be in the form of a function with 2 inputs and returns their distance.
+            For more information, refer to the tutorial.
+          normalise (bool): Whether or not to normalise all input data to norm 1.
+            This is used mainly for angular metric.
+            In place of this, "angular_full" metric may be used instead.
 
         """
         from pykeops.torch import LazyTensor
@@ -33,23 +38,23 @@ def __get_tools(self):
         self.tools = torchtools
 
     def fit(self, x, clusters=50, a=5, Niter=15, approx=False, n=50):
-        """Fits a dataset to perform the nearest neighbour search over
+        """Fits a dataset to perform the nearest neighbour search over.
 
-        K-Means is performed on the dataset to obtain clusters
-        Then the closest clusters to each cluster is stored for use during query time
+        K-Means is performed on the dataset to obtain clusters.
+        Then the closest clusters to each cluster is stored for use during query time.
 
         Args:
-          x (torch.Tensor): Torch tensor dataset of shape N, D
-            Where N is the number of points and D is the number of dimensions
-          clusters (int): Total number of clusters to create in K-Means
-          a (int): Number of clusters to search over, must be less than total number of clusters created
-          Niter (int): Number of iterations to run in K-Means algorithm
-          approx (bool): Whether or not to use an approximation step in K-Means
-            In hyperbolic metric and custom metric, this should be set to True
-            This is because the optimal cluster centroid may not have a simple closed form expression
-          n (int): Number of iterations to optimise the cluster centroid, when approx = True
-            A value of around 50 is recommended
-            Lower values are faster while higher values give better accuracy in centroid location
+          x ((N, D) array): Input dataset of N points in dimension D.
+            Where N is the number of points and D is the number of dimensions.
+          clusters (int): Total number of clusters to create in K-Means.
+          a (int): Number of clusters to search over, must be less than total number of clusters created.
+          Niter (int): Number of iterations to run in K-Means algorithm.
+          approx (bool): Whether or not to use an approximation step in K-Means.
+            In hyperbolic metric and custom metric, this should be set to True.
+            This is because the optimal cluster centroid may not have a simple closed form expression.
+          n (int): Number of iterations to optimise the cluster centroid, when approx = True.
+            A value of around 50 is recommended.
+            Lower values are faster while higher values give better accuracy in centroid location.
 
         """
         return self._fit(
@@ -57,9 +62,9 @@ def fit(self, x, clusters=50, a=5, Niter=15, approx=False, n=50):
         )
 
     def kneighbors(self, y):
-        """Obtains the nearest neighbors for an input dataset from the fitted dataset
+        """Obtains the nearest neighbors for an input dataset from the fitted dataset.
 
         Args:
-          y (torch.Tensor): Input dataset to search over
+          y ((M, D) array): Query dataset of M points in dimension D.
         """
         return self._kneighbors(y)

From 6982e8a3601a420207f852214390948e65cc10ca Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Sun, 18 Apr 2021 16:54:05 +0100
Subject: [PATCH 081/111] change numpy ivf approximation error message

---
 pykeops/numpy/knn/ivf.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pykeops/numpy/knn/ivf.py b/pykeops/numpy/knn/ivf.py
index 5a5814645..df0ff6b7a 100644
--- a/pykeops/numpy/knn/ivf.py
+++ b/pykeops/numpy/knn/ivf.py
@@ -51,7 +51,9 @@ def fit(self, x, clusters=50, a=5, Niter=15, backend="CPU", approx=False):
 
         """
         if approx:
-            raise ValueError("Approximation not supported for numpy")
+            raise NotImplementedError(
+                "Approximation in K-Means not supported for numpy"
+            )
         return self._fit(x, clusters=clusters, a=a, Niter=Niter, backend=backend)
 
     def kneighbors(self, y):

From 8fc2640fba8d864d9589a46d12cb391a258e91f5 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Sun, 18 Apr 2021 17:05:32 +0100
Subject: [PATCH 082/111] update utils to add comments

---
 pykeops/numpy/utils.py | 10 +++++++---
 pykeops/torch/utils.py |  9 ++++++---
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index 982e25bb3..5179bb1d9 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -157,10 +157,12 @@ def angular_full(x, y):
             return angular_full
         elif metric == "hyperbolic":
             raise ValueError(
-                "Hyperbolic not supported for numpy, please use torch version with approximation"
+                "Hyperbolic not supported for IVF numpy, please use IVF torch instead"
             )
         else:
-            raise ValueError("Unknown metric")
+            raise ValueError(
+                f"Unknown metric: {metric}. Supported values are euclidean, manhattan and angular"
+            )
 
     @staticmethod
     def sort(x):
@@ -189,6 +191,7 @@ def index_select(input, dim, index):
 
     @staticmethod
     def kmeans(x, distance=None, K=10, Niter=15, device="CPU", approx=False, n=0):
+        # default metric is euclidean
         if distance is None:
             distance = numpytools.distance_function("euclidean")
         if approx:
@@ -202,7 +205,8 @@ def kmeans(x, distance=None, K=10, Niter=15, device="CPU", approx=False, n=0):
             c_j = LazyTensor(c[None, :, :])
             D_ij = distance(x_i, c_j)
             D_ij.backend = device
-            cl = D_ij.argmin(axis=1).astype(int).reshape(N)
+            cl = D_ij.argmin(axis=1).astype(int).reshape(N)  # cluster assignment
+            # cluster location update
             Ncl = np.bincount(cl).astype(dtype="float32")
             for d in range(D):
                 c[:, d] = np.bincount(cl, weights=x[:, d]) / Ncl
diff --git a/pykeops/torch/utils.py b/pykeops/torch/utils.py
index 9f3af64d9..d16057ca9 100644
--- a/pykeops/torch/utils.py
+++ b/pykeops/torch/utils.py
@@ -233,6 +233,7 @@ def kmeans(x, distance=None, K=10, Niter=15, device="cuda", approx=False, n=10):
 
         from pykeops.torch import LazyTensor
 
+        # default metric is euclidean
         if distance is None:
             distance = torchtools.distance_function("euclidean")
 
@@ -243,8 +244,10 @@ def calc_centroid(x, c, cl, n=10):
             x1 = LazyTensor(x.unsqueeze(0))
             op = torch.optim.Adam([c], lr=1 / n)
             scaling = 1 / torch.gather(torch.bincount(cl), 0, cl).view(-1, 1)
+            # get the counts of all the labels for use later
             scaling.requires_grad = False
             with torch.autograd.set_detect_anomaly(True):
+                # perform grad descent n times
                 for _ in range(n):
                     c.requires_grad = True
                     op.zero_grad()
@@ -255,7 +258,7 @@ def calc_centroid(x, c, cl, n=10):
                     ).sum()  # calculate distance to centroid for each datapoint, divide by total number of points in that cluster, and sum
                     loss.backward(retain_graph=False)
                     op.step()
-            return c.detach()
+            return c.detach()  # return the optimised cluster centroids
 
         N, D = x.shape
         c = x[:K, :].clone()
@@ -264,7 +267,7 @@ def calc_centroid(x, c, cl, n=10):
         for i in range(Niter):
             c_j = LazyTensor(c.view(1, K, D).to(device))
             D_ij = distance(x_i, c_j)
-            cl = D_ij.argmin(dim=1).long().view(-1)
+            cl = D_ij.argmin(dim=1).long().view(-1)  # cluster assignment
 
             # updating c: either with approximation or exact
             if approx:
@@ -277,7 +280,7 @@ def calc_centroid(x, c, cl, n=10):
                 c.scatter_add_(0, cl[:, None].repeat(1, D), x)
                 Ncl = torch.bincount(cl, minlength=K).type_as(c).view(K, 1)
                 c /= Ncl
-
+            # check if NaN exists, may occur when metric is used incorrectly, eg angular metric with unnormalised data
             if torch.any(torch.isnan(c)):
                 raise ValueError(
                     "NaN detected in centroids during KMeans, please check metric is correct"

From 301e8070663594a719d02ae6df9cc8bf1dd108b3 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Sun, 18 Apr 2021 17:15:54 +0100
Subject: [PATCH 083/111] nn descent code update

---
 pykeops/torch/knn/NNDescent.py | 216 ++++++++++++++++-----------------
 1 file changed, 108 insertions(+), 108 deletions(-)

diff --git a/pykeops/torch/knn/NNDescent.py b/pykeops/torch/knn/NNDescent.py
index 55662ce6f..19e29ad25 100644
--- a/pykeops/torch/knn/NNDescent.py
+++ b/pykeops/torch/knn/NNDescent.py
@@ -2,13 +2,7 @@
 import time
 from pykeops.torch import LazyTensor
 from pykeops.torch.cluster import cluster_ranges_centroids, from_matrix, sort_clusters
-
-use_cuda = torch.cuda.is_available()
-if use_cuda:
-    torch.cuda.synchronize()
-    device = torch.device("cuda")
-else:
-    device = torch.device("cpu")
+from pykeops.torch.utils import torchtools
 
 
 class NNDescent:
@@ -22,29 +16,35 @@ def __init__(
         leaf_multiplier=128,
         big_leaf_depth=5,
         verbose=False,
-        LT=False,
+        backend="torch",
     ):
         """Initialize the NNDescent class.
 
         Initializes the NNDescent class given all relevant parameters. If data is
         provided, it fits the NNDescent search graph to the data.
+        NNDescent is an approximation strategy for k-Nearest Neighbor search. It
+        constructs a k-NN graph on the dataset, which is then navigated with a
+        graph-based search algorithm during query time.
+
+        The original paper on the method: https://www.cs.princeton.edu/cass/papers/www11.pdf
+        Our code was inspired by the PyNNDescent documentation: https://pynndescent.readthedocs.io/en/latest/how_pynndescent_works.html
 
         Args:
-          data ((N,d) Tensor): Dataset of N datapoints of dimensionality d.
-          k (int): The number of nearest neighbors which we want to find for each query point
-          metric (string): Name of metric, either "euclidean" and "manhattan"
-          initialization_method (string): The type of initialization to be used for
-            the search graph. Can be "random", "random_big", "forest" or "cluster".
-          num_trees (int): Number of trees used in "random_big" or "forest" initializations.
-          big_leaf_depth (int): The depth at which the big leaves are taken to be used at
-            the start of search.
-          verbose (boolean): Determines whether or not to print information while fitting.
-          LT (boolean): Determines if we want to use LazyTensors in cluster initialization.
-
-        Arg not used when initialization_method = "cluster":
-          leaf_multiplier (int): Parameter for the Tree class for tree-based initializations.
-          when initialization_method = "cluster", this parameter is used to adjust the number
-            of clusters to be close to the value specified in the fit function.
+            data ((N,D) Tensor): Dataset of N datapoints of dimensionality D.
+            k (int): The number of nearest neighbors which we want to find for each query point
+            metric (string): Name of metric, either "euclidean" and "manhattan"
+            initialization_method (string): The type of initialization to be used for
+                the search graph. Can be "random", "random_big", "forest" or "cluster".
+            num_trees (int): Number of trees used in "random_big" or "forest" initializations.
+            big_leaf_depth (int): The depth at which the big leaves are taken to be used at
+                the start of search.
+            verbose (boolean): Determines whether or not to print information while fitting.
+            backend (string): Either "torch" or "keops". Determines if we want to use LazyTensors in cluster initialization.
+
+        Args not used when initialization_method = "cluster":
+            leaf_multiplier (int): Parameter for the Tree class for tree-based initializations.
+                when initialization_method = "cluster", this parameter is used to adjust the number
+                of clusters to be close to the value specified in the fit function.
         """
 
         # Setting parameters
@@ -55,36 +55,31 @@ def __init__(
         self.leaf_multiplier = leaf_multiplier
         self.big_leaf_depth = big_leaf_depth
         self.big_leaves = None
-        self.LT = LT
+        self.backend = backend
+
+        # Distance function
+        self.distance = torchtools.distance_function(metric)
 
         # If data is provided, we call the fit function.
         if data is not None:
             self.fit(data, verbose=verbose)
 
-    def distance(self, x, y):
-        # Square of euclidian distance. Skip the root for faster computation.
-        if self.metric == "euclidean":
-            return ((x - y) ** 2).sum(-1)
-        elif self.metric == "manhattan":
-            return ((x - y).abs()).sum(-1)
-        else:
-            raise ValueError("Metric not implemented!")
-
     def fit(self, X, iter=20, verbose=False, clusters=32, a=10, queue=5):
         """Fits the NNDescent search graph to the data set X.
 
         Args:
-          X ((N,d) Tensor): Dataset of N datapoints of dimensionality d.
-          iter (int): Maximum number of iterations for graph updates
-          verbose (boolean): Determines whether or not to print information while fitting.
-          queue (int): The number of neighbors to which each node connects in the search graph.
+            X ((N,D) Tensor): Dataset of N datapoints of dimensionality D.
+            iter (int): Maximum number of iterations for graph updates
+            verbose (boolean): Determines whether or not to print information while fitting.
+            queue (int): The number of neighbors to which each node connects in the search graph.
 
         Used only when initialization_method = "cluster":
-          clusters (int): The min no. of clusters that we want the data to be clustered into
-          a (int): The number of clusters we want to search over using the cluster method.
+            clusters (int): The min no. of clusters that we want the data to be clustered into
+            a (int): The number of clusters we want to search over using the cluster method.
 
         """
         self.data = X
+        self.device = X.device
         self.queue = queue
 
         if queue < self.k and self.init_method is not "cluster":
@@ -94,10 +89,13 @@ def fit(self, X, iter=20, verbose=False, clusters=32, a=10, queue=5):
             )
         elif queue > a and self.init_method is "cluster":
             raise ValueError("Value of queue must be smaller than value of a!")
-        elif clusters < 32:
-            raise ValueError("Minimum number of clusters is 32!")
+        elif clusters < 2 ** self.big_leaf_depth:
+            # This is neccesary to use the more efficient initial points in the graph search.
+            raise ValueError("Minimum number of clusters is 2^big_leaf_depth!")
         elif a > clusters:
             raise ValueError("Number of clusters must be larger than or equal to a!")
+        elif X.is_cuda and self.init_method is not "cluster":
+            raise ValueError("CUDA not supported for non-cluster version of NNDescent.")
 
         # A 2D tensor representing a directed graph.
         # The value a = graph[i,j] represents an edge from point x_i to x_a.
@@ -131,24 +129,20 @@ def fit(self, X, iter=20, verbose=False, clusters=32, a=10, queue=5):
             self._update_graph(iter=iter, verbose=verbose)
 
     def _update_graph(self, iter, verbose=False):
-        """Updates the graph using algorithm: https://pynndescent.readthedocs.io/en/latest/how_pynndescent_works.html
+        """Updates the current estimate for the kNN-graph with the iterative NN-Descent algorithm
+
+        See https://pynndescent.readthedocs.io/en/latest/how_pynndescent_works.html for detailed explanation.
 
         Args:
-          iter (int): Number of iterations to use when updating search graph.
-          verbose (boolean): Printing information about iterations while searching.
+            iter (int): Number of iterations to use when updating search graph.
+            verbose (boolean): Printing information about iterations while searching.
         """
         # [STEP 1: Start with random graph.] Iterate
         start = time.time()
         for it in range(iter):
             if verbose:
                 print(
-                    "Iteration number",
-                    it,
-                    "with average distance of",
-                    torch.mean(self.k_distances).item(),
-                    "Took",
-                    time.time() - start,
-                    "seconds.",
+                    f"Iteration number {it} with average distance of {torch.mean(self.k_distances).item()}. Took {time.time() - start} seconds."
                 )
             has_changed = False
 
@@ -190,7 +184,7 @@ def _update_graph(self, iter, verbose=False):
             # [STEP 5: If any changes were made, repeat iteration, otherwise stop]
             if not has_changed:
                 if verbose:
-                    print("Fitting complete! Took", it, "iterations.")
+                    print(f"Fitting complete! Took {it} iterations.")
                 break
 
     def kneighbors(self, X, max_num_steps=100, tree_init=True, verbose=False):
@@ -203,14 +197,14 @@ def kneighbors(self, X, max_num_steps=100, tree_init=True, verbose=False):
         We then use the KeOps engine to perform the final nearest neighbours search over the nearest clusters to each query point
 
         Args:
-          X ((N,d) Tensor): A query set for which to find k neighbors.
-          K (int): How many neighbors to search for. Must be <=self.k for non-cluster methods. Default: self.k
-          max_num_steps (int): The maximum number of steps to take during search.
-          tree_init (boolean): Determine whether or not to use big leaves from projection trees as the starting point of search.
-          verbose (boolean): Printing information about iterations while searching.
+            X ((N,D) Tensor): A query set for which to find k neighbors.
+            K (int): How many neighbors to search for. Must be <=self.k for non-cluster methods. Default: self.k
+            max_num_steps (int): The maximum number of steps to take during search.
+            tree_init (boolean): Determine whether or not to use big leaves from projection trees as the starting point of search.
+            verbose (boolean): Printing information about iterations while searching.
 
         Returns:
-          The indices of the k nearest neighbors in the fitted data.
+            The indices of the k nearest neighbors in the fitted data.
         """
 
         # N datapoints of dimension d
@@ -222,7 +216,9 @@ def kneighbors(self, X, max_num_steps=100, tree_init=True, verbose=False):
 
         # If graph was initialized using trees, we can use information from there to initialize in a diversed manner.
         if self.big_leaves is not None and tree_init:
-            candidate_idx = self.big_leaves.unsqueeze(0).repeat(N, 1)  # Shape: (N,32)
+            candidate_idx = self.big_leaves.unsqueeze(0).repeat(
+                N, 1
+            )  # Shape: (N,2**self.big_leaf_depth)
         else:
             # Random initialization for starting points of search.
             candidate_idx = torch.randint(
@@ -230,8 +226,8 @@ def kneighbors(self, X, max_num_steps=100, tree_init=True, verbose=False):
             )
 
         if self.init_method == "cluster":
-            is_active = is_active.to(device)
-            candidate_idx = candidate_idx.to(device)
+            is_active = is_active.to(self.device)
+            candidate_idx = candidate_idx.to(self.device)
 
         # Sort the candidates by distance from X
         distances = self.distance(self.data[candidate_idx], X.unsqueeze(1))
@@ -246,7 +242,7 @@ def kneighbors(self, X, max_num_steps=100, tree_init=True, verbose=False):
         explored = torch.full(size=[N, num_explored], fill_value=-1)
 
         if self.init_method == "cluster":
-            explored = explored.to(device)
+            explored = explored.to(self.device)
 
         start = time.time()
         # The initialization of candidates and explored set is done. Now we can search.
@@ -254,13 +250,7 @@ def kneighbors(self, X, max_num_steps=100, tree_init=True, verbose=False):
         while count < max_num_steps:
             if verbose:
                 print(
-                    "Step",
-                    count,
-                    "- Search is completed for",
-                    1 - torch.mean(1.0 * is_active).item(),
-                    "- this step took",
-                    time.time() - start,
-                    "s",
+                    f"Step {count} - Search is completed for {1 - torch.mean(1.0 * is_active).item()} - this step took {time.time() - start} s"
                 )
             start = time.time()
 
@@ -300,8 +290,8 @@ def kneighbors(self, X, max_num_steps=100, tree_init=True, verbose=False):
             temp = torch.full((len(expanded_idx), 1), -1)
 
             if self.init_method == "cluster":
-                expanded_idx = expanded_idx.to(device)
-                temp = temp.to(device)
+                expanded_idx = expanded_idx.to(self.device)
+                temp = temp.to(self.device)
 
             shift = torch.cat(
                 (
@@ -330,15 +320,13 @@ def kneighbors(self, X, max_num_steps=100, tree_init=True, verbose=False):
         # Return the k candidates
         if verbose:
             print(
-                "Graph search finished after",
-                count,
-                "steps. Finished for:",
-                (1 - torch.mean(1.0 * is_active).item()) * 100,
-                "%.",
+                f"Graph search finished after {count} steps. Finished for: {(1 - torch.mean(1.0 * is_active).item()) * 100}%."
             )
 
         if self.init_method == "cluster":
-            return self.final_brute_force(candidate_idx[:, : self.k], X)
+            return self.final_brute_force(
+                candidate_idx[:, : self.k], X, verbose=verbose
+            )
         else:
             return candidate_idx[:, : self.k]
 
@@ -403,8 +391,10 @@ def _initialize_graph_forest(self, data, numtrees, leaf_multiplier, big_leaf_dep
 
         temp_graph = torch.tensor(())
         for j in range(numtrees):
-            # Create trees, obtain leaves
-            t = Tree(data, k=k * leaf_multiplier, big_leaf_depth=big_leaf_depth)
+            # Create trees, obtain leaves. RandomProjectionTree class is defined below.
+            t = RandomProjectionTree(
+                data, k=k * leaf_multiplier, big_leaf_depth=big_leaf_depth
+            )
 
             # Create temporary graph, 1 for each tree
             # Leaves are of uneven size; select smallest leaf size as graph size
@@ -472,14 +462,14 @@ def _initialize_graph_forest(self, data, numtrees, leaf_multiplier, big_leaf_dep
                 warning_count += 1
 
         if warning_count:
-            print("WARNING!", warning_count, " INDICES ARE RANDOM!")
+            print(f"WARNING! {warning_count} INDICES ARE RANDOM!")
 
     def _initialize_graph_clusters(self, data):
         """Initializes self.graph on cluster centroids, such that each cluster has 'a' distinct neighbors"""
         N, dim = data.shape
         k = self.k
         a = self.a
-        LT = self.LT
+        backend = self.backend
         leaf_multiplier = (
             N / self.num_clusters / k
         )  # to get number of clusters ~ num_clusters
@@ -490,24 +480,22 @@ def _initialize_graph_clusters(self, data):
             * -1
         )
 
-        data = data.to(device)
+        data = data.to(self.device)
 
-        # Create trees, obtain leaves
-        t = Tree(
-            data, k, self.big_leaf_depth, leaf_multiplier, LT
-        )  # TreeClusters(data, k, leaf_multiplier, LT)
+        # Create trees, obtain leaves. RandomProjectionTree class is defined below.
+        t = RandomProjectionTree(data, k, self.big_leaf_depth, leaf_multiplier, backend)
 
         self.leaves = len(t.leaves)
 
         # Assign each point to a cluster, 1 cluster per tree in forest
         for i, leaf in enumerate(t.leaves):
             self.clusters[leaf] = i
-        self.data_orig = self.data.clone()  # ADDED BY STEFAN
-        self.data = t.centroids.clone()  # CHANGED TO self.centroids to self.data
+        self.data_orig = self.data.clone()
+        self.data = t.centroids.clone()
 
         # Find nearest centroids
-        x_LT = LazyTensor(self.data.unsqueeze(1).to(device))
-        y_LT = LazyTensor(self.data.unsqueeze(0).to(device))
+        x_LT = LazyTensor(self.data.unsqueeze(1).to(self.device))
+        y_LT = LazyTensor(self.data.unsqueeze(0).to(self.device))
         d = self.distance(x_LT, y_LT)
         indices = d.argKmin(K=a + 1, dim=1).long()
         self.centroids_neighbours = indices[:, 1:].long()
@@ -529,26 +517,26 @@ def final_brute_force(self, nearest_clusters, query_pts, verbose=False):
 
     def _final_brute_force(self, nearest_clusters, query_pts):
         """ Final brute force search over clusters in cluster method"""
-        if use_cuda:
+        if torch.cuda.is_available():
             torch.cuda.synchronize()
 
         k = self.k
 
-        x = self.data_orig.to(device)
+        x = self.data_orig.to(self.device)
         x_labels = self.clusters.long()
-        y = query_pts.to(device)
+        y = query_pts.to(self.device)
         y_labels = nearest_clusters[:, 0]
 
         x = x.contiguous()
         y = y.contiguous()
-        x_labels = x_labels.to(device)
-        y_labels = y_labels.to(device)
+        x_labels = x_labels.to(self.device)
+        y_labels = y_labels.to(self.device)
 
         clusters, a = self.graph.shape
         r = torch.arange(clusters).repeat(a, 1).T.reshape(-1).long()
-        keep = torch.zeros([clusters, clusters], dtype=torch.bool).to(device)
+        keep = torch.zeros([clusters, clusters], dtype=torch.bool).to(self.device)
         keep[r, self.graph.flatten()] = True
-        keep += torch.eye(clusters).bool().to(device)
+        keep += torch.eye(clusters).bool().to(self.device)
 
         x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
         y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
@@ -556,12 +544,12 @@ def _final_brute_force(self, nearest_clusters, query_pts):
         x, x_labels = self.__sort_clusters(x, x_labels, store_x=True)
         y, y_labels = self.__sort_clusters(y, y_labels, store_x=False)
 
-        x_LT = LazyTensor(x.unsqueeze(0).to(device).contiguous())
-        y_LT = LazyTensor(y.unsqueeze(1).to(device).contiguous())
+        x_LT = LazyTensor(x.unsqueeze(0).to(self.device).contiguous())
+        y_LT = LazyTensor(y.unsqueeze(1).to(self.device).contiguous())
         D_ij = self.distance(y_LT, x_LT)
 
-        x_ranges = x_ranges.to(device)
-        y_ranges = y_ranges.to(device)
+        x_ranges = x_ranges.to(self.device)
+        y_ranges = y_ranges.to(self.device)
         ranges_ij = from_matrix(y_ranges, x_ranges, keep)
         D_ij.ranges = ranges_ij
         nn = D_ij.argKmin(K=k, axis=1)
@@ -579,7 +567,7 @@ def __unsort(self, nn):
         return torch.index_select(self.__x_perm[nn], 0, self.__y_perm.argsort())
 
 
-class Tree:  # NN clusters tree
+class RandomProjectionTree:
     """
     Random projection tree class that splits the data evenly per split
     Each split is performed by calculating the projection distance of each datapoint to a random unit vector
@@ -587,18 +575,30 @@ class Tree:  # NN clusters tree
     The indices of the datapoints are stored in tree.leaves, as a nested list
     """
 
-    def __init__(self, x, k=5, big_leaf_depth=5, leaf_multiplier=128, LT=False):
+    def __init__(
+        self,
+        x,
+        k=5,
+        big_leaf_depth=5,
+        leaf_multiplier=128,
+        backend="torch",
+        device=None,
+    ):
         self.min_size = k * leaf_multiplier
         self.leaves = []
         self.sizes = []
-        self.centroids = torch.tensor(()).to(device)
+        if device is None:
+            self.device = x.device
+        else:
+            self.device = device
+        self.centroids = torch.tensor(()).to(self.device)
         self.big_leaf_depth = big_leaf_depth
         self.big_leaves = []  # leaves at depth = 5
         indices = torch.arange(x.shape[0])
 
         self.dim = x.shape[1]
-        self.data = x.to(device)
-        self.LT = LT  # Boolean to choose LT or torch initialization
+        self.data = x.to(self.device)
+        self.backend = backend  # Boolean to choose LT or torch initialization
 
         self.tree = self.make_tree(indices, depth=0)
         self.centroids = self.centroids.reshape(-1, x.shape[1])
@@ -607,9 +607,9 @@ def make_tree(self, indices, depth):
         if depth == 5:  # add to big_leaves if depth=5
             self.big_leaves.append(int(indices[0]))
         if indices.shape[0] > self.min_size:
-            v = self.choose_rule().to(device)
+            v = self.choose_rule().to(self.device)
 
-            if self.LT:
+            if self.backend == "keops":
                 distances = self.dot_product(
                     self.data[indices], v
                 )  # create list of projection distances

From 1ec3c02952c8dce2cfaba7ef023e21fbf1b11cb9 Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Sun, 18 Apr 2021 17:43:44 +0100
Subject: [PATCH 084/111] updated as per jean's comments

---
 pykeops/benchmarks/plot_benchmark_KNN.py | 110 +++++++++++++----------
 1 file changed, 65 insertions(+), 45 deletions(-)

diff --git a/pykeops/benchmarks/plot_benchmark_KNN.py b/pykeops/benchmarks/plot_benchmark_KNN.py
index fcc30add1..6311c116d 100644
--- a/pykeops/benchmarks/plot_benchmark_KNN.py
+++ b/pykeops/benchmarks/plot_benchmark_KNN.py
@@ -62,51 +62,6 @@
 
 use_cuda = torch.cuda.is_available()
 
-############################################################################
-# KeOps IVF-Flat implementation
-# --------------------------------------
-#
-# KeOps IVF-Flat is an approximation method that leverages the KeOps engine. It uses the IVF-Flat approximation algorithm comprising 4 steps: (1) split the training data into clusters using k-means, (2) find the 'a' nearest clusters to each cluster, (3) find the nearest cluster to each query point, and (4) perform the nearest neighbour search within only these nearest clusters, and the 'a' nearest clusters to each of these clusters. (1) and (2) are performed during fitting, while (3) and (4) are performed during query time. Steps (3) and (4) achieve time savings during query time by reducing the amount of pair-wise distance calculations.
-
-from pykeops.torch.nn.ivf import IVF
-
-
-def KNN_KeOps_ivf_flat(K, metric="euclidean", clusters=100, a=10, **kwargs):
-
-    # Setup the K-NN estimator:
-    if metric == "angular":
-        metric = "angular_full"
-    KNN = IVF(k=K, metric=metric)  # normalise=False because dataset is normalised
-
-    def fit(x_train):
-        x_train = tensor(x_train)
-        start = timer()
-        KNN.fit(x_train, clusters=clusters, a=a)
-        elapsed = timer() - start
-
-        def f(x_test):
-            x_test = tensor(x_test)
-            start = timer()
-            indices = KNN.kneighbors(x_test)
-            elapsed = timer() - start
-            indices = indices.cpu().numpy()
-
-            return indices, elapsed
-
-        return f, elapsed
-
-    return fit
-
-
-##################################################################
-# The time savings and accuracies achieved depend on the underlying data structure, the number of clusters chosen and the 'a' parameter. The algorithm speed suffers for clusters >200. Reducing the proportion of clusters searched over (i.e. the a/clusters value) increases the algorithm speed, but lowers its accuracy. For structured data (e.g. MNIST), high accuracies >90% can be reached by just searching over 10% of clusters. However, for uniformly distributed random data, over 80% of the clusters will need to be searched over to attain >90% accuracy.
-
-# Here, we propose 2 sets of parameters that work well on real data (e.g. MNIST, GloVe):
-
-KNN_KeOps_gpu_IVFFlat_fast = partial(KNN_KeOps_ivf_flat, clusters=10, a=1)
-KNN_KeOps_gpu_IVFFlat_slow = partial(KNN_KeOps_ivf_flat, clusters=200, a=40)
-
-##############################################
 # We then specify the values of K that we will inspect:
 
 Ks = [1, 10, 50, 100]  # Numbers of neighbors to find
@@ -429,6 +384,69 @@ def f(x_test):
     return fit
 
 
+############################################################################
+# KeOps IVF-Flat implementation
+# --------------------------------------
+#
+# KeOps IVF-Flat is an approximation method that leverages the KeOps engine. It
+# uses the IVF-Flat approximation algorithm comprising 4 steps: (1) split the
+# training data into clusters using k-means, (2) find the 'a' nearest clusters
+# to each cluster, (3) find the nearest cluster to each query point, and (4)
+# perform the nearest neighbour search within only these nearest clusters, and
+# the 'a' nearest clusters to each of these clusters. (1) and (2) are performed
+# during fitting, while (3) and (4) are performed during query time. Steps (3)
+# and (4) achieve time savings during query time by reducing the amount of
+# pair-wise distance calculations.
+
+from pykeops.torch.knn import IVF
+
+
+def KNN_KeOps_ivf_flat(K, metric="euclidean", clusters=100, a=10, **kwargs):
+
+    # Setup the K-NN estimator:
+    if metric == "angular":
+        metric = "angular_full" # alternative metric for non-normalised data
+    KNN = IVF(k=K, metric=metric)
+
+    def fit(x_train):
+        x_train = tensor(x_train)
+        start = timer()
+        KNN.fit(x_train, clusters=clusters, a=a)
+        elapsed = timer() - start
+
+        def f(x_test):
+            x_test = tensor(x_test)
+            start = timer()
+            indices = KNN.kneighbors(x_test)
+            elapsed = timer() - start
+            indices = indices.cpu().numpy()
+
+            return indices, elapsed
+
+        return f, elapsed
+
+    return fit
+
+
+##################################################################
+# The time savings and accuracies achieved depend on the underlyng data
+# structure, the number of clusters chosen and the 'a' parameter. The algorithm
+# speed suffers for clusters >200. Reducing the proportion of clusters searched
+# over (i.e. the a/clusters value) increases the algorithm speed, but lowers its
+# accuracy. For structured data (e.g. MNIST), high accuracies >90% can be
+# reached by just searching over 10% of clusters. However, for uniformly
+# distributed random data, over 80% of the clusters will need to be searched
+# over to attain >90% accuracy.
+
+# Here, we propose 2 sets of parameters that work well on real data (e.g.
+# MNIST, GloVe):
+
+KNN_KeOps_gpu_IVFFlat_fast = partial(KNN_KeOps_ivf_flat, clusters=10, a=1)
+KNN_KeOps_gpu_IVFFlat_slow = partial(KNN_KeOps_ivf_flat, clusters=200, a=40)
+
+##############################################
+
+
 ################################################################################
 # SciKit-Learn tree-based and bruteforce methods
 # -----------------------------------------------------
@@ -705,6 +723,8 @@ def run_KNN_benchmark(name, loops=[1]):
         legend_location="upper right",
         linestyles=[
             "o-",
+            "+-.",
+            "x-.",
             "s-",
             "^:",
             "<:",

From 95378e93b97534570364f25833fb9f5c87e661b0 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Sun, 18 Apr 2021 18:34:01 +0100
Subject: [PATCH 085/111] black

---
 pykeops/benchmarks/plot_benchmark_KNN.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pykeops/benchmarks/plot_benchmark_KNN.py b/pykeops/benchmarks/plot_benchmark_KNN.py
index 6311c116d..087cf536d 100644
--- a/pykeops/benchmarks/plot_benchmark_KNN.py
+++ b/pykeops/benchmarks/plot_benchmark_KNN.py
@@ -405,7 +405,7 @@ def KNN_KeOps_ivf_flat(K, metric="euclidean", clusters=100, a=10, **kwargs):
 
     # Setup the K-NN estimator:
     if metric == "angular":
-        metric = "angular_full" # alternative metric for non-normalised data
+        metric = "angular_full"  # alternative metric for non-normalised data
     KNN = IVF(k=K, metric=metric)
 
     def fit(x_train):

From 8307208e1b17b54babec517d289c571e092d5199 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Mon, 19 Apr 2021 22:25:19 +0100
Subject: [PATCH 086/111] updated tutorials

---
 pykeops/tutorials/knn/plot_ivf_numpy.ipynb | 467 -----------------
 pykeops/tutorials/knn/plot_ivf_numpy.py    | 143 +++++
 pykeops/tutorials/knn/plot_ivf_torch.ipynb | 573 ---------------------
 pykeops/tutorials/knn/plot_ivf_torch.py    | 186 +++++++
 pykeops/tutorials/knn/plot_nnd_torch.py    | 175 +++++++
 5 files changed, 504 insertions(+), 1040 deletions(-)
 delete mode 100644 pykeops/tutorials/knn/plot_ivf_numpy.ipynb
 create mode 100644 pykeops/tutorials/knn/plot_ivf_numpy.py
 delete mode 100644 pykeops/tutorials/knn/plot_ivf_torch.ipynb
 create mode 100644 pykeops/tutorials/knn/plot_ivf_torch.py
 create mode 100644 pykeops/tutorials/knn/plot_nnd_torch.py

diff --git a/pykeops/tutorials/knn/plot_ivf_numpy.ipynb b/pykeops/tutorials/knn/plot_ivf_numpy.ipynb
deleted file mode 100644
index 406f81c6f..000000000
--- a/pykeops/tutorials/knn/plot_ivf_numpy.ipynb
+++ /dev/null
@@ -1,467 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.7.8"
-    },
-    "colab": {
-      "name": "plot_ivf_numpy.ipynb",
-      "provenance": [],
-      "collapsed_sections": []
-    },
-    "accelerator": "GPU"
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "gQpF1e7J753b"
-      },
-      "source": [
-        "\n",
-        "# IVF-Flat approximate nearest neighbors search - Numpy API\n",
-        "\n",
-        "The :class:`pykeops.torch.IVF` class supported by KeOps allows us\n",
-        "to perform **approximate nearest neighbor search** with four lines of code.\n",
-        "It can thus be used to compute a **large-scale** nearest neighbors search **much faster**. The code is based on the IVF-Flat algorithm and uses KeOps' block-sparse reductions to speed up the search by reducing the search space.\n",
-        "\n",
-        "Euclidean, Manhattan and Angular metrics are supported.\n",
-        "\n",
-        "<div class=\"alert alert-info\"><h4>Note</h4><p>Hyperbolic and custom metrics are not supported in the Numpy API, please use the PyTorch API instead.</p></div>\n",
-        "\n",
-        "  \n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "TaH5GxTr753c"
-      },
-      "source": [
-        "## Setup\n",
-        "Standard imports:\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "eVVKpbGW753c"
-      },
-      "source": [
-        "import time\n",
-        "import numpy as np\n",
-        "from pykeops.numpy import IVF"
-      ],
-      "execution_count": 4,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "GtpiZ-q4753e"
-      },
-      "source": [
-        "## IVF nearest neighbour search with Euclidean metric\n",
-        "First experiment with N=$10^5$ points in dimension D=3 and 5 nearest neighbours\n",
-        "\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "w_xRXXLn753f"
-      },
-      "source": [
-        "N, D, k = 10**5, 3, 5"
-      ],
-      "execution_count": 5,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "LkYe4YSQ753f"
-      },
-      "source": [
-        "Define our dataset:\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "qFkqpF3H753g"
-      },
-      "source": [
-        "np.random.seed(1)\n",
-        "x = 0.7 * np.random.randn(N, D) + 0.3\n",
-        "y = 0.7 * np.random.randn(N, D) + 0.3"
-      ],
-      "execution_count": 6,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "MJnZEZlb753g"
-      },
-      "source": [
-        "Create the IVF class and fit the dataset:\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "r0uKEYTF753g",
-        "outputId": "034abd75-f6d5-4c4e-8a1d-d8a68cf9f19b"
-      },
-      "source": [
-        "nn = IVF(k=k)\n",
-        "#set the number of clusters in K-Means to 50\n",
-        "#set the number of nearest clusters we search over during the final query search to 5\n",
-        "nn.fit(x, clusters = 50, a = 5)"
-      ],
-      "execution_count": 7,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "<__main__.IVF at 0x7f8657401110>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 7
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "uNM5UD_x753h"
-      },
-      "source": [
-        "Query dataset search\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "uYiiJxc_yCyU"
-      },
-      "source": [
-        "approx_nn = nn.kneighbors(y)"
-      ],
-      "execution_count": 8,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "awldFVOs753j"
-      },
-      "source": [
-        "Now computing the true nearest neighbors with brute force search\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "XJ56FXzB753j"
-      },
-      "source": [
-        "true_nn = nn.brute_force(x, y, k=k)"
-      ],
-      "execution_count": 9,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "qdT8szO9753k"
-      },
-      "source": [
-        "Define the function to compute recall of the nearest neighbors\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "hkoH4UaS753l"
-      },
-      "source": [
-        "def accuracy(indices_test, indices_truth):\n",
-        "  '''\n",
-        "  Compares the test and ground truth indices (rows = KNN for each point in dataset)\n",
-        "  Returns accuracy: proportion of correct nearest neighbours\n",
-        "  '''\n",
-        "  N, k = indices_test.shape\n",
-        "  \n",
-        "  # Calculate number of correct nearest neighbours\n",
-        "  accuracy = 0\n",
-        "  for i in range(k):\n",
-        "    accuracy += float(np.sum(indices_test == indices_truth))/N\n",
-        "    indices_truth = np.roll(indices_truth, 1, -1) # Create a rolling window (index positions may not match)\n",
-        "  accuracy = float(accuracy/k) # percentage accuracy\n",
-        "\n",
-        "  return accuracy"
-      ],
-      "execution_count": 10,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "V_FLgW29753l"
-      },
-      "source": [
-        "Check the performance of our algorithm\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "kXIoY57u753m",
-        "outputId": "be568c38-661a-4639-b44f-e40a2b99114a"
-      },
-      "source": [
-        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
-      ],
-      "execution_count": 11,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "IVF Recall: 0.9652399999999999\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mHWGG4yO4mN8"
-      },
-      "source": [
-        "Timing the algorithms to observe their performance"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "rrumsJ3q4lef",
-        "outputId": "629889c2-3cb7-43cf-c2c2-50e19c95a402"
-      },
-      "source": [
-        "start=time.time()\n",
-        "iters=10\n",
-        "\n",
-        "#timing KeOps brute force\n",
-        "for _ in range(iters):\n",
-        "  true_nn = nn.brute_force(x, y, k=k)\n",
-        "bf_time = time.time()-start\n",
-        "print('KeOps brute force timing for', N, 'points with', D, 'dimensions:', bf_time/iters)\n",
-        "\n",
-        "#timing IVF\n",
-        "nn = IVF(k=k)\n",
-        "nn.fit(x)\n",
-        "start = time.time()\n",
-        "for _ in range(iters):\n",
-        "  approx_nn = nn.kneighbors(y)\n",
-        "ivf_time = time.time() - start\n",
-        "print('KeOps IVF-Flat timing for', N, 'points with', D, 'dimensions:', ivf_time/iters)\n"
-      ],
-      "execution_count": 12,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "KeOps brute force timing for 100000 points with 3 dimensions: 0.21520822048187255\n",
-            "KeOps IVF-Flat timing for 100000 points with 3 dimensions: 0.05834429264068604\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "j8joqj-r753m"
-      },
-      "source": [
-        "## IVF nearest neighbors search with angular metric\n",
-        "Second experiment with N=$10^5$ points in dimension D=3, with 5 nearest neighbors\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "PdjTMuE1753n"
-      },
-      "source": [
-        "np.random.seed(1)\n",
-        "x = 0.7 * np.random.randn(N, D) + 0.3\n",
-        "y = 0.7 * np.random.randn(N, D) + 0.3\n",
-        "\n",
-        "#normalising the inputs to have norm of 1\n",
-        "x_norm = x / np.linalg.norm(x, axis=1, keepdims=True)\n",
-        "y_norm = y / np.linalg.norm(x, axis=1, keepdims=True)"
-      ],
-      "execution_count": 13,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "JyMhcbF6XR7k"
-      },
-      "source": [
-        "nn = IVF(metric = 'angular')\n",
-        "true_nn = nn.brute_force(x_norm, y_norm)"
-      ],
-      "execution_count": 14,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "fWjVpEuCXWvB",
-        "outputId": "fda9d458-d497-4e67-ec22-36bef77a7f5f"
-      },
-      "source": [
-        "nn = IVF(metric = 'angular')\n",
-        "nn.fit(x_norm)\n",
-        "approx_nn = nn.kneighbors(y_norm)\n",
-        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
-      ],
-      "execution_count": 15,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "IVF Recall: 0.9958119999999999\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "AqYVOtAVYgmd"
-      },
-      "source": [
-        "The IVF class also has an option to automatically normalise all inputs"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "OzE6F9EkYfNO",
-        "outputId": "a50dba76-d121-40a6-fd3b-00e03b24585b"
-      },
-      "source": [
-        "nn = IVF(metric = 'angular', normalise = True)\n",
-        "nn.fit(x)\n",
-        "approx_nn = nn.kneighbors(y)\n",
-        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
-      ],
-      "execution_count": 16,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "IVF Recall: 0.9958119999999999\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "YZgvkGCzYy2p"
-      },
-      "source": [
-        "There is also an option to use full angular metric \"angular_full\", which uses the full angular metric. \"angular\" simply uses the dot product."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "qMr5jpn8ZCAO",
-        "outputId": "3fe0a152-eff7-490c-9f3b-b3707bf81571"
-      },
-      "source": [
-        "nn = IVF(metric = 'angular_full')\n",
-        "nn.fit(x)\n",
-        "approx_nn = nn.kneighbors(y)\n",
-        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
-      ],
-      "execution_count": 17,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "IVF Recall: 0.995626\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "8Qoacr6Hk64h"
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": null,
-      "outputs": []
-    }
-  ]
-}
\ No newline at end of file
diff --git a/pykeops/tutorials/knn/plot_ivf_numpy.py b/pykeops/tutorials/knn/plot_ivf_numpy.py
new file mode 100644
index 000000000..bd9a82dc8
--- /dev/null
+++ b/pykeops/tutorials/knn/plot_ivf_numpy.py
@@ -0,0 +1,143 @@
+"""
+=========================================================
+IVF-Flat approximate nearest neighbors search - Numpy API
+=========================================================
+
+The :class:`pykeops.torch.IVF` class supported by KeOps allows us
+to perform **approximate nearest neighbor search** with four lines of code.
+It can thus be used to compute a **large-scale** nearest neighbors search **much faster**.
+The code is based on the IVF-Flat algorithm and uses KeOps' block-sparse reductions to speed up the search by reducing the search space.
+
+Euclidean, Manhattan and Angular metrics are supported.
+
+.. note::
+  Hyperbolic and custom metrics are not supported in the Numpy API, please use the PyTorch API instead.
+
+"""
+
+########################################################################
+# Setup
+# -----------------
+# Standard imports:
+
+import time
+import numpy as np
+from pykeops.numpy import IVF
+
+########################################################################
+# IVF nearest neighbour search with Euclidean metric
+# First experiment with N=$10^5$ points in dimension D=3 and 5 nearest neighbours
+
+N, D, k = 10 ** 5, 3, 5
+
+########################################################################
+# Define our dataset:
+
+np.random.seed(1)
+x = 0.7 * np.random.randn(N, D) + 0.3
+y = 0.7 * np.random.randn(N, D) + 0.3
+
+########################################################################
+# Create the IVF class and fit the dataset:
+
+nn = IVF(k=k)
+# set the number of clusters in K-Means to 50
+# set the number of nearest clusters we search over during the final query search to 5
+nn.fit(x, clusters=50, a=5)
+
+########################################################################
+# Query dataset search
+
+approx_nn = nn.kneighbors(y)
+
+########################################################################
+# Now computing the true nearest neighbors with brute force search
+
+true_nn = nn.brute_force(x, y, k=k)
+
+########################################################################
+# Define the function to compute recall of the nearest neighbors
+
+
+def accuracy(indices_test, indices_truth):
+    """
+    Compares the test and ground truth indices (rows = KNN for each point in dataset)
+    Returns accuracy: proportion of correct nearest neighbours
+    """
+    N, k = indices_test.shape
+
+    # Calculate number of correct nearest neighbours
+    accuracy = 0
+    for i in range(k):
+        accuracy += float(np.sum(indices_test == indices_truth)) / N
+        indices_truth = np.roll(
+            indices_truth, 1, -1
+        )  # Create a rolling window (index positions may not match)
+    accuracy = float(accuracy / k)  # percentage accuracy
+
+    return accuracy
+
+
+########################################################################
+# Check the performance of our algorithm
+
+print("IVF Recall:", accuracy(approx_nn, true_nn))
+
+########################################################################
+# Timing the algorithms to observe their performance
+
+start = time.time()
+iters = 10
+
+# timing KeOps brute force
+for _ in range(iters):
+    true_nn = nn.brute_force(x, y, k=k)
+bf_time = time.time() - start
+print(
+    "KeOps brute force timing for", N, "points with", D, "dimensions:", bf_time / iters
+)
+
+# timing IVF
+nn = IVF(k=k)
+nn.fit(x)
+start = time.time()
+for _ in range(iters):
+    approx_nn = nn.kneighbors(y)
+ivf_time = time.time() - start
+print("KeOps IVF-Flat timing for", N, "points with", D, "dimensions:", ivf_time / iters)
+
+########################################################################
+# IVF nearest neighbors search with angular metric
+# Second experiment with N=$10^5$ points in dimension D=3, with 5 nearest neighbors
+
+np.random.seed(1)
+x = 0.7 * np.random.randn(N, D) + 0.3
+y = 0.7 * np.random.randn(N, D) + 0.3
+
+# normalising the inputs to have norm of 1
+x_norm = x / np.linalg.norm(x, axis=1, keepdims=True)
+y_norm = y / np.linalg.norm(x, axis=1, keepdims=True)
+
+nn = IVF(metric="angular")
+true_nn = nn.brute_force(x_norm, y_norm)
+
+nn = IVF(metric="angular")
+nn.fit(x_norm)
+approx_nn = nn.kneighbors(y_norm)
+print("IVF Recall:", accuracy(approx_nn, true_nn))
+
+########################################################################
+# The IVF class also has an option to automatically normalise all inputs
+
+nn = IVF(metric="angular", normalise=True)
+nn.fit(x)
+approx_nn = nn.kneighbors(y)
+print("IVF Recall:", accuracy(approx_nn, true_nn))
+
+########################################################################
+# There is also an option to use full angular metric "angular_full", which uses the full angular metric. "angular" simply uses the dot product.
+
+nn = IVF(metric="angular_full")
+nn.fit(x)
+approx_nn = nn.kneighbors(y)
+print("IVF Recall:", accuracy(approx_nn, true_nn))
diff --git a/pykeops/tutorials/knn/plot_ivf_torch.ipynb b/pykeops/tutorials/knn/plot_ivf_torch.ipynb
deleted file mode 100644
index 902ed3dfb..000000000
--- a/pykeops/tutorials/knn/plot_ivf_torch.ipynb
+++ /dev/null
@@ -1,573 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.7.8"
-    },
-    "colab": {
-      "name": "plot_ivf_torch.ipynb",
-      "provenance": [],
-      "collapsed_sections": []
-    },
-    "accelerator": "GPU"
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "gQpF1e7J753b"
-      },
-      "source": [
-        "\n",
-        "# IVF-Flat approximate nearest neighbors search - PyTorch API\n",
-        "\n",
-        "The :class:`pykeops.torch.IVF` class supported by KeOps allows us\n",
-        "to perform **approximate nearest neighbor search** with four lines of code.\n",
-        "It can thus be used to compute a **large-scale** nearest neighbors search **much faster**. The code is based on the IVF-Flat algorithm and uses KeOps' block-sparse reductions to speed up the search by reducing the search space.\n",
-        "\n",
-        "Euclidean, Manhattan, Angular and Hyperbolic metrics are supported along with custom metrics.\n",
-        "\n",
-        "<div class=\"alert alert-info\"><h4>Note</h4><p>Hyperbolic and custom metrics require the use of an approximation during the K-Means step to obtain the centroid locations since a closed form expression might not be readily available.</p></div>\n",
-        "\n",
-        "  \n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "TaH5GxTr753c"
-      },
-      "source": [
-        "## Setup\n",
-        "Standard imports:\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "eVVKpbGW753c"
-      },
-      "source": [
-        "import time\n",
-        "import torch\n",
-        "from pykeops.torch import IVF\n",
-        "\n",
-        "use_cuda = torch.cuda.is_available()\n",
-        "device = torch.device('cuda') if use_cuda else torch.device('cpu')\n",
-        "dtype = torch.float32 if use_cuda else torch.float64"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "GtpiZ-q4753e"
-      },
-      "source": [
-        "## IVF nearest neighbour search with Euclidean metric\n",
-        "First experiment with N=$10^6$ points in dimension D=3 and 5 nearest neighbours\n",
-        "\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "w_xRXXLn753f"
-      },
-      "source": [
-        "N, D, k = 10**6, 3, 5"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "LkYe4YSQ753f"
-      },
-      "source": [
-        "Define our dataset:\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "qFkqpF3H753g"
-      },
-      "source": [
-        "torch.manual_seed(1)\n",
-        "x = 0.7 * torch.randn(N, D, dtype=dtype, device=device) + 0.3\n",
-        "y = 0.7 * torch.randn(N, D, dtype=dtype, device=device) + 0.3"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "MJnZEZlb753g"
-      },
-      "source": [
-        "Create the IVF class and fit the dataset:\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "r0uKEYTF753g",
-        "outputId": "dbcfef07-dd7c-4e1d-98cc-47edaa9ef303"
-      },
-      "source": [
-        "nn = IVF(k=k)\n",
-        "#set the number of clusters in K-Means to 50\n",
-        "#set the number of nearest clusters we search over during the final query search to 5\n",
-        "nn.fit(x, clusters = 50, a = 5)"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "<__main__.IVF at 0x7fc220cbc750>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 9
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "uNM5UD_x753h"
-      },
-      "source": [
-        "Query dataset search\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "uYiiJxc_yCyU"
-      },
-      "source": [
-        "approx_nn = nn.kneighbors(y)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "awldFVOs753j"
-      },
-      "source": [
-        "Now computing the true nearest neighbors with brute force search\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "XJ56FXzB753j"
-      },
-      "source": [
-        "true_nn = nn.brute_force(x, y, k=k)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "qdT8szO9753k"
-      },
-      "source": [
-        "Define the function to compute recall of the nearest neighbors\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "hkoH4UaS753l"
-      },
-      "source": [
-        "def accuracy(indices_test, indices_truth):\n",
-        "  '''\n",
-        "  Compares the test and ground truth indices (rows = KNN for each point in dataset)\n",
-        "  Returns accuracy: proportion of correct nearest neighbours\n",
-        "  '''\n",
-        "  N, k = indices_test.shape\n",
-        "  \n",
-        "  # Calculate number of correct nearest neighbours\n",
-        "  accuracy = 0\n",
-        "  for i in range(k):\n",
-        "    accuracy += torch.sum(indices_test == indices_truth).float()/N\n",
-        "    indices_truth = torch.roll(indices_truth, 1, -1) # Create a rolling window (index positions may not match)\n",
-        "  accuracy = float(accuracy/k) # percentage accuracy\n",
-        "\n",
-        "  return accuracy"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "V_FLgW29753l"
-      },
-      "source": [
-        "Check the performance of our algorithm\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "kXIoY57u753m",
-        "outputId": "88d14e97-0f56-4d17-af6f-be412943c964"
-      },
-      "source": [
-        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "IVF Recall: 0.9830819368362427\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mHWGG4yO4mN8"
-      },
-      "source": [
-        "Timing the algorithms to observe their performance"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "rrumsJ3q4lef",
-        "outputId": "89e366cb-8d7b-463b-ed78-6cd2aec6af2a"
-      },
-      "source": [
-        "start=time.time()\n",
-        "iters=10\n",
-        "\n",
-        "#timing KeOps brute force\n",
-        "for _ in range(iters):\n",
-        "  true_nn = nn.brute_force(x, y, k=k)\n",
-        "bf_time = time.time()-start\n",
-        "print('KeOps brute force timing for', N, 'points with', D, 'dimensions:', bf_time/iters)\n",
-        "\n",
-        "#timing IVF\n",
-        "nn = IVF(k=k)\n",
-        "nn.fit(x)\n",
-        "start = time.time()\n",
-        "for _ in range(iters):\n",
-        "  approx_nn = nn.kneighbors(y)\n",
-        "ivf_time = time.time() - start\n",
-        "print('KeOps IVF-Flat timing for', N, 'points with', D, 'dimensions:', ivf_time/iters)\n"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "KeOps brute force timing for 1000000 points with 3 dimensions: 4.693825650215149\n",
-            "KeOps IVF-Flat timing for 1000000 points with 3 dimensions: 0.601522707939148\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "j8joqj-r753m"
-      },
-      "source": [
-        "## IVF nearest neighbors search with angular metric\n",
-        "Second experiment with N=$10^6$ points in dimension D=3, with 5 nearest neighbors\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "PdjTMuE1753n"
-      },
-      "source": [
-        "torch.manual_seed(1)\n",
-        "x = 0.7 * torch.randn(N, D, dtype=dtype, device=device) + 0.3\n",
-        "y = 0.7 * torch.randn(N, D, dtype=dtype, device=device) + 0.3\n",
-        "\n",
-        "#normalising the inputs to have norm of 1\n",
-        "x_norm = x / torch.linalg.norm(x,dim=1,keepdim=True)\n",
-        "y_norm = y / torch.linalg.norm(y,dim=1,keepdim=True)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "JyMhcbF6XR7k"
-      },
-      "source": [
-        "nn = IVF(metric = 'angular')\n",
-        "true_nn = nn.brute_force(x_norm, y_norm)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "fWjVpEuCXWvB",
-        "outputId": "f1550d02-0ac6-451c-8760-ac8aa167202a"
-      },
-      "source": [
-        "nn = IVF(metric = 'angular')\n",
-        "nn.fit(x_norm)\n",
-        "approx_nn = nn.kneighbors(y_norm)\n",
-        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "IVF Recall: 0.998617947101593\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "AqYVOtAVYgmd"
-      },
-      "source": [
-        "The IVF class also has an option to automatically normalise all inputs"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "OzE6F9EkYfNO",
-        "outputId": "dbe30d60-548a-4054-b40c-a79431182853"
-      },
-      "source": [
-        "nn = IVF(metric = 'angular', normalise = True)\n",
-        "nn.fit(x)\n",
-        "approx_nn = nn.kneighbors(y)\n",
-        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "IVF Recall: 0.9986152052879333\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "YZgvkGCzYy2p"
-      },
-      "source": [
-        "There is also an option to use full angular metric \"angular_full\", which uses the full angular metric. \"angular\" simply uses the dot product."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "qMr5jpn8ZCAO",
-        "outputId": "69b187bb-96e4-416e-e1cf-5852e79243aa"
-      },
-      "source": [
-        "nn = IVF(metric = 'angular_full')\n",
-        "nn.fit(x)\n",
-        "approx_nn = nn.kneighbors(y)\n",
-        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "IVF Recall: 0.9928072094917297\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "8RsIuAPyZGwi"
-      },
-      "source": [
-        "## IVF nearest neighbors search with approximations for K-Means centroids\n",
-        "We run two experiment with N=$10^6$ points in dimension D=3, with 5 nearest neighbors. The first uses the hyperbolic metric while the second uses a custom metric."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "-Yju_HG1ZPPU"
-      },
-      "source": [
-        "#hyperbolic data generation\n",
-        "torch.manual_seed(1)\n",
-        "x = 0.5 + torch.rand(N, D, dtype=dtype, device=device) \n",
-        "y = 0.5 + torch.rand(N, D, dtype=dtype, device=device) "
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "IqDycMpscBoH",
-        "outputId": "36cae4cb-7d2a-4b44-f9a3-42ef6d872918"
-      },
-      "source": [
-        "nn = IVF(metric = 'hyperbolic')\n",
-        "#set approx to True\n",
-        "#n is the number of times we run gradient descent steps for the approximation, default of 50\n",
-        "nn.fit(x, approx = True, n = 50)\n",
-        "approx_nn = nn.kneighbors(y)\n",
-        "true_nn = nn.brute_force(x, y)\n",
-        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "IVF Recall: 0.9897241592407227\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Pg5kOKjacttb"
-      },
-      "source": [
-        "#define a custom metric\n",
-        "def minkowski(x, y, p = 3):\n",
-        "  \"\"\"Returns the computation of a metric\n",
-        "  Note the shape of the input tensors the function should accept\n",
-        "\n",
-        "  Args:\n",
-        "    x (tensor): Input dataset of size 1, N, D\n",
-        "    y (tensor): Query dataset of size M, 1, D\n",
-        "\n",
-        "  \"\"\"  \n",
-        "  return ((x - y).abs()**p).sum(-1)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "nIVYuN9Mdpgt",
-        "outputId": "4ca63cf6-3b37-4703-cf74-96339d7026fd"
-      },
-      "source": [
-        "#testing custom metric\n",
-        "nn = IVF(metric = minkowski)\n",
-        "nn.fit(x, approx = True)\n",
-        "approx_nn = nn.kneighbors(y)\n",
-        "true_nn = nn.brute_force(x, y)\n",
-        "print('IVF Recall:', accuracy(approx_nn, true_nn))"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "IVF Recall: 0.9897966384887695\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "8Qoacr6Hk64h"
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": null,
-      "outputs": []
-    }
-  ]
-}
\ No newline at end of file
diff --git a/pykeops/tutorials/knn/plot_ivf_torch.py b/pykeops/tutorials/knn/plot_ivf_torch.py
new file mode 100644
index 000000000..b19bb7124
--- /dev/null
+++ b/pykeops/tutorials/knn/plot_ivf_torch.py
@@ -0,0 +1,186 @@
+"""
+===========================================================
+IVF-Flat approximate nearest neighbors search - PyTorch API
+===========================================================
+
+The :class:`pykeops.torch.IVF` class supported by KeOps allows us
+to perform **approximate nearest neighbor search** with four lines of code.
+It can thus be used to compute a **large-scale** nearest neighbors search **much faster**.
+The code is based on the IVF-Flat algorithm and uses KeOps' block-sparse reductions to speed up the search by reducing the search space.
+
+Euclidean, Manhattan, Angular and Hyperbolic metrics are supported along with custom metrics.
+
+.. note::
+  Hyperbolic and custom metrics require the use of an approximation during the K-Means step.
+  This is to obtain the centroid locations since a closed form expression might not be readily available
+"""
+
+###############################################################
+# Setup
+# -----------------
+# Standard imports:
+
+import time
+import torch
+from pykeops.torch import IVF
+
+use_cuda = torch.cuda.is_available()
+device = torch.device("cuda") if use_cuda else torch.device("cpu")
+dtype = torch.float32 if use_cuda else torch.float64
+
+###############################################################
+# IVF nearest neighbour search with Euclidean metric
+# --------------------------------------------------
+# First experiment with N=$10^6$ points in dimension D=3 and 5 nearest neighbours
+
+
+N, D, k = 10 ** 6, 3, 5
+
+###############################################################
+# Define our dataset:
+
+torch.manual_seed(1)
+x = 0.7 * torch.randn(N, D, dtype=dtype, device=device) + 0.3
+y = 0.7 * torch.randn(N, D, dtype=dtype, device=device) + 0.3
+
+###############################################################
+# Create the IVF class and fit the dataset:
+
+nn = IVF(k=k)
+# set the number of clusters in K-Means to 50
+# set the number of nearest clusters we search over during the final query search to 5
+nn.fit(x, clusters=50, a=5)
+
+###############################################################
+# Query dataset search
+
+approx_nn = nn.kneighbors(y)
+
+###############################################################
+# Now computing the true nearest neighbors with brute force search
+
+true_nn = nn.brute_force(x, y, k=k)
+
+###############################################################
+# Define the function to compute recall of the nearest neighbors
+
+
+def accuracy(indices_test, indices_truth):
+    """
+    Compares the test and ground truth indices (rows = KNN for each point in dataset)
+    Returns accuracy: proportion of correct nearest neighbours
+    """
+    N, k = indices_test.shape
+
+    # Calculate number of correct nearest neighbours
+    accuracy = 0
+    for i in range(k):
+        accuracy += torch.sum(indices_test == indices_truth).float() / N
+        indices_truth = torch.roll(
+            indices_truth, 1, -1
+        )  # Create a rolling window (index positions may not match)
+    accuracy = float(accuracy / k)  # percentage accuracy
+
+    return accuracy
+
+
+###############################################################
+# Check the performance of our algorithm
+
+print("IVF Recall:", accuracy(approx_nn, true_nn))
+
+###############################################################
+# Timing the algorithms to observe their performance
+
+start = time.time()
+iters = 10
+
+# timing KeOps brute force
+for _ in range(iters):
+    true_nn = nn.brute_force(x, y, k=k)
+bf_time = time.time() - start
+print(
+    "KeOps brute force timing for", N, "points with", D, "dimensions:", bf_time / iters
+)
+
+# timing IVF
+nn = IVF(k=k)
+nn.fit(x)
+start = time.time()
+for _ in range(iters):
+    approx_nn = nn.kneighbors(y)
+ivf_time = time.time() - start
+print("KeOps IVF-Flat timing for", N, "points with", D, "dimensions:", ivf_time / iters)
+
+###############################################################
+# IVF nearest neighbors search with angular metric
+# Second experiment with N=$10^6$ points in dimension D=3, with 5 nearest neighbors
+
+torch.manual_seed(1)
+x = 0.7 * torch.randn(N, D, dtype=dtype, device=device) + 0.3
+y = 0.7 * torch.randn(N, D, dtype=dtype, device=device) + 0.3
+
+# normalising the inputs to have norm of 1
+x_norm = x / torch.linalg.norm(x, dim=1, keepdim=True)
+y_norm = y / torch.linalg.norm(y, dim=1, keepdim=True)
+
+nn = IVF(metric="angular")
+true_nn = nn.brute_force(x_norm, y_norm)
+
+nn = IVF(metric="angular")
+nn.fit(x_norm)
+approx_nn = nn.kneighbors(y_norm)
+print("IVF Recall:", accuracy(approx_nn, true_nn))
+
+###############################################################
+# The IVF class also has an option to automatically normalise all inputs
+
+nn = IVF(metric="angular", normalise=True)
+nn.fit(x)
+approx_nn = nn.kneighbors(y)
+print("IVF Recall:", accuracy(approx_nn, true_nn))
+
+###############################################################
+# There is also an option to use full angular metric "angular_full", which uses the full angular metric. "angular" simply uses the dot product.
+
+nn = IVF(metric="angular_full")
+nn.fit(x)
+approx_nn = nn.kneighbors(y)
+print("IVF Recall:", accuracy(approx_nn, true_nn))
+
+###############################################################
+# IVF nearest neighbors search with approximations for K-Means centroids
+# We run two experiment with N=$10^6$ points in dimension D=3, with 5 nearest neighbors. The first uses the hyperbolic metric while the second uses a custom metric.
+
+# hyperbolic data generation
+torch.manual_seed(1)
+x = 0.5 + torch.rand(N, D, dtype=dtype, device=device)
+y = 0.5 + torch.rand(N, D, dtype=dtype, device=device)
+
+nn = IVF(metric="hyperbolic")
+# set approx to True
+# n is the number of times we run gradient descent steps for the approximation, default of 50
+nn.fit(x, approx=True, n=50)
+approx_nn = nn.kneighbors(y)
+true_nn = nn.brute_force(x, y)
+print("IVF Recall:", accuracy(approx_nn, true_nn))
+
+# define a custom metric
+def minkowski(x, y, p=3):
+    """Returns the computation of a metric
+    Note the shape of the input tensors the function should accept
+
+    Args:
+      x (tensor): Input dataset of size 1, N, D
+      y (tensor): Query dataset of size M, 1, D
+
+    """
+    return ((x - y).abs() ** p).sum(-1)
+
+
+# testing custom metric
+nn = IVF(metric=minkowski)
+nn.fit(x, approx=True)
+approx_nn = nn.kneighbors(y)
+true_nn = nn.brute_force(x, y)
+print("IVF Recall:", accuracy(approx_nn, true_nn))
diff --git a/pykeops/tutorials/knn/plot_nnd_torch.py b/pykeops/tutorials/knn/plot_nnd_torch.py
new file mode 100644
index 000000000..618071641
--- /dev/null
+++ b/pykeops/tutorials/knn/plot_nnd_torch.py
@@ -0,0 +1,175 @@
+"""
+================================
+ Nearest Neighbors Descent (NND) approximate nearest neighbors search - PyTorch API
+================================
+
+The :class:`pykeops.torch.NND` class supported by KeOps allows us
+to perform **approximate nearest neighbor search** with four lines of code.
+
+Euclidean and Manhattan metrics are supported.
+
+.. note::
+  NNDescent is not fully optimised and we recommend the use of IVF-Flat instead.
+  Nevertheless, we provide NNDescent as a means of benchmarking cutting edge nearest neighbor search algorithms
+
+"""
+
+########################################################################
+# Setup
+# -----------------
+# Standard imports:
+
+import time
+import torch
+from pykeops.torch import NND
+
+use_cuda = torch.cuda.is_available()
+device = torch.device("cuda") if use_cuda else torch.device("cpu")
+dtype = torch.float32 if use_cuda else torch.float64
+
+########################################################################
+# NNDescent search with Euclidean metric
+# First experiment with N=$10^4$ points in dimension D=3 and 5 nearest neighbours, and default hyperparameters.
+
+N, D, k = 10 ** 4, 3, 5
+
+########################################################################
+# Define our dataset:
+
+torch.manual_seed(1)
+x = 0.7 * torch.randn(N, D, dtype=dtype) + 0.3
+y = 0.7 * torch.randn(N, D, dtype=dtype) + 0.3
+
+########################################################################
+# Create the NND class and fit the dataset:
+
+nn = NNDescent(k=k)
+nn.fit(x, queue=8)
+
+########################################################################
+# Query dataset search
+
+approx_nn = nn.kneighbors(y)
+
+########################################################################
+# Now computing the true nearest neighbors with brute force search
+
+x_LT = LazyTensor(x.unsqueeze(0).to(device))
+y_LT = LazyTensor(y.unsqueeze(1).to(device))
+d = ((x_LT - y_LT) ** 2).sum(-1)
+true_nn = d.argKmin(K=k, dim=1).long()
+
+########################################################################
+# Define the function to compute recall of the nearest neighbors
+
+
+def accuracy(indices_test, indices_truth):
+    """
+    Compares the test and ground truth indices (rows = KNN for each point in dataset)
+    Returns accuracy: proportion of correct nearest neighbours
+    """
+    N, k = indices_test.shape
+
+    # Calculate number of correct nearest neighbours
+    accuracy = 0
+    for i in range(k):
+        accuracy += torch.sum(indices_test == indices_truth).float() / N
+        indices_truth = torch.roll(
+            indices_truth, 1, -1
+        )  # Create a rolling window (index positions may not match)
+    accuracy = float(accuracy / k)  # percentage accuracy
+
+    return accuracy
+
+
+########################################################################
+# Check the performance of our algorithm
+
+print("NND Recall:", accuracy(approx_nn.to(device), true_nn))
+
+########################################################################
+# Timing the algorithms to observe their performance
+
+start = time.time()
+iters = 10
+
+# timing KeOps brute force
+for _ in range(iters):
+    x_LT = LazyTensor(x.unsqueeze(0).to(device))
+    y_LT = LazyTensor(y.unsqueeze(1).to(device))
+    d = ((x_LT - y_LT) ** 2).sum(-1)
+    true_nn = d.argKmin(K=k, dim=1).long()
+bf_time = time.time() - start
+print(
+    "KeOps brute force timing for", N, "points with", D, "dimensions:", bf_time / iters
+)
+
+# timing NNDescent
+start = time.time()
+for _ in range(iters):
+    approx_nn = nn.kneighbors(y)
+nnd_time = time.time() - start
+print("KeOps NND timing for", N, "points with", D, "dimensions:", nnd_time / iters)
+
+########################################################################
+# NNDescent search using clusters and Manhattan distance
+# Second experiment with N=$10^6$ points in dimension D=3, with 5 nearest neighbors and manhattan distance.
+
+N, D, k = 10 ** 6, 3, 5
+
+########################################################################
+# Define our dataset:
+
+torch.manual_seed(1)
+x = 0.7 * torch.randn(N, D, dtype=dtype) + 0.3
+x = x.to(device)
+y = 0.7 * torch.randn(N, D, dtype=dtype) + 0.3
+y = y.to(device)
+
+########################################################################
+# Create the NNDescent class and fit the dataset:
+
+nn = NNDescent(k=k, metric="manhattan", initialization_method="cluster")
+nn.fit(x, a=10, queue=5, clusters=64)
+
+########################################################################
+# Query dataset search
+
+approx_nn = nn.kneighbors(y)
+
+########################################################################
+# Now computing the true nearest neighbors with brute force search using Manhattan distance
+
+x_LT = LazyTensor(x.unsqueeze(0).to(device))
+y_LT = LazyTensor(y.unsqueeze(1).to(device))
+d = ((x_LT - y_LT).abs()).sum(-1)
+true_nn = d.argKmin(K=k, dim=1).long()
+
+########################################################################
+# Check the performance of our algorithm
+
+print("NND Recall:", accuracy(approx_nn.to(device), true_nn))
+
+########################################################################
+# Timing the algorithms to observe their performance
+
+start = time.time()
+iters = 10
+
+# timing KeOps brute force
+for _ in range(iters):
+    x_LT = LazyTensor(x.unsqueeze(0).to(device))
+    y_LT = LazyTensor(y.unsqueeze(1).to(device))
+    d = ((x_LT - y_LT).abs()).sum(-1)
+    true_nn = d.argKmin(K=k, dim=1).long()
+bf_time = time.time() - start
+print(
+    "KeOps brute force timing for", N, "points with", D, "dimensions:", bf_time / iters
+)
+
+# timing NNDescent
+start = time.time()
+for _ in range(iters):
+    approx_nn = nn.kneighbors(y)
+nnd_time = time.time() - start
+print("KeOps NND timing for", N, "points with", D, "dimensions:", nnd_time / iters)

From 4a73bfdbc3e70442d3e4216382211dd5ef5e44f9 Mon Sep 17 00:00:00 2001
From: Anna <ahledikova123@gmail.com>
Date: Tue, 20 Apr 2021 15:44:26 +0100
Subject: [PATCH 087/111] added nystrom scripts and unit tests

---
 pykeops/common/nystrom_generic.py  | 286 +++++++++++++++++++++++++++++
 pykeops/numpy/__init__.py          |   4 +-
 pykeops/test/unit_tests_numpy.py   |  21 ++-
 pykeops/test/unit_tests_pytorch.py |  27 ++-
 pykeops/torch/__init__.py          |   3 +-
 pykeops/torch/nystrom/nystrom.py   | 275 ++++++---------------------
 6 files changed, 382 insertions(+), 234 deletions(-)
 create mode 100644 pykeops/common/nystrom_generic.py

diff --git a/pykeops/common/nystrom_generic.py b/pykeops/common/nystrom_generic.py
new file mode 100644
index 000000000..99651c6b5
--- /dev/null
+++ b/pykeops/common/nystrom_generic.py
@@ -0,0 +1,286 @@
+import numpy as np
+import pykeops
+from typing import TypeVar, Union, Tuple
+import warnings
+
+# Generic placeholder for numpy and torch variables.
+generic_array = TypeVar("generic_array")
+GenericLazyTensor = TypeVar("GenericLazyTensor")
+
+
+class GenericNystrom:
+    """Super class defining the Nystrom operations. The end user should
+    use numpy.nystrom or torch.nystrom subclasses."""
+
+    def __init__(
+        self,
+        n_components: int = 100,
+        kernel: Union[str, callable] = "rbf",
+        sigma: float = None,
+        eps: float = 0.05,
+        mask_radius: float = None,
+        k_means: int = 10,
+        n_iter: int = 10,
+        inv_eps: float = None,
+        verbose: bool = False,
+        random_state: Union[None, int] = None,
+        tools=None,
+    ):
+
+        """
+        n_components  = how many samples to select from data.
+        kernel  = type of kernel to use. Current options = {rbf:Gaussian,
+                                                                 exp: exponential}.
+        sigma  = exponential constant for the RBF and exponential kernels.
+        eps = size for square bins in block-sparse preprocessing.
+        k_means = number of centroids for KMeans algorithm in block-sparse
+                       preprocessing.
+        n_iter = number of iterations for KMeans.
+        dtype = type of data: np.float32 or np.float64
+        inv_eps = additive invertibility constant for matrix decomposition.
+        verbose = set True to print details.
+        random_state = to set a random seed for the random sampling of the samples.
+                        To be used when  reproducibility is needed.
+        """
+        self.n_components = n_components
+        self.kernel = kernel
+        self.sigma = sigma
+        self.eps = eps
+        self.mask_radius = mask_radius
+        self.k_means = k_means
+        self.n_iter = n_iter
+        self.dtype = None
+        self.verbose = verbose
+        self.random_state = random_state
+        self.tools = None
+        self.LazyTensor = None
+
+        self.device = "cuda" if pykeops.config.gpu_available else "cpu"
+
+        if inv_eps:
+            self.inv_eps = inv_eps
+        else:
+            self.inv_eps = 1e-8
+
+    def fit(self, x: generic_array) -> "GenericNystrom":
+        """
+        Args:   x = array or tensor of shape (n_samples, n_features)
+        Returns: Fitted instance of the class
+        """
+        x = self._to_device(x)
+        self.dtype = x.dtype
+
+        # Basic checks
+        assert self.tools.is_tensor(
+            x
+        ), "Input to fit(.) must be an array\
+        if using numpy and tensor if using torch."
+        assert (
+            x.shape[0] >= self.n_components
+        ), "The application needs\
+        X.shape[0] >= n_components."
+        if self.kernel == "exp" and not (self.sigma is None):
+            assert self.sigma > 0, "Should be working with decaying exponential."
+
+        # Set default sigma
+        # if self.sigma is None and self.kernel == 'rbf':
+        if self.sigma is None:
+            self.sigma = np.sqrt(x.shape[1])
+
+        if self.mask_radius is None:
+            if self.kernel == "rbf":
+                # TODO get mask_radius correct
+                self.mask_radius = 8 * self.sigma
+            elif self.kernel == "exp":
+                self.mask_radius = 8 * self.sigma
+
+        # Update dtype
+        self._update_dtype(x)
+        # Number of samples
+        n_samples = x.shape[0]
+        # Define basis
+        rnd = self._check_random_state(self.random_state)
+        inds = rnd.permutation(n_samples)
+        basis_inds = inds[: self.n_components]
+        basis = x[basis_inds]
+        # Build smaller kernel
+        basis_kernel = self._pairwise_kernels(basis, dense=True)
+        # Decomposition is an abstract method that needs to be defined in each class
+        self.normalization_ = self._decomposition_and_norm(basis_kernel)
+        self.components_ = basis
+        self.component_indices_ = inds
+
+        return self
+
+    def _decomposition_and_norm(self, X: GenericLazyTensor):
+        """
+        To be defined in the subclass
+        """
+        raise NotImplementedError(
+            "Subclass must implement the method _decomposition_and_norm."
+        )
+
+    def transform(self, x: generic_array, dense=True) -> generic_array:
+        """
+        Applies transform on the data mapping it to the feature space
+        which supports the approximated kernel.
+        Args:
+            X = data to transform
+        Returns
+            X = data after transformation
+        """
+        if type(x) == np.ndarray and not dense:
+            warnings.warn("For Numpy transform it is best to use dense=True")
+
+        x = self._to_device(x)
+        K_nq = self._pairwise_kernels(x, self.components_, dense=dense)
+        x_new = K_nq @ self.normalization_
+        return x_new
+
+    def _pairwise_kernels(self, x: generic_array, y: generic_array = None, dense=False):
+        """Helper function to build kernel
+        Args:   x[np.array or torch.tensor] = data
+                y[np.array or torch.tensor] = array/tensor
+                dense[bool] = False to work with lazy tensor reduction,
+                              True to work with dense arrays/tensors
+        Returns:
+                K_ij[LazyTensor] if dense = False
+                K_ij[np.array or torch.tensor] if dense = True
+        """
+
+        if y is None:
+            y = x
+        x = x / self.sigma
+        y = y / self.sigma
+
+        x_i, x_j = (
+            self.tools.contiguous(self._to_device(x[:, None, :])),
+            self.tools.contiguous(self._to_device(y[None, :, :])),
+        )
+
+        if self.kernel == "rbf":
+            if dense:
+                D_ij = ((x_i - x_j) ** 2).sum(axis=2)
+                K_ij = self.tools.exp(-D_ij)
+
+            else:
+                x_i, x_j = self.LazyTensor(x_i), self.LazyTensor(x_j)
+                D_ij = ((x_i - x_j) ** 2).sum(dim=2)
+                K_ij = (-D_ij).exp()
+
+                # block-sparse reduction preprocess
+                K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)
+        elif self.kernel == "exp":
+            if dense:
+                K_ij = self.tools.exp(
+                    -self.tools.sqrt((((x_i - x_j) ** 2).sum(axis=2)))
+                )
+
+            else:
+                x_i, x_j = self.LazyTensor(x_i), self.LazyTensor(x_j)
+                K_ij = (-(((x_i - x_j) ** 2).sum(-1)).sqrt()).exp()
+
+                # block-sparse reduction preprocess
+                K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)
+
+        # computation with custom kernel
+        else:
+            print("Please note that computations on custom kernels are dense-only.")
+            K_ij = self.kernel(x_i, x_j)
+
+        return K_ij
+
+    def _Gauss_block_sparse_pre(
+        self, x: generic_array, y: generic_array, K_ij: GenericLazyTensor
+    ):
+        """
+        Helper function to preprocess data for block-sparse reduction
+        of the Gaussian kernel
+        Args:
+            x, y =  arrays or tensors giving rise to Gaussian kernel K(x,y)
+            K_ij = symbolic representation of K(x,y)
+            eps[float] = size for square bins
+        Returns:
+            K_ij =  symbolic representation of K(x,y) with
+                                set sparse ranges
+        """
+        x = self._to_device(x)
+        y = self._to_device(y)
+        # labels for low dimensions
+        if x.shape[1] < 4 or y.shape[1] < 4:
+            x_labels = self.tools.grid_cluster(x, self.eps)
+            y_labels = self.tools.grid_cluster(y, self.eps)
+
+            # range and centroid per class
+            x_ranges, x_centroids, _ = self.tools.cluster_ranges_centroids(x, x_labels)
+            y_ranges, y_centroids, _ = self.tools.cluster_ranges_centroids(y, y_labels)
+
+        else:
+            # labels for higher dimensions
+            x_labels, x_centroids = self._KMeans(x)
+            y_labels, y_centroids = self._KMeans(y)
+            # compute ranges
+            x_ranges = self.tools.cluster_ranges(x_labels)
+            y_ranges = self.tools.cluster_ranges(y_labels)
+
+        # sort points
+        x, x_labels = self.tools.sort_clusters(x, x_labels)
+        y, y_labels = self.tools.sort_clusters(y, y_labels)
+
+        # Compute a coarse Boolean mask:
+        if self.kernel == "rbf":
+            D = self.tools.arraysum(
+                (x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2
+            )
+
+        elif self.kernel == "exp":
+            D = self.tools.sqrt(
+                self.tools.arraysum(
+                    (x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2
+                )
+            )
+
+        keep = D < (self.mask_radius) ** 2
+        # mask -> set of integer tensors
+        ranges_ij = self.tools.from_matrix(x_ranges, y_ranges, keep)
+        K_ij.ranges = ranges_ij  # block-sparsity pattern
+
+        return K_ij
+
+    def _astype(self, data, type):
+        return data
+
+    def _to_device(self, data):
+        return data
+
+    def _update_dtype(self, x):
+        """Helper function that sets dtype to that of
+            the given data in the fitting step.
+        Args:
+            x [np.array or torch.tensor] = raw data to remap
+        Returns:
+            None
+        """
+        self.dtype = x.dtype
+        self.inv_eps = np.array([self.inv_eps]).astype(self.dtype)[0]
+
+    def _check_random_state(self, seed: Union[None, int]) -> None:
+        """Set/get np.random.RandomState instance for permutation
+        Args
+            seed[None, int]
+        Returns:
+            numpy random state
+        """
+
+        if seed is None:
+            return np.random.mtrand._rand
+
+        elif type(seed) == int:
+            return np.random.RandomState(seed)
+
+        raise ValueError(f"Seed {seed} must be None or an integer.")
+
+    def _KMeans(self, x: generic_array) -> Tuple[generic_array]:
+        """K-means algorithm to find clusters for preprocessing"""
+
+        raise NotImplementedError("Subclass must implement this method.")
diff --git a/pykeops/numpy/__init__.py b/pykeops/numpy/__init__.py
index 2df9564c1..061cc1e78 100644
--- a/pykeops/numpy/__init__.py
+++ b/pykeops/numpy/__init__.py
@@ -5,7 +5,7 @@
 ##########################################################
 # Import pyKeOps routines
 
-
+from .nystrom.nystrom import Nystrom
 from .knn.ivf import IVF
 from .generic.generic_red import Genred
 from .operations import KernelSolve
@@ -20,7 +20,7 @@
 
 __all__ = sorted(
     [
-        "IVF",
+        "Nystrom" "IVF",
         "Genred",
         "generic_sum",
         "generic_logsumexp",
diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index e7fb9d1b7..f43c6518c 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -507,28 +507,35 @@ def test_IVF(self):
     ############################################################
     def test_Nystrom_k_approx(self):
         ############################################################
-        from pykeops.numpy.nystrom.Nystrom import Nystrom_NK
+        from pykeops.numpy.nystrom.nystrom import Nystrom
 
+        length = 100
         num_sampling = 20
         x = np.random.randint(1, 10, (100, 3)).astype(np.float32)
 
         kernels = ["rbf", "exp"]
 
         for kernel in kernels:
-            N_NK = Nystrom_NK(
+            # calculate the ground truth kernel
+            N_truth = Nystrom(n_components=length, kernel=kernel, random_state=0).fit(x)
+            x_truth = N_truth.transform(x)
+            K = x_truth @ x_truth.T
+
+            # calculate an approximation
+            N_NK = Nystrom(
                 n_components=num_sampling, kernel=kernel, random_state=0
             ).fit(x)
-            K = N_NK.K_approx(x)
             x_new = N_NK.transform(x)
+            K_approx = x_new @ x_new.T
 
-            ML2_error = np.linalg.norm(x_new @ x_new.T - K) / K.size
+            error = np.linalg.norm(K - K_approx) / K.size
 
-            self.assertTrue(ML2_error < 0.01)
+            self.assertTrue(error < 0.01)
 
     ############################################################
     def test_Nystrom_k_shape(self):
         ############################################################
-        from pykeops.numpy.nystrom.Nystrom import Nystrom_NK
+        from pykeops.numpy.nystrom.nystrom import Nystrom
 
         length = 100
         num_sampling = 20
@@ -537,7 +544,7 @@ def test_Nystrom_k_shape(self):
         kernels = ["rbf", "exp"]
 
         for kernel in kernels:
-            N_NK = Nystrom_NK(
+            N_NK = Nystrom(
                 n_components=num_sampling, kernel=kernel, random_state=0
             ).fit(x)
 
diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index 372b066ec..3e7128903 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -751,41 +751,48 @@ def test_IVF(self):
     def test_Nystrom_K_approx(self):
         ############################################################
 
-        from pykeops.torch.nystrom.nystrom import LazyNystrom_TK
+        from pykeops.torch.nystrom.nystrom import Nystrom
         import torch
 
         length = 100
-        num_sampling = 20
-        x = torch.rand(length, 3) * 100
+        num_sampling = 40
+        x = torch.rand((length, 3), dtype=torch.float32) * 10
 
         kernels = ["rbf", "exp"]
 
         for kernel in kernels:
-            N_TK = LazyNystrom_TK(
+            # calculate the ground truth
+            N_truth = Nystrom(n_components=length, kernel=kernel, random_state=0).fit(x)
+            x_truth = N_truth.transform(x)
+            K = x_truth @ x_truth.T
+
+            # calculate an approximation
+            N_TK = Nystrom(
                 n_components=num_sampling, kernel=kernel, random_state=0
             ).fit(x)
-            K = N_TK.K_approx(x)
+
             x_new = N_TK.transform(x)
+            K_approx = x_new @ x_new.T
 
-            ML2_error = np.linalg.norm(x_new @ x_new.T - K) / K.shape[0]
+            error = torch.linalg.norm(K - K_approx) / (K.shape[0] * K.shape[1])
 
-            self.assertTrue(ML2_error < 0.01)
+            self.assertTrue(error < 0.01)
 
     ############################################################
     def test_Nystrom_K_shape(self):
         ############################################################
 
-        from pykeops.torch.nystrom.nystrom import LazyNystrom_TK
+        from pykeops.torch.nystrom.nystrom import Nystrom
         import torch
 
         length = 100
-        num_sampling = 20
+        num_sampling = 40
         x = torch.rand(length, 3) * 100
 
         kernels = ["rbf", "exp"]
 
         for kernel in kernels:
-            N_NT = LazyNystrom_TK(
+            N_NT = Nystrom(
                 n_components=num_sampling, kernel=kernel, random_state=0
             ).fit(x)
 
diff --git a/pykeops/torch/__init__.py b/pykeops/torch/__init__.py
index 6574a65f7..87f15e020 100644
--- a/pykeops/torch/__init__.py
+++ b/pykeops/torch/__init__.py
@@ -27,6 +27,7 @@
 ##########################################################
 # Import pyKeOps routines
 
+from .nystrom.nystrom import Nystrom
 from .knn.ivf import IVF
 from .generic.generic_red import Genred
 from .generic.generic_ops import (
@@ -40,7 +41,7 @@
 
 __all__ = sorted(
     [
-        "IVF",
+        "Nystrom" "IVF",
         "Genred",
         "generic_sum",
         "generic_logsumexp",
diff --git a/pykeops/torch/nystrom/nystrom.py b/pykeops/torch/nystrom/nystrom.py
index 2bace7709..184a4d2c7 100644
--- a/pykeops/torch/nystrom/nystrom.py
+++ b/pykeops/torch/nystrom/nystrom.py
@@ -1,237 +1,78 @@
-# !pip install pykeops[full] > install.log
-# colab for this code
-# https://colab.research.google.com/drive/1vF2cOSddbRFM5PLqxkIzyZ9XkuzO5DKN?usp=sharing
-import numpy as np
 import torch
-import numbers
 
-from pykeops.torch.cluster import grid_cluster
-from pykeops.torch.cluster import from_matrix
-from pykeops.torch.cluster import cluster_ranges_centroids, cluster_ranges
-from pykeops.torch.cluster import sort_clusters
+from pykeops.common.nystrom_generic import GenericNystrom
+from pykeops.torch.utils import torchtools
 from pykeops.torch import LazyTensor
 
-# Note: this is a function taken from Sklearn
-def check_random_state(seed):
-    """Turn seed into a np.random.RandomState instance
-    Parameters
-    ----------
-    seed : None, int or instance of RandomState
-        If seed is None, return the RandomState singleton used by np.random.
-        If seed is an int, return a new RandomState instance seeded with seed.
-        If seed is already a RandomState instance, return it.
-        Otherwise raise ValueError.
-    """
-    if seed is None or seed is np.random:
-        return np.random.mtrand._rand
-    if isinstance(seed, numbers.Integral):
-        return np.random.RandomState(seed)
-    if isinstance(seed, np.random.RandomState):
-        return seed
-    raise ValueError(
-        "%r cannot be used to seed a numpy.random.RandomState" " instance" % seed
-    )
-
-
-################################################################################
-# Same as LazyNystrom_T but written with pyKeOps
-
-
-class LazyNystrom_TK:
-    """
-    Class to implement Nystrom on torch LazyTensors.
-    This class works as an interface between lazy tensors and
-    the Nystrom algorithm in NumPy.
-    * The fit method computes K^{-1}_q.
-    * The transform method maps the data into the feature space underlying
-    the Nystrom-approximated kernel.
-    * The method K_approx directly computes the Nystrom approximation.
-    Parameters:
-    n_components [int] = how many samples to select from data.
-    kernel [str] = type of kernel to use. Current options = {linear, rbf}.
-    gamma [float] = exponential constant for the RBF kernel.
-    random_state=[None, float] = to set a random seed for the random
-                                 sampling of the samples. To be used when
-                                 reproducibility is needed.
-    """
 
+class Nystrom(GenericNystrom):
     def __init__(
         self,
         n_components=100,
         kernel="rbf",
-        sigma: float = 1.0,
-        exp_sigma: float = 1.0,
+        sigma: float = None,
         eps: float = 0.05,
         mask_radius: float = None,
         k_means=10,
         n_iter: int = 10,
         inv_eps: float = None,
-        dtype=np.float32,
-        backend="CPU",
+        verbose=False,
         random_state=None,
+        tools=None,
     ):
+        super().__init__(
+            n_components,
+            kernel,
+            sigma,
+            eps,
+            mask_radius,
+            k_means,
+            n_iter,
+            inv_eps,
+            verbose,
+            random_state,
+        )
+
+        self.tools = torchtools
+        self.verbose = verbose
+        self.LazyTensor = LazyTensor
 
-        self.n_components = n_components
-        self.kernel = kernel
-        self.random_state = random_state
-        self.sigma = sigma
-        self.exp_sigma = exp_sigma
-        self.eps = eps
-        self.mask_radius = mask_radius
-        self.k_means = k_means
-        self.n_iter = n_iter
-        self.dtype = dtype
-        self.backend = backend  # conditional here
-        if inv_eps:
-            self.inv_eps = inv_eps
-        else:
-            if kernel == "linear":
-                self.inv_eps = 1e-4
-            else:
-                self.inv_eps = 1e-8
-        if not mask_radius:
-            if kernel == "rbf":
-                self.mask_radius = 2 * np.sqrt(2) * self.sigma
-            if kernel == "exp":
-                self.mask_radius = 8 * self.exp_sigma
-
-    def fit(self, X: torch.tensor):
-        """
-        Args:   X = torch tensor with features of shape
-                (1, n_samples, n_features)
-        Returns: Fitted instance of the class
-        """
-
-        # Basic checks: we have a lazy tensor and n_components isn't too large
-        assert type(X) == torch.Tensor, "Input to fit(.) must be a Tensor."
-        assert (
-            X.size(0) >= self.n_components
-        ), "The application needs X.shape[1] >= n_components."
-        # self._update_dtype(X)
-        # Number of samples
-        n_samples = X.size(0)
-        # Define basis
-        rnd = check_random_state(self.random_state)
-        inds = rnd.permutation(n_samples)
-        basis_inds = inds[: self.n_components]
-        basis = X[basis_inds]
-        # Build smaller kernel
-        basis_kernel = self._pairwise_kernels(basis, kernel=self.kernel)
-        # Get SVD
-        U, S, V = torch.svd(basis_kernel)
-        S = torch.max(S, torch.ones(S.size()) * 1e-12)
-        self.normalization_ = torch.mm(U / np.sqrt(S), V.t())
-        self.components_ = basis
-        self.component_indices_ = inds
+    def _update_dtype(self, x):
+        pass
 
-        return self
+    def _to_device(self, x):
+        return x.to(self.device)
 
-    def transform(self, X: torch.tensor) -> torch.tensor:
-        """Applies transform on the data.
+    def _decomposition_and_norm(self, basis_kernel):
+        """Function to return self.nomalization_ used in fit(.) function
         Args:
-            X [LazyTensor] = data to transform
-        Returns
-            X [LazyTensor] = data after transformation
+            basis_kernel[torch LazyTensor] = subset of input data
+        Returns:
+            self.normalization_[torch.tensor]  X_q is the q x D-dimensional sub matrix of matrix X
         """
-        K_nq = self._pairwise_kernels(X, self.components_, self.kernel)
-        return K_nq @ self.normalization_.t()
-
-    def K_approx(self, X: torch.tensor) -> torch.tensor:
+        basis_kernel = basis_kernel.to(
+            self.device
+        )  # dim: num_components x num_components
+        U, S, V = torch.linalg.svd(basis_kernel, full_matrices=False)
+        S = torch.maximum(S, torch.ones(S.size()).to(self.device) * 1e-12)
+        return torch.mm(U / torch.sqrt(S), V)  # dim: num_components x num_components
+
+    def K_approx(self, X: torch.tensor) -> "K_approx operator":
         """Function to return Nystrom approximation to the kernel.
         Args:
-            X[torch.tensor] = data used in fit(.) function.
+            X = data used in fit(.) function.
         Returns
-            K[torch.tensor] = Nystrom approximation to kernel"""
+            K_approx = Nystrom approximation to kernel which can be applied
+                        downstream as K_approx @ v for some 1d tensor v"""
 
-        K_nq = self._pairwise_kernels(X, self.components_, self.kernel)
-        K_approx = K_nq @ self.normalization_ @ K_nq.t()
+        K_nq = self._pairwise_kernels(X, self.components_, dense=False)
+        K_approx = K_approx_operator(K_nq, self.normalization_)
         return K_approx
 
-    def _pairwise_kernels(
-        self, x: torch.tensor, y: torch.tensor = None, kernel="rbf", sigma: float = 1.0
-    ) -> LazyTensor:
-        """Helper function to build kernel
-        Args:   X = torch tensor of dimension 2.
-                K_type = type of Kernel to return
-        Returns:
-                K_ij[LazyTensor]
-        """
-        if y is None:
-            y = x
-        if kernel == "linear":
-            K_ij = x @ y.T
-        elif kernel == "rbf":
-            x /= sigma
-            y /= sigma
-
-            x_i, x_j = LazyTensor(x[:, None, :]), LazyTensor(y[None, :, :])
-            K_ij = (-1 * ((x_i - x_j) ** 2).sum(-1)).exp()
-
-            # block-sparse reduction preprocess
-            K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)
-
-        elif kernel == "exp":
-            x_i, x_j = LazyTensor(x[:, None, :]), LazyTensor(y[None, :, :])
-            K_ij = (-1 * ((x_i - x_j) ** 2).sum().sqrt()).exp()
-            # block-sparse reduction preprocess
-            K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)  # TODO
-
-        K_ij = K_ij @ torch.diag(torch.ones(K_ij.shape[1]))  # make 1 on diag only
-
-        K_ij.backend = self.backend
-        return K_ij
-
-    def _Gauss_block_sparse_pre(
-        self, x: torch.tensor, y: torch.tensor, K_ij: LazyTensor
-    ):
-        """
-        Helper function to preprocess data for block-sparse reduction
-        of the Gaussian kernel
-
-        Args:
-            x[np.array], y[np.array] = arrays giving rise to Gaussian kernel K(x,y)
-            K_ij[LazyTensor_n] = symbolic representation of K(x,y)
-            eps[float] = size for square bins
-        Returns:
-            K_ij[LazyTensor_n] = symbolic representation of K(x,y) with
-                                set sparse ranges
-        """
-        # labels for low dimensions
-
-        if x.shape[1] < 4 or y.shape[1] < 4:
-
-            x_labels = grid_cluster(x, self.eps)
-            y_labels = grid_cluster(y, self.eps)
-            # range and centroid per class
-            x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
-            y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
-        else:
-            # labels for higher dimensions
-
-            x_labels, x_centroids = self._KMeans(x)
-            y_labels, y_centroids = self._KMeans(y)
-            # compute ranges
-            x_ranges = cluster_ranges(x_labels)
-            y_ranges = cluster_ranges(y_labels)
-
-        # sort points
-        x, x_labels = sort_clusters(x, x_labels)
-        y, y_labels = sort_clusters(y, y_labels)
-        # Compute a coarse Boolean mask:
-        D = torch.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2)
-        keep = D < (self.mask_radius) ** 2
-        # mask -> set of integer tensors
-        ranges_ij = from_matrix(x_ranges, y_ranges, keep)
-        K_ij.ranges = ranges_ij  # block-sparsity pattern
-
-        return K_ij
-
     def _KMeans(self, x: torch.tensor):
         """KMeans with Pykeops to do binning of original data.
         Args:
-            x[np.array] = data
-            k_means[int] = number of bins to build
-            n_iter[int] = number iterations of KMeans loop
+            x = data
         Returns:
             labels[np.array] = class labels for each point in x
             clusters[np.array] = coordinates for each centroid
@@ -252,14 +93,20 @@ def _KMeans(self, x: torch.tensor):
 
         return labels, clusters
 
-    def _update_dtype(self, x):
-        """Helper function that sets inv_eps to dtype to that of
-            the given data in the fitting step.
 
-        Args:
-            x [np.array] = raw data to remap
-        Returns:
-            nothing
-        """
-        self.dtype = x.dtype
-        self.inv_eps = np.array([self.inv_eps]).astype(self.dtype)[0]
+class K_approx_operator:
+    """Helper class to return K_approx as an object
+    compatible with @ symbol"""
+
+    def __init__(self, K_nq, normalization):
+
+        self.K_nq = K_nq  # dim: number of samples x num_components
+        self.K_nq.backend = "GPU_2D"
+        self.normalization = normalization
+
+    def __matmul__(self, x: torch.tensor) -> torch.tensor:
+
+        x = self.K_nq.T @ x
+        x = self.normalization @ self.normalization.T @ x
+        x = self.K_nq @ x
+        return x

From 13d52f737a181ada5388982dffcab7558489ffba Mon Sep 17 00:00:00 2001
From: Anna <ahledikova123@gmail.com>
Date: Tue, 20 Apr 2021 16:01:51 +0100
Subject: [PATCH 088/111] updated imports in unit tests

---
 pykeops/numpy/__init__.py          | 3 ++-
 pykeops/test/unit_tests_numpy.py   | 2 +-
 pykeops/test/unit_tests_pytorch.py | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/pykeops/numpy/__init__.py b/pykeops/numpy/__init__.py
index 061cc1e78..61a0ebc61 100644
--- a/pykeops/numpy/__init__.py
+++ b/pykeops/numpy/__init__.py
@@ -20,7 +20,8 @@
 
 __all__ = sorted(
     [
-        "Nystrom" "IVF",
+        "Nystrom",
+        "IVF",
         "Genred",
         "generic_sum",
         "generic_logsumexp",
diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index f43c6518c..ba0fda692 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -507,7 +507,7 @@ def test_IVF(self):
     ############################################################
     def test_Nystrom_k_approx(self):
         ############################################################
-        from pykeops.numpy.nystrom.nystrom import Nystrom
+        from pykeops.numpy import Nystrom
 
         length = 100
         num_sampling = 20
diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index 3e7128903..b5addfab8 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -782,7 +782,7 @@ def test_Nystrom_K_approx(self):
     def test_Nystrom_K_shape(self):
         ############################################################
 
-        from pykeops.torch.nystrom.nystrom import Nystrom
+        from pykeops.torch import Nystrom
         import torch
 
         length = 100

From c2b12d694477cb4f2c57a6a69f53e327196f8fc3 Mon Sep 17 00:00:00 2001
From: Anna <ahledikova123@gmail.com>
Date: Tue, 20 Apr 2021 16:20:17 +0100
Subject: [PATCH 089/111] updated imports and added note to kmeans

---
 pykeops/test/unit_tests_numpy.py   | 2 +-
 pykeops/test/unit_tests_pytorch.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pykeops/test/unit_tests_numpy.py b/pykeops/test/unit_tests_numpy.py
index ba0fda692..249c17737 100644
--- a/pykeops/test/unit_tests_numpy.py
+++ b/pykeops/test/unit_tests_numpy.py
@@ -535,7 +535,7 @@ def test_Nystrom_k_approx(self):
     ############################################################
     def test_Nystrom_k_shape(self):
         ############################################################
-        from pykeops.numpy.nystrom.nystrom import Nystrom
+        from pykeops.numpy import Nystrom
 
         length = 100
         num_sampling = 20
diff --git a/pykeops/test/unit_tests_pytorch.py b/pykeops/test/unit_tests_pytorch.py
index b5addfab8..341ecd938 100644
--- a/pykeops/test/unit_tests_pytorch.py
+++ b/pykeops/test/unit_tests_pytorch.py
@@ -751,7 +751,7 @@ def test_IVF(self):
     def test_Nystrom_K_approx(self):
         ############################################################
 
-        from pykeops.torch.nystrom.nystrom import Nystrom
+        from pykeops.torch import Nystrom
         import torch
 
         length = 100

From 3d204bae1c60016374e2a5177037b9c97129dfa8 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 20 Apr 2021 16:33:37 +0100
Subject: [PATCH 090/111] changed torch init

---
 pykeops/torch/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pykeops/torch/__init__.py b/pykeops/torch/__init__.py
index 87f15e020..9ce0ea806 100644
--- a/pykeops/torch/__init__.py
+++ b/pykeops/torch/__init__.py
@@ -41,7 +41,8 @@
 
 __all__ = sorted(
     [
-        "Nystrom" "IVF",
+        "Nystrom",
+        "IVF",
         "Genred",
         "generic_sum",
         "generic_logsumexp",

From 5d7f1bb16bc17c1c24ef6b9465a8257e1f0ea68e Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 20 Apr 2021 16:37:36 +0100
Subject: [PATCH 091/111] changed capitalisation

---
 pykeops/numpy/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pykeops/numpy/__init__.py b/pykeops/numpy/__init__.py
index 61a0ebc61..77f4aba88 100644
--- a/pykeops/numpy/__init__.py
+++ b/pykeops/numpy/__init__.py
@@ -5,7 +5,7 @@
 ##########################################################
 # Import pyKeOps routines
 
-from .nystrom.nystrom import Nystrom
+from .nystrom.Nystrom import Nystrom
 from .knn.ivf import IVF
 from .generic.generic_red import Genred
 from .operations import KernelSolve

From 4621fd0c55649e7750a8fd1bfc2a075003a1087a Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 20 Apr 2021 16:45:07 +0100
Subject: [PATCH 092/111] updated nystrom

---
 pykeops/numpy/nystrom/Nystrom.py | 319 ++++++-------------------------
 1 file changed, 60 insertions(+), 259 deletions(-)

diff --git a/pykeops/numpy/nystrom/Nystrom.py b/pykeops/numpy/nystrom/Nystrom.py
index 7068132fb..64c5b2440 100644
--- a/pykeops/numpy/nystrom/Nystrom.py
+++ b/pykeops/numpy/nystrom/Nystrom.py
@@ -1,281 +1,108 @@
 import numpy as np
-import pykeops
-import numbers
 
-from pykeops.numpy import LazyTensor as LazyTensor_n
-from pykeops.numpy.cluster import grid_cluster
-from pykeops.numpy.cluster import from_matrix
-from pykeops.numpy.cluster import cluster_ranges_centroids, cluster_ranges
-from pykeops.numpy.cluster import sort_clusters
+from scipy.linalg import eigh
+from scipy.sparse.linalg import aslinearoperator
 
-from pykeops.torch import LazyTensor
+from pykeops.common.nystrom_generic import GenericNystrom
+from pykeops.numpy.utils import numpytools
+from pykeops.numpy import LazyTensor
 
-# For LinearOperator math
-from scipy.sparse.linalg import aslinearoperator, eigsh
-from scipy.sparse.linalg.interface import IdentityOperator
+from typing import Tuple, List
 
 
-class Nystrom_NK:
-    """
-    Class to implement Nystrom using numpy and PyKeops.
-    * The fit method computes K^{-1}_q.
-    * The transform method maps the data into the feature space underlying
-    the Nystrom-approximated kernel.
-    * The method K_approx directly computes the Nystrom approximation.
-    Parameters:
-    n_components [int] = how many samples to select from data.
-    kernel [str] = type of kernel to use. Current options = {rbf:Gaussian,
-                                                             exp: exponential}.
-    sigma [float] = exponential constant for the RBF kernel.
-    exp_sigma [float] = exponential constant for the exponential kernel.
-    eps[float] = size for square bins in block-sparse preprocessing.
-    k_means[int] = number of centroids for KMeans algorithm in block-sparse
-                   preprocessing.
-    n_iter[int] = number of iterations for KMeans
-    dtype[type] = type of data: np.float32 or np.float64
-    inv_eps[float] = additive invertibility constant for matrix decomposition.
-    backend[string] = "GPU" or "CPU" mode
-    verbose[boolean] = set True to print details
-    random_state=[None, float] = to set a random seed for the random
-                                 sampling of the samples. To be used when
-                                 reproducibility is needed.
-    """
+class Nystrom(GenericNystrom):
+    """Nystrom class to work with Numpy arrays"""
 
     def __init__(
         self,
         n_components=100,
         kernel="rbf",
-        sigma: float = 1.0,
-        exp_sigma: float = 1.0,
+        sigma: float = None,
         eps: float = 0.05,
         mask_radius: float = None,
         k_means=10,
         n_iter: int = 10,
         inv_eps: float = None,
-        dtype=np.float32,
-        backend=None,
         verbose=False,
         random_state=None,
+        eigvals: List[int] = None,
     ):
 
-        self.n_components = n_components
-        self.kernel = kernel
-        self.random_state = random_state
-        self.sigma = sigma
-        self.exp_sigma = exp_sigma
-        self.eps = eps
-        self.mask_radius = mask_radius
-        self.k_means = k_means
-        self.n_iter = n_iter
-        self.dtype = dtype
-        self.verbose = verbose
-
-        if not backend:
-            self.backend = "GPU" if pykeops.config.gpu_available else "CPU"
-        else:
-            self.backend = backend
-
-        if inv_eps:
-            self.inv_eps = inv_eps
-        else:
-            self.inv_eps = 1e-8
-
-        if not mask_radius:
-            if kernel == "rbf":
-                self.mask_radius = 2 * np.sqrt(2) * self.sigma
-            elif kernel == "exp":
-                self.mask_radius = 8 * self.exp_sigma
-
-    def fit(self, x: np.ndarray):
-        """
-        Args:   x = numpy array of shape (n_samples, n_features)
-        Returns: Fitted instance of the class
-        """
-        if self.verbose:
-            print(f"Working with backend = {self.backend}")
-
-        # Basic checks
-        assert type(x) == np.ndarray, "Input to fit(.) must be an array."
-        assert (
-            x.shape[0] >= self.n_components
-        ), "The application needs X.shape[0] >= n_components."
-        assert self.exp_sigma > 0, "Should be working with decaying exponential."
-
-        # Update dtype
-        self._update_dtype(x)
-        # Number of samples
-        n_samples = x.shape[0]
-        # Define basis
-        rnd = self._check_random_state(self.random_state)
-        inds = rnd.permutation(n_samples)
-        basis_inds = inds[: self.n_components]
-        basis = x[basis_inds]
-        # Build smaller kernel
-        basis_kernel = self._pairwise_kernels(basis, dense=False)
-        # Spectral decomposition
-        S, U = self._spectral(basis_kernel)
-        S = np.maximum(S, 1e-12)
-        self.normalization_ = np.dot(U / np.sqrt(S), U.T)
-        self.components_ = basis
-        self.component_indices_ = inds
-
-        return self
-
-    def _spectral(self, X_i: LazyTensor):
         """
-        Helper function to compute eigendecomposition of K_q.
-        Written using LinearOperators which are lazy
-        representations of sparse and/or structured data.
         Args:
-            X_i[numpy LazyTensor]
-        Returns
-            S[np.array] eigenvalues,
-            U[np.array] eigenvectors
+            eigvals = eigenvalues index interval [a,b] for constructed K_q,
+             where 0 <= a < b < length of K_q
+
         """
-        K_linear = aslinearoperator(X_i)
-        # K <- K + eps
-        K_linear = (
-            K_linear + IdentityOperator(K_linear.shape, dtype=self.dtype) * self.inv_eps
+        super().__init__(
+            n_components,
+            kernel,
+            sigma,
+            eps,
+            mask_radius,
+            k_means,
+            n_iter,
+            inv_eps,
+            verbose,
+            random_state,
         )
-        k = K_linear.shape[0] - 1
-        S, U = eigsh(K_linear, k=k, which="LM")
 
-        return S, U
+        self.tools = numpytools
+        self.LazyTensor = LazyTensor
+        self.eigvals = eigvals
 
-    def transform(self, x: np.ndarray) -> np.array:
-        """Applies transform on the data.
+        if eigvals:
+            assert eigvals[0] < eigvals[1], "eigvals = [a,b] needs a < b"
+            assert (
+                eigvals[1] < n_components
+            ), "max eigenvalue index needs to be less\
+            than size of K_q = n_components"
 
-        Args:
-            X [np.array] = data to transform
-        Returns
-            X [np.array] = data after transformation
-        """
+    def _decomposition_and_norm(self, X: np.array) -> np.array:
+        """Computes K_q^{-1/2}"""
 
-        K_nq = self._pairwise_kernels(x, self.components_, dense=True)
-        x_new = K_nq @ self.normalization_.T
-        return x_new
+        X = X + np.eye(X.shape[0], dtype=self.dtype) * self.inv_eps
+        S, U = eigh(X, eigvals=self.eigvals)
+        S = np.maximum(S, 1e-12)
+
+        return np.dot(U / np.sqrt(S), U.T)
 
-    def K_approx(self, x: np.array) -> np.array:
+    def K_approx(self, x: np.array) -> "LinearOperator":
         """Function to return Nystrom approximation to the kernel.
 
         Args:
-            X[np.array] = data used in fit(.) function.
+            x = data used in fit(.) function.
         Returns
-            K[np.array] = Nystrom approximation to kernel"""
-
-        K_nq = self._pairwise_kernels(x, self.components_, dense=True)
-        # For arrays: K_approx = K_nq @ K_q_inv @ K_nq.T
-        # But to use @ with lazy tensors we have:
-        K_q_inv = self.normalization_.T @ self.normalization_
-        K_approx = K_nq @ (K_nq @ K_q_inv).T
-        return K_approx.T
-
-    def _pairwise_kernels(
-        self, x: np.array, y: np.array = None, dense: bool = False
-    ) -> LazyTensor:
-        """Helper function to build kernel
-
-        Args:   x[np.array] = data
-                y[np.array] = array
-                dense[bool] = False to work with lazy tensor reduction,
-                              True to work with dense arrays
-        Returns:
-                K_ij[LazyTensor] if dense = False
-                K_ij[np.array] if dense = True
-
-        """
-        if y is None:
-            y = x
-        if self.kernel == "rbf":
-            x /= self.sigma
-            y /= self.sigma
-            if dense:
-                x_i, x_j = x[:, None, :], y[None, :, :]
-                K_ij = np.exp(-(((x_i - x_j) ** 2).sum(axis=2)))
-            else:
-                x_i, x_j = LazyTensor_n(x[:, None, :]), LazyTensor_n(y[None, :, :])
-                K_ij = (-(((x_i - x_j) ** 2).sum(dim=2))).exp()
-                # block-sparse reduction preprocess
-                K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)
-        elif self.kernel == "exp":
-            x /= self.exp_sigma
-            y /= self.exp_sigma
-            if dense:
-                x_i, x_j = x[:, None, :], y[None, :, :]
-                K_ij = np.exp(-np.sqrt((((x_i - x_j) ** 2).sum(axis=2))))
-            else:
-                x_i, x_j = LazyTensor_n(x[:, None, :]), LazyTensor_n(y[None, :, :])
-                K_ij = (-(((x_i - x_j) ** 2).sum(-1)).sqrt()).exp()
-                # block-sparse reduction preprocess
-                K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)  # TODO
-
-        if not dense:
-            K_ij.backend = self.backend
-
-        return K_ij
-
-    def _Gauss_block_sparse_pre(self, x: np.array, y: np.array, K_ij: LazyTensor):
-        """
-        Helper function to preprocess data for block-sparse reduction
-        of the Gaussian kernel
-
-        Args:
-            x[np.array], y[np.array] = arrays giving rise to Gaussian kernel K(x,y)
-            K_ij[LazyTensor_n] = symbolic representation of K(x,y)
-            eps[float] = size for square bins
-        Returns:
-            K_ij[LazyTensor_n] = symbolic representation of K(x,y) with
-                                set sparse ranges
-        """
-        # labels for low dimensions
-        if x.shape[1] < 4 or y.shape[1] < 4:
-            x_labels = grid_cluster(x, self.eps)
-            y_labels = grid_cluster(y, self.eps)
-            # range and centroid per class
-            x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
-            y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
-        else:
-            # labels for higher dimensions
-            x_labels, x_centroids = self._KMeans(x)
-            y_labels, y_centroids = self._KMeans(y)
-            # compute ranges
-            x_ranges = cluster_ranges(x_labels)
-            y_ranges = cluster_ranges(y_labels)
+            K = Nystrom approximation to kernel"""
 
-        # sort points
-        x, x_labels = sort_clusters(x, x_labels)
-        y, y_labels = sort_clusters(y, y_labels)
-        # Compute a coarse Boolean mask:
-        if self.kernel == "rbf":
-            D = np.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2)
-        elif self.kernel == "exp":
-            D = np.sqrt(
-                np.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2)
-            )
-        keep = D < (self.mask_radius) ** 2
-        # mask -> set of integer tensors
-        ranges_ij = from_matrix(x_ranges, y_ranges, keep)
-        K_ij.ranges = ranges_ij  # block-sparsity pattern
+        K_nq = self._pairwise_kernels(x, self.components_, dense=False)
+        K_nq.backend = "GPU_2D"
+        K_nq = aslinearoperator(K_nq)
+        K_q_inv = aslinearoperator(self.normalization_).T @ aslinearoperator(
+            self.normalization_
+        )
+        K_approx = K_nq @ K_q_inv @ K_nq.T
+        return K_approx
 
-        return K_ij
+    def _astype(self, data, d_type):
+        return data.astype(d_type)
 
-    def _KMeans(self, x: np.array):
+    # Note: _KMeans will be imported from utils soon
+    def _KMeans(self, x: np.array) -> Tuple[np.array]:
         """KMeans with Pykeops to do binning of original data.
         Args:
-            x[np.array] = data
-            k_means[int] = number of bins to build
-            n_iter[int] = number iterations of KMeans loop
+            x = data
         Returns:
-            labels[np.array] = class labels for each point in x
-            clusters[np.array] = coordinates for each centroid
+            labels = class labels for each point in x
+            clusters = coordinates for each centroid
         """
         N, D = x.shape
         clusters = np.copy(x[: self.k_means, :])  # initialization of clusters
-        x_i = LazyTensor_n(x[:, None, :])
+        x_i = LazyTensor(x[:, None, :])
 
         for i in range(self.n_iter):
 
-            clusters_j = LazyTensor_n(clusters[None, :, :])
+            clusters_j = LazyTensor(clusters[None, :, :])
             D_ij = ((x_i - clusters_j) ** 2).sum(-1)  # points-clusters kernel
             labels = (
                 D_ij.argmin(axis=1).astype(int).reshape(N)
@@ -285,29 +112,3 @@ def _KMeans(self, x: np.array):
                 clusters[:, d] = np.bincount(labels, weights=x[:, d]) / Ncl
 
         return labels, clusters
-
-    def _update_dtype(self, x):
-        """Helper function that sets dtype to that of
-            the given data in the fitting step.
-
-        Args:
-            x [np.array] = raw data to remap
-        Returns:
-            nothing
-        """
-        self.dtype = x.dtype
-        self.inv_eps = np.array([self.inv_eps]).astype(self.dtype)[0]
-
-    def _check_random_state(self, seed):
-        """Set/get np.random.RandomState instance for permutation
-
-        Args
-            seed[None, int]
-        Returns:
-            numpy random state
-        """
-        if seed is None:
-            return np.random.mtrand._rand
-        elif type(seed) == int:
-            return np.random.RandomState(seed)
-        raise ValueError(f"Seed {seed} must be None or an integer.")

From b6962bc7f758dfcfa956a328c7c2ca230fbe5d72 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Thu, 22 Apr 2021 12:12:33 +0100
Subject: [PATCH 093/111] testing updated import structure

---
 pykeops/numpy/nystrom/Nystrom.py | 2 +-
 pykeops/torch/nystrom/nystrom.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pykeops/numpy/nystrom/Nystrom.py b/pykeops/numpy/nystrom/Nystrom.py
index 64c5b2440..c1ec3e3c2 100644
--- a/pykeops/numpy/nystrom/Nystrom.py
+++ b/pykeops/numpy/nystrom/Nystrom.py
@@ -4,7 +4,6 @@
 from scipy.sparse.linalg import aslinearoperator
 
 from pykeops.common.nystrom_generic import GenericNystrom
-from pykeops.numpy.utils import numpytools
 from pykeops.numpy import LazyTensor
 
 from typing import Tuple, List
@@ -46,6 +45,7 @@ def __init__(
             verbose,
             random_state,
         )
+        from pykeops.numpy.utils import numpytools
 
         self.tools = numpytools
         self.LazyTensor = LazyTensor
diff --git a/pykeops/torch/nystrom/nystrom.py b/pykeops/torch/nystrom/nystrom.py
index 184a4d2c7..1c4667a0f 100644
--- a/pykeops/torch/nystrom/nystrom.py
+++ b/pykeops/torch/nystrom/nystrom.py
@@ -1,7 +1,7 @@
 import torch
 
 from pykeops.common.nystrom_generic import GenericNystrom
-from pykeops.torch.utils import torchtools
+
 from pykeops.torch import LazyTensor
 
 
@@ -33,6 +33,8 @@ def __init__(
             random_state,
         )
 
+        from pykeops.torch.utils import torchtools
+
         self.tools = torchtools
         self.verbose = verbose
         self.LazyTensor = LazyTensor

From d30e00faca1dc7a7dbe49f4e6df2796161432861 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Thu, 22 Apr 2021 15:19:27 +0100
Subject: [PATCH 094/111] moved imports around again

---
 pykeops/numpy/nystrom/Nystrom.py | 3 ++-
 pykeops/torch/nystrom/nystrom.py | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/pykeops/numpy/nystrom/Nystrom.py b/pykeops/numpy/nystrom/Nystrom.py
index c1ec3e3c2..3209cc91c 100644
--- a/pykeops/numpy/nystrom/Nystrom.py
+++ b/pykeops/numpy/nystrom/Nystrom.py
@@ -4,7 +4,6 @@
 from scipy.sparse.linalg import aslinearoperator
 
 from pykeops.common.nystrom_generic import GenericNystrom
-from pykeops.numpy import LazyTensor
 
 from typing import Tuple, List
 
@@ -48,6 +47,8 @@ def __init__(
         from pykeops.numpy.utils import numpytools
 
         self.tools = numpytools
+        from pykeops.numpy import LazyTensor
+
         self.LazyTensor = LazyTensor
         self.eigvals = eigvals
 
diff --git a/pykeops/torch/nystrom/nystrom.py b/pykeops/torch/nystrom/nystrom.py
index 1c4667a0f..34abd37ad 100644
--- a/pykeops/torch/nystrom/nystrom.py
+++ b/pykeops/torch/nystrom/nystrom.py
@@ -2,8 +2,6 @@
 
 from pykeops.common.nystrom_generic import GenericNystrom
 
-from pykeops.torch import LazyTensor
-
 
 class Nystrom(GenericNystrom):
     def __init__(
@@ -37,6 +35,8 @@ def __init__(
 
         self.tools = torchtools
         self.verbose = verbose
+        from pykeops.torch import LazyTensor
+
         self.LazyTensor = LazyTensor
 
     def _update_dtype(self, x):

From 129d216c92ea0d9ae9739c3173e85631adc979c6 Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Fri, 23 Apr 2021 13:58:26 +0100
Subject: [PATCH 095/111] Update plot_nnd_torch.py

---
 pykeops/tutorials/knn/plot_nnd_torch.py | 41 +++++++++++++------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/pykeops/tutorials/knn/plot_nnd_torch.py b/pykeops/tutorials/knn/plot_nnd_torch.py
index 618071641..d70791a11 100644
--- a/pykeops/tutorials/knn/plot_nnd_torch.py
+++ b/pykeops/tutorials/knn/plot_nnd_torch.py
@@ -52,13 +52,20 @@
 approx_nn = nn.kneighbors(y)
 
 ########################################################################
-# Now computing the true nearest neighbors with brute force search
+# Define the function to compute the true nearest neighbors with brute force search
 
-x_LT = LazyTensor(x.unsqueeze(0).to(device))
-y_LT = LazyTensor(y.unsqueeze(1).to(device))
-d = ((x_LT - y_LT) ** 2).sum(-1)
-true_nn = d.argKmin(K=k, dim=1).long()
 
+def brute_force(x, y, k, metric):
+    x_i = LazyTensor(x.unsqueeze(0).to(device))
+    y_j = LazyTensor(y.unsqueeze(1).to(device))
+    if metric == "euclidean":
+        D_ij = ((x_i - y_j) ** 2).sum(-1)
+    elif metric == "manhattan":
+        D_ij = ((x_i - y_j).abs()).sum(-1)
+    indices = D_ij.argKmin(K=k, dim=1).long()
+    return indices
+
+   
 ########################################################################
 # Define the function to compute recall of the nearest neighbors
 
@@ -82,10 +89,15 @@ def accuracy(indices_test, indices_truth):
     return accuracy
 
 
+########################################################################
+# Compute the true nearest neighbors with brute force search using Euclidean distance
+
+indices = brute_force(x=x, y=y, k=k, metric="euclidean")
+
 ########################################################################
 # Check the performance of our algorithm
 
-print("NND Recall:", accuracy(approx_nn.to(device), true_nn))
+print("NND Recall:", accuracy(approx_nn.to(device), indices))
 
 ########################################################################
 # Timing the algorithms to observe their performance
@@ -95,10 +107,7 @@ def accuracy(indices_test, indices_truth):
 
 # timing KeOps brute force
 for _ in range(iters):
-    x_LT = LazyTensor(x.unsqueeze(0).to(device))
-    y_LT = LazyTensor(y.unsqueeze(1).to(device))
-    d = ((x_LT - y_LT) ** 2).sum(-1)
-    true_nn = d.argKmin(K=k, dim=1).long()
+    indices = brute_force(x=x, y=y, k=k, metric="euclidean")
 bf_time = time.time() - start
 print(
     "KeOps brute force timing for", N, "points with", D, "dimensions:", bf_time / iters
@@ -140,15 +149,12 @@ def accuracy(indices_test, indices_truth):
 ########################################################################
 # Now computing the true nearest neighbors with brute force search using Manhattan distance
 
-x_LT = LazyTensor(x.unsqueeze(0).to(device))
-y_LT = LazyTensor(y.unsqueeze(1).to(device))
-d = ((x_LT - y_LT).abs()).sum(-1)
-true_nn = d.argKmin(K=k, dim=1).long()
+indices = brute_force(x=x, y=y, k=k, metric="manhattan")
 
 ########################################################################
 # Check the performance of our algorithm
 
-print("NND Recall:", accuracy(approx_nn.to(device), true_nn))
+print("NND Recall:", accuracy(approx_nn.to(device), indices))
 
 ########################################################################
 # Timing the algorithms to observe their performance
@@ -158,10 +164,7 @@ def accuracy(indices_test, indices_truth):
 
 # timing KeOps brute force
 for _ in range(iters):
-    x_LT = LazyTensor(x.unsqueeze(0).to(device))
-    y_LT = LazyTensor(y.unsqueeze(1).to(device))
-    d = ((x_LT - y_LT).abs()).sum(-1)
-    true_nn = d.argKmin(K=k, dim=1).long()
+    indices = brute_force(x=x, y=y, k=k, metric="manhattan")
 bf_time = time.time() - start
 print(
     "KeOps brute force timing for", N, "points with", D, "dimensions:", bf_time / iters

From ddbdefe1899322823ab379f950a4372f3f23eba0 Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Fri, 23 Apr 2021 14:04:05 +0100
Subject: [PATCH 096/111] Update utils.py

added accuracy to torch tools
---
 pykeops/torch/utils.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/pykeops/torch/utils.py b/pykeops/torch/utils.py
index d16057ca9..cb13a0d15 100644
--- a/pykeops/torch/utils.py
+++ b/pykeops/torch/utils.py
@@ -290,6 +290,25 @@ def calc_centroid(x, c, cl, n=10):
     @staticmethod
     def is_tensor(x):
         return isinstance(x, torch.Tensor)
+    
+    @staticmethod
+    def accuracy(indices_test, indices_truth):
+    """
+    Compares the test and ground truth indices (rows = KNN for each point in dataset)
+    Returns accuracy: proportion of correct nearest neighbours
+    """
+    N, k = indices_test.shape
+
+    # Calculate number of correct nearest neighbours
+    accuracy = 0
+    for i in range(k):
+        accuracy += float(np.sum(indices_test == indices_truth)) / N
+        indices_truth = np.roll(
+            indices_truth, 1, -1
+        )  # Create a rolling window (index positions may not match)
+    accuracy = float(accuracy / k)  # percentage accuracy
+
+    return accuracy
 
 
 def squared_distances(x, y):

From 02f0feec20597f49746cace63754e0bfc2720a3c Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Fri, 23 Apr 2021 14:05:48 +0100
Subject: [PATCH 097/111] Create utils.py

added accuracy to torch tools
---
 pykeops/torch/utils.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/pykeops/torch/utils.py b/pykeops/torch/utils.py
index cb13a0d15..3f80b4b2f 100644
--- a/pykeops/torch/utils.py
+++ b/pykeops/torch/utils.py
@@ -293,22 +293,22 @@ def is_tensor(x):
     
     @staticmethod
     def accuracy(indices_test, indices_truth):
-    """
-    Compares the test and ground truth indices (rows = KNN for each point in dataset)
-    Returns accuracy: proportion of correct nearest neighbours
-    """
-    N, k = indices_test.shape
-
-    # Calculate number of correct nearest neighbours
-    accuracy = 0
-    for i in range(k):
-        accuracy += float(np.sum(indices_test == indices_truth)) / N
-        indices_truth = np.roll(
-            indices_truth, 1, -1
-        )  # Create a rolling window (index positions may not match)
-    accuracy = float(accuracy / k)  # percentage accuracy
-
-    return accuracy
+        """
+        Compares the test and ground truth indices (rows = KNN for each point in dataset)
+        Returns accuracy: proportion of correct nearest neighbours
+        """
+        N, k = indices_test.shape
+
+        # Calculate number of correct nearest neighbours
+        accuracy = 0
+        for i in range(k):
+            accuracy += torch.sum(indices_test == indices_truth).float() / N
+            indices_truth = torch.roll(
+                indices_truth, 1, -1
+            )  # Create a rolling window (index positions may not match)
+        accuracy = float(accuracy / k)  # percentage accuracy
+
+        return accuracy
 
 
 def squared_distances(x, y):

From 7c793e81cf9282ef0ef59d84d68d71503c9e4102 Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Fri, 23 Apr 2021 14:14:23 +0100
Subject: [PATCH 098/111] Update plot_nnd_torch.py

shifted accuracy to torch.utils
---
 pykeops/tutorials/knn/plot_nnd_torch.py | 28 +++----------------------
 1 file changed, 3 insertions(+), 25 deletions(-)

diff --git a/pykeops/tutorials/knn/plot_nnd_torch.py b/pykeops/tutorials/knn/plot_nnd_torch.py
index d70791a11..3a173784b 100644
--- a/pykeops/tutorials/knn/plot_nnd_torch.py
+++ b/pykeops/tutorials/knn/plot_nnd_torch.py
@@ -22,6 +22,7 @@
 import time
 import torch
 from pykeops.torch import NND
+from pykeops.torch.utils import torchtools
 
 use_cuda = torch.cuda.is_available()
 device = torch.device("cuda") if use_cuda else torch.device("cpu")
@@ -65,29 +66,6 @@ def brute_force(x, y, k, metric):
     indices = D_ij.argKmin(K=k, dim=1).long()
     return indices
 
-   
-########################################################################
-# Define the function to compute recall of the nearest neighbors
-
-
-def accuracy(indices_test, indices_truth):
-    """
-    Compares the test and ground truth indices (rows = KNN for each point in dataset)
-    Returns accuracy: proportion of correct nearest neighbours
-    """
-    N, k = indices_test.shape
-
-    # Calculate number of correct nearest neighbours
-    accuracy = 0
-    for i in range(k):
-        accuracy += torch.sum(indices_test == indices_truth).float() / N
-        indices_truth = torch.roll(
-            indices_truth, 1, -1
-        )  # Create a rolling window (index positions may not match)
-    accuracy = float(accuracy / k)  # percentage accuracy
-
-    return accuracy
-
 
 ########################################################################
 # Compute the true nearest neighbors with brute force search using Euclidean distance
@@ -97,7 +75,7 @@ def accuracy(indices_test, indices_truth):
 ########################################################################
 # Check the performance of our algorithm
 
-print("NND Recall:", accuracy(approx_nn.to(device), indices))
+print("NND Recall:", torchtools.accuracy(approx_nn.to(device), indices))
 
 ########################################################################
 # Timing the algorithms to observe their performance
@@ -154,7 +132,7 @@ def accuracy(indices_test, indices_truth):
 ########################################################################
 # Check the performance of our algorithm
 
-print("NND Recall:", accuracy(approx_nn.to(device), indices))
+print("NND Recall:", torchtools.accuracy(approx_nn.to(device), indices))
 
 ########################################################################
 # Timing the algorithms to observe their performance

From 48959da25133bb75aad9ef328b185023abef3775 Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Fri, 23 Apr 2021 14:16:47 +0100
Subject: [PATCH 099/111] Update plot_ivf_torch.py

shifted accuracy to torch.utils
---
 pykeops/tutorials/knn/plot_ivf_torch.py | 36 +++++--------------------
 1 file changed, 7 insertions(+), 29 deletions(-)

diff --git a/pykeops/tutorials/knn/plot_ivf_torch.py b/pykeops/tutorials/knn/plot_ivf_torch.py
index b19bb7124..ed7ddce41 100644
--- a/pykeops/tutorials/knn/plot_ivf_torch.py
+++ b/pykeops/tutorials/knn/plot_ivf_torch.py
@@ -23,6 +23,7 @@
 import time
 import torch
 from pykeops.torch import IVF
+from pykeops.torch.utils import torchtools
 
 use_cuda = torch.cuda.is_available()
 device = torch.device("cuda") if use_cuda else torch.device("cpu")
@@ -61,33 +62,10 @@
 
 true_nn = nn.brute_force(x, y, k=k)
 
-###############################################################
-# Define the function to compute recall of the nearest neighbors
-
-
-def accuracy(indices_test, indices_truth):
-    """
-    Compares the test and ground truth indices (rows = KNN for each point in dataset)
-    Returns accuracy: proportion of correct nearest neighbours
-    """
-    N, k = indices_test.shape
-
-    # Calculate number of correct nearest neighbours
-    accuracy = 0
-    for i in range(k):
-        accuracy += torch.sum(indices_test == indices_truth).float() / N
-        indices_truth = torch.roll(
-            indices_truth, 1, -1
-        )  # Create a rolling window (index positions may not match)
-    accuracy = float(accuracy / k)  # percentage accuracy
-
-    return accuracy
-
-
 ###############################################################
 # Check the performance of our algorithm
 
-print("IVF Recall:", accuracy(approx_nn, true_nn))
+print("IVF Recall:", torchtools.accuracy(approx_nn, true_nn))
 
 ###############################################################
 # Timing the algorithms to observe their performance
@@ -130,7 +108,7 @@ def accuracy(indices_test, indices_truth):
 nn = IVF(metric="angular")
 nn.fit(x_norm)
 approx_nn = nn.kneighbors(y_norm)
-print("IVF Recall:", accuracy(approx_nn, true_nn))
+print("IVF Recall:", torchtools.accuracy(approx_nn, true_nn))
 
 ###############################################################
 # The IVF class also has an option to automatically normalise all inputs
@@ -138,7 +116,7 @@ def accuracy(indices_test, indices_truth):
 nn = IVF(metric="angular", normalise=True)
 nn.fit(x)
 approx_nn = nn.kneighbors(y)
-print("IVF Recall:", accuracy(approx_nn, true_nn))
+print("IVF Recall:", torchtools.accuracy(approx_nn, true_nn))
 
 ###############################################################
 # There is also an option to use full angular metric "angular_full", which uses the full angular metric. "angular" simply uses the dot product.
@@ -146,7 +124,7 @@ def accuracy(indices_test, indices_truth):
 nn = IVF(metric="angular_full")
 nn.fit(x)
 approx_nn = nn.kneighbors(y)
-print("IVF Recall:", accuracy(approx_nn, true_nn))
+print("IVF Recall:", torchtools.accuracy(approx_nn, true_nn))
 
 ###############################################################
 # IVF nearest neighbors search with approximations for K-Means centroids
@@ -163,7 +141,7 @@ def accuracy(indices_test, indices_truth):
 nn.fit(x, approx=True, n=50)
 approx_nn = nn.kneighbors(y)
 true_nn = nn.brute_force(x, y)
-print("IVF Recall:", accuracy(approx_nn, true_nn))
+print("IVF Recall:", torchtools.accuracy(approx_nn, true_nn))
 
 # define a custom metric
 def minkowski(x, y, p=3):
@@ -183,4 +161,4 @@ def minkowski(x, y, p=3):
 nn.fit(x, approx=True)
 approx_nn = nn.kneighbors(y)
 true_nn = nn.brute_force(x, y)
-print("IVF Recall:", accuracy(approx_nn, true_nn))
+print("IVF Recall:", torchtools.accuracy(approx_nn, true_nn))

From b1c30953a1adcc1266334d189c4472ff6f036189 Mon Sep 17 00:00:00 2001
From: Gantrithor_AI <78312524+Gantrithor-AI@users.noreply.github.com>
Date: Fri, 23 Apr 2021 14:37:51 +0100
Subject: [PATCH 100/111] Update plot_nnd_torch.py

factorized timing function
---
 pykeops/tutorials/knn/plot_nnd_torch.py | 64 ++++++++++++-------------
 1 file changed, 31 insertions(+), 33 deletions(-)

diff --git a/pykeops/tutorials/knn/plot_nnd_torch.py b/pykeops/tutorials/knn/plot_nnd_torch.py
index 3a173784b..e1fc835de 100644
--- a/pykeops/tutorials/knn/plot_nnd_torch.py
+++ b/pykeops/tutorials/knn/plot_nnd_torch.py
@@ -78,27 +78,41 @@ def brute_force(x, y, k, metric):
 print("NND Recall:", torchtools.accuracy(approx_nn.to(device), indices))
 
 ########################################################################
-# Timing the algorithms to observe their performance
+# Define function to time the algorithms to observe their performance
+
+
+def timing(x, y, k, N, D, metric):
+    start = time.time()
+    iters = 10
+
+    # timing KeOps brute force
+    for _ in range(iters):
+        indices = brute_force(x=x, y=y, k=k, metric=metric)
+    bf_time = time.time() - start
+    print(
+        "KeOps brute force timing for",
+        N,
+        "points with",
+        D,
+        "dimensions:",
+        bf_time / iters,
+    )
 
-start = time.time()
-iters = 10
+    # timing NNDescent
+    start = time.time()
+    for _ in range(iters):
+        approx_nn = nn.kneighbors(y)
+    nnd_time = time.time() - start
+    print("KeOps NND timing for", N, "points with", D, "dimensions:", nnd_time / iters)
 
-# timing KeOps brute force
-for _ in range(iters):
-    indices = brute_force(x=x, y=y, k=k, metric="euclidean")
-bf_time = time.time() - start
-print(
-    "KeOps brute force timing for", N, "points with", D, "dimensions:", bf_time / iters
-)
 
-# timing NNDescent
-start = time.time()
-for _ in range(iters):
-    approx_nn = nn.kneighbors(y)
-nnd_time = time.time() - start
-print("KeOps NND timing for", N, "points with", D, "dimensions:", nnd_time / iters)
+########################################################################
+# Timing the algorithms to observe their performance
+
+timing(x=x, y=y, k=k, N=N, D=D, metric="euclidean")
 
 ########################################################################
+
 # NNDescent search using clusters and Manhattan distance
 # Second experiment with N=$10^6$ points in dimension D=3, with 5 nearest neighbors and manhattan distance.
 
@@ -137,20 +151,4 @@ def brute_force(x, y, k, metric):
 ########################################################################
 # Timing the algorithms to observe their performance
 
-start = time.time()
-iters = 10
-
-# timing KeOps brute force
-for _ in range(iters):
-    indices = brute_force(x=x, y=y, k=k, metric="manhattan")
-bf_time = time.time() - start
-print(
-    "KeOps brute force timing for", N, "points with", D, "dimensions:", bf_time / iters
-)
-
-# timing NNDescent
-start = time.time()
-for _ in range(iters):
-    approx_nn = nn.kneighbors(y)
-nnd_time = time.time() - start
-print("KeOps NND timing for", N, "points with", D, "dimensions:", nnd_time / iters)
+timing(x=x, y=y, k=k, N=N, D=D, metric="manhattan")

From f3ccd592c80c66b469d59f12129809cf2c89461b Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Fri, 23 Apr 2021 14:45:41 +0100
Subject: [PATCH 101/111] reorganised accuracy computations

---
 pykeops/numpy/utils.py                  | 22 +++++++++++++++++
 pykeops/torch/utils.py                  | 10 +++++---
 pykeops/tutorials/knn/plot_ivf_numpy.py | 32 ++++---------------------
 pykeops/tutorials/knn/plot_ivf_torch.py | 12 +++++-----
 pykeops/tutorials/knn/plot_nnd_torch.py |  4 ++--
 5 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index 5179bb1d9..015191640 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -212,6 +212,28 @@ def kmeans(x, distance=None, K=10, Niter=15, device="CPU", approx=False, n=0):
                 c[:, d] = np.bincount(cl, weights=x[:, d]) / Ncl
         return cl, c
 
+    @staticmethod
+    def knn_accuracy(indices_test, indices_truth):
+      """
+      Compares the test and ground truth indices (rows = KNN for each point in dataset)
+      Returns the accuracy or proportion of correct nearest neighbours
+
+      Args:
+        indices_test ((N, K) array): K indices obtained from approximate NN search
+        indices_truth ((N, K) array): K indices obtained from exact NN search
+      """
+      N, k = indices_test.shape
+
+      # Calculate number of correct nearest neighbours
+      accuracy = 0
+      for i in range(k):
+          accuracy += float(np.sum(indices_test == indices_truth)) / N
+          indices_truth = np.roll(
+              indices_truth, 1, -1
+          )  # Create a rolling window (index positions may not match)
+      accuracy = float(accuracy / k)  # percentage accuracy
+
+      return accuracy
 
 def squared_distances(x, y):
     x_norm = (x ** 2).sum(1).reshape(-1, 1)
diff --git a/pykeops/torch/utils.py b/pykeops/torch/utils.py
index 3f80b4b2f..f501bd429 100644
--- a/pykeops/torch/utils.py
+++ b/pykeops/torch/utils.py
@@ -290,12 +290,16 @@ def calc_centroid(x, c, cl, n=10):
     @staticmethod
     def is_tensor(x):
         return isinstance(x, torch.Tensor)
-    
+
     @staticmethod
-    def accuracy(indices_test, indices_truth):
+    def knn_accuracy(indices_test, indices_truth):
         """
         Compares the test and ground truth indices (rows = KNN for each point in dataset)
-        Returns accuracy: proportion of correct nearest neighbours
+        Returns the accuracy or proportion of correct nearest neighbours
+
+        Args:
+          indices_test ((N, K) array): K indices obtained from approximate NN search
+          indices_truth ((N, K) array): K indices obtained from exact NN search
         """
         N, k = indices_test.shape
 
diff --git a/pykeops/tutorials/knn/plot_ivf_numpy.py b/pykeops/tutorials/knn/plot_ivf_numpy.py
index bd9a82dc8..8bbe728d5 100644
--- a/pykeops/tutorials/knn/plot_ivf_numpy.py
+++ b/pykeops/tutorials/knn/plot_ivf_numpy.py
@@ -23,6 +23,7 @@
 import time
 import numpy as np
 from pykeops.numpy import IVF
+from pykeops.numpy.utils import numpytools
 
 ########################################################################
 # IVF nearest neighbour search with Euclidean metric
@@ -55,33 +56,10 @@
 
 true_nn = nn.brute_force(x, y, k=k)
 
-########################################################################
-# Define the function to compute recall of the nearest neighbors
-
-
-def accuracy(indices_test, indices_truth):
-    """
-    Compares the test and ground truth indices (rows = KNN for each point in dataset)
-    Returns accuracy: proportion of correct nearest neighbours
-    """
-    N, k = indices_test.shape
-
-    # Calculate number of correct nearest neighbours
-    accuracy = 0
-    for i in range(k):
-        accuracy += float(np.sum(indices_test == indices_truth)) / N
-        indices_truth = np.roll(
-            indices_truth, 1, -1
-        )  # Create a rolling window (index positions may not match)
-    accuracy = float(accuracy / k)  # percentage accuracy
-
-    return accuracy
-
-
 ########################################################################
 # Check the performance of our algorithm
 
-print("IVF Recall:", accuracy(approx_nn, true_nn))
+print("IVF Recall:", numpytools.knn_accuracy(approx_nn, true_nn))
 
 ########################################################################
 # Timing the algorithms to observe their performance
@@ -124,7 +102,7 @@ def accuracy(indices_test, indices_truth):
 nn = IVF(metric="angular")
 nn.fit(x_norm)
 approx_nn = nn.kneighbors(y_norm)
-print("IVF Recall:", accuracy(approx_nn, true_nn))
+print("IVF Recall:", numpytools.knn_accuracy(approx_nn, true_nn))
 
 ########################################################################
 # The IVF class also has an option to automatically normalise all inputs
@@ -132,7 +110,7 @@ def accuracy(indices_test, indices_truth):
 nn = IVF(metric="angular", normalise=True)
 nn.fit(x)
 approx_nn = nn.kneighbors(y)
-print("IVF Recall:", accuracy(approx_nn, true_nn))
+print("IVF Recall:", numpytools.knn_accuracy(approx_nn, true_nn))
 
 ########################################################################
 # There is also an option to use full angular metric "angular_full", which uses the full angular metric. "angular" simply uses the dot product.
@@ -140,4 +118,4 @@ def accuracy(indices_test, indices_truth):
 nn = IVF(metric="angular_full")
 nn.fit(x)
 approx_nn = nn.kneighbors(y)
-print("IVF Recall:", accuracy(approx_nn, true_nn))
+print("IVF Recall:", numpytools.knn_accuracy(approx_nn, true_nn))
diff --git a/pykeops/tutorials/knn/plot_ivf_torch.py b/pykeops/tutorials/knn/plot_ivf_torch.py
index ed7ddce41..d6e6f3c0a 100644
--- a/pykeops/tutorials/knn/plot_ivf_torch.py
+++ b/pykeops/tutorials/knn/plot_ivf_torch.py
@@ -65,7 +65,7 @@
 ###############################################################
 # Check the performance of our algorithm
 
-print("IVF Recall:", torchtools.accuracy(approx_nn, true_nn))
+print("IVF Recall:", torchtools.knn_accuracy(approx_nn, true_nn))
 
 ###############################################################
 # Timing the algorithms to observe their performance
@@ -108,7 +108,7 @@
 nn = IVF(metric="angular")
 nn.fit(x_norm)
 approx_nn = nn.kneighbors(y_norm)
-print("IVF Recall:", torchtools.accuracy(approx_nn, true_nn))
+print("IVF Recall:", torchtools.knn_accuracy(approx_nn, true_nn))
 
 ###############################################################
 # The IVF class also has an option to automatically normalise all inputs
@@ -116,7 +116,7 @@
 nn = IVF(metric="angular", normalise=True)
 nn.fit(x)
 approx_nn = nn.kneighbors(y)
-print("IVF Recall:", torchtools.accuracy(approx_nn, true_nn))
+print("IVF Recall:", torchtools.knn_accuracy(approx_nn, true_nn))
 
 ###############################################################
 # There is also an option to use full angular metric "angular_full", which uses the full angular metric. "angular" simply uses the dot product.
@@ -124,7 +124,7 @@
 nn = IVF(metric="angular_full")
 nn.fit(x)
 approx_nn = nn.kneighbors(y)
-print("IVF Recall:", torchtools.accuracy(approx_nn, true_nn))
+print("IVF Recall:", torchtools.knn_accuracy(approx_nn, true_nn))
 
 ###############################################################
 # IVF nearest neighbors search with approximations for K-Means centroids
@@ -141,7 +141,7 @@
 nn.fit(x, approx=True, n=50)
 approx_nn = nn.kneighbors(y)
 true_nn = nn.brute_force(x, y)
-print("IVF Recall:", torchtools.accuracy(approx_nn, true_nn))
+print("IVF Recall:", torchtools.knn_accuracy(approx_nn, true_nn))
 
 # define a custom metric
 def minkowski(x, y, p=3):
@@ -161,4 +161,4 @@ def minkowski(x, y, p=3):
 nn.fit(x, approx=True)
 approx_nn = nn.kneighbors(y)
 true_nn = nn.brute_force(x, y)
-print("IVF Recall:", torchtools.accuracy(approx_nn, true_nn))
+print("IVF Recall:", torchtools.knn_accuracy(approx_nn, true_nn))
diff --git a/pykeops/tutorials/knn/plot_nnd_torch.py b/pykeops/tutorials/knn/plot_nnd_torch.py
index e1fc835de..1e39c2ba0 100644
--- a/pykeops/tutorials/knn/plot_nnd_torch.py
+++ b/pykeops/tutorials/knn/plot_nnd_torch.py
@@ -75,7 +75,7 @@ def brute_force(x, y, k, metric):
 ########################################################################
 # Check the performance of our algorithm
 
-print("NND Recall:", torchtools.accuracy(approx_nn.to(device), indices))
+print("NND Recall:", torchtools.knn_accuracy(approx_nn.to(device), indices))
 
 ########################################################################
 # Define function to time the algorithms to observe their performance
@@ -146,7 +146,7 @@ def timing(x, y, k, N, D, metric):
 ########################################################################
 # Check the performance of our algorithm
 
-print("NND Recall:", torchtools.accuracy(approx_nn.to(device), indices))
+print("NND Recall:", torchtools.knn_accuracy(approx_nn.to(device), indices))
 
 ########################################################################
 # Timing the algorithms to observe their performance

From 5390fcc9fe568e2156afed8ba1b9e284eb2521f3 Mon Sep 17 00:00:00 2001
From: Anna <ahledikova123@gmail.com>
Date: Sun, 25 Apr 2021 23:07:29 +0100
Subject: [PATCH 102/111] added in updates for Nystroem

---
 pykeops/common/nystroem_generic.py | 222 +++++++++++++++++++++++++++++
 pykeops/numpy/nystrom/nystroem.py  | 103 +++++++++++++
 pykeops/torch/nystrom/nystroem.py  | 112 +++++++++++++++
 3 files changed, 437 insertions(+)
 create mode 100644 pykeops/common/nystroem_generic.py
 create mode 100644 pykeops/numpy/nystrom/nystroem.py
 create mode 100644 pykeops/torch/nystrom/nystroem.py

diff --git a/pykeops/common/nystroem_generic.py b/pykeops/common/nystroem_generic.py
new file mode 100644
index 000000000..bcd84fd33
--- /dev/null
+++ b/pykeops/common/nystroem_generic.py
@@ -0,0 +1,222 @@
+import numpy as np
+import pykeops
+from typing import TypeVar, Union
+import warnings
+
+# Generic placeholder for numpy and torch variables.
+generic_array = TypeVar("generic_array")
+GenericLazyTensor = TypeVar("GenericLazyTensor")
+
+
+class GenericNystroem:
+    """
+    Super class defining the Nystrom operations. The end user should
+    use numpy.nystrom or torch.nystrom subclasses.
+
+    """
+
+    def __init__(
+        self,
+        n_components: int = 100,
+        kernel: Union[str, callable] = "rbf",
+        sigma: float = None,
+        inv_eps: float = None,
+        verbose: bool = False,
+        random_state: Union[None, int] = None,
+    ):
+
+        """
+        Args:
+             n_components: int: how many samples to select from data.
+            kernel: str: type of kernel to use. Current options = {rbf:Gaussian,
+                exp: exponential}.
+            sigma: float: exponential constant for the RBF and exponential kernels.
+            inv_eps: float: additive invertibility constant for matrix decomposition.
+            verbose: boolean: set True to print details.
+            random_state: int: to set a random seed for the random sampling of the
+                samples. To be used when reproducibility is needed.
+        """
+        self.n_components = n_components
+        self.kernel = kernel
+        self.sigma = sigma
+        self.dtype = None
+        self.verbose = verbose
+        self.random_state = random_state
+        self.tools = None
+        self.lazy_tensor = None
+
+        if inv_eps:
+            self.inv_eps = inv_eps
+        else:
+            self.inv_eps = 1e-8
+
+    def fit(self, x: generic_array) -> "GenericNystroem":
+        """
+        Args:
+            x: generic_array: array or tensor of shape (n_samples, n_features)
+        Returns:
+            Fitted instance of the class
+        """
+        self.dtype = x.dtype
+
+        # Basic checks
+        assert self.tools.is_tensor(
+            x
+        ), "Input to fit(.) must be an array\
+        if using numpy and tensor if using torch."
+        assert (
+            x.shape[0] >= self.n_components
+        ), "The application needs\
+        X.shape[0] >= n_components."
+        if self.kernel == "exp" and not (self.sigma is None):
+            assert self.sigma > 0, "Should be working with decaying exponential."
+
+        # Set default sigma
+        if self.sigma is None:
+            self.sigma = np.sqrt(x.shape[1]) / np.sqrt(2)
+
+        # Update dtype
+        self._update_dtype()
+        # Number of samples
+        n_samples = x.shape[0]
+
+        # Define basis
+        rnd = self._check_random_state(self.random_state)
+        inds = rnd.permutation(n_samples)
+        basis_inds = inds[: self.n_components]
+        basis = x[basis_inds]
+
+        # Build smaller kernel
+        basis_kernel = self._pairwise_kernels(basis, dense=True)
+
+        # Decomposition is an abstract method that needs to be defined in each class
+        self.normalization = self._decomposition_and_norm(basis_kernel)
+        self.components_ = basis
+        self.component_indices_ = inds
+
+        return self
+
+    def _decomposition_and_norm(self, X: GenericLazyTensor):
+        """
+        To be defined in the subclass
+
+        Args:
+          X: GenericLazyTensor:
+        """
+        raise NotImplementedError(
+            "Subclass must implement the method _decomposition_and_norm."
+        )
+
+    def _get_kernel(self, x: generic_array, y: generic_array) -> generic_array:
+        """
+        To be implmented in the subclass.
+
+        Args:
+            x: generic_array
+            y: generic_array
+
+        Returns:
+            K: generic_array: dense kernel array
+
+        """
+        raise NotImplementedError("Subclass must implement the method _get_kernel.")
+
+    def transform(self, x: generic_array, dense=True) -> generic_array:
+        """Applies transform on the data mapping it to the feature space
+        which supports the approximated kernel.
+
+        Args:
+            X: generic_array: data to transform, dim: n_samples n x m
+        Returns
+            X(array): data after transformation, dim: n_samples n x n_components D
+            x:generic_array:
+          dense:  (Default value = True)
+
+        Returns:
+
+        """
+        if type(x) == np.ndarray and not dense:
+            warnings.warn("For Numpy transform it is best to use dense=True")
+
+        K_nq = self._pairwise_kernels(x, self.components_, dense=dense)
+        x_new = K_nq @ self.normalization
+        return x_new
+
+    def _pairwise_kernels(self, x: generic_array, y: generic_array = None, dense=False):
+        """Helper function to build kernel
+
+                y(np.array or torch.tensor): array/tensor N x D
+                dense(bool): False to work with lazy tensor reduction,
+                              True to work with dense arrays/tensors
+
+        Args:
+          x:generic_array: data, shape N x M
+          y:generic_array:  (Default value = None), if given N x D array
+          dense: boolean: (Default value = False). Use False to return a
+            to return a dense generic_array. Use True to return a LazyTensor
+            version.
+
+        Returns:
+          LazyTensor: if dense == False
+          dense array: if dense == True
+
+        """
+
+        if y is None:
+            y = x
+        x = x / (np.sqrt(2) * self.sigma)
+        y = y / (np.sqrt(2) * self.sigma)
+
+        x_i, x_j = self.tools.contiguous(x[:, None, :]), self.tools.contiguous(
+            y[None, :, :]
+        )  # (N, 1, M), (1, N, M) or (1, N, D)
+
+        if self.kernel == "rbf":
+            if dense:
+                K_ij = self._get_kernel(x, y)
+
+            else:
+                x_i, x_j = self.lazy_tensor(x_i), self.lazy_tensor(x_j)
+                D_ij = ((x_i - x_j) ** 2).sum(dim=2)
+                K_ij = (-D_ij).exp()
+
+        elif self.kernel == "exp":
+            if dense:
+                K_ij = self._get_kernel(x, y, kernel="exp")
+
+            else:
+                x_i, x_j = self.lazy_tensor(x_i), self.lazy_tensor(x_j)
+                K_ij = (-(((x_i - x_j) ** 2).sum(-1)).sqrt()).exp()
+
+        # computation with custom kernel
+        else:
+            print("Please note that computations on custom kernels are dense-only.")
+            K_ij = self.kernel(x_i, x_j)
+
+        return K_ij  # (N, N)
+
+    def _update_dtype(self) -> None:
+        """Helper function that sets dtype to that of
+        the given data in the fitting step. Fixes inv_eps data type to
+        that of input data.
+        """
+        self.inv_eps = np.array([self.inv_eps]).astype(self.dtype)[0]
+
+    def _check_random_state(self, seed: Union[None, int]) -> None:
+        """
+        Set/get np.random.RandomState instance for permutation.
+
+        Args:
+            seed: Union[None: int]:
+
+        Returns:
+            numpy random state
+        """
+
+        if seed is None:
+            return np.random.mtrand._rand
+
+        elif type(seed) == int:
+            return np.random.RandomState(seed)
+
+        raise ValueError(f"Seed {seed} must be None or an integer.")
diff --git a/pykeops/numpy/nystrom/nystroem.py b/pykeops/numpy/nystrom/nystroem.py
new file mode 100644
index 000000000..f90ce599f
--- /dev/null
+++ b/pykeops/numpy/nystrom/nystroem.py
@@ -0,0 +1,103 @@
+import numpy as np
+
+from scipy.linalg import eigh
+from scipy.sparse.linalg import aslinearoperator
+
+from pykeops.common.nystrom_generic import GenericNystroem
+
+from typing import List
+
+
+class Nystroem(GenericNystroem):
+    """
+    Nystroem class to work with Numpy arrays.
+    """
+
+    def __init__(
+        self,
+        n_components=100,
+        kernel="rbf",
+        sigma: float = None,
+        inv_eps: float = None,
+        verbose=False,
+        random_state=None,
+        eigvals: List[int] = None,
+    ):
+
+        """
+        Args:
+             n_components: int: how many samples to select from data.
+            kernel: str: type of kernel to use. Current options = {rbf:Gaussian,
+                exp: exponential}.
+            sigma: float: exponential constant for the RBF and exponential kernels.
+            inv_eps: float: additive invertibility constant for matrix decomposition.
+            verbose: boolean: set True to print details.
+            random_state: int: to set a random seed for the random sampling of the
+                samples. To be used when reproducibility is needed.
+            eigvals: eigenvalues index interval [a,b] for constructed K_q,
+                where 0 <= a < b < length of K_q
+
+        """
+        super().__init__(n_components, kernel, sigma, inv_eps, verbose, random_state)
+        from pykeops.numpy.utils import numpytools
+        from pykeops.numpy import LazyTensor
+
+        self.tools = numpytools
+        self.lazy_tensor = LazyTensor
+        self.eigvals = eigvals
+
+        if eigvals:
+            assert eigvals[0] < eigvals[1], "eigvals = [a,b] needs a < b"
+            assert (
+                eigvals[1] < n_components
+            ), "max eigenvalue index needs to be less\
+            than size of K_q = n_components"
+
+    def _decomposition_and_norm(self, X: np.array) -> np.array:
+        """
+        Computes K_q^{-1/2}.
+
+        Returns:
+            K_q^{-1/2}: np.array
+        """
+
+        X = (
+            X + np.eye(X.shape[0], dtype=self.dtype) * self.inv_eps
+        )  # (Q,Q)  Q - num_components
+        S, U = eigh(X, eigvals=self.eigvals)  # (Q,), (Q,Q)
+        S = np.maximum(S, 1e-12)
+
+        return np.dot(U / np.sqrt(S), U.T)  # (Q,Q)
+
+    def _get_kernel(self, x: np.array, y: np.array, kernel=None) -> np.array:
+
+        D_xx = np.sum((x ** 2), axis=-1)[:, None]  # (N,1)
+        D_xy = x @ y.T  # (N,D) @ (D,M) = (N,M)
+        D_yy = np.sum((y ** 2), axis=-1)[None, :]  # (1,M)
+        D_xy = D_xx - 2 * D_xy + D_yy  # (N,M)
+        if kernel == "exp":
+            D_xy = np.sqrt(D_xy)
+        return np.exp(-D_xy)  # (N,M)
+
+    def K_approx(self, x: np.array) -> "LinearOperator":
+        """
+        Method to return Nystrom approximation to the kernel.
+
+        Args:
+            x: np.array: data used in fit(.) function.
+        Returns
+            K: LinearOperator: Nystrom approximation to kernel
+        """
+        K_nq = self._pairwise_kernels(x, self.components_, dense=False)  # (N, Q)
+
+        K_qn = K_nq.T
+        K_nq.backend = "GPU_2D"
+        K_qn = aslinearoperator(K_qn)
+        K_nq = aslinearoperator(K_nq)
+
+        K_q_inv = self.normalization.T @ self.normalization  # (Q,Q)
+        K_q_inv = aslinearoperator(K_q_inv)
+
+        K_approx = K_nq @ K_q_inv @ K_qn  # (N,Q), (Q,Q), (Q,N)
+
+        return K_approx  # (N, N)
diff --git a/pykeops/torch/nystrom/nystroem.py b/pykeops/torch/nystrom/nystroem.py
new file mode 100644
index 000000000..b4ece0861
--- /dev/null
+++ b/pykeops/torch/nystrom/nystroem.py
@@ -0,0 +1,112 @@
+import torch
+
+from pykeops.common.nystrom_generic import GenericNystroem
+
+
+class Nystroem(GenericNystroem):
+    """
+    Nystroem class to work with Pytorch tensors.
+    """
+
+    def __init__(
+        self,
+        n_components=100,
+        kernel="rbf",
+        sigma: float = None,
+        inv_eps: float = None,
+        verbose=False,
+        random_state=None,
+    ):
+
+        """
+        Args:
+             n_components: int: how many samples to select from data.
+            kernel: str: type of kernel to use. Current options = {rbf:Gaussian,
+                exp: exponential}.
+            sigma: float: exponential constant for the RBF and exponential kernels.
+            inv_eps: float: additive invertibility constant for matrix decomposition.
+            verbose: boolean: set True to print details.
+            random_state: int: to set a random seed for the random sampling of the
+                samples. To be used when reproducibility is needed.
+        """
+
+        super().__init__(n_components, kernel, sigma, inv_eps, verbose, random_state)
+        from pykeops.torch.utils import torchtools
+        from pykeops.torch import LazyTensor
+
+        self.tools = torchtools
+        self.verbose = verbose
+        self.lazy_tensor = LazyTensor
+
+    def _decomposition_and_norm(self, basis_kernel) -> torch.tensor:
+        """
+        Function to return self.normalization used in fit(.) function
+
+        Args:
+          basis_kernel: torch.tensor: K_q smaller sampled kernel
+
+        Returns:
+          K_q^{-1/2}: torch.tensor
+        """
+
+        U, S, V = torch.linalg.svd(
+            basis_kernel, full_matrices=False
+        )  # (Q,Q), (Q,), (Q,Q)
+        S = torch.maximum(S, torch.ones(S.size()) * 1e-12)
+        return torch.mm(U / torch.sqrt(S), V)  # (Q,Q)
+
+    def _get_kernel(self, x: torch.tensor, y: torch.tensor, kernel=None):
+        """
+        Constructs dense kernel.
+
+        Returns:
+          K: torch.tensor: dense kernel
+
+        """
+        D_xx = (x * x).sum(-1).unsqueeze(1)  # (N,1)
+        D_xy = torch.matmul(x, y.permute(1, 0))  # (N,D) @ (D,M) = (N,M)
+        D_yy = (y * y).sum(-1).unsqueeze(0)  # (1,M)
+        D_xy = D_xx - 2 * D_xy + D_yy  # (N,M)
+        if kernel == "exp":
+            D_xy = torch.sqrt(D_xy)
+        return (-D_xy).exp()  # (N,M)
+
+    def _update_dtype(self):
+        "Overloading function to bypass in this subclass"
+        pass
+
+    def K_approx(self, X: torch.tensor) -> "K_approx operator":
+        """Function to return Nystrom approximation to the kernel.
+
+        Args:
+          X: torch.tensor: data used in fit(.) function.
+        Returns
+          K_approx: K_approx_operator: Nystrom approximation to kernel
+            which can be applied downstream as K_approx @ v for some 1d
+            tensor v
+        """
+
+        K_nq = self._pairwise_kernels(X, self.components_, dense=False)  # (N, Q)
+        K_approx = K_approx_operator(K_nq, self.normalization)  # (N, B), with v[N, B]
+        return K_approx
+
+
+class K_approx_operator:
+    """Helper class to return K_approx as an object
+    compatible with @ symbol
+    """
+
+    def __init__(self, K_nq, normalization):
+
+        self.K_nq = K_nq  # dim: number of samples x num_components
+        self.normalization = normalization
+
+    def __matmul__(self, v: torch.tensor) -> torch.tensor:
+
+        K_qn = self.K_nq.T
+        self.K_nq.backend = "GPU_2D"
+
+        x = K_qn @ v  # (Q,N), (N,B)
+        x = self.normalization @ self.normalization.T @ x  # (Q,Q), (Q,Q), (Q, B)
+        x = self.K_nq @ x  # (N,Q), (Q,B)
+        return x  # (N,B)

From 1e23100c50518a04ca87725f09114eec5deb97d6 Mon Sep 17 00:00:00 2001
From: hl-anna <47156013+hl-anna@users.noreply.github.com>
Date: Sun, 25 Apr 2021 23:08:39 +0100
Subject: [PATCH 103/111] Delete nystrom_generic.py

---
 pykeops/common/nystrom_generic.py | 286 ------------------------------
 1 file changed, 286 deletions(-)
 delete mode 100644 pykeops/common/nystrom_generic.py

diff --git a/pykeops/common/nystrom_generic.py b/pykeops/common/nystrom_generic.py
deleted file mode 100644
index 99651c6b5..000000000
--- a/pykeops/common/nystrom_generic.py
+++ /dev/null
@@ -1,286 +0,0 @@
-import numpy as np
-import pykeops
-from typing import TypeVar, Union, Tuple
-import warnings
-
-# Generic placeholder for numpy and torch variables.
-generic_array = TypeVar("generic_array")
-GenericLazyTensor = TypeVar("GenericLazyTensor")
-
-
-class GenericNystrom:
-    """Super class defining the Nystrom operations. The end user should
-    use numpy.nystrom or torch.nystrom subclasses."""
-
-    def __init__(
-        self,
-        n_components: int = 100,
-        kernel: Union[str, callable] = "rbf",
-        sigma: float = None,
-        eps: float = 0.05,
-        mask_radius: float = None,
-        k_means: int = 10,
-        n_iter: int = 10,
-        inv_eps: float = None,
-        verbose: bool = False,
-        random_state: Union[None, int] = None,
-        tools=None,
-    ):
-
-        """
-        n_components  = how many samples to select from data.
-        kernel  = type of kernel to use. Current options = {rbf:Gaussian,
-                                                                 exp: exponential}.
-        sigma  = exponential constant for the RBF and exponential kernels.
-        eps = size for square bins in block-sparse preprocessing.
-        k_means = number of centroids for KMeans algorithm in block-sparse
-                       preprocessing.
-        n_iter = number of iterations for KMeans.
-        dtype = type of data: np.float32 or np.float64
-        inv_eps = additive invertibility constant for matrix decomposition.
-        verbose = set True to print details.
-        random_state = to set a random seed for the random sampling of the samples.
-                        To be used when  reproducibility is needed.
-        """
-        self.n_components = n_components
-        self.kernel = kernel
-        self.sigma = sigma
-        self.eps = eps
-        self.mask_radius = mask_radius
-        self.k_means = k_means
-        self.n_iter = n_iter
-        self.dtype = None
-        self.verbose = verbose
-        self.random_state = random_state
-        self.tools = None
-        self.LazyTensor = None
-
-        self.device = "cuda" if pykeops.config.gpu_available else "cpu"
-
-        if inv_eps:
-            self.inv_eps = inv_eps
-        else:
-            self.inv_eps = 1e-8
-
-    def fit(self, x: generic_array) -> "GenericNystrom":
-        """
-        Args:   x = array or tensor of shape (n_samples, n_features)
-        Returns: Fitted instance of the class
-        """
-        x = self._to_device(x)
-        self.dtype = x.dtype
-
-        # Basic checks
-        assert self.tools.is_tensor(
-            x
-        ), "Input to fit(.) must be an array\
-        if using numpy and tensor if using torch."
-        assert (
-            x.shape[0] >= self.n_components
-        ), "The application needs\
-        X.shape[0] >= n_components."
-        if self.kernel == "exp" and not (self.sigma is None):
-            assert self.sigma > 0, "Should be working with decaying exponential."
-
-        # Set default sigma
-        # if self.sigma is None and self.kernel == 'rbf':
-        if self.sigma is None:
-            self.sigma = np.sqrt(x.shape[1])
-
-        if self.mask_radius is None:
-            if self.kernel == "rbf":
-                # TODO get mask_radius correct
-                self.mask_radius = 8 * self.sigma
-            elif self.kernel == "exp":
-                self.mask_radius = 8 * self.sigma
-
-        # Update dtype
-        self._update_dtype(x)
-        # Number of samples
-        n_samples = x.shape[0]
-        # Define basis
-        rnd = self._check_random_state(self.random_state)
-        inds = rnd.permutation(n_samples)
-        basis_inds = inds[: self.n_components]
-        basis = x[basis_inds]
-        # Build smaller kernel
-        basis_kernel = self._pairwise_kernels(basis, dense=True)
-        # Decomposition is an abstract method that needs to be defined in each class
-        self.normalization_ = self._decomposition_and_norm(basis_kernel)
-        self.components_ = basis
-        self.component_indices_ = inds
-
-        return self
-
-    def _decomposition_and_norm(self, X: GenericLazyTensor):
-        """
-        To be defined in the subclass
-        """
-        raise NotImplementedError(
-            "Subclass must implement the method _decomposition_and_norm."
-        )
-
-    def transform(self, x: generic_array, dense=True) -> generic_array:
-        """
-        Applies transform on the data mapping it to the feature space
-        which supports the approximated kernel.
-        Args:
-            X = data to transform
-        Returns
-            X = data after transformation
-        """
-        if type(x) == np.ndarray and not dense:
-            warnings.warn("For Numpy transform it is best to use dense=True")
-
-        x = self._to_device(x)
-        K_nq = self._pairwise_kernels(x, self.components_, dense=dense)
-        x_new = K_nq @ self.normalization_
-        return x_new
-
-    def _pairwise_kernels(self, x: generic_array, y: generic_array = None, dense=False):
-        """Helper function to build kernel
-        Args:   x[np.array or torch.tensor] = data
-                y[np.array or torch.tensor] = array/tensor
-                dense[bool] = False to work with lazy tensor reduction,
-                              True to work with dense arrays/tensors
-        Returns:
-                K_ij[LazyTensor] if dense = False
-                K_ij[np.array or torch.tensor] if dense = True
-        """
-
-        if y is None:
-            y = x
-        x = x / self.sigma
-        y = y / self.sigma
-
-        x_i, x_j = (
-            self.tools.contiguous(self._to_device(x[:, None, :])),
-            self.tools.contiguous(self._to_device(y[None, :, :])),
-        )
-
-        if self.kernel == "rbf":
-            if dense:
-                D_ij = ((x_i - x_j) ** 2).sum(axis=2)
-                K_ij = self.tools.exp(-D_ij)
-
-            else:
-                x_i, x_j = self.LazyTensor(x_i), self.LazyTensor(x_j)
-                D_ij = ((x_i - x_j) ** 2).sum(dim=2)
-                K_ij = (-D_ij).exp()
-
-                # block-sparse reduction preprocess
-                K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)
-        elif self.kernel == "exp":
-            if dense:
-                K_ij = self.tools.exp(
-                    -self.tools.sqrt((((x_i - x_j) ** 2).sum(axis=2)))
-                )
-
-            else:
-                x_i, x_j = self.LazyTensor(x_i), self.LazyTensor(x_j)
-                K_ij = (-(((x_i - x_j) ** 2).sum(-1)).sqrt()).exp()
-
-                # block-sparse reduction preprocess
-                K_ij = self._Gauss_block_sparse_pre(x, y, K_ij)
-
-        # computation with custom kernel
-        else:
-            print("Please note that computations on custom kernels are dense-only.")
-            K_ij = self.kernel(x_i, x_j)
-
-        return K_ij
-
-    def _Gauss_block_sparse_pre(
-        self, x: generic_array, y: generic_array, K_ij: GenericLazyTensor
-    ):
-        """
-        Helper function to preprocess data for block-sparse reduction
-        of the Gaussian kernel
-        Args:
-            x, y =  arrays or tensors giving rise to Gaussian kernel K(x,y)
-            K_ij = symbolic representation of K(x,y)
-            eps[float] = size for square bins
-        Returns:
-            K_ij =  symbolic representation of K(x,y) with
-                                set sparse ranges
-        """
-        x = self._to_device(x)
-        y = self._to_device(y)
-        # labels for low dimensions
-        if x.shape[1] < 4 or y.shape[1] < 4:
-            x_labels = self.tools.grid_cluster(x, self.eps)
-            y_labels = self.tools.grid_cluster(y, self.eps)
-
-            # range and centroid per class
-            x_ranges, x_centroids, _ = self.tools.cluster_ranges_centroids(x, x_labels)
-            y_ranges, y_centroids, _ = self.tools.cluster_ranges_centroids(y, y_labels)
-
-        else:
-            # labels for higher dimensions
-            x_labels, x_centroids = self._KMeans(x)
-            y_labels, y_centroids = self._KMeans(y)
-            # compute ranges
-            x_ranges = self.tools.cluster_ranges(x_labels)
-            y_ranges = self.tools.cluster_ranges(y_labels)
-
-        # sort points
-        x, x_labels = self.tools.sort_clusters(x, x_labels)
-        y, y_labels = self.tools.sort_clusters(y, y_labels)
-
-        # Compute a coarse Boolean mask:
-        if self.kernel == "rbf":
-            D = self.tools.arraysum(
-                (x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2
-            )
-
-        elif self.kernel == "exp":
-            D = self.tools.sqrt(
-                self.tools.arraysum(
-                    (x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2
-                )
-            )
-
-        keep = D < (self.mask_radius) ** 2
-        # mask -> set of integer tensors
-        ranges_ij = self.tools.from_matrix(x_ranges, y_ranges, keep)
-        K_ij.ranges = ranges_ij  # block-sparsity pattern
-
-        return K_ij
-
-    def _astype(self, data, type):
-        return data
-
-    def _to_device(self, data):
-        return data
-
-    def _update_dtype(self, x):
-        """Helper function that sets dtype to that of
-            the given data in the fitting step.
-        Args:
-            x [np.array or torch.tensor] = raw data to remap
-        Returns:
-            None
-        """
-        self.dtype = x.dtype
-        self.inv_eps = np.array([self.inv_eps]).astype(self.dtype)[0]
-
-    def _check_random_state(self, seed: Union[None, int]) -> None:
-        """Set/get np.random.RandomState instance for permutation
-        Args
-            seed[None, int]
-        Returns:
-            numpy random state
-        """
-
-        if seed is None:
-            return np.random.mtrand._rand
-
-        elif type(seed) == int:
-            return np.random.RandomState(seed)
-
-        raise ValueError(f"Seed {seed} must be None or an integer.")
-
-    def _KMeans(self, x: generic_array) -> Tuple[generic_array]:
-        """K-means algorithm to find clusters for preprocessing"""
-
-        raise NotImplementedError("Subclass must implement this method.")

From ce55836280dd48fa77e1750f397f9220473468ea Mon Sep 17 00:00:00 2001
From: hl-anna <47156013+hl-anna@users.noreply.github.com>
Date: Sun, 25 Apr 2021 23:08:58 +0100
Subject: [PATCH 104/111] Delete nystrom.py

---
 pykeops/torch/nystrom/nystrom.py | 114 -------------------------------
 1 file changed, 114 deletions(-)
 delete mode 100644 pykeops/torch/nystrom/nystrom.py

diff --git a/pykeops/torch/nystrom/nystrom.py b/pykeops/torch/nystrom/nystrom.py
deleted file mode 100644
index 34abd37ad..000000000
--- a/pykeops/torch/nystrom/nystrom.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import torch
-
-from pykeops.common.nystrom_generic import GenericNystrom
-
-
-class Nystrom(GenericNystrom):
-    def __init__(
-        self,
-        n_components=100,
-        kernel="rbf",
-        sigma: float = None,
-        eps: float = 0.05,
-        mask_radius: float = None,
-        k_means=10,
-        n_iter: int = 10,
-        inv_eps: float = None,
-        verbose=False,
-        random_state=None,
-        tools=None,
-    ):
-        super().__init__(
-            n_components,
-            kernel,
-            sigma,
-            eps,
-            mask_radius,
-            k_means,
-            n_iter,
-            inv_eps,
-            verbose,
-            random_state,
-        )
-
-        from pykeops.torch.utils import torchtools
-
-        self.tools = torchtools
-        self.verbose = verbose
-        from pykeops.torch import LazyTensor
-
-        self.LazyTensor = LazyTensor
-
-    def _update_dtype(self, x):
-        pass
-
-    def _to_device(self, x):
-        return x.to(self.device)
-
-    def _decomposition_and_norm(self, basis_kernel):
-        """Function to return self.nomalization_ used in fit(.) function
-        Args:
-            basis_kernel[torch LazyTensor] = subset of input data
-        Returns:
-            self.normalization_[torch.tensor]  X_q is the q x D-dimensional sub matrix of matrix X
-        """
-        basis_kernel = basis_kernel.to(
-            self.device
-        )  # dim: num_components x num_components
-        U, S, V = torch.linalg.svd(basis_kernel, full_matrices=False)
-        S = torch.maximum(S, torch.ones(S.size()).to(self.device) * 1e-12)
-        return torch.mm(U / torch.sqrt(S), V)  # dim: num_components x num_components
-
-    def K_approx(self, X: torch.tensor) -> "K_approx operator":
-        """Function to return Nystrom approximation to the kernel.
-        Args:
-            X = data used in fit(.) function.
-        Returns
-            K_approx = Nystrom approximation to kernel which can be applied
-                        downstream as K_approx @ v for some 1d tensor v"""
-
-        K_nq = self._pairwise_kernels(X, self.components_, dense=False)
-        K_approx = K_approx_operator(K_nq, self.normalization_)
-        return K_approx
-
-    def _KMeans(self, x: torch.tensor):
-        """KMeans with Pykeops to do binning of original data.
-        Args:
-            x = data
-        Returns:
-            labels[np.array] = class labels for each point in x
-            clusters[np.array] = coordinates for each centroid
-        """
-
-        N, D = x.shape
-        clusters = torch.clone(x[: self.k_means, :])  # initialization of clusters
-        x_i = LazyTensor(x[:, None, :])
-
-        for i in range(self.n_iter):
-
-            clusters_j = LazyTensor(clusters[None, :, :])
-            D_ij = ((x_i - clusters_j) ** 2).sum(-1)  # points-clusters kernel
-            labels = D_ij.argmin(axis=1).reshape(N)  # Points -> Nearest cluster
-            Ncl = torch.bincount(labels)  # Class weights
-            for d in range(D):  # Compute the cluster centroids with np.bincount:
-                clusters[:, d] = torch.bincount(labels, weights=x[:, d]) / Ncl
-
-        return labels, clusters
-
-
-class K_approx_operator:
-    """Helper class to return K_approx as an object
-    compatible with @ symbol"""
-
-    def __init__(self, K_nq, normalization):
-
-        self.K_nq = K_nq  # dim: number of samples x num_components
-        self.K_nq.backend = "GPU_2D"
-        self.normalization = normalization
-
-    def __matmul__(self, x: torch.tensor) -> torch.tensor:
-
-        x = self.K_nq.T @ x
-        x = self.normalization @ self.normalization.T @ x
-        x = self.K_nq @ x
-        return x

From e3b2f0798b8dd0f76976bf44fdab0d18514c5c36 Mon Sep 17 00:00:00 2001
From: hl-anna <47156013+hl-anna@users.noreply.github.com>
Date: Sun, 25 Apr 2021 23:09:31 +0100
Subject: [PATCH 105/111] Delete Nystrom.py

---
 pykeops/numpy/nystrom/Nystrom.py | 115 -------------------------------
 1 file changed, 115 deletions(-)
 delete mode 100644 pykeops/numpy/nystrom/Nystrom.py

diff --git a/pykeops/numpy/nystrom/Nystrom.py b/pykeops/numpy/nystrom/Nystrom.py
deleted file mode 100644
index 3209cc91c..000000000
--- a/pykeops/numpy/nystrom/Nystrom.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import numpy as np
-
-from scipy.linalg import eigh
-from scipy.sparse.linalg import aslinearoperator
-
-from pykeops.common.nystrom_generic import GenericNystrom
-
-from typing import Tuple, List
-
-
-class Nystrom(GenericNystrom):
-    """Nystrom class to work with Numpy arrays"""
-
-    def __init__(
-        self,
-        n_components=100,
-        kernel="rbf",
-        sigma: float = None,
-        eps: float = 0.05,
-        mask_radius: float = None,
-        k_means=10,
-        n_iter: int = 10,
-        inv_eps: float = None,
-        verbose=False,
-        random_state=None,
-        eigvals: List[int] = None,
-    ):
-
-        """
-        Args:
-            eigvals = eigenvalues index interval [a,b] for constructed K_q,
-             where 0 <= a < b < length of K_q
-
-        """
-        super().__init__(
-            n_components,
-            kernel,
-            sigma,
-            eps,
-            mask_radius,
-            k_means,
-            n_iter,
-            inv_eps,
-            verbose,
-            random_state,
-        )
-        from pykeops.numpy.utils import numpytools
-
-        self.tools = numpytools
-        from pykeops.numpy import LazyTensor
-
-        self.LazyTensor = LazyTensor
-        self.eigvals = eigvals
-
-        if eigvals:
-            assert eigvals[0] < eigvals[1], "eigvals = [a,b] needs a < b"
-            assert (
-                eigvals[1] < n_components
-            ), "max eigenvalue index needs to be less\
-            than size of K_q = n_components"
-
-    def _decomposition_and_norm(self, X: np.array) -> np.array:
-        """Computes K_q^{-1/2}"""
-
-        X = X + np.eye(X.shape[0], dtype=self.dtype) * self.inv_eps
-        S, U = eigh(X, eigvals=self.eigvals)
-        S = np.maximum(S, 1e-12)
-
-        return np.dot(U / np.sqrt(S), U.T)
-
-    def K_approx(self, x: np.array) -> "LinearOperator":
-        """Function to return Nystrom approximation to the kernel.
-
-        Args:
-            x = data used in fit(.) function.
-        Returns
-            K = Nystrom approximation to kernel"""
-
-        K_nq = self._pairwise_kernels(x, self.components_, dense=False)
-        K_nq.backend = "GPU_2D"
-        K_nq = aslinearoperator(K_nq)
-        K_q_inv = aslinearoperator(self.normalization_).T @ aslinearoperator(
-            self.normalization_
-        )
-        K_approx = K_nq @ K_q_inv @ K_nq.T
-        return K_approx
-
-    def _astype(self, data, d_type):
-        return data.astype(d_type)
-
-    # Note: _KMeans will be imported from utils soon
-    def _KMeans(self, x: np.array) -> Tuple[np.array]:
-        """KMeans with Pykeops to do binning of original data.
-        Args:
-            x = data
-        Returns:
-            labels = class labels for each point in x
-            clusters = coordinates for each centroid
-        """
-        N, D = x.shape
-        clusters = np.copy(x[: self.k_means, :])  # initialization of clusters
-        x_i = LazyTensor(x[:, None, :])
-
-        for i in range(self.n_iter):
-
-            clusters_j = LazyTensor(clusters[None, :, :])
-            D_ij = ((x_i - clusters_j) ** 2).sum(-1)  # points-clusters kernel
-            labels = (
-                D_ij.argmin(axis=1).astype(int).reshape(N)
-            )  # Points -> Nearest cluster
-            Ncl = np.bincount(labels).astype(self.dtype)  # Class weights
-            for d in range(D):  # Compute the cluster centroids with np.bincount:
-                clusters[:, d] = np.bincount(labels, weights=x[:, d]) / Ncl
-
-        return labels, clusters

From 0d2eba7f2f8737a02a5da7aa33d0e0e092aff274 Mon Sep 17 00:00:00 2001
From: hl-anna <47156013+hl-anna@users.noreply.github.com>
Date: Sun, 25 Apr 2021 23:52:55 +0100
Subject: [PATCH 106/111] Rename nystroem.py to nystrom.py

---
 pykeops/numpy/nystrom/{nystroem.py => nystrom.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename pykeops/numpy/nystrom/{nystroem.py => nystrom.py} (100%)

diff --git a/pykeops/numpy/nystrom/nystroem.py b/pykeops/numpy/nystrom/nystrom.py
similarity index 100%
rename from pykeops/numpy/nystrom/nystroem.py
rename to pykeops/numpy/nystrom/nystrom.py

From cd3fb2dc3eb48faa87879c6bb8156ee7ab95a258 Mon Sep 17 00:00:00 2001
From: hl-anna <47156013+hl-anna@users.noreply.github.com>
Date: Sun, 25 Apr 2021 23:53:32 +0100
Subject: [PATCH 107/111] Rename nystroem.py to nystrom.py

---
 pykeops/torch/nystrom/{nystroem.py => nystrom.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename pykeops/torch/nystrom/{nystroem.py => nystrom.py} (100%)

diff --git a/pykeops/torch/nystrom/nystroem.py b/pykeops/torch/nystrom/nystrom.py
similarity index 100%
rename from pykeops/torch/nystrom/nystroem.py
rename to pykeops/torch/nystrom/nystrom.py

From 8c4e079bf48276c910d16c67fc6c554c6a93d7b9 Mon Sep 17 00:00:00 2001
From: hl-anna <47156013+hl-anna@users.noreply.github.com>
Date: Sun, 25 Apr 2021 23:53:50 +0100
Subject: [PATCH 108/111] Rename nystroem_generic.py to nystrom_generic.py

---
 pykeops/common/{nystroem_generic.py => nystrom_generic.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename pykeops/common/{nystroem_generic.py => nystrom_generic.py} (100%)

diff --git a/pykeops/common/nystroem_generic.py b/pykeops/common/nystrom_generic.py
similarity index 100%
rename from pykeops/common/nystroem_generic.py
rename to pykeops/common/nystrom_generic.py

From bd4bf4326cf7fdc7b3e82c8fa19c8b7d94e3206e Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Tue, 27 Apr 2021 15:39:09 +0100
Subject: [PATCH 109/111] rename

---
 pykeops/numpy/__init__.py        |  2 +-
 pykeops/numpy/nystrom/nystrom.py |  2 +-
 pykeops/numpy/utils.py           | 41 ++++++++++++++++----------------
 pykeops/torch/nystrom/nystrom.py |  2 +-
 4 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/pykeops/numpy/__init__.py b/pykeops/numpy/__init__.py
index 77f4aba88..61a0ebc61 100644
--- a/pykeops/numpy/__init__.py
+++ b/pykeops/numpy/__init__.py
@@ -5,7 +5,7 @@
 ##########################################################
 # Import pyKeOps routines
 
-from .nystrom.Nystrom import Nystrom
+from .nystrom.nystrom import Nystrom
 from .knn.ivf import IVF
 from .generic.generic_red import Genred
 from .operations import KernelSolve
diff --git a/pykeops/numpy/nystrom/nystrom.py b/pykeops/numpy/nystrom/nystrom.py
index f90ce599f..f4f0acbc6 100644
--- a/pykeops/numpy/nystrom/nystrom.py
+++ b/pykeops/numpy/nystrom/nystrom.py
@@ -8,7 +8,7 @@
 from typing import List
 
 
-class Nystroem(GenericNystroem):
+class Nystrom(GenericNystroem):
     """
     Nystroem class to work with Numpy arrays.
     """
diff --git a/pykeops/numpy/utils.py b/pykeops/numpy/utils.py
index 015191640..05efe2c98 100644
--- a/pykeops/numpy/utils.py
+++ b/pykeops/numpy/utils.py
@@ -214,26 +214,27 @@ def kmeans(x, distance=None, K=10, Niter=15, device="CPU", approx=False, n=0):
 
     @staticmethod
     def knn_accuracy(indices_test, indices_truth):
-      """
-      Compares the test and ground truth indices (rows = KNN for each point in dataset)
-      Returns the accuracy or proportion of correct nearest neighbours
-
-      Args:
-        indices_test ((N, K) array): K indices obtained from approximate NN search
-        indices_truth ((N, K) array): K indices obtained from exact NN search
-      """
-      N, k = indices_test.shape
-
-      # Calculate number of correct nearest neighbours
-      accuracy = 0
-      for i in range(k):
-          accuracy += float(np.sum(indices_test == indices_truth)) / N
-          indices_truth = np.roll(
-              indices_truth, 1, -1
-          )  # Create a rolling window (index positions may not match)
-      accuracy = float(accuracy / k)  # percentage accuracy
-
-      return accuracy
+        """
+        Compares the test and ground truth indices (rows = KNN for each point in dataset)
+        Returns the accuracy or proportion of correct nearest neighbours
+
+        Args:
+          indices_test ((N, K) array): K indices obtained from approximate NN search
+          indices_truth ((N, K) array): K indices obtained from exact NN search
+        """
+        N, k = indices_test.shape
+
+        # Calculate number of correct nearest neighbours
+        accuracy = 0
+        for i in range(k):
+            accuracy += float(np.sum(indices_test == indices_truth)) / N
+            indices_truth = np.roll(
+                indices_truth, 1, -1
+            )  # Create a rolling window (index positions may not match)
+        accuracy = float(accuracy / k)  # percentage accuracy
+
+        return accuracy
+
 
 def squared_distances(x, y):
     x_norm = (x ** 2).sum(1).reshape(-1, 1)
diff --git a/pykeops/torch/nystrom/nystrom.py b/pykeops/torch/nystrom/nystrom.py
index b4ece0861..d75de024f 100644
--- a/pykeops/torch/nystrom/nystrom.py
+++ b/pykeops/torch/nystrom/nystrom.py
@@ -3,7 +3,7 @@
 from pykeops.common.nystrom_generic import GenericNystroem
 
 
-class Nystroem(GenericNystroem):
+class Nystrom(GenericNystroem):
     """
     Nystroem class to work with Pytorch tensors.
     """

From 8778cbee70f823f691b0b0ccca9484dd28a29221 Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Wed, 28 Apr 2021 16:10:19 +0100
Subject: [PATCH 110/111] shifting import

---
 pykeops/torch/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pykeops/torch/__init__.py b/pykeops/torch/__init__.py
index 9ce0ea806..6262857e2 100644
--- a/pykeops/torch/__init__.py
+++ b/pykeops/torch/__init__.py
@@ -27,8 +27,8 @@
 ##########################################################
 # Import pyKeOps routines
 
-from .nystrom.nystrom import Nystrom
 from .knn.ivf import IVF
+from .nystrom.nystrom import Nystrom
 from .generic.generic_red import Genred
 from .generic.generic_ops import (
     generic_sum,
@@ -41,8 +41,8 @@
 
 __all__ = sorted(
     [
-        "Nystrom",
         "IVF",
+        "Nystrom",
         "Genred",
         "generic_sum",
         "generic_logsumexp",

From ce368287e8577e83c56e524a8569ee7cda9da57a Mon Sep 17 00:00:00 2001
From: Hudson Yeo <44338416+huddyyeo@users.noreply.github.com>
Date: Wed, 28 Apr 2021 17:17:29 +0100
Subject: [PATCH 111/111] add packages

---
 setup.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/setup.py b/setup.py
index 55597515f..936610406 100644
--- a/setup.py
+++ b/setup.py
@@ -103,12 +103,16 @@ def import_files(dirname, ext=["h", "hpp"]):
         "pykeops.numpy.generic",
         "pykeops.numpy.lazytensor",
         "pykeops.numpy.shape_distance",
+        "pykeops.numpy.knn",
+        "pykeops.numpy.nystrom",
         "pykeops.test",
         "pykeops.torch",
         "pykeops.torch.cluster",
         "pykeops.torch.generic",
         "pykeops.torch.lazytensor",
         "pykeops.torch.kernel_product",
+        "pykeops.torch.knn",
+        "pykeops.torch.nystrom",
     ],
     package_data={
         "pykeops": [