diff --git a/qstack/regression/local_kernels.py b/qstack/regression/local_kernels.py index bb076b2..f1d746d 100644 --- a/qstack/regression/local_kernels.py +++ b/qstack/regression/local_kernels.py @@ -2,17 +2,83 @@ Provides: local_kernels_dict: Dictionary mapping kernel names to their implementations. + RAM_BATCHING_SIZE: Max. RAM (in bytes) that can be used for batched compuation + of Manhattan distance matrix for L_custom_py kernel. Can be modified before call + using `local_kernels.RAM_BATCHING_SIZE = ...`. """ import os import ctypes import sysconfig import warnings +import itertools import numpy as np import sklearn.metrics.pairwise as _SKLEARN_PAIRWISE from qstack.regression import __path__ as REGMODULE_PATH +RAM_BATCHING_SIZE = 1024**3 * 5 # 5GiB + + +def compute_distance_matrix(R1, R2): + """Compute the Manhattan-distance matrix. + + This computes (||r_1 - r_2||_1) between the samples of R1 and R2, + using a batched python/numpy implementation, + designed to be more memory-efficient than a single numpy call and faster than a simple python for loop. + + This function is a batched-over-R1 implementation of the following code: + `return np.sum( (R1[:,None, ...]-R2[None,:, ...])**2, axis=tuple(range(2, R1.ndim)))` + + Args: + R1 (numpy ndarray): First set of samples (can be multi-dimensional). + R2 (numpy ndarray): Second set of samples. + + Returns: + numpy ndarray: squared-distance matrix of shape (len(R1), len(R2)). + + Raises: + RuntimeError: If X and Y have incompatible shapes. + """ + if R1.ndim != R2.ndim or R1.shape[1:] != R2.shape[1:]: + raise RuntimeError(f'incompatible shapes for R1 ({R1.shape:r}) and R2 ({R2.shape:r})') + + # determine batch size (batch should divide the larger dimention) + if R1.shape[0] < R2.shape[0]: + transpose_flag = True + R2,R1 = R1,R2 + else: + transpose_flag = False + dtype=np.result_type(R1,R2) + out = np.zeros((R1.shape[0], R2.shape[0]), dtype=dtype) + + # possible weirdness: how is the layout of dtype done if dtype.alignment != dtype.itemsize? + batch_size = int(np.floor(RAM_BATCHING_SIZE/ (dtype.itemsize * np.prod(R2.shape)))) + + if batch_size == 0: + batch_size = 1 + + if min(R1.shape[0],R2.shape[0]) == 0 or batch_size >= R1.shape[0]: + dists = R1[:,None]-R2[None,:] + #np.pow(dists, 2, out=dists) # For Euclidean distance + np.abs(dists, out=dists) + np.sum(dists, out=out, axis=tuple(range(2,dists.ndim))) + else: + dists = np.zeros((batch_size, *R2.shape), dtype=dtype) + batch_limits = np.minimum(np.arange(0, R1.shape[0]+batch_size, step=batch_size), R1.shape[0]) + for batch_start, batch_end in itertools.pairwise(batch_limits): + dists_view = dists[:batch_end-batch_start] + R1_view = R1[batch_start:batch_end, None, ...] + np.subtract(R1_view, R2[None,:], out=dists_view) + #np.pow(dists_view, 2, out=dists_view) # For Euclidean distance + np.abs(dists_view, out=dists_view) + np.sum(dists_view, out=out[batch_start:batch_end], axis=tuple(range(2,dists.ndim))) + + if transpose_flag: + out = out.T + return out + + def custom_laplacian_kernel(X, Y, gamma): """Compute Laplacian kernel between X and Y using Python implementation. @@ -31,15 +97,7 @@ def custom_laplacian_kernel(X, Y, gamma): """ if X.shape[1:] != Y.shape[1:]: raise RuntimeError(f"Incompatible shapes {X.shape} and {Y.shape}") - def cdist(X, Y): - K = np.zeros((len(X),len(Y))) - for i,x in enumerate(X): - x = np.array([x] * len(Y)) - d = np.abs(x-Y) - d = np.sum(d, axis=tuple(range(1, len(d.shape)))) - K[i,:] = d - return K - K = -gamma * cdist(X, Y) + K = -gamma * compute_distance_matrix(X,Y) np.exp(K, out=K) return K diff --git a/setup.py b/setup.py index 5f97a4b..0d090ec 100644 --- a/setup.py +++ b/setup.py @@ -1,15 +1,22 @@ -from setuptools import setup, find_packages, Extension +from setuptools import setup, Extension import subprocess -import os, tempfile, shutil +import os +import tempfile +import shutil + +# ruff: noqa: S607 # look, if people are installing qstack while using a borked PATH, that's on them +# ruff: noqa: D100 # this is a setup.py file, no docstring is needed VERSION="0.0.1" def get_git_version_hash(): """Get tag/hash of the latest commit. - Thanks to https://gist.github.com/nloadholtes/07a1716a89b53119021592a1d2b56db8""" + + Thanks to https://gist.github.com/nloadholtes/07a1716a89b53119021592a1d2b56db8 + """ try: p = subprocess.Popen(["git", "describe", "--tags", "--dirty", "--always"], stdout=subprocess.PIPE) - except EnvironmentError: + except OSError: return VERSION + "+unknown" version = p.communicate()[0] print(version) @@ -17,8 +24,10 @@ def get_git_version_hash(): def check_for_openmp(): - """Check if there is OpenMP available - Thanks to https://stackoverflow.com/questions/16549893/programatically-testing-for-openmp-support-from-a-python-setup-script""" + """Check if there is OpenMP available. + + Thanks to https://stackoverflow.com/questions/16549893/programatically-testing-for-openmp-support-from-a-python-setup-script + """ omp_test = 'void main() { }' tmpdir = tempfile.mkdtemp() curdir = os.getcwd() @@ -27,7 +36,7 @@ def check_for_openmp(): with open(filename, 'w') as file: file.write(omp_test) with open(os.devnull, 'w') as fnull: - result = subprocess.call(['cc', '-fopenmp', '-lgomp', filename], stdout=fnull, stderr=fnull) + result = subprocess.call(['cc', '-fopenmp', '-lgomp', filename], stdout=fnull, stderr=fnull) # noqa: S603 (filename is hard-coded earlier in this function) os.chdir(curdir) shutil.rmtree(tmpdir) return not result @@ -41,6 +50,6 @@ def check_for_openmp(): ext_modules=[Extension('qstack.regression.lib.manh', ['qstack/regression/lib/manh.c'], extra_compile_args=['-fopenmp', '-std=gnu11'] if openmp_enabled else ['-std=gnu11'], - extra_link_args=['-lgomp'] if openmp_enabled else []) + extra_link_args=['-lgomp'] if openmp_enabled else []), ], ) diff --git a/tests/test_kernels.py b/tests/test_kernels.py index 69d546f..397fe97 100755 --- a/tests/test_kernels.py +++ b/tests/test_kernels.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import numpy as np -from qstack.regression import kernel +from qstack.regression import kernel, local_kernels def test_local_kernels(): @@ -44,5 +44,24 @@ def test_local_kernels(): assert np.allclose(K, K_cos_good) +def test_batched_local_kernels(): + X = np.array([[0.70043712, 0.84418664, 0.67651434, 0.72785806], [0.95145796, 0.0127032 , 0.4135877 , 0.04881279]]) + Y = np.array([[0.09992856, 0.50806631, 0.20024754, 0.74415417], [0.192892 , 0.70084475, 0.29322811, 0.77447945]]) + K_L_good = np.array([[0.48938983, 0.58251676], [0.32374891, 0.31778924]]) + + X_huge = np.tile(X, (1000,1000)) + Y_huge = np.tile(Y, (50,1000)) + K_L_good_huge = np.tile(K_L_good, (1000,50)) + + local_kernels.RAM_BATCHING_SIZE = 1024**2 * 50 # 50MiB + + K = kernel.kernel(X_huge, Y_huge, akernel='L_custom_py', sigma=2.0*1000) + assert np.allclose(K, K_L_good_huge) + + K = kernel.kernel(X_huge.reshape((-1, 50, 80)), Y_huge.reshape((-1, 50, 80)), akernel='L_custom_py', sigma=2.0*1000) + assert np.allclose(K, K_L_good_huge) + + if __name__ == '__main__': test_local_kernels() + test_batched_local_kernels()