diff --git a/README.md b/README.md index 2482ddb..7f5c5ef 100644 --- a/README.md +++ b/README.md @@ -18,8 +18,14 @@ R = np.array([ [0, 1, 5, 4], ]) + +#because there are many dot products to calculate during the procedure, GPU can be used for this task +#use_gpu = False/True, Set to True to use GPU in training and prediction + + + # Perform training and obtain the user and item matrices -mf = MF(R, K=2, alpha=0.1, beta=0.01, iterations=20) +mf = MF(R, K=2, alpha=0.1, beta=0.01, iterations=20, use_gpu=False) training_process = mf.train() print(mf.P) print(mf.Q) @@ -43,5 +49,33 @@ print(mf.full_matrix()) [ 1.0064803 1.00498576 2.37696737 4.98530109] [ 1.00999456 0.59175173 2.58437035 3.99597255] [ 2.26471556 1.01985428 4.9871617 3.9942251 ]] + ''' + + + +# Perform training and obtain the user and item matrices, using GPU for the multiplications +mf = MF(R, K=2, alpha=0.1, beta=0.01, iterations=20, use_gpu=True) + + +# Prints the following: +''' + +[[-1.45063442 -0.61549057] + [-1.18644282 -0.25861855] + [ 0.98953876 0.41593886] + [ 0.83830212 -0.55288053] + [-0.50317654 1.14994057]] + +[[-1.37430118 0.245039 ] + [ 0.02231156 -1.12899481] + [-0.98101813 0.33079953] + [ 1.65238654 0.79608774]] + +[[4.993465 2.98861347 5.3236645 1.01310464] + [3.99121154 1.86497915 4.45591865 1.00713051] + [1.01303675 0.99894327 2.39137093 4.98668646] + [1.01104568 2.11692377 2.24681459 3.99308259] + [4.1289681 1.02158625 4.98319103 3.98910547]] + ''' ``` diff --git a/mf.py b/mf.py index bddd9ce..56bff67 100644 --- a/mf.py +++ b/mf.py @@ -1,9 +1,15 @@ +import pycuda.autoinit +import pycuda.gpuarray as gpuarray +import skcuda.linalg as linalg import numpy as np +import math +import skcuda.misc as misc + class MF(): - def __init__(self, R, K, alpha, beta, iterations): + def __init__(self, R, K, alpha, beta, iterations, use_gpu): """ Perform matrix factorization to predict empty entries in a matrix. @@ -13,8 +19,10 @@ def __init__(self, R, K, alpha, beta, iterations): - K (int) : number of latent dimensions - alpha (float) : learning rate - beta (float) : regularization parameter + - use_gpu (boolean) : declares the GPU implementation """ + self.gpu = gpu self.R = R self.num_users, self.num_items = R.shape self.K = K @@ -85,13 +93,47 @@ def sgd(self): def get_rating(self, i, j): """ - Get the predicted rating of user i and item j + Get the predicted rating of user i and item j, regarding the value of boolean variable 'use_gpu' we do our calculation accordingly """ - prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T) + + if(self.use_gpu): + + #initialization + linalg.init() + + #make the appropriate format + p_gpu = gpuarray.to_gpu(self.P) + q_gpu = gpuarray.to_gpu(self.Q) + + prediction = self.b + self.b_u[i] + self.b_i[j] + linalg.dot(p_gpu[i, :],q_gpu[j, :],transb='T') + + + else: + + prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T) + + return prediction def full_matrix(self): """ Computer the full matrix using the resultant biases, P and Q """ - return self.b + self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + self.P.dot(self.Q.T) + + if(self.use_gpu): + + #initialization + linalg.init() + + #make the appropriate format + p_gpu = gpuarray.to_gpu(self.P) + q_gpu = gpuarray.to_gpu(self.Q) + + #we denote as transb='T' that we take the second argument, matrix q_gpu as transpose + fullMatrix = self.b + self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + linalg.dot(p_gpu, q_gpu, transb='T').get() + + else: + + fullMatrix = self.b + self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + self.P.dot(self.Q.T) + + return fullMatrix diff --git a/requirements.md b/requirements.md new file mode 100644 index 0000000..c1d1299 --- /dev/null +++ b/requirements.md @@ -0,0 +1,53 @@ +Prerequisite libraries: + + + ----> math + + import math + + + ----> numpy + + pip: pip install numpy + + anaconda: conda install -c anaconda numpy + + + ----> scikit-cuda (skcuda) + + pip: pip install scikit-cuda + + anaconda: conda install -c lukepfister scikits.cuda + + Dependecies: + + - Python 2.7 or 3.4. + + - Setuptools 0.6c10 or later. + + - Mako 1.0.1 or later. + + - NumPy 1.2.0 or later. + + - PyCUDA 2016.1 or later (some parts of scikit-cuda might not work properly with earlier versions). + + - NVIDIA CUDA Toolkit 5.0 or later. + + + ----> pycuda + + pip: pip install pycuda + + anaconda: conda install -c lukepfister pycuda + + Dependecies: + + - Nvidia's CUDA toolkit. PyCUDA was developed against version 2.0 beta. It may work with other versions, too. + + - A C++ compiler, preferably a Version 4.x gcc. + + - A working Python installation, Version 2.4 or newer. + + additional information can be found: https://wiki.tiker.net/PyCuda/Installation + +