diff --git a/README.md b/README.md
index 2482ddb..7f5c5ef 100644
--- a/README.md
+++ b/README.md
@@ -18,8 +18,14 @@ R = np.array([
     [0, 1, 5, 4],
 ])
 
+
+#because there are many dot products to calculate during the procedure, GPU can be used for this task
+#use_gpu = False/True,   Set to True to use GPU in training and prediction
+
+
+
 # Perform training and obtain the user and item matrices 
-mf = MF(R, K=2, alpha=0.1, beta=0.01, iterations=20)
+mf = MF(R, K=2, alpha=0.1, beta=0.01, iterations=20, use_gpu=False)
 training_process = mf.train()
 print(mf.P)
 print(mf.Q)
@@ -43,5 +49,33 @@ print(mf.full_matrix())
  [ 1.0064803   1.00498576  2.37696737  4.98530109]
  [ 1.00999456  0.59175173  2.58437035  3.99597255]
  [ 2.26471556  1.01985428  4.9871617   3.9942251 ]]
+ '''
+ 
+ 
+ 
+# Perform training and obtain the user and item matrices, using GPU for the multiplications
+mf = MF(R, K=2, alpha=0.1, beta=0.01, iterations=20, use_gpu=True)
+
+
+# Prints the following:
+'''
+
+[[-1.45063442 -0.61549057]
+ [-1.18644282 -0.25861855]
+ [ 0.98953876  0.41593886]
+ [ 0.83830212 -0.55288053]
+ [-0.50317654  1.14994057]]
+ 
+[[-1.37430118  0.245039  ]
+ [ 0.02231156 -1.12899481]
+ [-0.98101813  0.33079953]
+ [ 1.65238654  0.79608774]]
+
+[[4.993465   2.98861347 5.3236645  1.01310464]
+ [3.99121154 1.86497915 4.45591865 1.00713051]
+ [1.01303675 0.99894327 2.39137093 4.98668646]
+ [1.01104568 2.11692377 2.24681459 3.99308259]
+ [4.1289681  1.02158625 4.98319103 3.98910547]]
+
 '''
 ```
diff --git a/mf.py b/mf.py
index bddd9ce..56bff67 100644
--- a/mf.py
+++ b/mf.py
@@ -1,9 +1,15 @@
+import pycuda.autoinit
+import pycuda.gpuarray as gpuarray
+import skcuda.linalg as linalg
 import numpy as np
+import math
+import skcuda.misc as misc
+
 
 
 class MF():
     
-    def __init__(self, R, K, alpha, beta, iterations):
+    def __init__(self, R, K, alpha, beta, iterations, use_gpu):
         """
         Perform matrix factorization to predict empty
         entries in a matrix.
@@ -13,8 +19,10 @@ def __init__(self, R, K, alpha, beta, iterations):
         - K (int)       : number of latent dimensions
         - alpha (float) : learning rate
         - beta (float)  : regularization parameter
+        - use_gpu (boolean) : declares the GPU implementation
         """
         
+        self.gpu = gpu
         self.R = R
         self.num_users, self.num_items = R.shape
         self.K = K
@@ -85,13 +93,47 @@ def sgd(self):
 
     def get_rating(self, i, j):
         """
-        Get the predicted rating of user i and item j
+        Get the predicted rating of user i and item j, regarding the value of boolean variable 'use_gpu' we do our calculation accordingly 
         """
-        prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
+        
+        if(self.use_gpu):
+            
+            #initialization
+            linalg.init()
+
+            #make the appropriate format
+            p_gpu = gpuarray.to_gpu(self.P)
+            q_gpu = gpuarray.to_gpu(self.Q)
+
+            prediction = self.b + self.b_u[i] + self.b_i[j] + linalg.dot(p_gpu[i, :],q_gpu[j, :],transb='T')
+        
+        
+        else:
+                    
+            prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
+        
+        
         return prediction
     
     def full_matrix(self):
         """
         Computer the full matrix using the resultant biases, P and Q
         """
-        return self.b + self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + self.P.dot(self.Q.T)
+        
+        if(self.use_gpu):
+            
+            #initialization
+            linalg.init()
+
+            #make the appropriate format
+            p_gpu = gpuarray.to_gpu(self.P)
+            q_gpu = gpuarray.to_gpu(self.Q)
+
+            #we denote as transb='T' that we take the second argument, matrix q_gpu as transpose
+            fullMatrix = self.b + self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + linalg.dot(p_gpu, q_gpu, transb='T').get()
+        
+        else:
+            
+            fullMatrix = self.b + self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + self.P.dot(self.Q.T)
+            
+        return fullMatrix
diff --git a/requirements.md b/requirements.md
new file mode 100644
index 0000000..c1d1299
--- /dev/null
+++ b/requirements.md
@@ -0,0 +1,53 @@
+Prerequisite libraries:
+
+
+	----> math
+
+		import math
+
+
+	----> numpy
+
+		pip: pip install numpy
+
+		anaconda: conda install -c anaconda numpy
+
+
+	----> scikit-cuda (skcuda)
+
+		pip: pip install scikit-cuda
+
+		anaconda: conda install -c lukepfister scikits.cuda
+
+		Dependecies:
+	
+			- Python 2.7 or 3.4.
+			
+			- Setuptools 0.6c10 or later.
+			
+			- Mako 1.0.1 or later.
+			
+			- NumPy 1.2.0 or later.
+			
+			- PyCUDA 2016.1 or later (some parts of scikit-cuda might not work properly with earlier versions).
+			
+			- NVIDIA CUDA Toolkit 5.0 or later.
+
+
+	----> pycuda
+
+		pip: pip install pycuda
+
+		anaconda:  conda install -c lukepfister pycuda
+
+		Dependecies:
+
+			- Nvidia's CUDA toolkit. PyCUDA was developed against version 2.0 beta. It may work with other versions, too.
+
+			- A C++ compiler, preferably a Version 4.x gcc.
+
+			- A working Python installation, Version 2.4 or newer. 
+
+			additional information can be found: https://wiki.tiker.net/PyCuda/Installation
+
+