add function to generate wrapper for C++ functions

benvanwerkhoven · benvanwerkhoven · commit f231fa29482f · 2018-02-21T11:28:16.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,12 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 
 ## Unreleased
 
+### Changed
+- bugfix for C backend for byte array arguments
+- argument type mismatches throw warning instead of exception
+
+### Added
+- wrapper functionality to wrap C++ functions
 
 ## [0.1.8] - 2017-11-23
 ### Changed
diff --git a/doc/source/user-api.rst b/doc/source/user-api.rst
@@ -11,3 +11,5 @@ This file provides all the details you need about how to call the Kernel Tuner's
 .. autofunction:: kernel_tuner.tune_kernel
 
 .. autofunction:: kernel_tuner.run_kernel
+
+.. autofunction:: kernel_tuner.wrappers.cpp
diff --git a/examples/c/matrix_multiply.cpp b/examples/c/matrix_multiply.cpp
@@ -0,0 +1,17 @@
+
+template<typename T, int sz>
+void multiply_matrix(T (&output)[sz], const T (&a)[sz], const T (&b)[sz], int s) {
+    // calculates matrix product of two square matrices
+    // out=A*B
+    for (int i=0; i<sz; i++) {
+        output[i] = 0;
+    }
+    for (int i=0; i<s; i++) {
+        for (int j=0; j<s; j++) {
+            for (int k=0; k<s; k++) {
+                output[i*s+j] += a[i*s+k] * b[k*s+j];
+            }
+    }
+    }
+}
+
diff --git a/examples/c/matrix_multiply.py b/examples/c/matrix_multiply.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env
+""" Example to show how to use the C++ wrapper
+
+This example shows how to use Kernel Tuner's wrapper
+functionality to also call (primitive-typed) C++
+functions from Python.
+"""
+
+from kernel_tuner import run_kernel
+from kernel_tuner import wrappers
+
+import numpy as np
+
+def test_multiply_matrix():
+
+    function_name = "multiply_matrix"
+
+    with open('matrix_multiply.cpp', 'r') as f:
+        kernel_string = f.read()
+
+    a = np.random.randn(9).astype(np.float64)
+    b = np.random.randn(9).astype(np.float64)
+    c = np.zeros_like(a)
+
+    args = [c, a, b, np.int32(3)]
+    convert = [True for _ in args]
+    convert[-1] = False
+
+    #generate a wrapper function with "extern C" binding that can be called from Python
+    kernel_string = wrappers.cpp(function_name, kernel_string, args, convert_to_array=convert)
+
+    answer = run_kernel(function_name + "_wrapper", kernel_string, 1, args, {},
+               lang="C")
+
+    #compute expected answer of matrix multiplication with Numpy
+    expected = a.reshape(3,3).dot(b.reshape(3,3))
+
+    assert np.allclose(answer[0].reshape(3,3), expected)
diff --git a/kernel_tuner/wrappers.py b/kernel_tuner/wrappers.py
@@ -0,0 +1,121 @@
+""" Module for wrapper functions
+
+This module contains functions that generate wrappers for functions,
+allowing them to be compiled and run using Kernel Tuner.
+
+The first function in this module generates a wrapper for
+primitive-typed (templated) C++ functions, allowing them to be
+compiled and executed using Kernel Tuner. The plan is to later add
+functionality to also wrap device functions.
+
+"""
+
+import numpy as np
+
+from kernel_tuner import util
+
+
+def cpp(function_name, kernel_source, args, convert_to_array=None):
+    """ Generate a wrapper to call C++ functions from Python
+
+    This function allows Kernel Tuner to call templated C++ functions
+    that use primitive data types (double, float, int, ...).
+
+    There is support to convert function arguments from plain pointers
+    to array references. If this is needed, there should be a True value
+    in convert_to_array in the location corresponding to the location in
+    the args array.
+
+    For example, a Numpy array argument of type float64 and length 10
+    will be cast using:
+    ``*reinterpret_cast<double(*)[10]>(arg)``
+    which allows it to be used to call a C++ that is defined as:
+    ``template<typename T, int s>void my_function(T (&arg)[s], ...)``
+
+    Arrays of size 1 will be converted to simple non-array references.
+    False indicates that no conversion is performed. Conversion
+    is only support for numpy array arguments. If convert_to_array is
+    passed it should have the same length as the args array.
+
+    :param function_name: A string containing the name of the C++ function
+        to be wrapped
+    :type function_name: string
+
+    :param kernel_source: One of the sources for the kernel, could be a
+        function that generates the kernel code, a string containing a filename
+        that points to the kernel source, or just a string that contains the code.
+    :type kernel_source: string or callable
+
+    :param args: A list of kernel arguments, use numpy arrays for
+        arrays, use numpy.int32 or numpy.float32 for scalars.
+    :type args: list
+
+    :param convert_to_array: A list of same length as args, containing
+        True or False values indicating whether the corresponding argument
+        in args should be cast to a reference to an array or not.
+    :type convert_to_array: list (True or False)
+
+    :returns: A string containing the orignal code extended with the wrapper
+        function. The wrapper has "extern C" binding and can be passed to
+        other Kernel Tuner functions, for example run_kernel with lang="C".
+        The name of the wrapper function will be the name of the function with
+        a "_wrapper" postfix.
+    :rtype: string
+
+    """
+
+    if convert_to_array and len(args) != len(convert_to_array):
+        raise ValueError("convert_to_array length should be same as args")
+
+    type_map = {"int8": "char",
+                "int16": "short",
+                "int32": "int",
+                "float32": "float",
+                "float64": "double"}
+
+    def type_str(arg):
+        if not str(arg.dtype) in type_map:
+            raise Value("only primitive data types are supported by the C++ wrapper")
+        typestring = type_map[str(arg.dtype)]
+        if isinstance(arg, np.ndarray):
+            typestring += " *"
+        return typestring + " "
+
+    signature = ",".join([type_str(arg) + "arg" + str(i) for i, arg in enumerate(args)])
+
+    if not convert_to_array:
+        call_args = ",".join(["arg" + str(i) for i in range(len(args))])
+    else:
+        call_args = []
+        for i, arg in enumerate(args):
+            if convert_to_array[i]:
+                if not isinstance(arg, np.ndarray):
+                    ValueError("conversion to array reference only supported for arguments that are numpy arrays, use length-1 numpy array to pass a scalar by reference")
+                if np.prod(arg.shape) > 1:
+                    #convert pointer to a reference to an array
+                    arg_shape = "".join("[%d]" % i for i in arg.shape)
+                    arg_str = "*reinterpret_cast<" + type_map[str(arg.dtype)] + "(*)" + arg_shape + ">(arg" + str(i) + ")"
+                else:
+                    #a reference is accepted rather than a pointer, just dereference
+                    arg_str = "*arg" + str(i)
+                call_args.append(arg_str)
+                #call_args = ",".join(["*reinterpret_cast<double(*)[9]>(arg" + str(i) + ")" for i in range(len(args))])
+            else:
+                call_args.append("arg" + str(i))
+        call_args_str = ",".join(call_args)
+
+    kernel_string = util.get_kernel_string(kernel_source)
+
+    return """
+
+    %s
+
+    extern "C"
+    float %s_wrapper(%s) {
+
+        %s(%s);
+
+        return 0.0f;
+    }""" % (kernel_string, function_name, signature, function_name, call_args_str)
+
+
diff --git a/roadmap.md b/roadmap.md
@@ -9,7 +9,7 @@ priorities grow and shift.
 This is the list of features that we want to have implemented by the next version.
 
  * A test_kernel function to perform parameterized testing without tuning
- * Option to set dynamically allocated shared memory for CUDA backend
+ * Function to generate wrapper kernels for device functions
  
 ### version 1.0.0
 
@@ -25,6 +25,7 @@ implemented in earlier versions.
 These are the things that we would like to implement, but we currently have no
 demand for it. If you are interested in any of these, let us know!
 
+ * Option to set dynamically allocated shared memory for CUDA backend
  * Option to set function that computes search space restriction, instead of a list of strings
  * Option to set function that computes grid dimensions instead of grid divisor lists
  * Provide API for analysis of tuning results