Skip to content

Commit f231fa2

Browse files
add function to generate wrapper for C++ functions
1 parent 64b6018 commit f231fa2

File tree

6 files changed

+186
-1
lines changed

6 files changed

+186
-1
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ This project adheres to [Semantic Versioning](http://semver.org/).
44

55
## Unreleased
66

7+
### Changed
8+
- bugfix for C backend for byte array arguments
9+
- argument type mismatches throw warning instead of exception
10+
11+
### Added
12+
- wrapper functionality to wrap C++ functions
713

814
## [0.1.8] - 2017-11-23
915
### Changed

doc/source/user-api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,5 @@ This file provides all the details you need about how to call the Kernel Tuner's
1111
.. autofunction:: kernel_tuner.tune_kernel
1212

1313
.. autofunction:: kernel_tuner.run_kernel
14+
15+
.. autofunction:: kernel_tuner.wrappers.cpp

examples/c/matrix_multiply.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
2+
template<typename T, int sz>
3+
void multiply_matrix(T (&output)[sz], const T (&a)[sz], const T (&b)[sz], int s) {
4+
// calculates matrix product of two square matrices
5+
// out=A*B
6+
for (int i=0; i<sz; i++) {
7+
output[i] = 0;
8+
}
9+
for (int i=0; i<s; i++) {
10+
for (int j=0; j<s; j++) {
11+
for (int k=0; k<s; k++) {
12+
output[i*s+j] += a[i*s+k] * b[k*s+j];
13+
}
14+
}
15+
}
16+
}
17+

examples/c/matrix_multiply.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/usr/bin/env
2+
""" Example to show how to use the C++ wrapper
3+
4+
This example shows how to use Kernel Tuner's wrapper
5+
functionality to also call (primitive-typed) C++
6+
functions from Python.
7+
"""
8+
9+
from kernel_tuner import run_kernel
10+
from kernel_tuner import wrappers
11+
12+
import numpy as np
13+
14+
def test_multiply_matrix():
15+
16+
function_name = "multiply_matrix"
17+
18+
with open('matrix_multiply.cpp', 'r') as f:
19+
kernel_string = f.read()
20+
21+
a = np.random.randn(9).astype(np.float64)
22+
b = np.random.randn(9).astype(np.float64)
23+
c = np.zeros_like(a)
24+
25+
args = [c, a, b, np.int32(3)]
26+
convert = [True for _ in args]
27+
convert[-1] = False
28+
29+
#generate a wrapper function with "extern C" binding that can be called from Python
30+
kernel_string = wrappers.cpp(function_name, kernel_string, args, convert_to_array=convert)
31+
32+
answer = run_kernel(function_name + "_wrapper", kernel_string, 1, args, {},
33+
lang="C")
34+
35+
#compute expected answer of matrix multiplication with Numpy
36+
expected = a.reshape(3,3).dot(b.reshape(3,3))
37+
38+
assert np.allclose(answer[0].reshape(3,3), expected)

kernel_tuner/wrappers.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
""" Module for wrapper functions
2+
3+
This module contains functions that generate wrappers for functions,
4+
allowing them to be compiled and run using Kernel Tuner.
5+
6+
The first function in this module generates a wrapper for
7+
primitive-typed (templated) C++ functions, allowing them to be
8+
compiled and executed using Kernel Tuner. The plan is to later add
9+
functionality to also wrap device functions.
10+
11+
"""
12+
13+
import numpy as np
14+
15+
from kernel_tuner import util
16+
17+
18+
def cpp(function_name, kernel_source, args, convert_to_array=None):
19+
""" Generate a wrapper to call C++ functions from Python
20+
21+
This function allows Kernel Tuner to call templated C++ functions
22+
that use primitive data types (double, float, int, ...).
23+
24+
There is support to convert function arguments from plain pointers
25+
to array references. If this is needed, there should be a True value
26+
in convert_to_array in the location corresponding to the location in
27+
the args array.
28+
29+
For example, a Numpy array argument of type float64 and length 10
30+
will be cast using:
31+
``*reinterpret_cast<double(*)[10]>(arg)``
32+
which allows it to be used to call a C++ that is defined as:
33+
``template<typename T, int s>void my_function(T (&arg)[s], ...)``
34+
35+
Arrays of size 1 will be converted to simple non-array references.
36+
False indicates that no conversion is performed. Conversion
37+
is only support for numpy array arguments. If convert_to_array is
38+
passed it should have the same length as the args array.
39+
40+
:param function_name: A string containing the name of the C++ function
41+
to be wrapped
42+
:type function_name: string
43+
44+
:param kernel_source: One of the sources for the kernel, could be a
45+
function that generates the kernel code, a string containing a filename
46+
that points to the kernel source, or just a string that contains the code.
47+
:type kernel_source: string or callable
48+
49+
:param args: A list of kernel arguments, use numpy arrays for
50+
arrays, use numpy.int32 or numpy.float32 for scalars.
51+
:type args: list
52+
53+
:param convert_to_array: A list of same length as args, containing
54+
True or False values indicating whether the corresponding argument
55+
in args should be cast to a reference to an array or not.
56+
:type convert_to_array: list (True or False)
57+
58+
:returns: A string containing the orignal code extended with the wrapper
59+
function. The wrapper has "extern C" binding and can be passed to
60+
other Kernel Tuner functions, for example run_kernel with lang="C".
61+
The name of the wrapper function will be the name of the function with
62+
a "_wrapper" postfix.
63+
:rtype: string
64+
65+
"""
66+
67+
if convert_to_array and len(args) != len(convert_to_array):
68+
raise ValueError("convert_to_array length should be same as args")
69+
70+
type_map = {"int8": "char",
71+
"int16": "short",
72+
"int32": "int",
73+
"float32": "float",
74+
"float64": "double"}
75+
76+
def type_str(arg):
77+
if not str(arg.dtype) in type_map:
78+
raise Value("only primitive data types are supported by the C++ wrapper")
79+
typestring = type_map[str(arg.dtype)]
80+
if isinstance(arg, np.ndarray):
81+
typestring += " *"
82+
return typestring + " "
83+
84+
signature = ",".join([type_str(arg) + "arg" + str(i) for i, arg in enumerate(args)])
85+
86+
if not convert_to_array:
87+
call_args = ",".join(["arg" + str(i) for i in range(len(args))])
88+
else:
89+
call_args = []
90+
for i, arg in enumerate(args):
91+
if convert_to_array[i]:
92+
if not isinstance(arg, np.ndarray):
93+
ValueError("conversion to array reference only supported for arguments that are numpy arrays, use length-1 numpy array to pass a scalar by reference")
94+
if np.prod(arg.shape) > 1:
95+
#convert pointer to a reference to an array
96+
arg_shape = "".join("[%d]" % i for i in arg.shape)
97+
arg_str = "*reinterpret_cast<" + type_map[str(arg.dtype)] + "(*)" + arg_shape + ">(arg" + str(i) + ")"
98+
else:
99+
#a reference is accepted rather than a pointer, just dereference
100+
arg_str = "*arg" + str(i)
101+
call_args.append(arg_str)
102+
#call_args = ",".join(["*reinterpret_cast<double(*)[9]>(arg" + str(i) + ")" for i in range(len(args))])
103+
else:
104+
call_args.append("arg" + str(i))
105+
call_args_str = ",".join(call_args)
106+
107+
kernel_string = util.get_kernel_string(kernel_source)
108+
109+
return """
110+
111+
%s
112+
113+
extern "C"
114+
float %s_wrapper(%s) {
115+
116+
%s(%s);
117+
118+
return 0.0f;
119+
}""" % (kernel_string, function_name, signature, function_name, call_args_str)
120+
121+

roadmap.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ priorities grow and shift.
99
This is the list of features that we want to have implemented by the next version.
1010

1111
* A test_kernel function to perform parameterized testing without tuning
12-
* Option to set dynamically allocated shared memory for CUDA backend
12+
* Function to generate wrapper kernels for device functions
1313

1414
### version 1.0.0
1515

@@ -25,6 +25,7 @@ implemented in earlier versions.
2525
These are the things that we would like to implement, but we currently have no
2626
demand for it. If you are interested in any of these, let us know!
2727

28+
* Option to set dynamically allocated shared memory for CUDA backend
2829
* Option to set function that computes search space restriction, instead of a list of strings
2930
* Option to set function that computes grid dimensions instead of grid divisor lists
3031
* Provide API for analysis of tuning results

0 commit comments

Comments
 (0)