Skip to content

Commit b16e0b8

Browse files
authored
Merge pull request #717 from jszuppe/pr_subgroup_info
Add get_sub_group_info() to kernel class
2 parents a965a8d + 499689d commit b16e0b8

File tree

10 files changed

+357
-9
lines changed

10 files changed

+357
-9
lines changed

.appveyor.yml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,19 @@ environment:
1818
global:
1919
INTELOCLSDKROOT: C:\Program Files (x86)\Intel\OpenCL SDK\
2020
BOOST_COMPUTE_DEFAULT_PLATFORM: Intel(R) OpenCL
21+
OPENCL_INCLUDE_DIR: C:\opencl\include
22+
# Downloaded OpenCL headers version (2.1)
23+
OPENCL_HEADERS_VER: 21
24+
# OpenCL version used in tests (2.0)
25+
OPENCL_VERSION: 200
26+
CXXFLAGS: -DBOOST_COMPUTE_MAX_CL_VERSION=%OPENCL_VERSION%
2127
matrix:
2228
- VS_VER: 2015
29+
CMAKE_GENERATOR: Visual Studio 14 2015 Win64
2330
BOOST_ROOT: C:\Libraries\boost_1_59_0
2431
BOOST_LIBRARYDIR: C:\Libraries\boost_1_59_0\lib64-msvc-14.0
2532
- VS_VER: 2013
33+
CMAKE_GENERATOR: Visual Studio 12 2013 Win64
2634
BOOST_ROOT: C:\Libraries\boost_1_58_0
2735
BOOST_LIBRARYDIR: C:\Libraries\boost_1_58_0\lib64-msvc-12.0
2836

@@ -42,11 +50,11 @@ before_build:
4250
- appveyor DownloadFile "http://registrationcenter-download.intel.com/akdlm/irc_nas/9022/opencl_runtime_16.1.1_x64_setup.msi"
4351
- start /wait msiexec /i opencl_runtime_16.1.1_x64_setup.msi /qn /l*v msiexec2.log
4452
#- type msiexec2.log
53+
- git clone --depth 1 https://github.com/KhronosGroup/OpenCL-Headers.git -b opencl%OPENCL_HEADERS_VER% %OPENCL_INCLUDE_DIR%\CL
4554

4655
build_script:
4756
- mkdir build && cd build
48-
- if "%VS_VER%" == "2015" cmake -G"Visual Studio 14 2015 Win64" -DBOOST_COMPUTE_BUILD_TESTS=ON -DBOOST_COMPUTE_BUILD_EXAMPLES=ON ..
49-
- if "%VS_VER%" == "2013" cmake -G"Visual Studio 12 2013 Win64" -DBOOST_COMPUTE_BUILD_TESTS=ON -DBOOST_COMPUTE_BUILD_EXAMPLES=ON ..
57+
- cmake -G"%CMAKE_GENERATOR%" -DBOOST_COMPUTE_BUILD_TESTS=ON -DBOOST_COMPUTE_BUILD_EXAMPLES=ON -DOpenCL_INCLUDE_DIR=%OPENCL_INCLUDE_DIR% ..
5058
- cmake --build . --config Debug
5159

5260
test_script:

example/hello_world.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@ int main()
2121
// get the default device
2222
compute::device device = compute::system::default_device();
2323

24-
// print the device's name
25-
std::cout << "hello from " << device.name() << std::endl;
24+
// print the device's name and platform
25+
std::cout << "hello from " << device.name();
26+
std::cout << " (platform: " << device.platform().name() << ")" << std::endl;
2627

2728
return 0;
2829
}

include/boost/compute/cl_ext.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,24 @@
1111
#ifndef BOOST_COMPUTE_CL_EXT_HPP
1212
#define BOOST_COMPUTE_CL_EXT_HPP
1313

14+
#if defined(BOOST_COMPUTE_MAX_CL_VERSION)
15+
# if !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 202
16+
# define CL_USE_DEPRECATED_OPENCL_2_1_APIS
17+
# endif
18+
# if !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 201
19+
# define CL_USE_DEPRECATED_OPENCL_2_0_APIS
20+
# endif
21+
# if !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 200
22+
# define CL_USE_DEPRECATED_OPENCL_1_2_APIS
23+
# endif
24+
# if !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 102
25+
# define CL_USE_DEPRECATED_OPENCL_1_1_APIS
26+
# endif
27+
# if !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 101
28+
# define CL_USE_DEPRECATED_OPENCL_1_0_APIS
29+
# endif
30+
#endif
31+
1432
#if defined(__APPLE__)
1533
#include <OpenCL/cl_ext.h>
1634
#else

include/boost/compute/detail/get_object_info.hpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,16 @@ struct bound_info_function
3636
{
3737
}
3838

39+
template<class Info>
40+
cl_int operator()(Info info, size_t input_size, const void *input,
41+
size_t size, void *value, size_t *size_ret) const
42+
{
43+
return m_function(
44+
m_object, m_aux_info, info,
45+
input_size, input, size, value, size_ret
46+
);
47+
}
48+
3949
template<class Info>
4050
cl_int operator()(Info info, size_t size, void *value, size_t *size_ret) const
4151
{
@@ -96,6 +106,20 @@ struct get_object_info_impl
96106

97107
return value;
98108
}
109+
110+
template<class Function, class Info>
111+
T operator()(Function function, Info info,
112+
const size_t input_size, const void* input) const
113+
{
114+
T value;
115+
116+
cl_int ret = function(info, input_size, input, sizeof(T), &value, 0);
117+
if(ret != CL_SUCCESS){
118+
BOOST_THROW_EXCEPTION(opencl_error(ret));
119+
}
120+
121+
return value;
122+
}
99123
};
100124

101125
// specialization for bool
@@ -167,6 +191,42 @@ struct get_object_info_impl<std::vector<T> >
167191

168192
return vector;
169193
}
194+
195+
template<class Function, class Info>
196+
std::vector<T> operator()(Function function, Info info,
197+
const size_t input_size, const void* input) const
198+
{
199+
#ifdef BOOST_COMPUTE_CL_VERSION_2_1
200+
// For CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT in clGetKernelSubGroupInfo
201+
// we can't get param_value_size using param_value_size_ret
202+
if(info == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT)
203+
{
204+
std::vector<T> vector(3);
205+
cl_int ret = function(
206+
info, input_size, input,
207+
sizeof(T) * vector.size(), &vector[0], 0
208+
);
209+
if(ret != CL_SUCCESS){
210+
BOOST_THROW_EXCEPTION(opencl_error(ret));
211+
}
212+
return vector;
213+
}
214+
#endif
215+
size_t size = 0;
216+
217+
cl_int ret = function(info, input_size, input, 0, 0, &size);
218+
if(ret != CL_SUCCESS){
219+
BOOST_THROW_EXCEPTION(opencl_error(ret));
220+
}
221+
222+
std::vector<T> vector(size / sizeof(T));
223+
ret = function(info, input_size, input, size, &vector[0], 0);
224+
if(ret != CL_SUCCESS){
225+
BOOST_THROW_EXCEPTION(opencl_error(ret));
226+
}
227+
228+
return vector;
229+
}
170230
};
171231

172232
// returns the value (of type T) from the given clGet*Info() function call.
@@ -182,6 +242,12 @@ inline T get_object_info(Function f, Object o, Info i, AuxInfo j)
182242
return get_object_info_impl<T>()(bind_info_function(f, o, j), i);
183243
}
184244

245+
template<class T, class Function, class Object, class Info, class AuxInfo>
246+
inline T get_object_info(Function f, Object o, Info i, AuxInfo j, const size_t k, const void * l)
247+
{
248+
return get_object_info_impl<T>()(bind_info_function(f, o, j), i, k, l);
249+
}
250+
185251
// returns the value type for the clGet*Info() call on Object with Enum.
186252
template<class Object, int Enum>
187253
struct get_object_info_type;

include/boost/compute/interop/opengl/cl_gl_ext.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,24 @@
1111
#ifndef BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_EXT_HPP
1212
#define BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_EXT_HPP
1313

14+
#if defined(BOOST_COMPUTE_MAX_CL_VERSION)
15+
# if !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 202
16+
# define CL_USE_DEPRECATED_OPENCL_2_1_APIS
17+
# endif
18+
# if !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 201
19+
# define CL_USE_DEPRECATED_OPENCL_2_0_APIS
20+
# endif
21+
# if !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 200
22+
# define CL_USE_DEPRECATED_OPENCL_1_2_APIS
23+
# endif
24+
# if !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 102
25+
# define CL_USE_DEPRECATED_OPENCL_1_1_APIS
26+
# endif
27+
# if !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 101
28+
# define CL_USE_DEPRECATED_OPENCL_1_0_APIS
29+
# endif
30+
#endif
31+
1432
#if defined(__APPLE__)
1533
#include <OpenCL/cl_gl_ext.h>
1634
#else

include/boost/compute/interop/opengl/context.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ inline context opengl_create_shared_context()
7676
const platform &platform = platforms[i];
7777

7878
// check whether this platform supports OpenCL/OpenGL sharing
79-
if (!platform.supports_extension(cl_gl_sharing_extension))
79+
if (!platform.supports_extension("cl_gl_sharing_extension"))
8080
continue;
8181

8282
// load clGetGLContextInfoKHR() extension function
@@ -98,7 +98,7 @@ inline context opengl_create_shared_context()
9898
CL_GLX_DISPLAY_KHR, (cl_context_properties) glXGetCurrentDisplay(),
9999
#elif defined(_WIN32)
100100
CL_GL_CONTEXT_KHR, (cl_context_properties) wglGetCurrentContext(),
101-
CL_WGL_HDC_KHR, (cl_context_properties) wglGetCurrentDC(),
101+
CL_WGL_HDC_KHR, (cl_context_properties) wglGetCurrentDC(),
102102
#endif
103103
0
104104
};
@@ -118,7 +118,7 @@ inline context opengl_create_shared_context()
118118

119119
// create device object for the GPU and ensure it supports CL-GL sharing
120120
device gpu(gpu_id, false);
121-
if(!gpu.supports_extension(cl_gl_sharing_extension)){
121+
if(!gpu.supports_extension("cl_gl_sharing_extension")){
122122
continue;
123123
}
124124

@@ -129,7 +129,7 @@ inline context opengl_create_shared_context()
129129

130130
// no CL-GL sharing capable devices found
131131
BOOST_THROW_EXCEPTION(
132-
unsupported_extension_error(cl_gl_sharing_extension)
132+
unsupported_extension_error("cl_gl_sharing_extension")
133133
);
134134
}
135135

include/boost/compute/kernel.hpp

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,14 @@
1515

1616
#include <boost/assert.hpp>
1717
#include <boost/utility/enable_if.hpp>
18+
#include <boost/optional.hpp>
19+
20+
#include <boost/compute/cl_ext.hpp> // cl_khr_subgroups
1821

1922
#include <boost/compute/config.hpp>
20-
#include <boost/compute/program.hpp>
2123
#include <boost/compute/exception.hpp>
24+
#include <boost/compute/program.hpp>
25+
#include <boost/compute/platform.hpp>
2226
#include <boost/compute/type_traits/is_fundamental.hpp>
2327
#include <boost/compute/detail/get_object_info.hpp>
2428
#include <boost/compute/detail/assert_cl_success.hpp>
@@ -208,6 +212,101 @@ class kernel
208212
return detail::get_object_info<T>(clGetKernelWorkGroupInfo, m_kernel, info, device.id());
209213
}
210214

215+
#if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
216+
/// Returns sub-group information for the kernel with \p device. Returns a null
217+
/// optional if \p device is not 2.1 device, or is not 2.0 device with support
218+
/// for cl_khr_subgroups extension.
219+
///
220+
/// \opencl_version_warning{2,1}
221+
/// \see_opencl_ref{clGetKernelSubGroupInfo}
222+
template<class T>
223+
boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
224+
const size_t input_size, const void * input) const
225+
{
226+
if(device.check_version(2, 1))
227+
{
228+
return detail::get_object_info<T>(
229+
clGetKernelSubGroupInfo, m_kernel, info, device.id(), input_size, input
230+
);
231+
}
232+
else if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
233+
{
234+
return boost::optional<T>();
235+
}
236+
// Only CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE and CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE
237+
// are supported in cl_khr_subgroups extension for 2.0 devices.
238+
else if(info != CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE && info != CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)
239+
{
240+
return boost::optional<T>();
241+
}
242+
243+
clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
244+
reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
245+
reinterpret_cast<size_t>(
246+
device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
247+
)
248+
);
249+
250+
return detail::get_object_info<T>(
251+
clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
252+
);
253+
}
254+
255+
/// \overload
256+
template<class T>
257+
boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info) const
258+
{
259+
return get_sub_group_info<T>(device, info, 0, 0);
260+
}
261+
262+
/// \overload
263+
template<class T>
264+
boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
265+
const size_t input) const
266+
{
267+
return get_sub_group_info<T>(device, info, sizeof(size_t), &input);
268+
}
269+
#endif // BOOST_COMPUTE_CL_VERSION_2_1
270+
271+
#if defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
272+
/// Returns sub-group information for the kernel with \p device. Returns a null
273+
/// optional if cl_khr_subgroups extension is not supported by \p device.
274+
///
275+
/// \opencl_version_warning{2,0}
276+
/// \see_opencl_ref{clGetKernelSubGroupInfoKHR}
277+
template<class T>
278+
boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
279+
const size_t input_size, const void * input) const
280+
{
281+
if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
282+
{
283+
return boost::optional<T>();
284+
}
285+
286+
clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
287+
reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
288+
reinterpret_cast<size_t>(
289+
device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
290+
)
291+
);
292+
293+
return detail::get_object_info<T>(
294+
clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
295+
);
296+
}
297+
#endif // defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
298+
299+
#if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
300+
/// \overload
301+
template<class T>
302+
boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
303+
const std::vector<size_t> input) const
304+
{
305+
BOOST_ASSERT(input.size() > 0);
306+
return get_sub_group_info<T>(device, info, input.size() * sizeof(size_t), &input[0]);
307+
}
308+
#endif // BOOST_COMPUTE_CL_VERSION_2_0
309+
211310
/// Sets the argument at \p index to \p value with \p size.
212311
///
213312
/// \see_opencl_ref{clSetKernelArg}

test/test_for_each.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ BOOST_AUTO_TEST_CASE(for_each_nop)
2929
BOOST_COMPUTE_FUNCTION(void, nop, (int ignored), {});
3030

3131
bc::for_each(vector.begin(), vector.end(), nop, queue);
32+
queue.finish();
3233
}
3334

3435
BOOST_AUTO_TEST_CASE(for_each_n_nop)
@@ -39,6 +40,7 @@ BOOST_AUTO_TEST_CASE(for_each_n_nop)
3940
BOOST_COMPUTE_FUNCTION(void, nop, (int ignored), {});
4041

4142
bc::for_each_n(vector.begin(), vector.size(), nop, queue);
43+
queue.finish();
4244
}
4345

4446
BOOST_AUTO_TEST_SUITE_END()

0 commit comments

Comments
 (0)