|
15 | 15 |
|
16 | 16 | #include <boost/assert.hpp> |
17 | 17 | #include <boost/utility/enable_if.hpp> |
| 18 | +#include <boost/optional.hpp> |
| 19 | + |
| 20 | +#include <boost/compute/cl_ext.hpp> // cl_khr_subgroups |
18 | 21 |
|
19 | 22 | #include <boost/compute/config.hpp> |
20 | | -#include <boost/compute/program.hpp> |
21 | 23 | #include <boost/compute/exception.hpp> |
| 24 | +#include <boost/compute/program.hpp> |
| 25 | +#include <boost/compute/platform.hpp> |
22 | 26 | #include <boost/compute/type_traits/is_fundamental.hpp> |
23 | 27 | #include <boost/compute/detail/get_object_info.hpp> |
24 | 28 | #include <boost/compute/detail/assert_cl_success.hpp> |
@@ -208,6 +212,101 @@ class kernel |
208 | 212 | return detail::get_object_info<T>(clGetKernelWorkGroupInfo, m_kernel, info, device.id()); |
209 | 213 | } |
210 | 214 |
|
| 215 | + #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) |
| 216 | + /// Returns sub-group information for the kernel with \p device. Returns a null |
| 217 | + /// optional if \p device is not 2.1 device, or is not 2.0 device with support |
| 218 | + /// for cl_khr_subgroups extension. |
| 219 | + /// |
| 220 | + /// \opencl_version_warning{2,1} |
| 221 | + /// \see_opencl_ref{clGetKernelSubGroupInfo} |
| 222 | + template<class T> |
| 223 | + boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info, |
| 224 | + const size_t input_size, const void * input) const |
| 225 | + { |
| 226 | + if(device.check_version(2, 1)) |
| 227 | + { |
| 228 | + return detail::get_object_info<T>( |
| 229 | + clGetKernelSubGroupInfo, m_kernel, info, device.id(), input_size, input |
| 230 | + ); |
| 231 | + } |
| 232 | + else if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups")) |
| 233 | + { |
| 234 | + return boost::optional<T>(); |
| 235 | + } |
| 236 | + // Only CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE and CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE |
| 237 | + // are supported in cl_khr_subgroups extension for 2.0 devices. |
| 238 | + else if(info != CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE && info != CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) |
| 239 | + { |
| 240 | + return boost::optional<T>(); |
| 241 | + } |
| 242 | + |
| 243 | + clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr = |
| 244 | + reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>( |
| 245 | + reinterpret_cast<size_t>( |
| 246 | + device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR") |
| 247 | + ) |
| 248 | + ); |
| 249 | + |
| 250 | + return detail::get_object_info<T>( |
| 251 | + clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input |
| 252 | + ); |
| 253 | + } |
| 254 | + |
| 255 | + /// \overload |
| 256 | + template<class T> |
| 257 | + boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info) const |
| 258 | + { |
| 259 | + return get_sub_group_info<T>(device, info, 0, 0); |
| 260 | + } |
| 261 | + |
| 262 | + /// \overload |
| 263 | + template<class T> |
| 264 | + boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info, |
| 265 | + const size_t input) const |
| 266 | + { |
| 267 | + return get_sub_group_info<T>(device, info, sizeof(size_t), &input); |
| 268 | + } |
| 269 | + #endif // BOOST_COMPUTE_CL_VERSION_2_1 |
| 270 | + |
| 271 | + #if defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1) |
| 272 | + /// Returns sub-group information for the kernel with \p device. Returns a null |
| 273 | + /// optional if cl_khr_subgroups extension is not supported by \p device. |
| 274 | + /// |
| 275 | + /// \opencl_version_warning{2,0} |
| 276 | + /// \see_opencl_ref{clGetKernelSubGroupInfoKHR} |
| 277 | + template<class T> |
| 278 | + boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info, |
| 279 | + const size_t input_size, const void * input) const |
| 280 | + { |
| 281 | + if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups")) |
| 282 | + { |
| 283 | + return boost::optional<T>(); |
| 284 | + } |
| 285 | + |
| 286 | + clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr = |
| 287 | + reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>( |
| 288 | + reinterpret_cast<size_t>( |
| 289 | + device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR") |
| 290 | + ) |
| 291 | + ); |
| 292 | + |
| 293 | + return detail::get_object_info<T>( |
| 294 | + clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input |
| 295 | + ); |
| 296 | + } |
| 297 | + #endif // defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1) |
| 298 | + |
| 299 | + #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) |
| 300 | + /// \overload |
| 301 | + template<class T> |
| 302 | + boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info, |
| 303 | + const std::vector<size_t> input) const |
| 304 | + { |
| 305 | + BOOST_ASSERT(input.size() > 0); |
| 306 | + return get_sub_group_info<T>(device, info, input.size() * sizeof(size_t), &input[0]); |
| 307 | + } |
| 308 | + #endif // BOOST_COMPUTE_CL_VERSION_2_0 |
| 309 | + |
211 | 310 | /// Sets the argument at \p index to \p value with \p size. |
212 | 311 | /// |
213 | 312 | /// \see_opencl_ref{clSetKernelArg} |
|
0 commit comments