|
| 1 | +// REQUIRES: aspect-usm_shared_allocations |
| 2 | +// UNSUPPORTED: target-amd |
| 3 | +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16072 |
| 4 | + |
| 5 | +// RUN: %{build} -o %t.out |
| 6 | +// RUN: %{run} %t.out |
| 7 | + |
| 8 | +// XFAIL: target-native_cpu |
| 9 | +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/20142 |
| 10 | + |
| 11 | +// This test checks that free function kernels can be submitted using the |
| 12 | +// enqueued functions defined in the free function kernel extension, namely the |
| 13 | +// single_task and the nd_launch functions that take a queue/handler as an |
| 14 | +// argument. These were added in https://github.com/intel/llvm/pull/19995. |
| 15 | + |
| 16 | +#include <cassert> |
| 17 | +#include <sycl/ext/oneapi/experimental/enqueue_functions.hpp> |
| 18 | +#include <sycl/ext/oneapi/experimental/free_function_traits.hpp> |
| 19 | +#include <sycl/ext/oneapi/free_function_queries.hpp> |
| 20 | +#include <sycl/ext/oneapi/work_group_static.hpp> |
| 21 | +#include <sycl/usm.hpp> |
| 22 | + |
| 23 | +namespace syclext = sycl::ext::oneapi; |
| 24 | +namespace syclexp = sycl::ext::oneapi::experimental; |
| 25 | + |
| 26 | +using accType = |
| 27 | + sycl::accessor<int, 1, sycl::access_mode::read_write, sycl::target::device, |
| 28 | + sycl::access::placeholder::true_t>; |
| 29 | + |
| 30 | +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel)) |
| 31 | +void empty() {} |
| 32 | + |
| 33 | +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>)) |
| 34 | +void initialize(int *ptr) { |
| 35 | + size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id(); |
| 36 | + ptr[Lid] = Lid; |
| 37 | +} |
| 38 | + |
| 39 | +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel)) |
| 40 | +void successor(int *src, int *dst) { *dst = *src + 1; } |
| 41 | + |
| 42 | +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>)) |
| 43 | +void square(int *src, int *dst) { |
| 44 | + size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id(); |
| 45 | + dst[Lid] = src[Lid] * src[Lid]; |
| 46 | +} |
| 47 | + |
| 48 | +template <typename T> |
| 49 | +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>)) |
| 50 | +void squareWithScratchMemoryTemplated(T *src, T *dst) { |
| 51 | + size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id(); |
| 52 | + T *LocalMem = reinterpret_cast<T *>(syclexp::get_work_group_scratch_memory()); |
| 53 | + LocalMem[Lid] = src[Lid] * src[Lid]; |
| 54 | + dst[Lid] = LocalMem[Lid]; |
| 55 | +} |
| 56 | + |
| 57 | +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>)) |
| 58 | +void squareWithAccessor(accType src, accType dst) { |
| 59 | + size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id(); |
| 60 | + dst[Lid] = src[Lid] * src[Lid]; |
| 61 | +} |
| 62 | + |
| 63 | +constexpr int SIZE = 16; |
| 64 | + |
| 65 | +int main() { |
| 66 | + sycl::queue Q; |
| 67 | + int *Src = sycl::malloc_shared<int>(SIZE, Q); |
| 68 | + int *Dst = sycl::malloc_shared<int>(SIZE, Q); |
| 69 | + |
| 70 | + syclexp::single_task(Q, syclexp::kernel_function_s<empty>{}); |
| 71 | + |
| 72 | + syclexp::nd_launch( |
| 73 | + Q, ::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)), |
| 74 | + syclexp::kernel_function<initialize>, Src); |
| 75 | + Q.wait(); |
| 76 | + |
| 77 | + syclexp::launch_config Config{ |
| 78 | + ::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)), |
| 79 | + syclexp::properties{ |
| 80 | + syclexp::work_group_scratch_size(SIZE * sizeof(int))}}; |
| 81 | + |
| 82 | + static_assert( |
| 83 | + std::is_same_v< |
| 84 | + decltype(syclexp::nd_launch( |
| 85 | + Q, Config, |
| 86 | + syclexp::kernel_function<squareWithScratchMemoryTemplated<int>>, |
| 87 | + Src, Dst)), |
| 88 | + void>); |
| 89 | + |
| 90 | + syclexp::nd_launch( |
| 91 | + Q, Config, |
| 92 | + syclexp::kernel_function<squareWithScratchMemoryTemplated<int>>, Src, |
| 93 | + Dst); |
| 94 | + Q.wait(); |
| 95 | + |
| 96 | + for (int I = 0; I < SIZE; I++) { |
| 97 | + assert(Dst[I] == Src[I] * Src[I]); |
| 98 | + } |
| 99 | + |
| 100 | + syclexp::nd_launch( |
| 101 | + Q, ::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)), |
| 102 | + syclexp::kernel_function<square>, Src, Dst); |
| 103 | + Q.wait(); |
| 104 | + |
| 105 | + for (int I = 0; I < SIZE; I++) { |
| 106 | + assert(Dst[I] == Src[I] * Src[I]); |
| 107 | + } |
| 108 | + |
| 109 | + static_assert( |
| 110 | + std::is_same_v<decltype(syclexp::single_task( |
| 111 | + Q, syclexp::kernel_function<successor>, Src, Dst)), |
| 112 | + void>); |
| 113 | + syclexp::single_task(Q, syclexp::kernel_function<successor>, Src, Dst); |
| 114 | + Q.wait(); |
| 115 | + assert(Dst[0] == Src[0] + 1); |
| 116 | + |
| 117 | + int SrcData[SIZE]; |
| 118 | + int DstData[SIZE]; |
| 119 | + for (int I = 0; I < SIZE; ++I) { |
| 120 | + SrcData[I] = I; |
| 121 | + } |
| 122 | + |
| 123 | + { // Test with accessors |
| 124 | + sycl::buffer<int> SrcBuf{&SrcData[0], SIZE}; |
| 125 | + sycl::buffer<int> DstBuf{&DstData[0], SIZE}; |
| 126 | + accType SrcAcc{SrcBuf}; |
| 127 | + accType DstAcc{DstBuf}; |
| 128 | + |
| 129 | + Q.submit([&](sycl::handler &CGH) { |
| 130 | + CGH.require(SrcAcc); |
| 131 | + CGH.require(DstAcc); |
| 132 | + syclexp::nd_launch(CGH, Config, |
| 133 | + syclexp::kernel_function<squareWithAccessor>, SrcAcc, |
| 134 | + DstAcc); |
| 135 | + }); |
| 136 | + } |
| 137 | + for (int I = 0; I < SIZE; ++I) { |
| 138 | + assert(DstData[I] == SrcData[I] * SrcData[I]); |
| 139 | + } |
| 140 | + |
| 141 | + Q.submit([&](sycl::handler &CGH) { |
| 142 | + static_assert( |
| 143 | + std::is_same_v<decltype(syclexp::nd_launch( |
| 144 | + CGH, Config, |
| 145 | + syclexp::kernel_function< |
| 146 | + squareWithScratchMemoryTemplated<int>>, |
| 147 | + Src, Dst)), |
| 148 | + void>); |
| 149 | + syclexp::nd_launch( |
| 150 | + CGH, Config, |
| 151 | + syclexp::kernel_function<squareWithScratchMemoryTemplated<int>>, Src, |
| 152 | + Dst); |
| 153 | + }).wait(); |
| 154 | + |
| 155 | + for (int I = 0; I < SIZE; I++) { |
| 156 | + assert(Dst[I] == Src[I] * Src[I]); |
| 157 | + } |
| 158 | + |
| 159 | + Q.submit([&](sycl::handler &CGH) { |
| 160 | + static_assert( |
| 161 | + std::is_same_v<decltype(syclexp::nd_launch( |
| 162 | + CGH, |
| 163 | + ::sycl::nd_range<1>(::sycl::range<1>(SIZE), |
| 164 | + ::sycl::range<1>(SIZE)), |
| 165 | + syclexp::kernel_function<square>, Src, Dst)), |
| 166 | + void>); |
| 167 | + |
| 168 | + syclexp::nd_launch( |
| 169 | + CGH, |
| 170 | + ::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)), |
| 171 | + syclexp::kernel_function<square>, Src, Dst); |
| 172 | + }).wait(); |
| 173 | + |
| 174 | + for (int I = 0; I < SIZE; I++) { |
| 175 | + assert(Dst[I] == Src[I] * Src[I]); |
| 176 | + } |
| 177 | + |
| 178 | + Q.submit([&](sycl::handler &CGH) { |
| 179 | + static_assert(std::is_same_v<decltype(syclexp::single_task( |
| 180 | + CGH, syclexp::kernel_function<successor>, |
| 181 | + Src, Dst)), |
| 182 | + void>); |
| 183 | + syclexp::single_task(CGH, syclexp::kernel_function<successor>, Src, Dst); |
| 184 | + }).wait(); |
| 185 | + |
| 186 | + assert(Dst[0] == Src[0] + 1); |
| 187 | + |
| 188 | + return 0; |
| 189 | +} |
0 commit comments