Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2263,6 +2263,7 @@ enum class external_semaphore_handle_type {
win32_nt_dx12_fence = 2,
timeline_fd = 3,
timeline_win32_nt_handle = 4,
win32_nt_dx11_fence = 5
};

// Descriptor templated on specific resource type
Expand Down Expand Up @@ -2313,10 +2314,10 @@ external_semaphore import_external_semaphore(
The resulting `external_semaphore` can then be used in a SYCL command
group, to either wait until the semaphore signalled, or signal the semaphore.

If the type of semaphore imported supports setting the state of discrete
semaphore value (the semaphore type is `win32_nt_dx12_fence`, `timeline_fd` or
`timeline_win32_nt_handle`), then the user can specify which value the semaphore
operation should wait on, or signal.
If the imported semaphore type supports setting the state of a discrete semaphore
value (the semaphore type is `win32_nt_dx12_fence`, `win32_nt_dx11_fence`,
`timeline_fd`, or `timeline_win32_nt_handle`), then the user can specify which
value the semaphore operation should wait on, or signal.

We propose to extend the SYCL queue and handler classes with semaphore waiting
and signalling operations.
Expand Down Expand Up @@ -2408,15 +2409,15 @@ The behaviour of waiting on a semaphore will depend on the type of the
semaphore which was imported.

If the semaphore does not support setting of a discrete state value (the
semaphore type is not `win32_nt_dx12_fence`, `timeline_fd` or
`timeline_win32_nt_handle`), then any operations submitted to the queue after a
`ext_oneapi_wait_external_semaphore` call will not begin until the imported
semaphore is in a signalled state. After this, the semaphore will be reset to a
non-signalled state.
semaphore type is not `win32_nt_dx12_fence`, `win32_nt_dx11_fence`,
`timeline_fd` or `timeline_win32_nt_handle`), then any operations submitted to
the queue after a `ext_oneapi_wait_external_semaphore` call will not begin until
the imported semaphore is in a signalled state. After this, the semaphore will
be reset to a non-signalled state.

If the semaphore does support setting of a discrete state value (the semaphore
type is `win32_nt_dx12_fence`, `timeline_fd` or `timeline_win32_nt_handle`),
then any operations submitted to the queue after a
type is `win32_nt_dx12_fence`, `win32_nt_dx11_fence`, `timeline_fd` or
`timeline_win32_nt_handle`), then any operations submitted to the queue after a
`ext_oneapi_wait_external_semaphore` call will not begin until the imported
semaphore is in a state greater than or equal to the `wait_value`. The state of
this type of semaphore will not be altered by the call to
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/ext/oneapi/bindless_images_interop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ enum class external_semaphore_handle_type {
win32_nt_dx12_fence = 2,
timeline_fd = 3,
timeline_win32_nt_handle = 4,
win32_nt_dx11_fence = 5,
};

/// Opaque external memory handle type
Expand Down
3 changes: 3 additions & 0 deletions sycl/source/detail/bindless_images.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,9 @@ __SYCL_EXPORT external_semaphore import_external_semaphore(
case external_semaphore_handle_type::win32_nt_dx12_fence:
urHandleType = UR_EXP_EXTERNAL_SEMAPHORE_TYPE_WIN32_NT_DX12_FENCE;
break;
case external_semaphore_handle_type::win32_nt_dx11_fence:
urHandleType = UR_EXP_EXTERNAL_SEMAPHORE_TYPE_WIN32_NT_DX11_FENCE;
break;
case external_semaphore_handle_type::timeline_win32_nt_handle:
urHandleType = UR_EXP_EXTERNAL_SEMAPHORE_TYPE_TIMELINE_WIN32_NT;
break;
Expand Down
4 changes: 4 additions & 0 deletions sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1357,6 +1357,8 @@ void handler::ext_oneapi_wait_external_semaphore(
switch (ExtSemaphore.handle_type) {
case sycl::ext::oneapi::experimental::external_semaphore_handle_type::
win32_nt_dx12_fence:
case sycl::ext::oneapi::experimental::external_semaphore_handle_type::
win32_nt_dx11_fence:
case sycl::ext::oneapi::experimental::external_semaphore_handle_type::
timeline_fd:
case sycl::ext::oneapi::experimental::external_semaphore_handle_type::
Expand Down Expand Up @@ -1412,6 +1414,8 @@ void handler::ext_oneapi_signal_external_semaphore(
switch (ExtSemaphore.handle_type) {
case sycl::ext::oneapi::experimental::external_semaphore_handle_type::
win32_nt_dx12_fence:
case sycl::ext::oneapi::experimental::external_semaphore_handle_type::
win32_nt_dx11_fence:
case sycl::ext::oneapi::experimental::external_semaphore_handle_type::
timeline_fd:
case sycl::ext::oneapi::experimental::external_semaphore_handle_type::
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@

#include <sycl/ext/oneapi/bindless_images.hpp>

#include <d3d11_3.h>
#ifdef TEST_SEMAPHORE_IMPORT
#include <d3d11_4.h> // Used for ID3D11Device5 / ID3D11DeviceContext4 / ID3D11Fence
#else
#include <d3d11_3.h> // Used for ID3D11Device3
#endif // TEST_SEMAPHORE_IMPORT

#include <limits>

Expand Down Expand Up @@ -76,6 +80,25 @@ syclImportTextureMem(HANDLE sharedHandle, size_t allocationSize,
return syclImageHandle;
}

#ifdef TEST_SEMAPHORE_IMPORT
syclexp::external_semaphore syclImportDX11FenceSemaphore(HANDLE sharedHandle,
sycl::queue queue) {
syclexp::external_semaphore_descriptor<syclexp::resource_win32_handle>
semDesc{sharedHandle,
syclexp::external_semaphore_handle_type::win32_nt_dx11_fence};
auto ret = syclexp::import_external_semaphore(semDesc, queue);
return ret;
}

void waitD3D11Fence(ID3D11Fence *fence, UINT64 value, HANDLE eventHandle,
DWORD msecTimeout = INFINITE) {
ThrowIfFailed(fence->SetEventOnCompletion(value, eventHandle));
if (WaitForSingleObject(eventHandle, msecTimeout) != WAIT_OBJECT_0) {
throw std::runtime_error("Timed out waiting for D3D11 fence.");
}
}
#endif // TEST_SEMAPHORE_IMPORT

template <int NDims, typename DType, int NChannels>
void callSyclKernel(sycl::queue syclQueue,
syclexp::unsampled_image_handle syclImageHandle,
Expand All @@ -86,8 +109,8 @@ void callSyclKernel(sycl::queue syclQueue,
using VecType = sycl::vec<DType, NChannels>;

// All we are doing is doubling the value of each pixel in the texture.
syclQueue
.submit([&](sycl::handler &cgh) {
auto e =
syclQueue.submit([&](sycl::handler &cgh) {
cgh.parallel_for(
sycl::nd_range<NDims>{globalSize, localSize},
[=](sycl::nd_item<NDims> it) {
Expand Down Expand Up @@ -128,8 +151,11 @@ void callSyclKernel(sycl::queue syclQueue,
syclexp::write_image(imgHandle, int(dim0), px);
}
});
})
.wait_and_throw();
});
#ifndef TEST_SEMAPHORE_IMPORT
e.wait_and_throw();
#endif

// Instead of wait_and_throw here, we may want to import and use the
// ID3D11Fence interface to synchronize the SYCL queue with the D3D11
// device by signaling the completion of the work and waiting for it on
Expand Down Expand Up @@ -288,6 +314,32 @@ int runTest(D3D11ProgramState &d3d11ProgramState, sycl::queue syclQueue,
ThrowIfFailed(texture.As(&keyedMutex));
d3d11ProgramState.key = 0;

#ifdef TEST_SEMAPHORE_IMPORT
ComPtr<ID3D11Device5> device5;
ThrowIfFailed(pDevice->QueryInterface(IID_PPV_ARGS(&device5)));

ComPtr<ID3D11DeviceContext4> context4;
ThrowIfFailed(
d3d11ProgramState.deviceContext->QueryInterface(IID_PPV_ARGS(&context4)));

ComPtr<ID3D11Fence> fence;
uint64_t fenceVal = 0;
ThrowIfFailed(device5->CreateFence(fenceVal, D3D11_FENCE_FLAG_SHARED,
IID_PPV_ARGS(&fence)));

HANDLE sharedFence = INVALID_HANDLE_VALUE;
ThrowIfFailed(
fence->CreateSharedHandle(nullptr, GENERIC_ALL, nullptr, &sharedFence));

syclexp::external_semaphore syclSemaphore =
syclImportDX11FenceSemaphore(sharedFence, syclQueue);

HANDLE fenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
if (fenceEvent == nullptr) {
ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError()));
}
#endif // TEST_SEMAPHORE_IMPORT

// Create an NT handle to a shared resource referring to our texture.
// Opening the shared resource gives access to it for use on the SYCL device.
ComPtr<IDXGIResource1> sharedResource;
Expand Down Expand Up @@ -329,6 +381,11 @@ int runTest(D3D11ProgramState &d3d11ProgramState, sycl::queue syclQueue,
syclexp::unsampled_image_handle syclImageHandle = syclImportTextureMem(
sharedHandle, allocationSize, syclImageDesc, syclQueue);

#ifdef TEST_SEMAPHORE_IMPORT
ThrowIfFailed(context4->Signal(fence.Get(), fenceVal));
syclQueue.ext_oneapi_wait_external_semaphore(syclSemaphore, fenceVal);
fenceVal++;
#endif
// Submit the SYCL kernel.
// When IDXGIKeyedMutex importing into SYCL is implemented, we'll be able to
// call it from the SYCL API. All it does is ensuring only one device has
Expand All @@ -339,6 +396,14 @@ int runTest(D3D11ProgramState &d3d11ProgramState, sycl::queue syclQueue,
// Back to the D3D11 process
ThrowIfFailed(keyedMutex->ReleaseSync(d3d11ProgramState.key));

#ifdef TEST_SEMAPHORE_IMPORT
syclQueue.submit([&](sycl::handler &cgh) {
cgh.ext_oneapi_signal_external_semaphore(syclSemaphore, fenceVal);
});
waitD3D11Fence(fence.Get(), fenceVal, fenceEvent);
fenceVal++;
#endif

// Read-back and verify
int errc = 1;
if (ComPtr<ID3D11Resource> resource; SUCCEEDED(texture.As(&resource))) {
Expand All @@ -352,6 +417,11 @@ int runTest(D3D11ProgramState &d3d11ProgramState, sycl::queue syclQueue,
// cleanup of the shared handle.
CloseNTHandle(sharedHandle);

#ifdef TEST_SEMAPHORE_IMPORT
CloseNTHandle(sharedFence);
CloseNTHandle(fenceEvent);
#endif

#ifdef VERBOSE_PRINT
if (errc == 1) {
std::cerr << "\tTest failed: NDims " << NDims << " NChannels " << NChannels
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// REQUIRES: aspect-ext_oneapi_external_memory_import
// REQUIRES: aspect-ext_oneapi_external_semaphore_import
// REQUIRES: windows

// XFAIL: run-mode
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/15851

// RUN: %{build} %link-directx -o %t.out
// RUN: %{run-unfiltered-devices} %t.out

#define TEST_SEMAPHORE_IMPORT
// FIXME large image size fails in semaphore tests.
#define TEST_SMALL_IMAGE_SIZE
#include "read_write_unsampled.cpp"
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ ComPtr<IDXGIAdapter1> getDXGIHardwareAdapter(IDXGIFactory1 *pFactory,

adapterName = getD3DDeviceName(desc);
#ifdef VERBOSE_PRINT
std::cout << "Considering D3D device: " << name << std::endl;
std::cout << "Considering D3D device: " << adapterName << std::endl;
#endif
// Try matching SYCL device name with D3D device name
// TODO: This should be replaced by LUID matching
Expand Down
1 change: 1 addition & 0 deletions unified-runtime/scripts/core/EXP-BINDLESS-IMAGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ Enums
* ${X}_EXP_EXTERNAL_SEMAPHORE_TYPE_WIN32_NT_DX12_FENCE
* ${X}_EXP_EXTERNAL_SEMAPHORE_TYPE_TIMELINE_FD
* ${X}_EXP_EXTERNAL_SEMAPHORE_TYPE_TIMELINE_WIN32_NT
* ${X}_EXP_EXTERNAL_SEMAPHORE_TYPE_WIN32_NT_DX11_FENCE

* ${x}_function_t
* ${X}_FUNCTION_USM_PITCHED_ALLOC_EXP
Expand Down
2 changes: 2 additions & 0 deletions unified-runtime/scripts/core/exp-bindless-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,8 @@ etors:
desc: "Timeline semaphore opaque file descriptor"
- name: TIMELINE_WIN32_NT
desc: "Timeline semaphore Win32 NT handle"
- name: WIN32_NT_DX11_FENCE
desc: "Fence semaphore Win32 NT DirectX 11 handle"
--- #--------------------------------------------------------------------------
type: enum
desc: "Indicates the type of image backing memory handle."
Expand Down
3 changes: 3 additions & 0 deletions unified-runtime/source/adapters/cuda/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1734,6 +1734,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp(
case UR_EXP_EXTERNAL_SEMAPHORE_TYPE_WIN32_NT_DX12_FENCE:
extSemDesc.type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE;
break;
case UR_EXP_EXTERNAL_SEMAPHORE_TYPE_WIN32_NT_DX11_FENCE:
extSemDesc.type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE;
break;
case UR_EXP_EXTERNAL_SEMAPHORE_TYPE_TIMELINE_WIN32_NT:
extSemDesc.type =
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32;
Expand Down
3 changes: 3 additions & 0 deletions unified-runtime/source/adapters/hip/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1561,6 +1561,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp(
case UR_EXP_EXTERNAL_SEMAPHORE_TYPE_WIN32_NT_DX12_FENCE:
extSemDesc.type = hipExternalSemaphoreHandleTypeD3D12Fence;
break;
case UR_EXP_EXTERNAL_SEMAPHORE_TYPE_WIN32_NT_DX11_FENCE:
extSemDesc.type = hipExternalSemaphoreHandleTypeD3D11Fence;
break;
case UR_EXP_EXTERNAL_SEMAPHORE_TYPE_OPAQUE_FD:
[[fallthrough]];
default:
Expand Down
3 changes: 3 additions & 0 deletions unified-runtime/source/adapters/level_zero/image_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1428,6 +1428,9 @@ ur_result_t urBindlessImagesImportExternalSemaphoreExp(
case UR_EXP_EXTERNAL_SEMAPHORE_TYPE_WIN32_NT_DX12_FENCE:
SemDesc.flags = ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_D3D12_FENCE;
break;
case UR_EXP_EXTERNAL_SEMAPHORE_TYPE_WIN32_NT_DX11_FENCE:
SemDesc.flags = ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_D3D11_FENCE;
break;
case UR_EXP_EXTERNAL_SEMAPHORE_TYPE_TIMELINE_WIN32_NT:
SemDesc.flags =
ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_VK_TIMELINE_SEMAPHORE_WIN32;
Expand Down