diff --git a/test/Feature/HLSLLib/InterlockedAnd.32.test b/test/Feature/HLSLLib/InterlockedAnd.32.test new file mode 100644 index 000000000..14961fa1b --- /dev/null +++ b/test/Feature/HLSLLib/InterlockedAnd.32.test @@ -0,0 +1,131 @@ +#--- source.hlsl + +// This test exercises InterlockedAnd against non-resource (groupshared) +// destinations. A single threadgroup of 32 threads concurrently updates +// shared counters, so the test actually exercises atomic behavior. +// +// Both the 2-argument and 3-argument overloads are covered for int and uint. +// +// Atomicity is verified by starting a counter at 0xFFFFFFFF and having each +// of 32 threads atomically clear its own unique bit (AND with ~(1 << tid)). +// If any read-modify-write were non-atomic, some thread's bit clear would +// be lost and the final value would be non-zero. With true atomicity, the +// counter must end at exactly 0. +// +// For the 3-argument form we additionally verify, per-thread, that the +// returned "original value" still had this thread's bit set when the AND +// was performed -- this must always be true under atomic semantics, since +// no other thread ever clears that bit. + +RWStructuredBuffer OutOrigBitSet : register(u0); +RWStructuredBuffer OutFinal : register(u1); + +groupshared uint CounterU; // 3-arg form, unsigned: bit-clear test +groupshared uint CounterUNoOrig; // 2-arg form, unsigned: bit-clear test +groupshared int CounterI; // 3-arg form, signed: bit-clear test on -1 +groupshared uint MaskedU; // deterministic mask test (all threads + // AND with the same constant) + +groupshared uint OrigBitSet[32]; // per-thread: was my bit set in the + // original value I observed? + +[numthreads(32, 1, 1)] +void main(uint3 GTID : SV_GroupThreadID) { + if (GTID.x == 0) { + CounterU = 0xFFFFFFFFu; + CounterUNoOrig = 0xFFFFFFFFu; + CounterI = -1; // 0xFFFFFFFF as int + MaskedU = 0xAAAAAAAAu; + } + OrigBitSet[GTID.x] = 0; + GroupMemoryBarrierWithGroupSync(); + + uint ThreadBit = 1u << GTID.x; + uint ThreadMask = ~ThreadBit; + + // 3-argument form: capture original, then check our bit was set in it. + uint OrigU; + InterlockedAnd(CounterU, ThreadMask, OrigU); + OrigBitSet[GTID.x] = ((OrigU & ThreadBit) != 0u) ? 1u : 0u; + + // 3-argument form, signed. + int OrigI; + InterlockedAnd(CounterI, (int)ThreadMask, OrigI); + + // 2-argument form: no original captured. + InterlockedAnd(CounterUNoOrig, ThreadMask); + + // 2-argument form: every thread ANDs with the same constant. Result + // is deterministic regardless of ordering: 0xAAAAAAAA & 0x0F0F0F0F. + InterlockedAnd(MaskedU, 0x0F0F0F0Fu); + + GroupMemoryBarrierWithGroupSync(); + + OutOrigBitSet[GTID.x] = OrigBitSet[GTID.x]; + + if (GTID.x == 0) { + OutFinal[0] = CounterU; // 0 + OutFinal[1] = (uint)CounterI; // 0 + OutFinal[2] = CounterUNoOrig; // 0 + OutFinal[3] = MaskedU; // 0xAAAAAAAA & 0x0F0F0F0F = 0x0A0A0A0A + } +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main +Buffers: + - Name: OutOrigBitSet + Format: UInt32 + Stride: 4 + FillSize: 128 + - Name: ExpectedOrigBitSet + Format: UInt32 + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + - Name: OutFinal + Format: UInt32 + Stride: 4 + FillSize: 16 + - Name: ExpectedFinal + Format: UInt32 + Stride: 4 + Data: [ 0, 0, 0, 0x0A0A0A0A ] +Results: + - Result: TestOrigBitSet + Rule: BufferExact + Actual: OutOrigBitSet + Expected: ExpectedOrigBitSet + - Result: TestFinal + Rule: BufferExact + Actual: OutFinal + Expected: ExpectedFinal +DescriptorSets: + - Resources: + - Name: OutOrigBitSet + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: OutFinal + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# Unimplemented: https://github.com/llvm/llvm-project/issues/99125 +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/InterlockedAnd.int64.test b/test/Feature/HLSLLib/InterlockedAnd.int64.test new file mode 100644 index 000000000..e53ae234d --- /dev/null +++ b/test/Feature/HLSLLib/InterlockedAnd.int64.test @@ -0,0 +1,137 @@ +#--- source.hlsl + +// This test exercises InterlockedAnd against non-resource (groupshared) +// destinations using 64-bit integer types. A single threadgroup of 64 +// threads concurrently updates shared counters, so the test actually +// exercises atomic behavior. +// +// Both the 2-argument and 3-argument overloads are covered for int64_t +// and uint64_t. +// +// Atomicity is verified by starting a counter at 0xFFFFFFFFFFFFFFFF and +// having each of 64 threads atomically clear its own unique bit +// (AND with ~(1ull << tid)). If any read-modify-write were non-atomic, +// some thread's bit clear would be lost and the final value would be +// non-zero. With true atomicity, the counter must end at exactly 0. +// +// For the 3-argument form we additionally verify, per-thread, that the +// returned "original value" still had this thread's bit set when the AND +// was performed -- this must always be true under atomic semantics, since +// no other thread ever clears that bit. + +RWStructuredBuffer OutOrigBitSet : register(u0); +RWStructuredBuffer OutFinal : register(u1); + +groupshared uint64_t CounterU; // 3-arg form, unsigned: bit-clear test +groupshared uint64_t CounterUNoOrig; // 2-arg form, unsigned: bit-clear test +groupshared int64_t CounterI; // 3-arg form, signed: bit-clear test on -1 +groupshared uint64_t MaskedU; // deterministic mask test (all threads + // AND with the same constant) + +groupshared uint OrigBitSet[64]; // per-thread: was my bit set in the + // original value I observed? + +[numthreads(64, 1, 1)] +void main(uint3 GTID : SV_GroupThreadID) { + if (GTID.x == 0) { + CounterU = 0xFFFFFFFFFFFFFFFFull; + CounterUNoOrig = 0xFFFFFFFFFFFFFFFFull; + CounterI = (int64_t)-1; // all bits set + MaskedU = 0xAAAAAAAAAAAAAAAAull; + } + OrigBitSet[GTID.x] = 0; + GroupMemoryBarrierWithGroupSync(); + + uint64_t ThreadBit = 1ull << GTID.x; + uint64_t ThreadMask = ~ThreadBit; + + // 3-argument form: capture original, then check our bit was set in it. + uint64_t OrigU; + InterlockedAnd(CounterU, ThreadMask, OrigU); + OrigBitSet[GTID.x] = ((OrigU & ThreadBit) != 0ull) ? 1u : 0u; + + // 3-argument form, signed. + int64_t OrigI; + InterlockedAnd(CounterI, (int64_t)ThreadMask, OrigI); + + // 2-argument form: no original captured. + InterlockedAnd(CounterUNoOrig, ThreadMask); + + // 2-argument form: every thread ANDs with the same constant. Result + // is deterministic regardless of ordering. The mask exercises both the + // low and high 32-bit halves. + InterlockedAnd(MaskedU, 0x0F0F0F0F0F0F0F0Full); + + GroupMemoryBarrierWithGroupSync(); + + OutOrigBitSet[GTID.x] = OrigBitSet[GTID.x]; + + if (GTID.x == 0) { + OutFinal[0] = CounterU; // 0 + OutFinal[1] = (uint64_t)CounterI; // 0 + OutFinal[2] = CounterUNoOrig; // 0 + OutFinal[3] = MaskedU; // 0x0A0A0A0A0A0A0A0A + } +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main +Buffers: + - Name: OutOrigBitSet + Format: UInt32 + Stride: 4 + FillSize: 256 + - Name: ExpectedOrigBitSet + Format: UInt32 + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + - Name: OutFinal + Format: UInt64 + Stride: 8 + FillSize: 32 + - Name: ExpectedFinal + Format: UInt64 + Stride: 8 + Data: [ 0, 0, 0, 0x0A0A0A0A0A0A0A0A ] +Results: + - Result: TestOrigBitSet + Rule: BufferExact + Actual: OutOrigBitSet + Expected: ExpectedOrigBitSet + - Result: TestFinal + Rule: BufferExact + Actual: OutFinal + Expected: ExpectedFinal +DescriptorSets: + - Resources: + - Name: OutOrigBitSet + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: OutFinal + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# Unimplemented: https://github.com/llvm/llvm-project/issues/99125 +# XFAIL: Clang + +# REQUIRES: Int64 +# RUN: split-file %s %t +# RUN: %dxc_target -HV 202x -T cs_6_6 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/InterlockedAnd.resources.32.test b/test/Feature/HLSLLib/InterlockedAnd.resources.32.test new file mode 100644 index 000000000..2673d23fa --- /dev/null +++ b/test/Feature/HLSLLib/InterlockedAnd.resources.32.test @@ -0,0 +1,228 @@ +#--- source.hlsl + +// This test exercises InterlockedAnd against the full set of HLSL-legal +// non-groupshared (resource) destination types: +// +// * RWStructuredBuffer[i] (free function, 2-arg) +// * RWStructuredBuffer[i] (free function, 3-arg, signed) +// * RWBuffer[i] (free function on typed UAV) +// * RWTexture2D[coord] (free function on typed UAV) +// * RWByteAddressBuffer::InterlockedAnd (method, 2-arg and 3-arg) +// +// For each destination, 32 threads of a single threadgroup atomically AND +// `~(1 << tid)` into a single accumulator slot whose initial value is +// 0xFFFFFFFF. With correct atomic semantics every thread's bit-clear must +// take effect, so each accumulator must end at exactly 0. If any +// read-modify-write were lost, the corresponding bit would remain set. +// +// Per-thread sanity for the 3-argument forms: the returned "original" +// value must have *this* thread's bit set, since no other thread ever +// clears that bit. Failures here would indicate the wrong value was +// returned for `original_value`. + +RWStructuredBuffer SBufU : register(u0); +RWStructuredBuffer SBufI : register(u1); +RWBuffer TBufU : register(u2); +RWTexture2D Tex2D : register(u3); +RWByteAddressBuffer BABuf : register(u4); + +RWStructuredBuffer OutOrigBitSetSBufI : register(u5); +RWStructuredBuffer OutOrigBitSetBABuf : register(u6); + +[numthreads(32, 1, 1)] +void main(uint3 GTID : SV_GroupThreadID) { + if (GTID.x == 0) { + SBufU[0] = 0xFFFFFFFFu; + SBufI[0] = -1; // 0xFFFFFFFF as int + TBufU[0] = 0xFFFFFFFFu; + Tex2D[uint2(0, 0)] = 0xFFFFFFFFu; + BABuf.Store(0, 0xFFFFFFFFu); // accumulator for 2-arg method form + BABuf.Store(4, 0xFFFFFFFFu); // accumulator for 3-arg method form + } + OutOrigBitSetSBufI[GTID.x] = 0u; + OutOrigBitSetBABuf[GTID.x] = 0u; + DeviceMemoryBarrierWithGroupSync(); + + uint ThreadBit = 1u << GTID.x; + uint ThreadMask = ~ThreadBit; + + // RWStructuredBuffer: 2-argument free function. + InterlockedAnd(SBufU[0], ThreadMask); + + // RWStructuredBuffer: 3-argument free function (signed). + int OrigI; + InterlockedAnd(SBufI[0], (int)ThreadMask, OrigI); + OutOrigBitSetSBufI[GTID.x] = (((uint)OrigI & ThreadBit) != 0u) ? 1u : 0u; + + // RWBuffer (typed buffer UAV): 2-argument free function. + InterlockedAnd(TBufU[0], ThreadMask); + + // RWTexture2D: 2-argument free function. + InterlockedAnd(Tex2D[uint2(0, 0)], ThreadMask); + + // RWByteAddressBuffer method: 2-argument form. + BABuf.InterlockedAnd(0, ThreadMask); + + // RWByteAddressBuffer method: 3-argument form. + uint OrigBA; + BABuf.InterlockedAnd(4, ThreadMask, OrigBA); + OutOrigBitSetBABuf[GTID.x] = ((OrigBA & ThreadBit) != 0u) ? 1u : 0u; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main +Buffers: + - Name: SBufU + Format: UInt32 + Stride: 4 + FillSize: 4 + - Name: ExpectedSBufU + Format: UInt32 + Stride: 4 + Data: [ 0 ] + - Name: SBufI + Format: Int32 + Stride: 4 + FillSize: 4 + - Name: ExpectedSBufI + Format: Int32 + Stride: 4 + Data: [ 0 ] + - Name: TBufU + Format: UInt32 + Channels: 1 + FillSize: 4 + - Name: ExpectedTBufU + Format: UInt32 + Channels: 1 + Data: [ 0 ] + - Name: Tex2D + Format: UInt32 + Channels: 1 + FillSize: 4 + OutputProps: + Height: 1 + Width: 1 + Depth: 1 + - Name: ExpectedTex2D + Format: UInt32 + Channels: 1 + Data: [ 0 ] + OutputProps: + Height: 1 + Width: 1 + Depth: 1 + - Name: BABuf + Format: Hex32 + Stride: 4 + FillSize: 8 + - Name: ExpectedBABuf + Format: Hex32 + Stride: 4 + Data: [ 0x00000000, 0x00000000 ] + - Name: OutOrigBitSetSBufI + Format: UInt32 + Stride: 4 + FillSize: 128 + - Name: ExpectedOrigBitSet + Format: UInt32 + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + - Name: OutOrigBitSetBABuf + Format: UInt32 + Stride: 4 + FillSize: 128 +Results: + - Result: TestSBufU + Rule: BufferExact + Actual: SBufU + Expected: ExpectedSBufU + - Result: TestSBufI + Rule: BufferExact + Actual: SBufI + Expected: ExpectedSBufI + - Result: TestTBufU + Rule: BufferExact + Actual: TBufU + Expected: ExpectedTBufU + - Result: TestTex2D + Rule: BufferExact + Actual: Tex2D + Expected: ExpectedTex2D + - Result: TestBABuf + Rule: BufferExact + Actual: BABuf + Expected: ExpectedBABuf + - Result: TestOrigBitSetSBufI + Rule: BufferExact + Actual: OutOrigBitSetSBufI + Expected: ExpectedOrigBitSet + - Result: TestOrigBitSetBABuf + Rule: BufferExact + Actual: OutOrigBitSetBABuf + Expected: ExpectedOrigBitSet +DescriptorSets: + - Resources: + - Name: SBufU + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: SBufI + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: TBufU + Kind: RWBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: Tex2D + Kind: RWTexture2D + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: BABuf + Kind: RWByteAddressBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: OutOrigBitSetSBufI + Kind: RWStructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 + - Name: OutOrigBitSetBABuf + Kind: RWStructuredBuffer + DirectXBinding: + Register: 6 + Space: 0 + VulkanBinding: + Binding: 6 +... +#--- end + +# Unimplemented: https://github.com/llvm/llvm-project/issues/99125 +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/InterlockedAnd.resources.int64.test b/test/Feature/HLSLLib/InterlockedAnd.resources.int64.test new file mode 100644 index 000000000..148a41118 --- /dev/null +++ b/test/Feature/HLSLLib/InterlockedAnd.resources.int64.test @@ -0,0 +1,178 @@ +#--- source.hlsl + +// This test exercises InterlockedAnd against the HLSL-legal 64-bit +// non-groupshared (resource) destination types. 64-bit atomics on UAVs +// were introduced in Shader Model 6.6. +// +// * RWStructuredBuffer[i] (free function, 2-arg) +// * RWStructuredBuffer[i] (free function, 3-arg, signed) +// * RWByteAddressBuffer::InterlockedAnd64 (method, 2-arg and 3-arg) +// +// 64-bit atomics on typed RWBuffer / RWTexture require the optional +// "AtomicInt64OnTypedResource" capability and are intentionally not +// covered here, to keep this test portable across implementations. +// +// For each destination, 64 threads of a single threadgroup atomically +// AND `~(1ull << tid)` into a single accumulator slot whose initial value +// is 0xFFFFFFFFFFFFFFFF. With correct atomic semantics every thread's +// bit-clear must take effect, so each accumulator must end at exactly 0. +// If any read-modify-write were lost, the corresponding bit would remain +// set. +// +// Per-thread sanity for the 3-argument forms: the returned "original" +// value must have *this* thread's bit set, since no other thread ever +// clears that bit. + +RWStructuredBuffer SBufU : register(u0); +RWStructuredBuffer SBufI : register(u1); +RWByteAddressBuffer BABuf : register(u2); + +RWStructuredBuffer OutOrigBitSetSBufI : register(u3); +RWStructuredBuffer OutOrigBitSetBABuf : register(u4); + +[numthreads(64, 1, 1)] +void main(uint3 GTID : SV_GroupThreadID) { + if (GTID.x == 0) { + SBufU[0] = 0xFFFFFFFFFFFFFFFFull; + SBufI[0] = (int64_t)-1; + BABuf.Store(0, 0xFFFFFFFFFFFFFFFFull); // accumulator for 2-arg method form + BABuf.Store(8, 0xFFFFFFFFFFFFFFFFull); // accumulator for 3-arg method form + } + OutOrigBitSetSBufI[GTID.x] = 0u; + OutOrigBitSetBABuf[GTID.x] = 0u; + DeviceMemoryBarrierWithGroupSync(); + + uint64_t ThreadBit = 1ull << GTID.x; + uint64_t ThreadMask = ~ThreadBit; + + // RWStructuredBuffer: 2-argument free function. + InterlockedAnd(SBufU[0], ThreadMask); + + // RWStructuredBuffer: 3-argument free function (signed). + int64_t OrigI; + InterlockedAnd(SBufI[0], (int64_t)ThreadMask, OrigI); + OutOrigBitSetSBufI[GTID.x] = (((uint64_t)OrigI & ThreadBit) != 0ull) ? 1u : 0u; + + // RWByteAddressBuffer 64-bit method: 2-argument form. + BABuf.InterlockedAnd64(0, ThreadMask); + + // RWByteAddressBuffer 64-bit method: 3-argument form. + uint64_t OrigBA; + BABuf.InterlockedAnd64(8, ThreadMask, OrigBA); + OutOrigBitSetBABuf[GTID.x] = ((OrigBA & ThreadBit) != 0ull) ? 1u : 0u; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main +Buffers: + - Name: SBufU + Format: UInt64 + Stride: 8 + FillSize: 8 + - Name: ExpectedSBufU + Format: UInt64 + Stride: 8 + Data: [ 0 ] + - Name: SBufI + Format: Int64 + Stride: 8 + FillSize: 8 + - Name: ExpectedSBufI + Format: Int64 + Stride: 8 + Data: [ 0 ] + - Name: BABuf + Format: Hex64 + Stride: 8 + FillSize: 16 + - Name: ExpectedBABuf + Format: Hex64 + Stride: 8 + Data: [ 0x0000000000000000, 0x0000000000000000 ] + - Name: OutOrigBitSetSBufI + Format: UInt32 + Stride: 4 + FillSize: 256 + - Name: ExpectedOrigBitSet + Format: UInt32 + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + - Name: OutOrigBitSetBABuf + Format: UInt32 + Stride: 4 + FillSize: 256 +Results: + - Result: TestSBufU + Rule: BufferExact + Actual: SBufU + Expected: ExpectedSBufU + - Result: TestSBufI + Rule: BufferExact + Actual: SBufI + Expected: ExpectedSBufI + - Result: TestBABuf + Rule: BufferExact + Actual: BABuf + Expected: ExpectedBABuf + - Result: TestOrigBitSetSBufI + Rule: BufferExact + Actual: OutOrigBitSetSBufI + Expected: ExpectedOrigBitSet + - Result: TestOrigBitSetBABuf + Rule: BufferExact + Actual: OutOrigBitSetBABuf + Expected: ExpectedOrigBitSet +DescriptorSets: + - Resources: + - Name: SBufU + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: SBufI + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: BABuf + Kind: RWByteAddressBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: OutOrigBitSetSBufI + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: OutOrigBitSetBABuf + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 +... +#--- end + +# Unimplemented: https://github.com/llvm/llvm-project/issues/99125 +# XFAIL: Clang + +# REQUIRES: Int64 +# RUN: split-file %s %t +# RUN: %dxc_target -HV 202x -T cs_6_6 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/InterlockedAnd.resources.typed.int64.test b/test/Feature/HLSLLib/InterlockedAnd.resources.typed.int64.test new file mode 100644 index 000000000..563f942d2 --- /dev/null +++ b/test/Feature/HLSLLib/InterlockedAnd.resources.typed.int64.test @@ -0,0 +1,130 @@ +#--- source.hlsl + +// This test exercises InterlockedAnd on 64-bit typed RWBuffer resources. +// 64-bit atomics on typed UAVs are an optional Shader Model 6.6 capability +// (D3D12 cap AtomicInt64OnTypedResourceSupported, Vulkan feature +// shaderBufferInt64Atomics), surfaced to lit as `Int64TypedResourceAtomics`. +// +// * RWBuffer[i] (free function, 2-arg) +// * RWBuffer[i] (free function, 3-arg, signed) +// +// 64-bit typed-image atomics (RWTexture) live behind an additional +// Vulkan extension (VK_EXT_shader_image_atomic_int64) and are not covered +// here. +// +// 64 threads of a single threadgroup atomically AND `~(1ull << tid)` into +// a single accumulator slot whose initial value is 0xFFFFFFFFFFFFFFFF. +// With correct atomic semantics every thread's bit-clear must take +// effect, so each accumulator must end at exactly 0. If any +// read-modify-write were lost, the corresponding bit would remain set. +// +// Per-thread sanity for the 3-argument form: the returned "original" +// value must have *this* thread's bit set, since no other thread ever +// clears that bit. + +RWBuffer TBufU : register(u0); +RWBuffer TBufI : register(u1); + +RWStructuredBuffer OutOrigBitSet : register(u2); + +[numthreads(64, 1, 1)] +void main(uint3 GTID : SV_GroupThreadID) { + if (GTID.x == 0) { + TBufU[0] = 0xFFFFFFFFFFFFFFFFull; + TBufI[0] = (int64_t)-1; + } + OutOrigBitSet[GTID.x] = 0u; + DeviceMemoryBarrierWithGroupSync(); + + uint64_t ThreadBit = 1ull << GTID.x; + uint64_t ThreadMask = ~ThreadBit; + + // RWBuffer: 2-argument free function. + InterlockedAnd(TBufU[0], ThreadMask); + + // RWBuffer: 3-argument free function (signed). + int64_t OrigI; + InterlockedAnd(TBufI[0], (int64_t)ThreadMask, OrigI); + OutOrigBitSet[GTID.x] = (((uint64_t)OrigI & ThreadBit) != 0ull) ? 1u : 0u; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main +Buffers: + - Name: TBufU + Format: UInt64 + Channels: 1 + FillSize: 8 + - Name: ExpectedTBufU + Format: UInt64 + Channels: 1 + Data: [ 0 ] + - Name: TBufI + Format: Int64 + Channels: 1 + FillSize: 8 + - Name: ExpectedTBufI + Format: Int64 + Channels: 1 + Data: [ 0 ] + - Name: OutOrigBitSet + Format: UInt32 + Stride: 4 + FillSize: 256 + - Name: ExpectedOrigBitSet + Format: UInt32 + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] +Results: + - Result: TestTBufU + Rule: BufferExact + Actual: TBufU + Expected: ExpectedTBufU + - Result: TestTBufI + Rule: BufferExact + Actual: TBufI + Expected: ExpectedTBufI + - Result: TestOrigBitSet + Rule: BufferExact + Actual: OutOrigBitSet + Expected: ExpectedOrigBitSet +DescriptorSets: + - Resources: + - Name: TBufU + Kind: RWBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: TBufI + Kind: RWBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: OutOrigBitSet + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 +... +#--- end + +# Unimplemented: https://github.com/llvm/llvm-project/issues/99125 +# XFAIL: Clang + +# REQUIRES: Int64TypedResourceAtomics +# RUN: split-file %s %t +# RUN: %dxc_target -HV 202x -T cs_6_6 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o