diff --git a/include/API/CommandBuffer.h b/include/API/CommandBuffer.h index 3a2bc3cff..aa8ba37ed 100644 --- a/include/API/CommandBuffer.h +++ b/include/API/CommandBuffer.h @@ -19,12 +19,22 @@ #include "API/API.h" #include "API/Encoder.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Error.h" #include namespace offloadtest { +class RenderPass; +class Texture; + +struct RenderPassBeginDesc { + RenderPass *Pass = nullptr; + llvm::SmallVector ColorAttachments; + Texture *DepthStencil = nullptr; +}; + class CommandBuffer { GPUAPI Kind; @@ -44,6 +54,15 @@ class CommandBuffer { std::errc::not_supported, "createComputeEncoder not implemented for this backend"); } + + /// Create a render command encoder for recording draw commands. + virtual llvm::Expected> + createRenderEncoder(const RenderPassBeginDesc &Desc) { + (void)Desc; + return llvm::createStringError( + std::errc::not_supported, + "createRenderEncoder not implemented for this backend"); + } }; } // namespace offloadtest diff --git a/include/API/Device.h b/include/API/Device.h index 050dab32d..0a9474766 100644 --- a/include/API/Device.h +++ b/include/API/Device.h @@ -18,6 +18,7 @@ #include "API/Buffer.h" #include "API/Capabilities.h" #include "API/CommandBuffer.h" +#include "API/RenderPass.h" #include "API/Texture.h" #include "Support/Pipeline.h" @@ -175,6 +176,9 @@ class Device { virtual llvm::Expected> createTexture(std::string Name, const TextureCreateDesc &Desc) = 0; + virtual llvm::Expected> + createRenderPass(const RenderPassDesc &Desc) = 0; + virtual void printExtra(llvm::raw_ostream &OS) {} virtual llvm::Expected> diff --git a/include/API/Encoder.h b/include/API/Encoder.h index 09f7c9192..3b57f0d05 100644 --- a/include/API/Encoder.h +++ b/include/API/Encoder.h @@ -20,6 +20,7 @@ namespace offloadtest { class Buffer; +class PipelineState; /// Base class for all command encoders. An encoder records commands into a /// command buffer. Call endEncoding() when done recording. Barriers are @@ -42,12 +43,6 @@ class CommandEncoder { GPUAPI getAPI() const { return API; } bool isEnded() const { return Ended; } - /// Copy \p Size bytes from \p Src at \p SrcOffset to \p Dst at - /// \p DstOffset. - virtual llvm::Error copyBufferToBuffer(Buffer &Src, size_t SrcOffset, - Buffer &Dst, size_t DstOffset, - size_t Size) = 0; - /// Begin a named debug group. Visible in GPU debuggers (PIX, RenderDoc, /// Xcode). Must be balanced by a corresponding popDebugGroup() call. virtual void pushDebugGroup(llvm::StringRef Label) {} @@ -80,6 +75,44 @@ class ComputeEncoder : public CommandEncoder { /// pipeline state (e.g. the shader's numthreads attribute). virtual llvm::Error dispatch(uint32_t GroupCountX, uint32_t GroupCountY, uint32_t GroupCountZ) = 0; + + /// Copy \p Size bytes from \p Src at \p SrcOffset to \p Dst at + /// \p DstOffset. + virtual llvm::Error copyBufferToBuffer(Buffer &Src, size_t SrcOffset, + Buffer &Dst, size_t DstOffset, + size_t Size) = 0; +}; + +struct Viewport { + float X = 0.0f, Y = 0.0f; + float Width = 0.0f, Height = 0.0f; + float MinDepth = 0.0f, MaxDepth = 1.0f; +}; + +struct ScissorRect { + int32_t X = 0, Y = 0; + uint32_t Width = 0, Height = 0; +}; + +class RenderEncoder : public CommandEncoder { +public: + using CommandEncoder::CommandEncoder; + + virtual void setViewport(const Viewport &VP) = 0; + virtual void setScissor(const ScissorRect &Rect) = 0; + + virtual void setVertexBuffer(uint32_t Slot, Buffer *VB, size_t Offset, + uint32_t Stride) = 0; + + virtual llvm::Error drawInstanced(const PipelineState &PSO, + uint32_t VertexCount, + uint32_t InstanceCount, + uint32_t FirstVertex = 0, + uint32_t FirstInstance = 0) = 0; + + virtual llvm::Error dispatchMesh(const PipelineState &PSO, + uint32_t GroupCountX, uint32_t GroupCountY, + uint32_t GroupCountZ) = 0; }; } // namespace offloadtest diff --git a/include/API/Enums.h b/include/API/Enums.h index e0d4360cf..7d113ddab 100644 --- a/include/API/Enums.h +++ b/include/API/Enums.h @@ -31,6 +31,19 @@ enum ShaderContainerType { Metal, }; +/// Action applied to an attachment when a render pass begins. +enum class LoadAction { + Load, ///< Preserve existing contents. + Clear, ///< Clear to the texture's OptimizedClearValue at encoder time. + DontCare, ///< Contents are undefined; the driver may discard. +}; + +/// Action applied to an attachment when a render pass ends. +enum class StoreAction { + Store, ///< Write the rendered contents back to memory. + DontCare, ///< Contents may be discarded after the pass. +}; + } // namespace offloadtest #endif // OFFLOADTEST_API_ENUMS_H diff --git a/include/API/RenderPass.h b/include/API/RenderPass.h new file mode 100644 index 000000000..fe621cbca --- /dev/null +++ b/include/API/RenderPass.h @@ -0,0 +1,65 @@ +//===- RenderPass.h - Offload API Render Pass -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the RenderPass abstract base class. A RenderPass describes the +// formats and load / store actions of one or more attachments. It carries no +// reference to specific textures: those are bound at encoder creation time. +// Backends that have a corresponding native object (Vulkan's VkRenderPass) +// build it once at creation time so it can be reused across encoders. +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOADTEST_API_RENDERPASS_H +#define OFFLOADTEST_API_RENDERPASS_H + +#include "API/API.h" +#include "API/Enums.h" +#include "API/Resources.h" + +#include "llvm/ADT/SmallVector.h" + +#include + +namespace offloadtest { + +struct ColorAttachmentFormatDesc { + Format Fmt; + LoadAction Load = LoadAction::Clear; + StoreAction Store = StoreAction::Store; +}; + +struct DepthStencilAttachmentFormatDesc { + Format Fmt; + LoadAction DepthLoad = LoadAction::Clear; + StoreAction DepthStore = StoreAction::Store; + LoadAction StencilLoad = LoadAction::DontCare; + StoreAction StencilStore = StoreAction::DontCare; +}; + +struct RenderPassDesc { + llvm::SmallVector ColorAttachments; + std::optional DepthStencil; +}; + +class RenderPass { + GPUAPI API; + +public: + virtual ~RenderPass(); + RenderPass(const RenderPass &) = delete; + RenderPass &operator=(const RenderPass &) = delete; + + GPUAPI getAPI() const { return API; } + +protected: + explicit RenderPass(GPUAPI API) : API(API) {} +}; + +} // namespace offloadtest + +#endif // OFFLOADTEST_API_RENDERPASS_H diff --git a/include/API/Texture.h b/include/API/Texture.h index 1be43ebae..26b9b030f 100644 --- a/include/API/Texture.h +++ b/include/API/Texture.h @@ -150,6 +150,7 @@ class Texture { Texture &operator=(const Texture &) = delete; GPUAPI getAPI() const { return API; } + virtual const TextureCreateDesc &getDesc() const = 0; protected: explicit Texture(GPUAPI API) : API(API) {} diff --git a/include/Support/Pipeline.h b/include/Support/Pipeline.h index c38ca24b1..7c5825bc3 100644 --- a/include/Support/Pipeline.h +++ b/include/Support/Pipeline.h @@ -26,15 +26,25 @@ namespace offloadtest { -enum class Stages { Compute, Vertex, Pixel }; +enum class Stages { + // Compute + Compute, + + // Traditional Raster + Vertex, + Pixel, + + // Mesh Shader + Amplification, + Mesh +}; inline constexpr std::array AllStages = { - Stages::Compute, - Stages::Vertex, - Stages::Pixel, + Stages::Compute, Stages::Vertex, Stages::Pixel, + Stages::Amplification, Stages::Mesh, }; inline constexpr size_t NumStages = AllStages.size(); -enum class ShaderPipelineKind { Compute, TraditionalRaster }; +enum class ShaderPipelineKind { Compute, TraditionalRaster, MeshShaderRaster }; enum class Rule { BufferExact, BufferFloatULP, BufferFloatEpsilon }; @@ -383,11 +393,11 @@ struct VertexAttribute { struct IOBindings { std::string VertexBuffer; - CPUBuffer *VertexBufferPtr; + CPUBuffer *VertexBufferPtr = nullptr; llvm::SmallVector VertexAttributes; std::string RenderTarget; - CPUBuffer *RTargetBufferPtr; + CPUBuffer *RTargetBufferPtr = nullptr; uint32_t getVertexStride() const { uint32_t Stride = 0; @@ -502,6 +512,12 @@ struct Pipeline { bool isTraditionalRaster() const { return Kind == ShaderPipelineKind::TraditionalRaster; } + bool isMeshShaderRaster() const { + return Kind == ShaderPipelineKind::MeshShaderRaster; + } + bool isRaster() const { + return isTraditionalRaster() || isMeshShaderRaster(); + } }; } // namespace offloadtest @@ -712,6 +728,8 @@ template <> struct ScalarEnumerationTraits { ENUM_CASE(Compute); ENUM_CASE(Vertex); ENUM_CASE(Pixel); + ENUM_CASE(Amplification); + ENUM_CASE(Mesh); #undef ENUM_CASE } }; diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index aabba47c6..32e01492d 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -361,6 +361,8 @@ class DXTexture : public offloadtest::Texture { : offloadtest::Texture(GPUAPI::DirectX), Resource(Resource), Name(Name), Desc(Desc) {} + const TextureCreateDesc &getDesc() const override { return Desc; } + static bool classof(const offloadtest::Texture *T) { return T->getAPI() == GPUAPI::DirectX; } @@ -505,7 +507,7 @@ class DXQueue : public offloadtest::Queue { class DXCommandBuffer : public offloadtest::CommandBuffer { public: ComPtr Allocator; - ComPtr CmdList; + ComPtr CmdList; /// Whether a UAV barrier is pending from a prior compute command. bool PendingUAVBarrier = false; @@ -546,6 +548,9 @@ class DXCommandBuffer : public offloadtest::CommandBuffer { llvm::Expected> createComputeEncoder() override; + llvm::Expected> + createRenderEncoder(const offloadtest::RenderPassBeginDesc &Desc) override; + private: DXCommandBuffer() : CommandBuffer(GPUAPI::DirectX) {} }; @@ -652,6 +657,213 @@ DXCommandBuffer::createComputeEncoder() { return Enc; } +class DXRenderPass final : public offloadtest::RenderPass { +public: + offloadtest::RenderPassDesc Desc; + + explicit DXRenderPass(offloadtest::RenderPassDesc Desc) + : RenderPass(GPUAPI::DirectX), Desc(std::move(Desc)) {} + + static bool classof(const offloadtest::RenderPass *RP) { + return RP->getAPI() == GPUAPI::DirectX; + } +}; + +class DXRenderEncoder : public offloadtest::RenderEncoder { + DXCommandBuffer &CB; + + // Encoder contract: viewport and scissor must both be set before draw(). + bool ViewportSet = false; + bool ScissorSet = false; + + llvm::Error bindCommonDrawState(const offloadtest::PipelineState &PSO) { + if (!ViewportSet) + return llvm::createStringError(std::errc::invalid_argument, + "Viewport must be set before drawing."); + if (!ScissorSet) + return llvm::createStringError(std::errc::invalid_argument, + "Scissor must be set before drawing."); + + const auto &DXPSO = llvm::cast(PSO); + CB.CmdList->SetGraphicsRootSignature(DXPSO.RootSig.Get()); + CB.CmdList->SetPipelineState(DXPSO.PSO.Get()); + CB.CmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + return llvm::Error::success(); + } + +public: + DXRenderEncoder(DXCommandBuffer &CB) + : RenderEncoder(GPUAPI::DirectX), CB(CB) {} + + ~DXRenderEncoder() override { endEncoding(); } + + static bool classof(const CommandEncoder *E) { + return E->getAPI() == GPUAPI::DirectX; + } + + // See DXComputeEncoder for why these are no-ops. + void pushDebugGroup(llvm::StringRef Label) override {} + void popDebugGroup() override {} + void insertDebugSignpost(llvm::StringRef Label) override {} + + void setViewport(const offloadtest::Viewport &VP) override { + D3D12_VIEWPORT DXVP = {}; + DXVP.TopLeftX = VP.X; + DXVP.TopLeftY = VP.Y; + DXVP.Width = VP.Width; + DXVP.Height = VP.Height; + DXVP.MinDepth = VP.MinDepth; + DXVP.MaxDepth = VP.MaxDepth; + CB.CmdList->RSSetViewports(1, &DXVP); + ViewportSet = true; + } + + void setScissor(const offloadtest::ScissorRect &Rect) override { + const D3D12_RECT DXRect = {Rect.X, Rect.Y, + static_cast(Rect.X + Rect.Width), + static_cast(Rect.Y + Rect.Height)}; + CB.CmdList->RSSetScissorRects(1, &DXRect); + ScissorSet = true; + } + + void setVertexBuffer(uint32_t Slot, offloadtest::Buffer *VB, size_t Offset, + uint32_t Stride) override { + if (!VB) { + CB.CmdList->IASetVertexBuffers(Slot, 1, nullptr); + return; + } + auto &DXVB = llvm::cast(*VB); + D3D12_VERTEX_BUFFER_VIEW VBView = {}; + VBView.BufferLocation = DXVB.Buffer->GetGPUVirtualAddress() + Offset; + VBView.SizeInBytes = static_cast(DXVB.getSizeInBytes() - Offset); + VBView.StrideInBytes = Stride; + CB.CmdList->IASetVertexBuffers(Slot, 1, &VBView); + } + + llvm::Error drawInstanced(const offloadtest::PipelineState &PSO, + uint32_t VertexCount, uint32_t InstanceCount, + uint32_t FirstVertex, + uint32_t FirstInstance) override { + if (auto Err = bindCommonDrawState(PSO)) + return Err; + CB.CmdList->DrawInstanced(VertexCount, InstanceCount, FirstVertex, + FirstInstance); + return llvm::Error::success(); + } + + llvm::Error dispatchMesh(const offloadtest::PipelineState &PSO, + uint32_t GroupCountX, uint32_t GroupCountY, + uint32_t GroupCountZ) override { + if (auto Err = bindCommonDrawState(PSO)) + return Err; + CB.CmdList->DispatchMesh(GroupCountX, GroupCountY, GroupCountZ); + return llvm::Error::success(); + } + + void endEncodingImpl() override { popDebugGroup(); } +}; + +llvm::Expected> +DXCommandBuffer::createRenderEncoder( + const offloadtest::RenderPassBeginDesc &Desc) { + // The pass carries format / load / store policy; the begin desc supplies + // the actual textures. Walk both in lockstep. + if (!Desc.Pass) + return llvm::createStringError( + std::errc::invalid_argument, + "RenderPassBeginDesc is missing its RenderPass."); + auto &DXPass = llvm::cast(*Desc.Pass); + const offloadtest::RenderPassDesc &PassDesc = DXPass.Desc; + + if (Desc.ColorAttachments.size() != PassDesc.ColorAttachments.size()) + return llvm::createStringError( + std::errc::invalid_argument, + "RenderPassBeginDesc color attachment count does not match its " + "RenderPass."); + if (PassDesc.DepthStencil.has_value() != (Desc.DepthStencil != nullptr)) + return llvm::createStringError(std::errc::invalid_argument, + "RenderPassBeginDesc depth-stencil " + "presence does not match its RenderPass."); + + // Validate attachments and gather the RTV / DSV CPU handles. RT and DSV + // descriptors are owned by the textures themselves; this just collects + // them for OMSetRenderTargets. + llvm::SmallVector RTTextures; + llvm::SmallVector RTVHandles; + RTTextures.reserve(Desc.ColorAttachments.size()); + RTVHandles.reserve(Desc.ColorAttachments.size()); + for (offloadtest::Texture *Tex : Desc.ColorAttachments) { + if (!Tex) + return llvm::createStringError( + std::errc::invalid_argument, + "RenderPassBeginDesc has a null color attachment texture."); + auto &DXTex = llvm::cast(*Tex); + if (DXTex.RTVHandle.ptr == 0) + return llvm::createStringError( + std::errc::invalid_argument, + "Color attachment texture was not created with RenderTarget usage."); + RTTextures.push_back(&DXTex); + RTVHandles.push_back(DXTex.RTVHandle); + } + + DXTexture *DSTexture = nullptr; + D3D12_CPU_DESCRIPTOR_HANDLE DSVHandle = {}; + const D3D12_CPU_DESCRIPTOR_HANDLE *DSVPtr = nullptr; + if (Desc.DepthStencil) { + auto &DXDS = llvm::cast(*Desc.DepthStencil); + if (DXDS.DSVHandle.ptr == 0) + return llvm::createStringError( + std::errc::invalid_argument, + "Depth-stencil texture was not created with DepthStencil usage."); + DSTexture = &DXDS; + DSVHandle = DXDS.DSVHandle; + DSVPtr = &DSVHandle; + } + + CmdList->OMSetRenderTargets( + static_cast(RTVHandles.size()), RTVHandles.data(), + /*RTsSingleHandleToDescriptorRange=*/false, DSVPtr); + + for (size_t I = 0; I < PassDesc.ColorAttachments.size(); ++I) { + if (PassDesc.ColorAttachments[I].Load != offloadtest::LoadAction::Clear) + continue; + if (!RTTextures[I]->Desc.OptimizedClearValue) + return llvm::createStringError( + std::errc::invalid_argument, + "LoadAction::Clear requires the render target to have been " + "created with an OptimizedClearValue."); + const auto *CV = + std::get_if(&*RTTextures[I]->Desc.OptimizedClearValue); + assert(CV && "RenderTarget OptimizedClearValue must be a ClearColor"); + const float ClearArr[4] = {CV->R, CV->G, CV->B, CV->A}; + CmdList->ClearRenderTargetView(RTVHandles[I], ClearArr, 0, nullptr); + } + if (PassDesc.DepthStencil) { + D3D12_CLEAR_FLAGS Flags = static_cast(0); + if (PassDesc.DepthStencil->DepthLoad == offloadtest::LoadAction::Clear) + Flags |= D3D12_CLEAR_FLAG_DEPTH; + if (PassDesc.DepthStencil->StencilLoad == offloadtest::LoadAction::Clear) + Flags |= D3D12_CLEAR_FLAG_STENCIL; + if (Flags != 0) { + if (!DSTexture->Desc.OptimizedClearValue) + return llvm::createStringError( + std::errc::invalid_argument, + "LoadAction::Clear requires the depth-stencil texture to have " + "been created with an OptimizedClearValue."); + const auto *CV = + std::get_if(&*DSTexture->Desc.OptimizedClearValue); + assert(CV && + "DepthStencil OptimizedClearValue must be a ClearDepthStencil"); + CmdList->ClearDepthStencilView(DSVHandle, Flags, CV->Depth, CV->Stencil, + 0, nullptr); + } + } + + auto Enc = std::make_unique(*this); + Enc->pushDebugGroup("RenderEncoder"); + return Enc; +} + class DXDevice : public offloadtest::Device { private: ComPtr Adapter; @@ -687,9 +899,10 @@ class DXDevice : public offloadtest::Device { std::unique_ptr Pipeline; // Resources for graphics pipelines. - std::unique_ptr RT; + std::unique_ptr RenderPass; + std::unique_ptr RenderTarget; std::unique_ptr RTReadback; - std::unique_ptr DS; + std::unique_ptr DepthStencil; std::unique_ptr VB; llvm::SmallVector DescTables; @@ -930,6 +1143,90 @@ class DXDevice : public offloadtest::Device { return std::make_unique(Name, RootSig, PSO); } + llvm::Expected> + createPipelineAsMsPs(llvm::StringRef Name, const BindingsDesc &BindingsDesc, + llvm::ArrayRef RTFormats, + std::optional DSFormat, + std::optional AS, ShaderContainer MS, + std::optional PS) /*override*/ { + assert(RTFormats.size() <= 8); + + ComPtr RootSig; + if (auto Err = createRootSignature(Name, BindingsDesc, MS, + /*IsGraphics=*/true, RootSig)) + return Err; + + D3D12_SHADER_BYTECODE MSBytecode = {MS.Shader->getBuffer().data(), + MS.Shader->getBuffer().size()}; + if (MSBytecode.BytecodeLength == 0) + return llvm::createStringError( + std::errc::invalid_argument, + "Mesh shader pipeline requires a mesh shader."); + + // The amplification (task) shader is optional. + D3D12_SHADER_BYTECODE ASBytecode = {}; + if (AS) { + assert((*AS).Shader->getBufferSize() > 0 && + "The passed task/amplification shader was empty."); + ASBytecode = {(*AS).Shader->getBuffer().data(), + (*AS).Shader->getBuffer().size()}; + } + + // The pixel shader is optional + D3D12_SHADER_BYTECODE PSBytecode = {}; + if (PS) { + assert((*PS).Shader->getBufferSize() > 0 && + "The passed pixel shader was empty."); + PSBytecode = {(*PS).Shader->getBuffer().data(), + (*PS).Shader->getBuffer().size()}; + } + + D3D12_RT_FORMAT_ARRAY RTArray = {}; + RTArray.NumRenderTargets = static_cast(RTFormats.size()); + for (size_t I = 0; I < RTFormats.size(); ++I) + RTArray.RTFormats[I] = getDXGIFormat(RTFormats[I]); + + CD3DX12_DEPTH_STENCIL_DESC1 DepthStencil(D3D12_DEFAULT); + DepthStencil.DepthEnable = true; + DepthStencil.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + DepthStencil.DepthFunc = D3D12_COMPARISON_FUNC_LESS; + DepthStencil.StencilEnable = false; + + DXGI_SAMPLE_DESC SampleDesc = {}; + SampleDesc.Count = 1; + + CD3DX12_PIPELINE_MESH_STATE_STREAM Stream; + Stream.pRootSignature = RootSig.Get(); + Stream.AS = ASBytecode; + Stream.MS = MSBytecode; + Stream.PS = PSBytecode; + Stream.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); + Stream.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); + Stream.DepthStencilState = DepthStencil; + Stream.SampleMask = UINT_MAX; + Stream.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + Stream.RTVFormats = RTArray; + if (DSFormat) + Stream.DSVFormat = getDXGIFormat(*DSFormat); + Stream.SampleDesc = SampleDesc; + + const D3D12_PIPELINE_STATE_STREAM_DESC StreamDesc = {sizeof(Stream), + &Stream}; + + ComPtr Device2; + if (auto Err = + HR::toError(Device.As(&Device2), "Failed to query ID3D12Device2.")) + return Err; + + ComPtr PSO; + if (auto Err = HR::toError( + Device2->CreatePipelineState(&StreamDesc, IID_PPV_ARGS(&PSO)), + "Failed to create mesh shader PSO.")) + return Err; + + return std::make_unique(Name, RootSig, PSO); + } + llvm::Expected> createFence(llvm::StringRef Name) override { return DXFence::create(Device.Get(), Name); @@ -1155,6 +1452,11 @@ class DXDevice : public offloadtest::Device { return DXCommandBuffer::create(Device); } + llvm::Expected> + createRenderPass(const offloadtest::RenderPassDesc &Desc) override { + return std::make_unique(Desc); + } + void addResourceUploadCommands(Resource &R, InvocationState &IS, ComPtr Destination, ComPtr Source) { @@ -1638,6 +1940,16 @@ class DXDevice : public offloadtest::Device { if (auto Err = CreateBuffer(Resource, IS.RootResources)) return Err; } + + if (P.isTraditionalRaster() && P.Bindings.VertexBufferPtr) { + auto VBOrErr = offloadtest::createVertexBufferFromCPUBuffer( + *this, *P.Bindings.VertexBufferPtr); + if (!VBOrErr) + return VBOrErr.takeError(); + IS.VB = std::move(*VBOrErr); + llvm::outs() << "Vertex buffer created.\n"; + } + return llvm::Error::success(); } @@ -1857,7 +2169,7 @@ class DXDevice : public offloadtest::Device { // Query the copy footprint to get the actual padded row pitch used by // the copy operation (D3D12 requires 256-byte aligned rows). - auto &RT = llvm::cast(*IS.RT); + auto &RT = llvm::cast(*IS.RenderTarget); const D3D12_RESOURCE_DESC RTDesc = RT.Resource->GetDesc(); D3D12_PLACED_SUBRESOURCE_FOOTPRINT Placed = {}; uint32_t NumRows = 0; @@ -1883,7 +2195,7 @@ class DXDevice : public offloadtest::Device { if (!TexOrErr) return TexOrErr.takeError(); - IS.RT = std::move(*TexOrErr); + IS.RenderTarget = std::move(*TexOrErr); // Create readback buffer sized for the pixel data with row pitch padded // up to D3D12_TEXTURE_DATA_PITCH_ALIGNMENT, which is what D3D12 requires @@ -1907,43 +2219,15 @@ class DXDevice : public offloadtest::Device { P.Bindings.RTargetBufferPtr->OutputProps.Height); if (!TexOrErr) return TexOrErr.takeError(); - IS.DS = std::move(*TexOrErr); - return llvm::Error::success(); - } - - llvm::Error createVertexBuffer(Pipeline &P, InvocationState &IS) { - if (!P.Bindings.VertexBufferPtr) - return llvm::createStringError( - std::errc::invalid_argument, - "No vertex buffer bound for graphics pipeline."); - - auto VBOrErr = offloadtest::createVertexBufferFromCPUBuffer( - *this, *P.Bindings.VertexBufferPtr); - if (!VBOrErr) - return VBOrErr.takeError(); - IS.VB = std::move(*VBOrErr); - - auto &VBBuf = llvm::cast(*IS.VB); - D3D12_VERTEX_BUFFER_VIEW VBView = {}; - VBView.BufferLocation = VBBuf.Buffer->GetGPUVirtualAddress(); - VBView.SizeInBytes = static_cast(IS.VB->getSizeInBytes()); - VBView.StrideInBytes = P.Bindings.getVertexStride(); - - IS.CB->CmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - IS.CB->CmdList->IASetVertexBuffers(0, 1, &VBView); - + IS.DepthStencil = std::move(*TexOrErr); return llvm::Error::success(); } llvm::Error createGraphicsCommands(Pipeline &P, InvocationState &IS) { - auto &RT = llvm::cast(*IS.RT); - auto &DS = llvm::cast(*IS.DS); + auto &RT = llvm::cast(*IS.RenderTarget); + auto &DS = llvm::cast(*IS.DepthStencil); auto &RTReadback = llvm::cast(*IS.RTReadback); - if (!IS.VB) - return llvm::createStringError(std::errc::invalid_argument, - "Vertex buffer not initialized."); - const DXPipelineState &DXPipeline = llvm::cast(*IS.Pipeline.get()); IS.CB->CmdList->SetGraphicsRootSignature(DXPipeline.RootSig.Get()); @@ -1953,35 +2237,47 @@ class DXDevice : public offloadtest::Device { IS.CB->CmdList->SetGraphicsRootDescriptorTable( 0, IS.DescHeap->GetGPUDescriptorHandleForHeapStart()); } - IS.CB->CmdList->SetPipelineState(DXPipeline.PSO.Get()); - IS.CB->CmdList->OMSetRenderTargets(1, &RT.RTVHandle, false, &DS.DSVHandle); + RenderPassBeginDesc BeginDesc = {}; + BeginDesc.Pass = IS.RenderPass.get(); + BeginDesc.ColorAttachments.push_back(&RT); + BeginDesc.DepthStencil = &DS; - const auto *DepthCV = - std::get_if(&*DS.Desc.OptimizedClearValue); - if (!DepthCV) - return llvm::createStringError( - std::errc::invalid_argument, - "Depth/stencil clear value must be a ClearDepthStencil."); - IS.CB->CmdList->ClearDepthStencilView( - DS.DSVHandle, D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL, - DepthCV->Depth, DepthCV->Stencil, 0, nullptr); + auto EncOrErr = IS.CB->createRenderEncoder(BeginDesc); + if (!EncOrErr) + return EncOrErr.takeError(); + auto &Encoder = *EncOrErr.get(); - D3D12_VIEWPORT VP = {}; + Viewport VP; VP.Width = static_cast(P.Bindings.RTargetBufferPtr->OutputProps.Width); VP.Height = static_cast(P.Bindings.RTargetBufferPtr->OutputProps.Height); - VP.MinDepth = 0.0f; - VP.MaxDepth = 1.0f; - VP.TopLeftX = 0.0f; - VP.TopLeftY = 0.0f; - IS.CB->CmdList->RSSetViewports(1, &VP); - const D3D12_RECT Scissor = {0, 0, static_cast(VP.Width), - static_cast(VP.Height)}; - IS.CB->CmdList->RSSetScissorRects(1, &Scissor); + Encoder.setViewport(VP); + + ScissorRect Scissor; + Scissor.Width = static_cast(VP.Width); + Scissor.Height = static_cast(VP.Height); + Encoder.setScissor(Scissor); + + if (P.isTraditionalRaster()) { + if (IS.VB) + Encoder.setVertexBuffer(0, IS.VB.get(), 0, + P.Bindings.getVertexStride()); - IS.CB->CmdList->DrawInstanced(P.getVertexCount(), 1, 0, 0); + if (auto Err = + Encoder.drawInstanced(*IS.Pipeline.get(), P.getVertexCount(), + /*InstanceCount=*/1)) + return Err; + } else { + if (auto Err = Encoder.dispatchMesh( + *IS.Pipeline.get(), P.DispatchParameters.DispatchGroupCount[0], + P.DispatchParameters.DispatchGroupCount[1], + P.DispatchParameters.DispatchGroupCount[2])) + return Err; + } + + Encoder.endEncoding(); // Transition the render target to copy source and copy to the readback // buffer. @@ -2076,6 +2372,19 @@ class DXDevice : public offloadtest::Device { BindingsDesc.DescriptorSetDescs.push_back(Layout); } + if (P.isRaster()) { + // Create render target, depth/stencil, readback and vertex buffer and + // PSO. + if (auto Err = createRenderTarget(P, State)) + return Err; + llvm::outs() << "Render target created.\n"; + // TODO: Always created for graphics pipelines. Consider making this + // conditional on the pipeline definition. + if (auto Err = createDepthStencil(P, State)) + return Err; + llvm::outs() << "Depth stencil created.\n"; + } + if (P.isCompute()) { // This is an arbitrary distinction that we could alter in the future. if (P.Shaders.size() != 1 || P.Shaders[0].Stage != Stages::Compute) @@ -2097,68 +2406,118 @@ class DXDevice : public offloadtest::Device { return Err; llvm::outs() << "Compute command list created.\n"; - } else if (P.isTraditionalRaster()) { - // Create render target, depth/stencil, readback and vertex buffer and - // PSO. - if (auto Err = createRenderTarget(P, State)) - return Err; - llvm::outs() << "Render target created.\n"; - // TODO: Always created for graphics pipelines. Consider making this - // conditional on the pipeline definition. - if (auto Err = createDepthStencil(P, State)) - return Err; - llvm::outs() << "Depth stencil created.\n"; - if (auto Err = createVertexBuffer(P, State)) - return Err; - llvm::outs() << "Vertex buffer created.\n"; + } else if (P.isRaster()) { + // Begin a render pass: bind RT/DSV and clear depth-stencil. Color + // load action is Load — the existing inline code didn't clear color. + ColorAttachmentFormatDesc ColorAttachment = {}; + ColorAttachment.Fmt = State.RenderTarget->getDesc().Fmt; + ColorAttachment.Load = LoadAction::Load; + ColorAttachment.Store = StoreAction::Store; + + DepthStencilAttachmentFormatDesc DSAttachment = {}; + DSAttachment.Fmt = State.DepthStencil->getDesc().Fmt; + DSAttachment.DepthLoad = LoadAction::Clear; + DSAttachment.DepthStore = StoreAction::Store; + DSAttachment.StencilLoad = LoadAction::DontCare; + DSAttachment.StencilStore = StoreAction::DontCare; + + RenderPassDesc PassDesc; + PassDesc.ColorAttachments.push_back(ColorAttachment); + PassDesc.DepthStencil = DSAttachment; + + auto RenderPassOrErr = createRenderPass(PassDesc); + if (!RenderPassOrErr) + return RenderPassOrErr.takeError(); + State.RenderPass = std::move(*RenderPassOrErr); + llvm::outs() << "Render pass created.\n"; + + if (P.isTraditionalRaster()) { + ShaderContainer VS = {}; + ShaderContainer PS = {}; + for (auto &Shader : P.Shaders) { + if (Shader.Stage == Stages::Vertex) { + VS.EntryPoint = Shader.Entry; + VS.Shader = Shader.Shader.get(); + } else if (Shader.Stage == Stages::Pixel) { + PS.EntryPoint = Shader.Entry; + PS.Shader = Shader.Shader.get(); + } + } - ShaderContainer VS = {}; - ShaderContainer PS = {}; - for (auto &Shader : P.Shaders) { - if (Shader.Stage == Stages::Vertex) { - VS.EntryPoint = Shader.Entry; - VS.Shader = Shader.Shader.get(); - } else if (Shader.Stage == Stages::Pixel) { - PS.EntryPoint = Shader.Entry; - PS.Shader = Shader.Shader.get(); + // Create the input layout based on the vertex attributes. + llvm::SmallVector InputLayout; + for (auto &Attr : P.Bindings.VertexAttributes) { + auto FormatOrErr = toFormat(Attr.Format, Attr.Channels); + if (!FormatOrErr) + return FormatOrErr.takeError(); + + InputLayoutDesc Desc = {}; + Desc.Name = Attr.Name; + Desc.Fmt = *FormatOrErr; + Desc.OffsetInBytes = Attr.Offset; + InputLayout.push_back(Desc); } - } - // Create the input layout based on the vertex attributes. - llvm::SmallVector InputLayout; - for (auto &Attr : P.Bindings.VertexAttributes) { - auto FormatOrErr = toFormat(Attr.Format, Attr.Channels); + auto FormatOrErr = toFormat(P.Bindings.RTargetBufferPtr->Format, + P.Bindings.RTargetBufferPtr->Channels); if (!FormatOrErr) return FormatOrErr.takeError(); - InputLayoutDesc Desc = {}; - Desc.Name = Attr.Name; - Desc.Fmt = *FormatOrErr; - Desc.OffsetInBytes = Attr.Offset; - InputLayout.push_back(Desc); - } + llvm::SmallVector RTFormats; + RTFormats.push_back(*FormatOrErr); + + auto PipelineStateOrErr = createPipelineVsPs( + "Traditional Raster Pipeline State", BindingsDesc, InputLayout, + RTFormats, Format::D32FloatS8Uint, VS, PS); + if (!PipelineStateOrErr) + return PipelineStateOrErr.takeError(); + State.Pipeline = std::move(*PipelineStateOrErr); + llvm::outs() << "Traditional Raster Pipeline created.\n"; + + } else if (P.isMeshShaderRaster()) { + std::optional AS = {}; + ShaderContainer MS = {}; + std::optional PS = {}; + for (auto &Shader : P.Shaders) { + if (Shader.Stage == Stages::Amplification) { + ShaderContainer Container; + Container.EntryPoint = Shader.Entry; + Container.Shader = Shader.Shader.get(); + AS = Container; + } else if (Shader.Stage == Stages::Mesh) { + MS.EntryPoint = Shader.Entry; + MS.Shader = Shader.Shader.get(); + } else if (Shader.Stage == Stages::Pixel) { + ShaderContainer Container; + Container.EntryPoint = Shader.Entry; + Container.Shader = Shader.Shader.get(); + PS = Container; + } + } - auto FormatOrErr = toFormat(P.Bindings.RTargetBufferPtr->Format, - P.Bindings.RTargetBufferPtr->Channels); - if (!FormatOrErr) - return FormatOrErr.takeError(); + auto FormatOrErr = toFormat(P.Bindings.RTargetBufferPtr->Format, + P.Bindings.RTargetBufferPtr->Channels); + if (!FormatOrErr) + return FormatOrErr.takeError(); - llvm::SmallVector RTFormats; - RTFormats.push_back(*FormatOrErr); + llvm::SmallVector RTFormats; + RTFormats.push_back(*FormatOrErr); + + auto PipelineStateOrErr = + createPipelineAsMsPs("Mesh Shader Pipeline State", BindingsDesc, + RTFormats, Format::D32FloatS8Uint, AS, MS, PS); + + if (!PipelineStateOrErr) + return PipelineStateOrErr.takeError(); + State.Pipeline = std::move(*PipelineStateOrErr); + llvm::outs() << "Mesh Shader Pipeline created.\n"; + } - auto PipelineStateOrErr = createPipelineVsPs( - "Graphics Pipeline State", BindingsDesc, InputLayout, RTFormats, - Format::D32FloatS8Uint, VS, PS); - if (!PipelineStateOrErr) - return PipelineStateOrErr.takeError(); - State.Pipeline = std::move(*PipelineStateOrErr); - llvm::outs() << "Graphics Pipeline created.\n"; if (auto Err = createGraphicsCommands(P, State)) return Err; llvm::outs() << "Graphics command list created complete.\n"; } else { - return llvm::createStringError( - "Pipeline was neither Compute nor Traditional Raster"); + return llvm::createStringError("Pipeline was neither Compute nor Raster"); } auto SubmitResult = executeCommandList(State); diff --git a/lib/API/Device.cpp b/lib/API/Device.cpp index 72d1867ab..6eb43aec8 100644 --- a/lib/API/Device.cpp +++ b/lib/API/Device.cpp @@ -34,6 +34,8 @@ Queue::~Queue() {} Texture::~Texture() {} +RenderPass::~RenderPass() {} + Device::~Device() {} llvm::Expected>> diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index a5ce8d3ed..46f32052d 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -234,6 +234,13 @@ class MTLPipelineState : public offloadtest::PipelineState { MTL::ComputePipelineState *ComputePipeline = nullptr; MTL::RenderPipelineState *RenderPipeline = nullptr; + // Rasterization pipeline only state. + // These are part of the pipeline in DX and VK, but dynamic state in Metal. + // To have a shared API we store these here and set the state when the + // pipeline is used. + MTL::DepthStencilState *DepthStencilState = nullptr; + MTL::CullMode CullMode = MTL::CullModeNone; + MTLPipelineState(llvm::StringRef Name, IRRootSignaturePtr RootSig, std::unique_ptr ArgBuffer, IRShaderReflectionPtr Reflection, @@ -244,16 +251,21 @@ class MTLPipelineState : public offloadtest::PipelineState { MTLPipelineState(llvm::StringRef Name, IRRootSignaturePtr RootSig, std::unique_ptr ArgBuffer, - MTL::RenderPipelineState *RenderPipeline) + MTL::RenderPipelineState *RenderPipeline, + MTL::DepthStencilState *DepthStencilState, + MTL::CullMode CullMode) : offloadtest::PipelineState(GPUAPI::Metal), Name(Name), RootSig(std::move(RootSig)), ArgBuffer(std::move(ArgBuffer)), - RenderPipeline(RenderPipeline) {} + RenderPipeline(RenderPipeline), DepthStencilState(DepthStencilState), + CullMode(CullMode) {} ~MTLPipelineState() override { if (ComputePipeline) ComputePipeline->release(); if (RenderPipeline) RenderPipeline->release(); + if (DepthStencilState) + DepthStencilState->release(); } static bool classof(const offloadtest::PipelineState *B) { @@ -314,11 +326,29 @@ class MTLTexture : public offloadtest::Texture { Tex->release(); } + const TextureCreateDesc &getDesc() const override { return Desc; } + static bool classof(const offloadtest::Texture *T) { return T->getAPI() == GPUAPI::Metal; } }; +/// Metal has no standalone render-pass object: render pass info lives on +/// MTLRenderPassDescriptor and is consumed when a render command encoder +/// is created. We therefore just stash the descriptor for the encoder to +/// translate later. +class MTLRenderPass final : public offloadtest::RenderPass { +public: + offloadtest::RenderPassDesc Desc; + + explicit MTLRenderPass(offloadtest::RenderPassDesc Desc) + : RenderPass(GPUAPI::Metal), Desc(std::move(Desc)) {} + + static bool classof(const offloadtest::RenderPass *RP) { + return RP->getAPI() == GPUAPI::Metal; + } +}; + class MTLCommandBuffer : public offloadtest::CommandBuffer { public: MTL::CommandBuffer *CmdBuffer = nullptr; @@ -342,6 +372,9 @@ class MTLCommandBuffer : public offloadtest::CommandBuffer { llvm::Expected> createComputeEncoder() override; + llvm::Expected> + createRenderEncoder(const offloadtest::RenderPassBeginDesc &Desc) override; + private: MTLCommandBuffer() : CommandBuffer(GPUAPI::Metal) {} }; @@ -487,6 +520,13 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder { return llvm::Error::success(); } + llvm::Error dispatchMesh(const offloadtest::PipelineState &PSO, + uint32_t GroupCountX, uint32_t GroupCountY, + uint32_t GroupCountZ) override { + return llvm::createStringError( + "dispatchMesh is unimplemented in the Metal backend."); + } + llvm::Error copyBufferToBuffer(offloadtest::Buffer &Src, size_t SrcOffset, offloadtest::Buffer &Dst, size_t DstOffset, size_t Size) override { @@ -526,6 +566,233 @@ MTLCommandBuffer::createComputeEncoder() { NS::String::string("ComputeEncoder", NS::UTF8StringEncoding)); return std::make_unique(CmdBuffer, NativeEncoder); } + +static MTL::LoadAction getMTLLoadAction(offloadtest::LoadAction Action) { + switch (Action) { + case offloadtest::LoadAction::Load: + return MTL::LoadActionLoad; + case offloadtest::LoadAction::Clear: + return MTL::LoadActionClear; + case offloadtest::LoadAction::DontCare: + return MTL::LoadActionDontCare; + } + llvm_unreachable("All LoadAction cases handled"); +} + +static MTL::StoreAction getMTLStoreAction(offloadtest::StoreAction Action) { + switch (Action) { + case offloadtest::StoreAction::Store: + return MTL::StoreActionStore; + case offloadtest::StoreAction::DontCare: + return MTL::StoreActionDontCare; + } + llvm_unreachable("All StoreAction cases handled"); +} + +class MTLRenderEncoder : public offloadtest::RenderEncoder { + MTL::RenderCommandEncoder *RenderEnc = nullptr; + + // Encoder contract: viewport and scissor must both be set before + // drawInstanced(). + bool ViewportSet = false; + bool ScissorSet = false; + +public: + MTLRenderEncoder(MTL::RenderCommandEncoder *Enc) + : RenderEncoder(GPUAPI::Metal), RenderEnc(Enc) {} + + ~MTLRenderEncoder() override { endEncoding(); } + + static bool classof(const CommandEncoder *E) { + return E->getAPI() == GPUAPI::Metal; + } + + /// Access the underlying Metal encoder for state that the abstract + /// RenderEncoder API does not yet cover (depth-stencil state, cull + /// mode, etc.). Returns nullptr after endEncoding(). + MTL::RenderCommandEncoder *getNative() const { return RenderEnc; } + + void pushDebugGroup(llvm::StringRef Label) override { + if (RenderEnc) + RenderEnc->pushDebugGroup( + NS::String::string(Label.data(), NS::UTF8StringEncoding)); + } + + void popDebugGroup() override { + if (RenderEnc) + RenderEnc->popDebugGroup(); + } + + void insertDebugSignpost(llvm::StringRef Label) override { + if (RenderEnc) + RenderEnc->insertDebugSignpost( + NS::String::string(Label.data(), NS::UTF8StringEncoding)); + } + + void setViewport(const offloadtest::Viewport &VP) override { + RenderEnc->setViewport(MTL::Viewport{ + static_cast(VP.X), static_cast(VP.Y), + static_cast(VP.Width), static_cast(VP.Height), + static_cast(VP.MinDepth), static_cast(VP.MaxDepth)}); + ViewportSet = true; + } + + void setScissor(const offloadtest::ScissorRect &Rect) override { + MTL::ScissorRect MTLRect; + MTLRect.x = static_cast(Rect.X); + MTLRect.y = static_cast(Rect.Y); + MTLRect.width = Rect.Width; + MTLRect.height = Rect.Height; + RenderEnc->setScissorRect(MTLRect); + ScissorSet = true; + } + + void setVertexBuffer(uint32_t Slot, offloadtest::Buffer *VB, size_t Offset, + uint32_t /*Stride*/) override { + // Stride is needed in DX12 at binding time, ignore parameter here. + if (!VB) { + RenderEnc->setVertexBuffer(nullptr, 0, Slot); + return; + } + auto &MTLVB = llvm::cast(*VB); + RenderEnc->setVertexBuffer(MTLVB.Buf, Offset, Slot); + } + + llvm::Error drawInstanced(const offloadtest::PipelineState &PSO, + uint32_t VertexCount, uint32_t InstanceCount, + uint32_t FirstVertex, + uint32_t FirstInstance) override { + if (!ViewportSet) + return llvm::createStringError(std::errc::invalid_argument, + "Viewport must be set before drawing."); + if (!ScissorSet) + return llvm::createStringError(std::errc::invalid_argument, + "Scissor must be set before drawing."); + + const auto &MTLPSO = llvm::cast(PSO); + if (!MTLPSO.RenderPipeline) + return llvm::createStringError( + std::errc::invalid_argument, + "PipelineState bound to drawInstanced() is not a render pipeline."); + RenderEnc->setRenderPipelineState(MTLPSO.RenderPipeline); + if (MTLPSO.DepthStencilState) + RenderEnc->setDepthStencilState(MTLPSO.DepthStencilState); + RenderEnc->setCullMode(MTLPSO.CullMode); + RenderEnc->drawPrimitives(MTL::PrimitiveTypeTriangle, + static_cast(FirstVertex), + static_cast(VertexCount), + static_cast(InstanceCount), + static_cast(FirstInstance)); + return llvm::Error::success(); + } + + void endEncodingImpl() override { + if (RenderEnc) { + RenderEnc->popDebugGroup(); + RenderEnc->endEncoding(); + RenderEnc = nullptr; + } + } +}; + +llvm::Expected> +MTLCommandBuffer::createRenderEncoder( + const offloadtest::RenderPassBeginDesc &Desc) { + if (!Desc.Pass) + return llvm::createStringError( + std::errc::invalid_argument, + "RenderPassBeginDesc is missing its RenderPass."); + auto &Pass = llvm::cast(*Desc.Pass); + const offloadtest::RenderPassDesc &PassDesc = Pass.Desc; + + if (Desc.ColorAttachments.size() != PassDesc.ColorAttachments.size()) + return llvm::createStringError( + std::errc::invalid_argument, + "RenderPassBeginDesc color attachment count does not match its " + "RenderPass."); + if (PassDesc.DepthStencil.has_value() != (Desc.DepthStencil != nullptr)) + return llvm::createStringError(std::errc::invalid_argument, + "RenderPassBeginDesc depth-stencil " + "presence does not match its RenderPass."); + + MTL::RenderPassDescriptor *MTLDesc = + MTL::RenderPassDescriptor::alloc()->init(); + auto DescScope = llvm::scope_exit([&] { MTLDesc->release(); }); + + for (size_t I = 0; I < Desc.ColorAttachments.size(); ++I) { + if (!Desc.ColorAttachments[I]) + return llvm::createStringError( + std::errc::invalid_argument, + "RenderPassBeginDesc has a null color attachment texture."); + auto &Tex = llvm::cast(*Desc.ColorAttachments[I]); + const offloadtest::ColorAttachmentFormatDesc &Color = + PassDesc.ColorAttachments[I]; + + auto *CADesc = MTL::RenderPassColorAttachmentDescriptor::alloc()->init(); + CADesc->setTexture(Tex.Tex); + CADesc->setLoadAction(getMTLLoadAction(Color.Load)); + CADesc->setStoreAction(getMTLStoreAction(Color.Store)); + if (Color.Load == offloadtest::LoadAction::Clear) { + if (!Tex.getDesc().OptimizedClearValue) { + CADesc->release(); + return llvm::createStringError( + std::errc::invalid_argument, + "LoadAction::Clear requires the render target to have been " + "created with an OptimizedClearValue."); + } + const auto *CV = + std::get_if(&*Tex.getDesc().OptimizedClearValue); + assert(CV && "RenderTarget OptimizedClearValue must be a ClearColor"); + CADesc->setClearColor(MTL::ClearColor(CV->R, CV->G, CV->B, CV->A)); + } + MTLDesc->colorAttachments()->setObject(CADesc, I); + CADesc->release(); + } + + if (Desc.DepthStencil) { + auto &Tex = llvm::cast(*Desc.DepthStencil); + const offloadtest::DepthStencilAttachmentFormatDesc &DS = + *PassDesc.DepthStencil; + + auto *DADesc = MTLDesc->depthAttachment(); + DADesc->setTexture(Tex.Tex); + DADesc->setLoadAction(getMTLLoadAction(DS.DepthLoad)); + DADesc->setStoreAction(getMTLStoreAction(DS.DepthStore)); + + auto *SADesc = MTLDesc->stencilAttachment(); + SADesc->setTexture(Tex.Tex); + SADesc->setLoadAction(getMTLLoadAction(DS.StencilLoad)); + SADesc->setStoreAction(getMTLStoreAction(DS.StencilStore)); + + if (DS.DepthLoad == offloadtest::LoadAction::Clear || + DS.StencilLoad == offloadtest::LoadAction::Clear) { + if (!Tex.getDesc().OptimizedClearValue) + return llvm::createStringError( + std::errc::invalid_argument, + "LoadAction::Clear requires the depth-stencil texture to have " + "been created with an OptimizedClearValue."); + const auto *CV = + std::get_if(&*Tex.getDesc().OptimizedClearValue); + assert(CV && + "DepthStencil OptimizedClearValue must be a ClearDepthStencil"); + if (DS.DepthLoad == offloadtest::LoadAction::Clear) + DADesc->setClearDepth(CV->Depth); + if (DS.StencilLoad == offloadtest::LoadAction::Clear) + SADesc->setClearStencil(CV->Stencil); + } + } + + MTL::RenderCommandEncoder *NativeEncoder = + CmdBuffer->renderCommandEncoder(MTLDesc); + if (!NativeEncoder) + return llvm::createStringError( + std::errc::device_or_resource_busy, + "Failed to create Metal render command encoder."); + NativeEncoder->pushDebugGroup( + NS::String::string("RenderEncoder", NS::UTF8StringEncoding)); + return std::make_unique(NativeEncoder); +} + class MTLDevice : public offloadtest::Device { Capabilities Caps; MTL::Device *Device; @@ -552,11 +819,12 @@ class MTLDevice : public offloadtest::Device { NS::AutoreleasePool *Pool = nullptr; std::unique_ptr DescHeap; std::unique_ptr VB; - std::unique_ptr FrameBufferTexture; + std::unique_ptr RenderTarget; std::unique_ptr FrameBufferReadback; std::unique_ptr DepthStencil; std::unique_ptr CB; std::unique_ptr Pipeline; + std::unique_ptr RenderPass; llvm::SmallVector DescTables; // TODO: Support RootResources? @@ -955,17 +1223,13 @@ class MTLDevice : public offloadtest::Device { } } - if (P.isTraditionalRaster()) { - if (!P.Bindings.VertexBufferPtr) - return llvm::createStringError( - std::errc::invalid_argument, - "No vertex buffer specified for graphics pipeline."); - + if (P.isTraditionalRaster() && P.Bindings.VertexBufferPtr) { auto VBOrErr = offloadtest::createVertexBufferFromCPUBuffer( *this, *P.Bindings.VertexBufferPtr); if (!VBOrErr) return VBOrErr.takeError(); IS.VB = std::move(*VBOrErr); + llvm::outs() << "Vertex buffer created.\n"; } return llvm::Error::success(); } @@ -1031,7 +1295,7 @@ class MTLDevice : public offloadtest::Device { if (!TexOrErr) return TexOrErr.takeError(); - IS.FrameBufferTexture = std::move(*TexOrErr); + IS.RenderTarget = std::move(*TexOrErr); // Create a readback buffer for copying render target data to the CPU. BufferCreateDesc BufDesc = {}; @@ -1063,99 +1327,60 @@ class MTLDevice : public offloadtest::Device { if (auto Err = createDepthStencil(P, IS)) return Err; - auto &FBTex = llvm::cast(*IS.FrameBufferTexture); - auto &DS = llvm::cast(*IS.DepthStencil); - auto &FBReadback = llvm::cast(*IS.FrameBufferReadback); - - MTL::RenderPassDescriptor *Desc = - MTL::RenderPassDescriptor::alloc()->init(); - - const uint64_t Width = FBTex.Desc.Width; - const uint64_t Height = FBTex.Desc.Height; - - // Color attachment. - auto *CADesc = MTL::RenderPassColorAttachmentDescriptor::alloc()->init(); - CADesc->setTexture(FBTex.Tex); - CADesc->setLoadAction(MTL::LoadActionClear); - const auto *ColorCV = - std::get_if(&*FBTex.Desc.OptimizedClearValue); - if (!ColorCV) - return llvm::createStringError( - std::errc::invalid_argument, - "Render target clear value must be a ClearColor."); - - CADesc->setClearColor( - MTL::ClearColor(ColorCV->R, ColorCV->G, ColorCV->B, ColorCV->A)); - CADesc->setStoreAction(MTL::StoreActionStore); - Desc->colorAttachments()->setObject(CADesc, 0); - - // Depth/stencil attachment. - const auto *DepthCV = - std::get_if(&*DS.Desc.OptimizedClearValue); - if (!DepthCV) - return llvm::createStringError( - std::errc::invalid_argument, - "Depth/stencil clear value must be a ClearDepthStencil."); - - auto *DADesc = Desc->depthAttachment(); - DADesc->setTexture(DS.Tex); - DADesc->setLoadAction(MTL::LoadActionClear); - DADesc->setClearDepth(DepthCV->Depth); - DADesc->setStoreAction(MTL::StoreActionDontCare); - - auto *SADesc = Desc->stencilAttachment(); - SADesc->setTexture(DS.Tex); - SADesc->setLoadAction(MTL::LoadActionClear); - SADesc->setClearStencil(DepthCV->Stencil); - SADesc->setStoreAction(MTL::StoreActionDontCare); - - MTL::RenderCommandEncoder *CmdEncoder = - IS.CB->CmdBuffer->renderCommandEncoder(Desc); - - const auto &PS = llvm::cast(IS.Pipeline.get()); - CmdEncoder->setRenderPipelineState(PS->RenderPipeline); - - // Configure depth stencil state: depth test enabled, write all, less. - MTL::DepthStencilDescriptor *DSDesc = - MTL::DepthStencilDescriptor::alloc()->init(); - DSDesc->setDepthCompareFunction(MTL::CompareFunctionLess); - DSDesc->setDepthWriteEnabled(true); - MTL::DepthStencilState *DSState = Device->newDepthStencilState(DSDesc); - CmdEncoder->setDepthStencilState(DSState); - DSDesc->release(); - DSState->release(); - - if (IS.DescHeap) { - IS.DescHeap->bind(CmdEncoder); - // NOTE: This code assumes 1 descriptor set (D3D12 backend also assumes - // this) - PS->ArgBuffer->setRootDescriptorTable( - 0, IS.DescHeap->getGPUDescriptorHandleForHeapStart()); + const uint64_t Width = IS.RenderTarget->getDesc().Width; + const uint64_t Height = IS.RenderTarget->getDesc().Height; + + RenderPassBeginDesc BeginDesc = {}; + BeginDesc.Pass = IS.RenderPass.get(); + BeginDesc.ColorAttachments.push_back(IS.RenderTarget.get()); + BeginDesc.DepthStencil = IS.DepthStencil.get(); + + auto EncOrErr = IS.CB->createRenderEncoder(BeginDesc); + if (!EncOrErr) + return EncOrErr.takeError(); + auto &Encoder = *EncOrErr.get(); + + { + auto &MTLEncoder = llvm::cast(Encoder); + const auto &PS = llvm::cast(IS.Pipeline.get()); + auto *CmdEncoder = MTLEncoder.getNative(); + if (IS.DescHeap) { + IS.DescHeap->bind(CmdEncoder); + // NOTE: This code assumes 1 descriptor set (D3D12 backend also assumes + // this) + PS->ArgBuffer->setRootDescriptorTable( + 0, IS.DescHeap->getGPUDescriptorHandleForHeapStart()); + } + PS->ArgBuffer->bind(CmdEncoder); + for (const auto &Table : IS.DescTables) + for (const auto &ResPair : Table.Resources) + for (const auto &ResSet : ResPair.second) + CmdEncoder->useResource(ResSet.Resource.get(), + MTL::ResourceUsageRead | + MTL::ResourceUsageWrite); } - PS->ArgBuffer->bind(CmdEncoder); - for (const auto &Table : IS.DescTables) - for (const auto &ResPair : Table.Resources) - for (const auto &ResSet : ResPair.second) - CmdEncoder->useResource(ResSet.Resource.get(), - MTL::ResourceUsageRead | - MTL::ResourceUsageWrite); - // Explicitly set viewport to texture dimensions. - CmdEncoder->setViewport( - MTL::Viewport{0.0, 0.0, (double)Width, (double)Height, 0.0, 1.0}); - CmdEncoder->setCullMode(MTL::CullModeNone); - CmdEncoder->setFrontFacingWinding(MTL::WindingCounterClockwise); + Viewport VP; + VP.Width = static_cast(Width); + VP.Height = static_cast(Height); + Encoder.setViewport(VP); - // Bind vertex buffer at slot 0 to match the vertex descriptor which - // references buffer index 0. - CmdEncoder->setVertexBuffer(llvm::cast(*IS.VB).Buf, 0, 0); + ScissorRect Scissor; + Scissor.Width = static_cast(Width); + Scissor.Height = static_cast(Height); + Encoder.setScissor(Scissor); - CmdEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), - P.getVertexCount()); + if (IS.VB) + Encoder.setVertexBuffer(0, IS.VB.get(), 0, P.Bindings.getVertexStride()); - CmdEncoder->endEncoding(); + if (auto Err = Encoder.drawInstanced(*IS.Pipeline.get(), P.getVertexCount(), + /*InstanceCount=*/1)) + return Err; + Encoder.endEncoding(); // Blit the render target into the readback buffer for CPU access. + auto &FBTex = llvm::cast(*IS.RenderTarget); + auto &FBReadback = llvm::cast(*IS.FrameBufferReadback); MTL::BlitCommandEncoder *Blit = IS.CB->CmdBuffer->blitCommandEncoder(); const size_t ElemSize = getFormatSizeInBytes(FBTex.Desc.Fmt); const size_t RowBytes = Width * ElemSize; @@ -1204,7 +1429,7 @@ class MTLDevice : public offloadtest::Device { if (auto Err = MemCpyBack(R)) return Err; - if (P.isTraditionalRaster()) { + if (P.isRaster()) { auto &FBReadback = llvm::cast(*IS.FrameBufferReadback); auto *RT = P.Bindings.RTargetBufferPtr; RT->copyFromTexture(FBReadback.Buf->contents(), RT->getImageRowBytes()); @@ -1269,6 +1494,11 @@ class MTLDevice : public offloadtest::Device { return MTLCommandBuffer::create(GraphicsQueue.Queue); } + llvm::Expected> + createRenderPass(const offloadtest::RenderPassDesc &Desc) override { + return std::make_unique(Desc); + } + llvm::Expected> createPipelineCs(llvm::StringRef Name, const BindingsDesc &BindingsDesc, ShaderContainer CS) override { @@ -1388,8 +1618,8 @@ class MTLDevice : public offloadtest::Device { "Mismatch between vertex shader attribute count and pipeline " "vertex input count."); - // Collect the attribute indices the shader expects so that we can map the - // specified attributes onto the correct indices. + // Collect the attribute indices the shader expects so that we can map + // the specified attributes onto the correct indices. llvm::StringMap ShaderAttrIndices; for (uint32_t I = 0; I < FnAttrs->count(); ++I) { auto *A = static_cast(FnAttrs->object(I)); @@ -1459,8 +1689,16 @@ class MTLDevice : public offloadtest::Device { if (Error) return toError(Error); + MTL::DepthStencilDescriptor *DSDesc = + MTL::DepthStencilDescriptor::alloc()->init(); + DSDesc->setDepthCompareFunction(MTL::CompareFunctionLess); + DSDesc->setDepthWriteEnabled(true); + MTL::DepthStencilState *DSState = Device->newDepthStencilState(DSDesc); + DSDesc->release(); + return std::make_unique(Name, std::move(RootSig), - std::move(ArgBuffer), PSO); + std::move(ArgBuffer), PSO, + DSState, MTL::CullModeNone); } llvm::Error executeProgram(Pipeline &P) override { @@ -1552,6 +1790,27 @@ class MTLDevice : public offloadtest::Device { return PipelineStateOrErr.takeError(); IS.Pipeline = std::move(*PipelineStateOrErr); + ColorAttachmentFormatDesc ColorAttachment = {}; + ColorAttachment.Fmt = *FormatOrErr; + ColorAttachment.Load = LoadAction::Clear; + ColorAttachment.Store = StoreAction::Store; + + DepthStencilAttachmentFormatDesc DSAttachment = {}; + DSAttachment.Fmt = Format::D32FloatS8Uint; + DSAttachment.DepthLoad = LoadAction::Clear; + DSAttachment.DepthStore = StoreAction::DontCare; + DSAttachment.StencilLoad = LoadAction::Clear; + DSAttachment.StencilStore = StoreAction::DontCare; + + RenderPassDesc PassDesc; + PassDesc.ColorAttachments.push_back(ColorAttachment); + PassDesc.DepthStencil = DSAttachment; + + auto RenderPassOrErr = createRenderPass(PassDesc); + if (!RenderPassOrErr) + return RenderPassOrErr.takeError(); + IS.RenderPass = std::move(*RenderPassOrErr); + if (auto Err = createGraphicsCommands(P, IS)) return Err; } diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index 6ee8adaa8..dd9b30b30 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -206,6 +206,10 @@ static VkShaderStageFlagBits getShaderStageFlag(Stages Stage) { return VK_SHADER_STAGE_VERTEX_BIT; case Stages::Pixel: return VK_SHADER_STAGE_FRAGMENT_BIT; + case Stages::Mesh: + return VK_SHADER_STAGE_MESH_BIT_EXT; + case Stages::Amplification: + return VK_SHADER_STAGE_TASK_BIT_EXT; } llvm_unreachable("All cases handled"); } @@ -382,6 +386,18 @@ struct VulkanInstance { namespace { +struct MeshShaderFunctions { + PFN_vkCmdDrawMeshTasksEXT VkCmdDrawMeshTasksEXT = nullptr; + + static MeshShaderFunctions create(VkDevice Device) { + MeshShaderFunctions Result; + Result.VkCmdDrawMeshTasksEXT = + (PFN_vkCmdDrawMeshTasksEXT)vkGetDeviceProcAddr(Device, + "vkCmdDrawMeshTasksEXT"); + return Result; + } +}; + class VulkanBuffer : public offloadtest::Buffer { public: VkDevice Dev; // Needed for clean-up @@ -457,6 +473,8 @@ class VulkanTexture : public offloadtest::Texture { vkFreeMemory(Dev, Memory, nullptr); } + const TextureCreateDesc &getDesc() const override { return Desc; } + static bool classof(const offloadtest::Texture *T) { return T->getAPI() == GPUAPI::Vulkan; } @@ -549,6 +567,7 @@ class VulkanQueue : public offloadtest::Queue { class VulkanCommandBuffer : public offloadtest::CommandBuffer { public: VkDevice Device = VK_NULL_HANDLE; + MeshShaderFunctions MeshShaderFns; // Owned per command buffer so that recording, submission, and lifetime // management of each command buffer are independently safe without external // synchronization. @@ -563,7 +582,8 @@ class VulkanCommandBuffer : public offloadtest::CommandBuffer { create(VkDevice Device, uint32_t QueueFamilyIdx, PFN_vkCmdBeginDebugUtilsLabelEXT CmdBeginDebugUtilsLabel, PFN_vkCmdEndDebugUtilsLabelEXT CmdEndDebugUtilsLabel, - PFN_vkCmdInsertDebugUtilsLabelEXT CmdInsertDebugUtilsLabel) { + PFN_vkCmdInsertDebugUtilsLabelEXT CmdInsertDebugUtilsLabel, + MeshShaderFunctions MeshShaderFns) { auto CB = std::unique_ptr(new VulkanCommandBuffer()); CB->Device = Device; @@ -595,6 +615,7 @@ class VulkanCommandBuffer : public offloadtest::CommandBuffer { CB->CmdBeginDebugUtilsLabel = CmdBeginDebugUtilsLabel; CB->CmdEndDebugUtilsLabel = CmdEndDebugUtilsLabel; CB->CmdInsertDebugUtilsLabel = CmdInsertDebugUtilsLabel; + CB->MeshShaderFns = MeshShaderFns; return CB; } @@ -636,7 +657,12 @@ class VulkanCommandBuffer : public offloadtest::CommandBuffer { PendingDstAccess = 0; } + /// Keep-alive list for Framebuffers constructed by RencderEncoders. + llvm::SmallVector OwnedFramebuffers; + ~VulkanCommandBuffer() override { + for (VkFramebuffer FB : OwnedFramebuffers) + vkDestroyFramebuffer(Device, FB, nullptr); if (CmdPool != VK_NULL_HANDLE) vkDestroyCommandPool(Device, CmdPool, nullptr); } @@ -648,6 +674,9 @@ class VulkanCommandBuffer : public offloadtest::CommandBuffer { llvm::Expected> createComputeEncoder() override; + llvm::Expected> + createRenderEncoder(const offloadtest::RenderPassBeginDesc &Desc) override; + private: VulkanCommandBuffer() : CommandBuffer(GPUAPI::Vulkan) {} }; @@ -736,19 +765,15 @@ class VulkanPipelineState : public offloadtest::PipelineState { VkPipeline Pipeline; VkPipelineLayout Layout; llvm::SmallVector SetLayouts; - VkRenderPass RenderPass; VulkanPipelineState(llvm::StringRef Name, VkDevice Dev, VkPipeline Pipeline, VkPipelineLayout Layout, - llvm::SmallVector SetLayouts, - VkRenderPass RenderPass) + llvm::SmallVector SetLayouts) : offloadtest::PipelineState(GPUAPI::Vulkan), Name(Name.str()), Dev(Dev), - Pipeline(Pipeline), Layout(Layout), SetLayouts(std::move(SetLayouts)), - RenderPass(RenderPass) {} + Pipeline(Pipeline), Layout(Layout), SetLayouts(std::move(SetLayouts)) {} ~VulkanPipelineState() override { vkDestroyPipeline(Dev, Pipeline, nullptr); - vkDestroyRenderPass(Dev, RenderPass, nullptr); vkDestroyPipelineLayout(Dev, Layout, nullptr); for (VkDescriptorSetLayout L : SetLayouts) vkDestroyDescriptorSetLayout(Dev, L, nullptr); @@ -758,6 +783,299 @@ class VulkanPipelineState : public offloadtest::PipelineState { return B->getAPI() == GPUAPI::Vulkan; } }; + +static VkAttachmentLoadOp getVkLoadOp(offloadtest::LoadAction Action) { + switch (Action) { + case offloadtest::LoadAction::Load: + return VK_ATTACHMENT_LOAD_OP_LOAD; + case offloadtest::LoadAction::Clear: + return VK_ATTACHMENT_LOAD_OP_CLEAR; + case offloadtest::LoadAction::DontCare: + return VK_ATTACHMENT_LOAD_OP_DONT_CARE; + } + llvm_unreachable("All LoadAction cases handled"); +} + +static VkAttachmentStoreOp getVkStoreOp(offloadtest::StoreAction Action) { + switch (Action) { + case offloadtest::StoreAction::Store: + return VK_ATTACHMENT_STORE_OP_STORE; + case offloadtest::StoreAction::DontCare: + return VK_ATTACHMENT_STORE_OP_DONT_CARE; + } + llvm_unreachable("All StoreAction cases handled"); +} + +class VulkanRenderPass final : public offloadtest::RenderPass { +public: + VkDevice Dev; + VkRenderPass Handle; + offloadtest::RenderPassDesc Desc; + + VulkanRenderPass(VkDevice Dev, VkRenderPass Handle, + offloadtest::RenderPassDesc Desc) + : RenderPass(GPUAPI::Vulkan), Dev(Dev), Handle(Handle), + Desc(std::move(Desc)) {} + + ~VulkanRenderPass() override { + if (Handle != VK_NULL_HANDLE) + vkDestroyRenderPass(Dev, Handle, nullptr); + } + + static bool classof(const offloadtest::RenderPass *RP) { + return RP->getAPI() == GPUAPI::Vulkan; + } +}; + +class VKRenderEncoder : public offloadtest::RenderEncoder { + VulkanCommandBuffer &CB; + + // Encoder contract: viewport and scissor must both be set before draw(). + bool ViewportSet = false; + bool ScissorSet = false; + +public: + VKRenderEncoder(VulkanCommandBuffer &CB) + : RenderEncoder(GPUAPI::Vulkan), CB(CB) {} + + ~VKRenderEncoder() override { endEncoding(); } + + static bool classof(const CommandEncoder *E) { + return E->getAPI() == GPUAPI::Vulkan; + } + + void pushDebugGroup(llvm::StringRef Label) override { + if (!CB.CmdBeginDebugUtilsLabel) + return; + VkDebugUtilsLabelEXT LabelInfo = {}; + LabelInfo.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; + LabelInfo.pLabelName = Label.data(); + CB.CmdBeginDebugUtilsLabel(CB.CmdBuffer, &LabelInfo); + } + + void popDebugGroup() override { + if (CB.CmdEndDebugUtilsLabel) + CB.CmdEndDebugUtilsLabel(CB.CmdBuffer); + } + + void insertDebugSignpost(llvm::StringRef Label) override { + if (!CB.CmdInsertDebugUtilsLabel) + return; + VkDebugUtilsLabelEXT LabelInfo = {}; + LabelInfo.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; + LabelInfo.pLabelName = Label.data(); + CB.CmdInsertDebugUtilsLabel(CB.CmdBuffer, &LabelInfo); + } + + void setViewport(const offloadtest::Viewport &VP) override { + // Negative viewport height (with Y origin at the bottom) flips clip-> + // framebuffer Y the same way DX12 and Metal do, so a CCW-in-clip-space + // triangle is front-facing on every backend. + VkViewport VKVP = {}; + VKVP.x = VP.X; + VKVP.y = VP.Y + VP.Height; + VKVP.width = VP.Width; + VKVP.height = -VP.Height; + VKVP.minDepth = VP.MinDepth; + VKVP.maxDepth = VP.MaxDepth; + vkCmdSetViewport(CB.CmdBuffer, 0, 1, &VKVP); + ViewportSet = true; + } + + void setScissor(const offloadtest::ScissorRect &Rect) override { + VkRect2D VKRect = {}; + VKRect.offset.x = Rect.X; + VKRect.offset.y = Rect.Y; + VKRect.extent.width = Rect.Width; + VKRect.extent.height = Rect.Height; + vkCmdSetScissor(CB.CmdBuffer, 0, 1, &VKRect); + ScissorSet = true; + } + + void setVertexBuffer(uint32_t Slot, offloadtest::Buffer *VB, size_t Offset, + uint32_t /*Stride*/) override { + // Stride is needed in DX12 at binding time, ignore parameter here. + if (!VB) { + VkBuffer NullBuf = VK_NULL_HANDLE; + const VkDeviceSize Zero = 0; + vkCmdBindVertexBuffers(CB.CmdBuffer, Slot, 1, &NullBuf, &Zero); + return; + } + VkBuffer Handle = llvm::cast(*VB).Buffer; + const VkDeviceSize VKOffset = Offset; + vkCmdBindVertexBuffers(CB.CmdBuffer, Slot, 1, &Handle, &VKOffset); + } + + llvm::Error drawInstanced(const offloadtest::PipelineState &PSO, + uint32_t VertexCount, uint32_t InstanceCount, + uint32_t FirstVertex, + uint32_t FirstInstance) override { + if (!ViewportSet) + return llvm::createStringError(std::errc::invalid_argument, + "Viewport must be set before drawing."); + if (!ScissorSet) + return llvm::createStringError(std::errc::invalid_argument, + "Scissor must be set before drawing."); + + const auto &VKPSO = llvm::cast(PSO); + vkCmdBindPipeline(CB.CmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + VKPSO.Pipeline); + vkCmdDraw(CB.CmdBuffer, VertexCount, InstanceCount, FirstVertex, + FirstInstance); + return llvm::Error::success(); + } + + llvm::Error dispatchMesh(const offloadtest::PipelineState &PSO, + uint32_t GroupCountX, uint32_t GroupCountY, + uint32_t GroupCountZ) override { + if (!ViewportSet) + return llvm::createStringError(std::errc::invalid_argument, + "Viewport must be set before drawing."); + if (!ScissorSet) + return llvm::createStringError(std::errc::invalid_argument, + "Scissor must be set before drawing."); + + const auto &VKPSO = llvm::cast(PSO); + vkCmdBindPipeline(CB.CmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + VKPSO.Pipeline); + CB.MeshShaderFns.VkCmdDrawMeshTasksEXT(CB.CmdBuffer, GroupCountX, + GroupCountY, GroupCountZ); + return llvm::Error::success(); + } + + void endEncodingImpl() override { + vkCmdEndRenderPass(CB.CmdBuffer); + popDebugGroup(); + } +}; + +llvm::Expected> +VulkanCommandBuffer::createRenderEncoder( + const offloadtest::RenderPassBeginDesc &Desc) { + // The pass carries the VkRenderPass and the format / load / store policy. + // The begin desc supplies the textures that get bound for this encoder. + if (!Desc.Pass) + return llvm::createStringError( + std::errc::invalid_argument, + "RenderPassBeginDesc is missing its RenderPass."); + auto &VKPass = llvm::cast(*Desc.Pass); + const offloadtest::RenderPassDesc &PassDesc = VKPass.Desc; + + if (Desc.ColorAttachments.size() != PassDesc.ColorAttachments.size()) + return llvm::createStringError( + std::errc::invalid_argument, + "RenderPassBeginDesc color attachment count does not match its " + "RenderPass."); + if (PassDesc.DepthStencil.has_value() != (Desc.DepthStencil != nullptr)) + return llvm::createStringError(std::errc::invalid_argument, + "RenderPassBeginDesc depth-stencil " + "presence does not match its RenderPass."); + + llvm::SmallVector Views; + llvm::SmallVector ClearValues; + uint32_t Width = 0; + uint32_t Height = 0; + + for (size_t I = 0; I < Desc.ColorAttachments.size(); ++I) { + if (!Desc.ColorAttachments[I]) + return llvm::createStringError( + std::errc::invalid_argument, + "RenderPassBeginDesc has a null color attachment texture."); + auto &Tex = llvm::cast(*Desc.ColorAttachments[I]); + if (Tex.View == VK_NULL_HANDLE) + return llvm::createStringError( + std::errc::invalid_argument, + "Color attachment texture has no image view."); + Views.push_back(Tex.View); + + VkClearValue CV = {}; + if (PassDesc.ColorAttachments[I].Load == offloadtest::LoadAction::Clear) { + if (!Tex.Desc.OptimizedClearValue) + return llvm::createStringError( + std::errc::invalid_argument, + "LoadAction::Clear requires the render target to have been " + "created with an OptimizedClearValue."); + const auto *ColorCV = + std::get_if(&*Tex.Desc.OptimizedClearValue); + assert(ColorCV && + "RenderTarget OptimizedClearValue must be a ClearColor"); + CV.color = {{ColorCV->R, ColorCV->G, ColorCV->B, ColorCV->A}}; + } + ClearValues.push_back(CV); + + if (Tex.Desc.Width > Width) + Width = Tex.Desc.Width; + if (Tex.Desc.Height > Height) + Height = Tex.Desc.Height; + } + + if (Desc.DepthStencil) { + auto &Tex = llvm::cast(*Desc.DepthStencil); + if (Tex.View == VK_NULL_HANDLE) + return llvm::createStringError( + std::errc::invalid_argument, + "Depth-stencil attachment texture has no image view."); + Views.push_back(Tex.View); + + const auto &DS = *PassDesc.DepthStencil; + VkClearValue CV = {}; + if (DS.DepthLoad == offloadtest::LoadAction::Clear || + DS.StencilLoad == offloadtest::LoadAction::Clear) { + if (!Tex.Desc.OptimizedClearValue) + return llvm::createStringError( + std::errc::invalid_argument, + "LoadAction::Clear requires the depth-stencil texture to have " + "been created with an OptimizedClearValue."); + const auto *DepthCV = + std::get_if(&*Tex.Desc.OptimizedClearValue); + assert(DepthCV && + "DepthStencil OptimizedClearValue must be a ClearDepthStencil"); + CV.depthStencil = {DepthCV->Depth, DepthCV->Stencil}; + } + ClearValues.push_back(CV); + + if (Tex.Desc.Width > Width) + Width = Tex.Desc.Width; + if (Tex.Desc.Height > Height) + Height = Tex.Desc.Height; + } + + VkFramebufferCreateInfo FBCI = {}; + FBCI.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + FBCI.renderPass = VKPass.Handle; + FBCI.attachmentCount = static_cast(Views.size()); + FBCI.pAttachments = Views.data(); + FBCI.width = Width; + FBCI.height = Height; + FBCI.layers = 1; + + VkFramebuffer Framebuffer = VK_NULL_HANDLE; + if (auto Err = + VK::toError(vkCreateFramebuffer(Device, &FBCI, nullptr, &Framebuffer), + "Failed to create framebuffer for RenderEncoder.")) + return Err; + + // The framebuffer must outlive this encoder and remain valid through GPU + // execution; the command buffer destroys it on teardown. The render pass + // is owned by the user-supplied VulkanRenderPass. + OwnedFramebuffers.push_back(Framebuffer); + + VkRenderPassBeginInfo BeginInfo = {}; + BeginInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + BeginInfo.renderPass = VKPass.Handle; + BeginInfo.framebuffer = Framebuffer; + BeginInfo.renderArea.extent.width = Width; + BeginInfo.renderArea.extent.height = Height; + BeginInfo.clearValueCount = static_cast(ClearValues.size()); + BeginInfo.pClearValues = ClearValues.data(); + + vkCmdBeginRenderPass(CmdBuffer, &BeginInfo, VK_SUBPASS_CONTENTS_INLINE); + + auto Enc = std::make_unique(*this); + Enc->pushDebugGroup("RenderEncoder"); + return Enc; +} + class VulkanDevice : public offloadtest::Device { private: std::shared_ptr Instance; @@ -778,6 +1096,7 @@ class VulkanDevice : public offloadtest::Device { PFN_vkCmdBeginDebugUtilsLabelEXT CmdBeginDebugUtilsLabel = nullptr; PFN_vkCmdEndDebugUtilsLabelEXT CmdEndDebugUtilsLabel = nullptr; PFN_vkCmdInsertDebugUtilsLabelEXT CmdInsertDebugUtilsLabel = nullptr; + MeshShaderFunctions MeshShaderFns; struct BufferRef { VkBuffer Buffer; @@ -845,6 +1164,7 @@ class VulkanDevice : public offloadtest::Device { // FrameBuffer associated data for offscreen rendering. VkFramebuffer FrameBuffer = VK_NULL_HANDLE; + std::unique_ptr RenderPass; std::unique_ptr RenderTarget; std::unique_ptr RTReadback; std::unique_ptr DepthStencil; @@ -931,6 +1251,31 @@ class VulkanDevice : public offloadtest::Device { #endif vkGetPhysicalDeviceFeatures2(PhysicalDevice, &Features); + const VulkanDevice::ExtensionVector AvailableDeviceExtensions = + queryDeviceExtensions(PhysicalDevice); + + llvm::SmallVector EnabledDeviceExtensions; + const llvm::StringRef ExtensionName = "VK_EXT_mesh_shader"; + VkPhysicalDeviceMeshShaderFeaturesEXT MeshFeatures{}; + if (isExtensionSupported(AvailableDeviceExtensions, ExtensionName)) { + EnabledDeviceExtensions.push_back(ExtensionName.data()); + MeshFeatures.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT; + MeshFeatures.taskShader = 1; + MeshFeatures.meshShader = 1; + MeshFeatures.multiviewMeshShader = 0; + MeshFeatures.primitiveFragmentShadingRateMeshShader = 0; + MeshFeatures.meshShaderQueries = 0; +#ifdef VK_VERSION_1_4 + Features14.pNext = &MeshFeatures; +#else + Features13.pNext = &MeshFeatures; +#endif + } + + DeviceInfo.enabledExtensionCount = + static_cast(EnabledDeviceExtensions.size()); + DeviceInfo.ppEnabledExtensionNames = EnabledDeviceExtensions.data(); DeviceInfo.pEnabledFeatures = &Features.features; DeviceInfo.pNext = Features.pNext; @@ -948,16 +1293,18 @@ class VulkanDevice : public offloadtest::Device { VulkanQueue GraphicsQueue(DeviceQueue, QueueFamilyIdx, Device, std::move(*SubmitFenceOrErr)); - return std::make_unique(Instance, PhysicalDevice, Props, - Device, std::move(GraphicsQueue), - std::move(InstanceLayers)); + return std::make_unique( + Instance, PhysicalDevice, Props, Device, std::move(GraphicsQueue), + std::move(InstanceLayers), std::move(AvailableDeviceExtensions)); } VulkanDevice(std::shared_ptr I, VkPhysicalDevice P, VkPhysicalDeviceProperties Props, VkDevice D, VulkanQueue Q, - llvm::SmallVector InstanceLayers) + llvm::SmallVector InstanceLayers, + ExtensionVector DeviceExtensions) : Instance(I), PhysicalDevice(P), Props(Props), Device(D), - GraphicsQueue(std::move(Q)), InstanceLayers(std::move(InstanceLayers)) { + GraphicsQueue(std::move(Q)), InstanceLayers(std::move(InstanceLayers)), + DeviceExtensions(std::move(DeviceExtensions)) { const uint64_t DeviceNameSz = strnlen(Props.deviceName, VK_MAX_PHYSICAL_DEVICE_NAME_SIZE); Description = std::string(Props.deviceName, DeviceNameSz); @@ -983,8 +1330,6 @@ class VulkanDevice : public offloadtest::Device { Description += " (" + DriverName + ")"; #endif - DeviceExtensions = queryDeviceExtensions(PhysicalDevice); - CmdBeginDebugUtilsLabel = (PFN_vkCmdBeginDebugUtilsLabelEXT)vkGetDeviceProcAddr( Device, "vkCmdBeginDebugUtilsLabelEXT"); @@ -993,6 +1338,8 @@ class VulkanDevice : public offloadtest::Device { CmdInsertDebugUtilsLabel = (PFN_vkCmdInsertDebugUtilsLabelEXT)vkGetDeviceProcAddr( Device, "vkCmdInsertDebugUtilsLabelEXT"); + + MeshShaderFns = MeshShaderFunctions::create(Device); } VulkanDevice(const VulkanDevice &) = delete; @@ -1101,6 +1448,21 @@ class VulkanDevice : public offloadtest::Device { llvm::Expected> createPipelineCs(llvm::StringRef Name, const BindingsDesc &BindingsDesc, ShaderContainer CS) override { + auto CSModOrErr = createShaderModule(CS.Shader, "compute"); + if (!CSModOrErr) + return CSModOrErr.takeError(); + + VkShaderModule CSModule = *CSModOrErr; + auto ShaderModuleCleanUp = llvm::scope_exit( + [&] { vkDestroyShaderModule(Device, CSModule, nullptr); }); + + llvm::SmallVector SpecEntries; + llvm::SmallVector SpecData; + VkSpecializationInfo SpecInfo = {}; + if (auto Err = parseSpecializationConstants( + CS.SpecializationConstants, SpecEntries, SpecData, SpecInfo)) + return Err; + llvm::SmallVector SetLayouts; VkPipelineLayout PipelineLayout = VK_NULL_HANDLE; if (auto Err = @@ -1113,49 +1475,13 @@ class VulkanDevice : public offloadtest::Device { vkDestroyDescriptorSetLayout(Device, Layout, nullptr); }); - // Create compute shader module. - auto CSModOrErr = createShaderModule(CS.Shader, "compute"); - if (!CSModOrErr) { - vkDestroyPipelineLayout(Device, PipelineLayout, nullptr); - return CSModOrErr.takeError(); - } - VkShaderModule CSModule = *CSModOrErr; - VkPipelineShaderStageCreateInfo StageCI = {}; StageCI.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; StageCI.stage = VK_SHADER_STAGE_COMPUTE_BIT; StageCI.module = CSModule; StageCI.pName = CS.EntryPoint.c_str(); - - llvm::SmallVector SpecEntries; - llvm::SmallVector SpecData; - VkSpecializationInfo SpecInfo = {}; - if (!CS.SpecializationConstants.empty()) { - llvm::DenseSet SeenConstantIDs; - - for (const auto &SpecConst : CS.SpecializationConstants) { - if (!SeenConstantIDs.insert(SpecConst.ConstantID).second) - return llvm::createStringError( - std::errc::invalid_argument, - "Test configuration contains multiple entries for " - "specialization constant ID %u.", - SpecConst.ConstantID); - - VkSpecializationMapEntry Entry; - if (auto Err = - parseSpecializationConstant(SpecConst, Entry, SpecData)) { - vkDestroyShaderModule(Device, CSModule, nullptr); - vkDestroyPipelineLayout(Device, PipelineLayout, nullptr); - return Err; - } - SpecEntries.push_back(Entry); - } - SpecInfo.mapEntryCount = SpecEntries.size(); - SpecInfo.pMapEntries = SpecEntries.data(); - SpecInfo.dataSize = SpecData.size(); - SpecInfo.pData = SpecData.data(); - StageCI.pSpecializationInfo = &SpecInfo; - } + StageCI.pSpecializationInfo = + CS.SpecializationConstants.empty() ? nullptr : &SpecInfo; VkComputePipelineCreateInfo PipelineCI = {}; PipelineCI.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; @@ -1166,17 +1492,12 @@ class VulkanDevice : public offloadtest::Device { 1, &PipelineCI, nullptr, &Pipeline), "Failed to create compute pipeline.")) { - vkDestroyShaderModule(Device, CSModule, nullptr); vkDestroyPipelineLayout(Device, PipelineLayout, nullptr); return Err; } - // No longer need shader modules after pipeline compilation. - vkDestroyShaderModule(Device, CSModule, nullptr); - return std::make_unique( - Name, Device, Pipeline, PipelineLayout, std::move(SetLayouts), - VK_NULL_HANDLE); + Name, Device, Pipeline, PipelineLayout, std::move(SetLayouts)); } llvm::Expected> @@ -1185,122 +1506,99 @@ class VulkanDevice : public offloadtest::Device { llvm::ArrayRef RTFormats, std::optional DSFormat, ShaderContainer VS, ShaderContainer PS) override { - const VkShaderStageFlags GraphicsFlags = - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; - - llvm::SmallVector SetLayouts; - VkPipelineLayout PipelineLayout = VK_NULL_HANDLE; - if (auto Err = createPipelineLayout(BindingsDesc, GraphicsFlags, SetLayouts, - PipelineLayout)) - return Err; - auto RenderPassOrErr = createRenderPass(RTFormats, DSFormat); - if (!RenderPassOrErr) { - vkDestroyPipelineLayout(Device, PipelineLayout, nullptr); - for (auto *L : SetLayouts) - vkDestroyDescriptorSetLayout(Device, L, nullptr); - return RenderPassOrErr.takeError(); - } - VkRenderPass RenderPass = *RenderPassOrErr; - llvm::outs() << "Render pass created.\n"; - - std::vector ShaderModules; - auto VSModOrErr = createShaderModule(VS.Shader, "vertex"); - if (!VSModOrErr) { - vkDestroyRenderPass(Device, RenderPass, nullptr); - vkDestroyPipelineLayout(Device, PipelineLayout, nullptr); - for (auto *L : SetLayouts) - vkDestroyDescriptorSetLayout(Device, L, nullptr); - return VSModOrErr.takeError(); - } - ShaderModules.push_back(*VSModOrErr); - - auto PSModOrErr = createShaderModule(PS.Shader, "pixel"); - if (!PSModOrErr) { - for (auto *M : ShaderModules) - vkDestroyShaderModule(Device, M, nullptr); - vkDestroyRenderPass(Device, RenderPass, nullptr); - vkDestroyPipelineLayout(Device, PipelineLayout, nullptr); - for (auto *L : SetLayouts) - vkDestroyDescriptorSetLayout(Device, L, nullptr); - return PSModOrErr.takeError(); - } - ShaderModules.push_back(*PSModOrErr); + VkShaderStageFlags GraphicsFlags = VK_SHADER_STAGE_VERTEX_BIT; + llvm::SmallVector ShaderStages; + auto ShaderModuleCleanUp = llvm::scope_exit([&] { + for (auto &Stage : ShaderStages) + vkDestroyShaderModule(Device, Stage.module, nullptr); + }); - // Build specialization info for vertex shader. llvm::SmallVector VSSpecEntries; llvm::SmallVector VSSpecData; VkSpecializationInfo VSSpecInfo = {}; - if (!VS.SpecializationConstants.empty()) { - llvm::DenseSet SeenConstantIDs; - for (const auto &SpecConst : VS.SpecializationConstants) { - if (!SeenConstantIDs.insert(SpecConst.ConstantID).second) - return llvm::createStringError( - std::errc::invalid_argument, - "Test configuration contains multiple entries for " - "specialization constant ID %u.", - SpecConst.ConstantID); + { + if (auto Err = parseSpecializationConstants(VS.SpecializationConstants, + VSSpecEntries, VSSpecData, + VSSpecInfo)) + return Err; - VkSpecializationMapEntry Entry; - if (auto Err = - parseSpecializationConstant(SpecConst, Entry, VSSpecData)) { - for (auto *M : ShaderModules) - vkDestroyShaderModule(Device, M, nullptr); - vkDestroyRenderPass(Device, RenderPass, nullptr); - vkDestroyPipelineLayout(Device, PipelineLayout, nullptr); - for (auto *L : SetLayouts) - vkDestroyDescriptorSetLayout(Device, L, nullptr); - return Err; - } - VSSpecEntries.push_back(Entry); - } - VSSpecInfo.mapEntryCount = VSSpecEntries.size(); - VSSpecInfo.pMapEntries = VSSpecEntries.data(); - VSSpecInfo.dataSize = VSSpecData.size(); - VSSpecInfo.pData = VSSpecData.data(); + auto VSModOrErr = createShaderModule(VS.Shader, "mesh"); + if (!VSModOrErr) + return VSModOrErr.takeError(); + + VkPipelineShaderStageCreateInfo ShaderStage = {}; + ShaderStage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + ShaderStage.stage = VK_SHADER_STAGE_VERTEX_BIT; + ShaderStage.module = *VSModOrErr; + ShaderStage.pName = VS.EntryPoint.c_str(); + ShaderStage.pSpecializationInfo = + VS.SpecializationConstants.empty() ? nullptr : &VSSpecInfo; + ShaderStages.push_back(ShaderStage); } - // Build specialization info for pixel/fragment shader. llvm::SmallVector PSSpecEntries; llvm::SmallVector PSSpecData; VkSpecializationInfo PSSpecInfo = {}; - if (!PS.SpecializationConstants.empty()) { - llvm::DenseSet SeenConstantIDs; - for (const auto &SpecConst : PS.SpecializationConstants) { - if (!SeenConstantIDs.insert(SpecConst.ConstantID).second) - return llvm::createStringError( - std::errc::invalid_argument, - "Test configuration contains multiple entries for " - "specialization constant ID %u.", - SpecConst.ConstantID); + { + if (auto Err = parseSpecializationConstants(PS.SpecializationConstants, + PSSpecEntries, PSSpecData, + PSSpecInfo)) + return Err; - VkSpecializationMapEntry Entry; - if (auto Err = - parseSpecializationConstant(SpecConst, Entry, PSSpecData)) { - for (auto *M : ShaderModules) - vkDestroyShaderModule(Device, M, nullptr); - vkDestroyRenderPass(Device, RenderPass, nullptr); - vkDestroyPipelineLayout(Device, PipelineLayout, nullptr); - for (auto *L : SetLayouts) - vkDestroyDescriptorSetLayout(Device, L, nullptr); - return Err; - } - PSSpecEntries.push_back(Entry); - } - PSSpecInfo.mapEntryCount = PSSpecEntries.size(); - PSSpecInfo.pMapEntries = PSSpecEntries.data(); - PSSpecInfo.dataSize = PSSpecData.size(); - PSSpecInfo.pData = PSSpecData.data(); - } + auto PSModOrErr = createShaderModule(PS.Shader, "pixel"); + if (!PSModOrErr) + return PSModOrErr.takeError(); + + GraphicsFlags |= VK_SHADER_STAGE_FRAGMENT_BIT; + + VkPipelineShaderStageCreateInfo ShaderStage = {}; + ShaderStage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + ShaderStage.stage = VK_SHADER_STAGE_FRAGMENT_BIT; + ShaderStage.module = *PSModOrErr; + ShaderStage.pName = PS.EntryPoint.c_str(); + ShaderStage.pSpecializationInfo = + PS.SpecializationConstants.empty() ? nullptr : &PSSpecInfo; + ShaderStages.push_back(ShaderStage); + } + + // Build a RenderPassDesc from the PSO's RT/DS formats. + RenderPassDesc PassDesc; + PassDesc.ColorAttachments.reserve(RTFormats.size()); + for (const Format F : RTFormats) { + ColorAttachmentFormatDesc CA = {}; + CA.Fmt = F; + CA.Load = LoadAction::DontCare; + CA.Store = StoreAction::DontCare; + PassDesc.ColorAttachments.push_back(CA); + } + if (DSFormat) { + DepthStencilAttachmentFormatDesc DS = {}; + DS.Fmt = *DSFormat; + DS.DepthLoad = LoadAction::DontCare; + DS.DepthStore = StoreAction::DontCare; + DS.StencilLoad = LoadAction::DontCare; + DS.StencilStore = StoreAction::DontCare; + PassDesc.DepthStencil = DS; + } + + // NOTE: After pipeline creation this render pass can be dropped. Later + // render passes just need to be compatible with this render pass, or in + // other words: the format, sample count and number of targets (rt and ds), + // need to match. + auto RenderPassOrErr = createRenderPass(PassDesc); + if (!RenderPassOrErr) + return RenderPassOrErr.takeError(); + const std::unique_ptr RenderPass = + std::move(*RenderPassOrErr); + VkRenderPass RenderPassHandle = + llvm::cast(*RenderPass).Handle; - const std::array Stages = {{ - {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, nullptr, 0, - VK_SHADER_STAGE_VERTEX_BIT, ShaderModules[0], VS.EntryPoint.c_str(), - VS.SpecializationConstants.empty() ? nullptr : &VSSpecInfo}, - {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, nullptr, 0, - VK_SHADER_STAGE_FRAGMENT_BIT, ShaderModules[1], PS.EntryPoint.c_str(), - PS.SpecializationConstants.empty() ? nullptr : &PSSpecInfo}, - }}; + llvm::SmallVector SetLayouts; + VkPipelineLayout PipelineLayout = VK_NULL_HANDLE; + if (auto Err = createPipelineLayout(BindingsDesc, GraphicsFlags, SetLayouts, + PipelineLayout)) + return Err; // Build vertex input attribute and binding descriptions from InputLayout. uint32_t Stride = 0; @@ -1388,8 +1686,8 @@ class VulkanDevice : public offloadtest::Device { VkGraphicsPipelineCreateInfo PipelineCI = {}; PipelineCI.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - PipelineCI.stageCount = static_cast(Stages.size()); - PipelineCI.pStages = Stages.data(); + PipelineCI.stageCount = static_cast(ShaderStages.size()); + PipelineCI.pStages = ShaderStages.data(); PipelineCI.pVertexInputState = &VertexInputCI; PipelineCI.pInputAssemblyState = &InputAssemblyCI; PipelineCI.pViewportState = &ViewportCI; @@ -1399,29 +1697,219 @@ class VulkanDevice : public offloadtest::Device { PipelineCI.pColorBlendState = &BlendCI; PipelineCI.pDynamicState = &DynamicCI; PipelineCI.layout = PipelineLayout; - PipelineCI.renderPass = RenderPass; + PipelineCI.renderPass = RenderPassHandle; VkPipeline Pipeline = VK_NULL_HANDLE; if (auto Err = VK::toError(vkCreateGraphicsPipelines(Device, VK_NULL_HANDLE, 1, &PipelineCI, nullptr, &Pipeline), "Failed to create graphics pipeline.")) { - for (auto *M : ShaderModules) - vkDestroyShaderModule(Device, M, nullptr); - vkDestroyRenderPass(Device, RenderPass, nullptr); vkDestroyPipelineLayout(Device, PipelineLayout, nullptr); for (auto *L : SetLayouts) vkDestroyDescriptorSetLayout(Device, L, nullptr); return Err; } - // No longer need shader modules after pipeline compilation. - for (auto *M : ShaderModules) - vkDestroyShaderModule(Device, M, nullptr); + return std::make_unique( + Name, Device, Pipeline, PipelineLayout, std::move(SetLayouts)); + } + + llvm::Expected> + createPipelineAsMsPs(llvm::StringRef Name, const BindingsDesc &BindingsDesc, + llvm::ArrayRef RTFormats, + std::optional DSFormat, + std::optional AS, ShaderContainer MS, + std::optional PS) /*override*/ { + assert(RTFormats.size() <= 8); + + VkShaderStageFlags GraphicsFlags = VK_SHADER_STAGE_MESH_BIT_EXT; + llvm::SmallVector ShaderStages; + auto ShaderModuleCleanUp = llvm::scope_exit([&] { + for (auto &Stage : ShaderStages) + vkDestroyShaderModule(Device, Stage.module, nullptr); + }); + + llvm::SmallVector MSSpecEntries; + llvm::SmallVector MSSpecData; + VkSpecializationInfo MSSpecInfo = {}; + { + if (auto Err = parseSpecializationConstants(MS.SpecializationConstants, + MSSpecEntries, MSSpecData, + MSSpecInfo)) + return Err; + + auto MSModOrErr = createShaderModule(MS.Shader, "mesh"); + if (!MSModOrErr) + return MSModOrErr.takeError(); + + VkPipelineShaderStageCreateInfo ShaderStage = {}; + ShaderStage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + ShaderStage.stage = VK_SHADER_STAGE_MESH_BIT_EXT; + ShaderStage.module = *MSModOrErr; + ShaderStage.pName = MS.EntryPoint.c_str(); + ShaderStage.pSpecializationInfo = + MS.SpecializationConstants.empty() ? nullptr : &MSSpecInfo; + ShaderStages.push_back(ShaderStage); + } + + llvm::SmallVector ASSpecEntries; + llvm::SmallVector ASSpecData; + VkSpecializationInfo ASSpecInfo = {}; + if (AS) { + if (auto Err = parseSpecializationConstants((*AS).SpecializationConstants, + ASSpecEntries, ASSpecData, + ASSpecInfo)) + return Err; + + auto ASModOrErr = createShaderModule((*AS).Shader, "task"); + if (!ASModOrErr) + return ASModOrErr.takeError(); + + GraphicsFlags |= VK_SHADER_STAGE_TASK_BIT_EXT; + + VkPipelineShaderStageCreateInfo ShaderStage = {}; + ShaderStage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + ShaderStage.stage = VK_SHADER_STAGE_TASK_BIT_EXT; + ShaderStage.module = *ASModOrErr; + ShaderStage.pName = (*AS).EntryPoint.c_str(); + ShaderStage.pSpecializationInfo = + (*AS).SpecializationConstants.empty() ? nullptr : &ASSpecInfo; + ShaderStages.push_back(ShaderStage); + } + + llvm::SmallVector PSSpecEntries; + llvm::SmallVector PSSpecData; + VkSpecializationInfo PSSpecInfo = {}; + if (PS) { + if (auto Err = parseSpecializationConstants((*PS).SpecializationConstants, + PSSpecEntries, PSSpecData, + PSSpecInfo)) + return Err; + + auto PSModOrErr = createShaderModule((*PS).Shader, "pixel"); + if (!PSModOrErr) + return PSModOrErr.takeError(); + + GraphicsFlags |= VK_SHADER_STAGE_FRAGMENT_BIT; + + VkPipelineShaderStageCreateInfo ShaderStage = {}; + ShaderStage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + ShaderStage.stage = VK_SHADER_STAGE_FRAGMENT_BIT; + ShaderStage.module = *PSModOrErr; + ShaderStage.pName = (*PS).EntryPoint.c_str(); + ShaderStage.pSpecializationInfo = + (*PS).SpecializationConstants.empty() ? nullptr : &PSSpecInfo; + ShaderStages.push_back(ShaderStage); + } + + // Build a RenderPassDesc from the PSO's RT/DS formats. + RenderPassDesc PassDesc; + PassDesc.ColorAttachments.reserve(RTFormats.size()); + for (const Format F : RTFormats) { + ColorAttachmentFormatDesc CA = {}; + CA.Fmt = F; + CA.Load = LoadAction::DontCare; + CA.Store = StoreAction::DontCare; + PassDesc.ColorAttachments.push_back(CA); + } + if (DSFormat) { + DepthStencilAttachmentFormatDesc DS = {}; + DS.Fmt = *DSFormat; + DS.DepthLoad = LoadAction::DontCare; + DS.DepthStore = StoreAction::DontCare; + DS.StencilLoad = LoadAction::DontCare; + DS.StencilStore = StoreAction::DontCare; + PassDesc.DepthStencil = DS; + } + + // NOTE: After pipeline creation this render pass can be dropped. Later + // render passes just need to be compatible with this render pass, or in + // other words: the format, sample count and number of targets (rt and ds), + // need to match. + auto RenderPassOrErr = createRenderPass(PassDesc); + if (!RenderPassOrErr) + return RenderPassOrErr.takeError(); + const std::unique_ptr RenderPass = + std::move(*RenderPassOrErr); + VkRenderPass RenderPassHandle = + llvm::cast(*RenderPass).Handle; + + llvm::SmallVector SetLayouts; + VkPipelineLayout PipelineLayout = VK_NULL_HANDLE; + if (auto Err = createPipelineLayout(BindingsDesc, GraphicsFlags, SetLayouts, + PipelineLayout)) + return Err; + + VkPipelineViewportStateCreateInfo ViewportCI = {}; + ViewportCI.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + ViewportCI.viewportCount = 1; + ViewportCI.scissorCount = 1; + + const VkDynamicState DynStates[] = {VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo DynamicCI = {}; + DynamicCI.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + DynamicCI.dynamicStateCount = 2; + DynamicCI.pDynamicStates = DynStates; + + VkPipelineRasterizationStateCreateInfo RastCI = {}; + RastCI.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + RastCI.polygonMode = VK_POLYGON_MODE_FILL; + RastCI.cullMode = VK_CULL_MODE_NONE; + RastCI.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + RastCI.lineWidth = 1.0f; + + VkPipelineMultisampleStateCreateInfo MultisampleCI = {}; + MultisampleCI.sType = + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + MultisampleCI.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + + VkPipelineDepthStencilStateCreateInfo DepthStencilCI = {}; + DepthStencilCI.sType = + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + DepthStencilCI.depthTestEnable = VK_TRUE; + DepthStencilCI.depthWriteEnable = VK_TRUE; + DepthStencilCI.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + DepthStencilCI.back.failOp = VK_STENCIL_OP_KEEP; + DepthStencilCI.back.passOp = VK_STENCIL_OP_KEEP; + DepthStencilCI.back.compareOp = VK_COMPARE_OP_ALWAYS; + DepthStencilCI.front = DepthStencilCI.back; + + llvm::SmallVector BlendAttachments( + RTFormats.size()); + for (auto &BA : BlendAttachments) + BA.colorWriteMask = 0xf; + VkPipelineColorBlendStateCreateInfo BlendCI = {}; + BlendCI.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + BlendCI.attachmentCount = static_cast(BlendAttachments.size()); + BlendCI.pAttachments = BlendAttachments.data(); + + VkGraphicsPipelineCreateInfo PipelineCI = {}; + PipelineCI.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + PipelineCI.stageCount = static_cast(ShaderStages.size()); + PipelineCI.pStages = ShaderStages.data(); + PipelineCI.pViewportState = &ViewportCI; + PipelineCI.pRasterizationState = &RastCI; + PipelineCI.pMultisampleState = &MultisampleCI; + PipelineCI.pDepthStencilState = &DepthStencilCI; + PipelineCI.pColorBlendState = &BlendCI; + PipelineCI.pDynamicState = &DynamicCI; + PipelineCI.layout = PipelineLayout; + PipelineCI.renderPass = RenderPassHandle; + + VkPipeline Pipeline = VK_NULL_HANDLE; + if (auto Err = VK::toError(vkCreateGraphicsPipelines(Device, VK_NULL_HANDLE, + 1, &PipelineCI, + nullptr, &Pipeline), + "Failed to create mesh shader pipeline.")) { + vkDestroyPipelineLayout(Device, PipelineLayout, nullptr); + for (auto *L : SetLayouts) + vkDestroyDescriptorSetLayout(Device, L, nullptr); + return Err; + } return std::make_unique( - Name, Device, Pipeline, PipelineLayout, std::move(SetLayouts), - RenderPass); + Name, Device, Pipeline, PipelineLayout, std::move(SetLayouts)); } llvm::Expected> @@ -1657,7 +2145,78 @@ class VulkanDevice : public offloadtest::Device { createCommandBuffer() override { return VulkanCommandBuffer::create( Device, GraphicsQueue.QueueFamilyIdx, CmdBeginDebugUtilsLabel, - CmdEndDebugUtilsLabel, CmdInsertDebugUtilsLabel); + CmdEndDebugUtilsLabel, CmdInsertDebugUtilsLabel, MeshShaderFns); + } + + llvm::Expected> + createRenderPass(const offloadtest::RenderPassDesc &Desc) override { + llvm::SmallVector Attachments; + llvm::SmallVector ColorRefs; + + for (const ColorAttachmentFormatDesc &Color : Desc.ColorAttachments) { + VkAttachmentDescription AD = {}; + AD.format = getVulkanFormat(Color.Fmt); + AD.samples = VK_SAMPLE_COUNT_1_BIT; + AD.loadOp = getVkLoadOp(Color.Load); + AD.storeOp = getVkStoreOp(Color.Store); + AD.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + AD.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + AD.initialLayout = Color.Load == LoadAction::Load + ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + : VK_IMAGE_LAYOUT_UNDEFINED; + AD.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + VkAttachmentReference Ref = {}; + Ref.attachment = static_cast(Attachments.size()); + Ref.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + Attachments.push_back(AD); + ColorRefs.push_back(Ref); + } + + VkAttachmentReference DepthReference = {}; + bool HasDS = false; + if (Desc.DepthStencil) { + const auto &DS = *Desc.DepthStencil; + VkAttachmentDescription AD = {}; + AD.format = getVulkanFormat(DS.Fmt); + AD.samples = VK_SAMPLE_COUNT_1_BIT; + AD.loadOp = getVkLoadOp(DS.DepthLoad); + AD.storeOp = getVkStoreOp(DS.DepthStore); + AD.stencilLoadOp = getVkLoadOp(DS.StencilLoad); + AD.stencilStoreOp = getVkStoreOp(DS.StencilStore); + AD.initialLayout = (DS.DepthLoad == LoadAction::Load || + DS.StencilLoad == LoadAction::Load) + ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL + : VK_IMAGE_LAYOUT_UNDEFINED; + AD.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + DepthReference.attachment = static_cast(Attachments.size()); + DepthReference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + Attachments.push_back(AD); + HasDS = true; + } + + VkSubpassDescription Subpass = {}; + Subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + Subpass.colorAttachmentCount = static_cast(ColorRefs.size()); + Subpass.pColorAttachments = ColorRefs.data(); + Subpass.pDepthStencilAttachment = HasDS ? &DepthReference : nullptr; + + VkRenderPassCreateInfo RPCI = {}; + RPCI.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + RPCI.attachmentCount = static_cast(Attachments.size()); + RPCI.pAttachments = Attachments.data(); + RPCI.subpassCount = 1; + RPCI.pSubpasses = &Subpass; + + VkRenderPass Handle = VK_NULL_HANDLE; + if (auto Err = + VK::toError(vkCreateRenderPass(Device, &RPCI, nullptr, &Handle), + "Failed to create render pass.")) + return Err; + return std::make_unique(Device, Handle, Desc); } llvm::Expected createBuffer(VkBufferUsageFlags Usage, @@ -1924,24 +2483,22 @@ class VulkanDevice : public offloadtest::Device { } } - if (P.isTraditionalRaster()) { + if (P.isRaster()) { if (auto Err = createRenderTarget(P, IS)) return Err; // TODO: Always created for graphics pipelines. Consider making this // conditional on the pipeline definition. if (auto Err = createDepthStencil(P, IS)) return Err; + } - if (P.Bindings.VertexBufferPtr == nullptr) - return llvm::createStringError( - std::errc::invalid_argument, - "No Vertex buffer specified for graphics pipeline."); - + if (P.isTraditionalRaster() && P.Bindings.VertexBufferPtr) { auto VBOrErr = offloadtest::createVertexBufferFromCPUBuffer( *this, *P.Bindings.VertexBufferPtr); if (!VBOrErr) return VBOrErr.takeError(); IS.VB = std::move(*VBOrErr); + llvm::outs() << "Vertex buffer created.\n"; } return llvm::Error::success(); @@ -2181,87 +2738,16 @@ class VulkanDevice : public offloadtest::Device { return llvm::Error::success(); } - llvm::Expected - createRenderPass(llvm::ArrayRef RTFormats, - std::optional DSFormat) { - // Only 8 render targets can be bound + 1 depth stencil target. - llvm::SmallVector Attachments; - llvm::SmallVector ColorReferences; - for (size_t I = 0, N = RTFormats.size(); I < N; ++I) { - VkAttachmentDescription AttachmentDesc = {}; - AttachmentDesc.format = getVulkanFormat(RTFormats[I]); - AttachmentDesc.samples = VK_SAMPLE_COUNT_1_BIT; - AttachmentDesc.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - AttachmentDesc.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - AttachmentDesc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - AttachmentDesc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - AttachmentDesc.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - AttachmentDesc.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - Attachments.push_back(AttachmentDesc); - - VkAttachmentReference ColorReference = {}; - ColorReference.attachment = I; - ColorReference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - ColorReferences.push_back(ColorReference); - } - - VkAttachmentReference DepthReference = {}; - if (DSFormat.has_value()) { - VkAttachmentDescription AttachmentDesc = {}; - AttachmentDesc.format = getVulkanFormat(*DSFormat); - AttachmentDesc.samples = VK_SAMPLE_COUNT_1_BIT; - AttachmentDesc.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - AttachmentDesc.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - AttachmentDesc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - AttachmentDesc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - AttachmentDesc.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - AttachmentDesc.finalLayout = - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - Attachments.push_back(AttachmentDesc); - - DepthReference.attachment = Attachments.size() - 1; - DepthReference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - } - - VkSubpassDescription SubpassDescription = {}; - SubpassDescription.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - SubpassDescription.colorAttachmentCount = ColorReferences.size(); - SubpassDescription.pColorAttachments = ColorReferences.data(); - SubpassDescription.pDepthStencilAttachment = - DSFormat.has_value() ? &DepthReference : nullptr; - SubpassDescription.inputAttachmentCount = 0; - SubpassDescription.pInputAttachments = nullptr; - SubpassDescription.preserveAttachmentCount = 0; - SubpassDescription.pPreserveAttachments = nullptr; - SubpassDescription.pResolveAttachments = nullptr; - - VkRenderPassCreateInfo RPCI = {}; - RPCI.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - RPCI.attachmentCount = static_cast(Attachments.size()); - RPCI.pAttachments = Attachments.data(); - RPCI.subpassCount = 1; - RPCI.pSubpasses = &SubpassDescription; - // RPCI.dependencyCount = static_cast(Dependencies.size()); - // RPCI.pDependencies = Dependencies.data(); - - VkRenderPass RenderPass = VK_NULL_HANDLE; - if (auto Err = - VK::toError(vkCreateRenderPass(Device, &RPCI, nullptr, &RenderPass), - "Failed to create render pass.")) - return Err; - return RenderPass; - } - llvm::Error createFrameBuffer(InvocationState &IS) { auto &RT = llvm::cast(*IS.RenderTarget); auto &DS = llvm::cast(*IS.DepthStencil); - auto &PipelineState = llvm::cast(*IS.Pipeline); std::array Views = {RT.View, DS.View}; VkFramebufferCreateInfo FbufCreateInfo = {}; FbufCreateInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - FbufCreateInfo.renderPass = PipelineState.RenderPass; + FbufCreateInfo.renderPass = + llvm::cast(*IS.RenderPass).Handle; FbufCreateInfo.attachmentCount = Views.size(); FbufCreateInfo.pAttachments = Views.data(); FbufCreateInfo.width = RT.Desc.Width; @@ -2275,10 +2761,10 @@ class VulkanDevice : public offloadtest::Device { return llvm::Error::success(); } - static llvm::Error + static llvm::Expected parseSpecializationConstant(const SpecializationConstant &SpecConst, - VkSpecializationMapEntry &Entry, - llvm::SmallVector &SpecData) { + llvm::SmallVectorImpl &SpecData) { + VkSpecializationMapEntry Entry = {}; Entry.constantID = SpecConst.ConstantID; Entry.offset = SpecData.size(); switch (SpecConst.Type) { @@ -2371,6 +2857,40 @@ class VulkanDevice : public offloadtest::Device { default: llvm_unreachable("Unsupported specialization constant type"); } + + return Entry; + } + + static llvm::Error parseSpecializationConstants( + llvm::ArrayRef SpecializationConstants, + llvm::SmallVectorImpl &SpecEntries, + llvm::SmallVectorImpl &SpecData, VkSpecializationInfo &SpecInfo) { + + if (SpecializationConstants.empty()) { + SpecInfo = {}; + return llvm::Error::success(); + } + + llvm::DenseSet SeenConstantIDs; + for (const auto &SpecConst : SpecializationConstants) { + if (!SeenConstantIDs.insert(SpecConst.ConstantID).second) + return llvm::createStringError( + std::errc::invalid_argument, + "Test configuration contains multiple entries for " + "specialization constant ID %u.", + SpecConst.ConstantID); + + auto EntryOrErr = parseSpecializationConstant(SpecConst, SpecData); + if (!EntryOrErr) + return EntryOrErr.takeError(); + SpecEntries.push_back(*EntryOrErr); + } + + SpecInfo.mapEntryCount = SpecEntries.size(); + SpecInfo.pMapEntries = SpecEntries.data(); + SpecInfo.dataSize = SpecData.size(); + SpecInfo.pData = SpecData.data(); + return llvm::Error::success(); } @@ -2644,63 +3164,6 @@ class VulkanDevice : public offloadtest::Device { for (auto &R : IS.Resources) copyResourceDataToDevice(IS, R); - if (P.isTraditionalRaster()) { - auto &RT = llvm::cast(*IS.RenderTarget); - auto &DS = llvm::cast(*IS.DepthStencil); - auto &PipelineState = llvm::cast(*IS.Pipeline); - - const auto *ColorCV = - std::get_if(&*RT.Desc.OptimizedClearValue); - if (!ColorCV) - return llvm::createStringError( - std::errc::invalid_argument, - "Render target clear value must be a ClearColor."); - const auto *DepthCV = - std::get_if(&*DS.Desc.OptimizedClearValue); - if (!DepthCV) - return llvm::createStringError( - std::errc::invalid_argument, - "Depth/stencil clear value must be a ClearDepthStencil."); - VkClearValue ClearValues[2] = {}; - ClearValues[0].color = {{ColorCV->R, ColorCV->G, ColorCV->B, ColorCV->A}}; - ClearValues[1].depthStencil = {DepthCV->Depth, DepthCV->Stencil}; - - VkRenderPassBeginInfo RenderPassBeginInfo = {}; - RenderPassBeginInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - RenderPassBeginInfo.renderPass = PipelineState.RenderPass; - RenderPassBeginInfo.framebuffer = IS.FrameBuffer; - RenderPassBeginInfo.renderArea.extent.width = - P.Bindings.RTargetBufferPtr->OutputProps.Width; - RenderPassBeginInfo.renderArea.extent.height = - P.Bindings.RTargetBufferPtr->OutputProps.Height; - RenderPassBeginInfo.clearValueCount = 2; - RenderPassBeginInfo.pClearValues = ClearValues; - - vkCmdBeginRenderPass(IS.CB->CmdBuffer, &RenderPassBeginInfo, - VK_SUBPASS_CONTENTS_INLINE); - - // Negative viewport height (with Y origin at the bottom) flips clip-> - // framebuffer Y the same way DX12 and Metal do, so a CCW-in-clip-space - // triangle is front-facing on every backend. - VkViewport Viewport = {}; - Viewport.x = 0.0f; - Viewport.y = - static_cast(P.Bindings.RTargetBufferPtr->OutputProps.Height); - Viewport.width = - static_cast(P.Bindings.RTargetBufferPtr->OutputProps.Width); - Viewport.height = - -static_cast(P.Bindings.RTargetBufferPtr->OutputProps.Height); - Viewport.minDepth = 0.0f; - Viewport.maxDepth = 1.0f; - vkCmdSetViewport(IS.CB->CmdBuffer, 0, 1, &Viewport); - - VkRect2D Scissor = {}; - Scissor.offset = {0, 0}; - Scissor.extent.width = P.Bindings.RTargetBufferPtr->OutputProps.Width; - Scissor.extent.height = P.Bindings.RTargetBufferPtr->OutputProps.Height; - vkCmdSetScissor(IS.CB->CmdBuffer, 0, 1, &Scissor); - } - const VkPipelineBindPoint BindPoint = P.isTraditionalRaster() ? VK_PIPELINE_BIND_POINT_GRAPHICS : VK_PIPELINE_BIND_POINT_COMPUTE; @@ -2735,21 +3198,55 @@ class VulkanDevice : public offloadtest::Device { << P.DispatchParameters.DispatchGroupCount[0] << ", " << P.DispatchParameters.DispatchGroupCount[1] << ", " << P.DispatchParameters.DispatchGroupCount[2] << " }\n"; - } else { - VkDeviceSize Offsets[1]{0}; - assert(IS.VB); - VkBuffer VBHandle = llvm::cast(*IS.VB).Buffer; - vkCmdBindVertexBuffers(IS.CB->CmdBuffer, 0, 1, &VBHandle, Offsets); - // instanceCount must be >=1 to draw; previously was 0 which draws nothing - vkCmdDraw(IS.CB->CmdBuffer, P.getVertexCount(), 1, 0, 0); - llvm::outs() << "Drew " << P.getVertexCount() << " vertices.\n"; - vkCmdEndRenderPass(IS.CB->CmdBuffer); + } else if (P.isRaster()) { + RenderPassBeginDesc BeginDesc = {}; + BeginDesc.Pass = IS.RenderPass.get(); + BeginDesc.ColorAttachments.push_back(IS.RenderTarget.get()); + BeginDesc.DepthStencil = IS.DepthStencil.get(); + + auto EncOrErr = IS.CB->createRenderEncoder(BeginDesc); + if (!EncOrErr) + return EncOrErr.takeError(); + auto &Encoder = *EncOrErr.get(); + + Viewport VP; + VP.Width = + static_cast(P.Bindings.RTargetBufferPtr->OutputProps.Width); + VP.Height = + static_cast(P.Bindings.RTargetBufferPtr->OutputProps.Height); + Encoder.setViewport(VP); + + ScissorRect Scissor; + Scissor.Width = static_cast(VP.Width); + Scissor.Height = static_cast(VP.Height); + Encoder.setScissor(Scissor); + + if (P.isTraditionalRaster()) { + if (IS.VB) + Encoder.setVertexBuffer(0, IS.VB.get(), 0, + P.Bindings.getVertexStride()); + + if (auto Err = + Encoder.drawInstanced(*IS.Pipeline.get(), P.getVertexCount(), + /*InstanceCount=*/1)) + return Err; + } else if (P.isMeshShaderRaster()) { + if (auto Err = Encoder.dispatchMesh( + *IS.Pipeline.get(), P.DispatchParameters.DispatchGroupCount[0], + P.DispatchParameters.DispatchGroupCount[1], + P.DispatchParameters.DispatchGroupCount[2])) + return Err; + } + Encoder.endEncoding(); + copyTextureToReadback(IS.CB->CmdBuffer, llvm::cast(*IS.RenderTarget), llvm::cast(*IS.RTReadback), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); + } else { + assert(false && "Unhandled test pipeline variant."); } for (auto &R : IS.Resources) @@ -2799,7 +3296,7 @@ class VulkanDevice : public offloadtest::Device { } // Copy back the frame buffer data if this was a graphics pipeline. - if (P.isTraditionalRaster()) { + if (P.isRaster()) { auto &Readback = llvm::cast(*IS.RTReadback); VkMappedMemoryRange Range = {}; @@ -2875,7 +3372,7 @@ class VulkanDevice : public offloadtest::Device { auto CBOrErr = VulkanCommandBuffer::create( Device, GraphicsQueue.QueueFamilyIdx, CmdBeginDebugUtilsLabel, - CmdEndDebugUtilsLabel, CmdInsertDebugUtilsLabel); + CmdEndDebugUtilsLabel, CmdInsertDebugUtilsLabel, MeshShaderFns); if (!CBOrErr) return CBOrErr.takeError(); State.CB = std::move(*CBOrErr); @@ -2939,50 +3436,110 @@ class VulkanDevice : public offloadtest::Device { return PipelineStateOrErr.takeError(); State.Pipeline = std::move(*PipelineStateOrErr); llvm::outs() << "Compute Pipeline created.\n"; - } else if (P.isTraditionalRaster()) { - ShaderContainer VS = {}; - ShaderContainer PS = {}; - for (auto &Shader : P.Shaders) { - if (Shader.Stage == Stages::Vertex) { - VS.EntryPoint = Shader.Entry; - VS.Shader = Shader.Shader.get(); - VS.SpecializationConstants = Shader.SpecializationConstants; - } else if (Shader.Stage == Stages::Pixel) { - PS.EntryPoint = Shader.Entry; - PS.Shader = Shader.Shader.get(); - PS.SpecializationConstants = Shader.SpecializationConstants; + } else if (P.isRaster()) { + ColorAttachmentFormatDesc ColorAttachment = {}; + ColorAttachment.Fmt = State.RenderTarget->getDesc().Fmt; + ColorAttachment.Load = LoadAction::Clear; + ColorAttachment.Store = StoreAction::Store; + + DepthStencilAttachmentFormatDesc DSAttachment = {}; + DSAttachment.Fmt = State.DepthStencil->getDesc().Fmt; + DSAttachment.DepthLoad = LoadAction::Clear; + DSAttachment.DepthStore = StoreAction::Store; + DSAttachment.StencilLoad = LoadAction::DontCare; + DSAttachment.StencilStore = StoreAction::DontCare; + + RenderPassDesc PassDesc; + PassDesc.ColorAttachments.push_back(ColorAttachment); + PassDesc.DepthStencil = DSAttachment; + + auto RenderPassOrErr = createRenderPass(PassDesc); + if (!RenderPassOrErr) + return RenderPassOrErr.takeError(); + State.RenderPass = std::move(*RenderPassOrErr); + llvm::outs() << "Render pass created.\n"; + + if (P.isTraditionalRaster()) { + ShaderContainer VS = {}; + ShaderContainer PS = {}; + for (auto &Shader : P.Shaders) { + if (Shader.Stage == Stages::Vertex) { + VS.EntryPoint = Shader.Entry; + VS.Shader = Shader.Shader.get(); + VS.SpecializationConstants = Shader.SpecializationConstants; + } else if (Shader.Stage == Stages::Pixel) { + PS.EntryPoint = Shader.Entry; + PS.Shader = Shader.Shader.get(); + PS.SpecializationConstants = Shader.SpecializationConstants; + } } - } - // Create the input layout based on the vertex attributes. - llvm::SmallVector InputLayout; - for (auto &Attr : P.Bindings.VertexAttributes) { - auto FormatOrErr = toFormat(Attr.Format, Attr.Channels); + // Create the input layout based on the vertex attributes. + llvm::SmallVector InputLayout; + for (auto &Attr : P.Bindings.VertexAttributes) { + auto FormatOrErr = toFormat(Attr.Format, Attr.Channels); + if (!FormatOrErr) + return FormatOrErr.takeError(); + + InputLayoutDesc Desc = {}; + Desc.Name = Attr.Name; + Desc.Fmt = *FormatOrErr; + Desc.OffsetInBytes = Attr.Offset; + InputLayout.push_back(Desc); + } + + auto FormatOrErr = toFormat(P.Bindings.RTargetBufferPtr->Format, + P.Bindings.RTargetBufferPtr->Channels); if (!FormatOrErr) return FormatOrErr.takeError(); - InputLayoutDesc Desc = {}; - Desc.Name = Attr.Name; - Desc.Fmt = *FormatOrErr; - Desc.OffsetInBytes = Attr.Offset; - InputLayout.push_back(Desc); - } + llvm::SmallVector RTFormats; + RTFormats.push_back(*FormatOrErr); + + auto PipelineStateOrErr = createPipelineVsPs( + "Graphics Pipeline State", BindingsDesc, InputLayout, RTFormats, + Format::D32FloatS8Uint, VS, PS); + if (!PipelineStateOrErr) + return PipelineStateOrErr.takeError(); + State.Pipeline = std::move(*PipelineStateOrErr); + llvm::outs() << "Graphics Pipeline created.\n"; + } else if (P.isMeshShaderRaster()) { + std::optional AS = {}; + ShaderContainer MS = {}; + std::optional PS = {}; + for (auto &Shader : P.Shaders) { + if (Shader.Stage == Stages::Amplification) { + ShaderContainer Container; + Container.EntryPoint = Shader.Entry; + Container.Shader = Shader.Shader.get(); + AS = Container; + } else if (Shader.Stage == Stages::Mesh) { + MS.EntryPoint = Shader.Entry; + MS.Shader = Shader.Shader.get(); + } else if (Shader.Stage == Stages::Pixel) { + ShaderContainer Container; + Container.EntryPoint = Shader.Entry; + Container.Shader = Shader.Shader.get(); + PS = Container; + } + } - auto FormatOrErr = toFormat(P.Bindings.RTargetBufferPtr->Format, - P.Bindings.RTargetBufferPtr->Channels); - if (!FormatOrErr) - return FormatOrErr.takeError(); + auto FormatOrErr = toFormat(P.Bindings.RTargetBufferPtr->Format, + P.Bindings.RTargetBufferPtr->Channels); + if (!FormatOrErr) + return FormatOrErr.takeError(); - llvm::SmallVector RTFormats; - RTFormats.push_back(*FormatOrErr); + llvm::SmallVector RTFormats; + RTFormats.push_back(*FormatOrErr); - auto PipelineStateOrErr = createPipelineVsPs( - "Graphics Pipeline State", BindingsDesc, InputLayout, RTFormats, - Format::D32FloatS8Uint, VS, PS); - if (!PipelineStateOrErr) - return PipelineStateOrErr.takeError(); - State.Pipeline = std::move(*PipelineStateOrErr); - llvm::outs() << "Graphics Pipeline created.\n"; + auto PipelineStateOrErr = + createPipelineAsMsPs("Mesh Shader Pipeline State", BindingsDesc, + RTFormats, Format::D32FloatS8Uint, AS, MS, PS); + if (!PipelineStateOrErr) + return PipelineStateOrErr.takeError(); + State.Pipeline = std::move(*PipelineStateOrErr); + llvm::outs() << "Mesh Shader Pipeline created.\n"; + } if (auto Err = createFrameBuffer(State)) return Err; @@ -3001,7 +3558,7 @@ class VulkanDevice : public offloadtest::Device { llvm::outs() << "Executed copy command buffer.\n"; auto DispatchCBOrErr = VulkanCommandBuffer::create( Device, GraphicsQueue.QueueFamilyIdx, CmdBeginDebugUtilsLabel, - CmdEndDebugUtilsLabel, CmdInsertDebugUtilsLabel); + CmdEndDebugUtilsLabel, CmdInsertDebugUtilsLabel, MeshShaderFns); if (!DispatchCBOrErr) return DispatchCBOrErr.takeError(); State.CB = std::move(*DispatchCBOrErr); diff --git a/lib/Support/Pipeline.cpp b/lib/Support/Pipeline.cpp index 83a31c7cb..04abbc945 100644 --- a/lib/Support/Pipeline.cpp +++ b/lib/Support/Pipeline.cpp @@ -598,23 +598,34 @@ llvm::Error offloadtest::Pipeline::validatePipelineKind() { } if (HasShaderType[llvm::to_underlying(Stages::Vertex)]) { + if (HasShaderType[llvm::to_underlying(Stages::Amplification)] || + HasShaderType[llvm::to_underlying(Stages::Mesh)]) + return llvm::createStringError("Vertex and Mesh/Amplification Shaders " + "cannot be used in the same pipeline."); + Kind = ShaderPipelineKind::TraditionalRaster; return llvm::Error::success(); } + if (HasShaderType[llvm::to_underlying(Stages::Mesh)]) { + Kind = ShaderPipelineKind::MeshShaderRaster; + return llvm::Error::success(); + } + // As we add more pipeline types this error message should be updated with // more required shader types. return llvm::createStringError( - "The pipeline misses a Compute or Vertex Shader."); + "The pipeline misses a Compute, Vertex or Mesh Shader."); } llvm::Error offloadtest::Pipeline::validateDispatchParameters() { switch (Kind) { case ShaderPipelineKind::Compute: + case ShaderPipelineKind::MeshShaderRaster: if (DispatchParameters.VertexCount) return llvm::createStringError( - "DispatchParameters.VertexCount set on a Compute pipeline. Only " - "allowed on a TraditionalRaster pipeline."); + "DispatchParameters.VertexCount set on a Compute or Mesh Shader " + "pipeline. Only allowed on a TraditionalRaster pipeline."); break; case ShaderPipelineKind::TraditionalRaster: if (DispatchParameters.DispatchGroupCount != diff --git a/test/Graphics/MeshShaders/SimpleTriangle.test b/test/Graphics/MeshShaders/SimpleTriangle.test new file mode 100644 index 000000000..ed8bd0db3 --- /dev/null +++ b/test/Graphics/MeshShaders/SimpleTriangle.test @@ -0,0 +1,81 @@ +#--- mesh.hlsl +struct PSInput +{ + float4 position : SV_POSITION; + float4 color : COLOR; +}; + +[outputtopology("triangle")] +[numthreads(3, 1, 1)] +void main(uint groupThreadID: SV_GroupThreadID, out vertices PSInput verts[3], out indices uint3 tris[1]) { + SetMeshOutputCounts(3, 1); + + float4 position; + float4 color; + if (groupThreadID == 0) { + // position = float4(-1.0, -1.0, 0.0, 1.0); + position = float4(0.0, 0.25, 0.0, 1.0); + color = float4(1.0, 0.0, 0.0, 1.0); + } else if (groupThreadID == 1) { + // position = float4(0.0, 1.0, 0.0, 1.0); + position = float4(0.25, -0.25, 0.0, 1.0); + color = float4(0.0, 1.0, 0.0, 1.0); + } else /*if (groupThreadID == 2)*/ { + // position = float4(1.0, -1.0, 0.0, 1.0); + position = float4(-0.25, -0.25, 0.0, 1.0); + color = float4(0.0, 0.0, 1.0, 1.0); + } + + verts[groupThreadID].position = position; + verts[groupThreadID].color = color; + + if (groupThreadID == 0) { + tris[0] = uint3(0, 1, 2); + } +} + +#--- pixel.hlsl +struct PSInput +{ + float4 position : SV_POSITION; + float4 color : COLOR; +}; + +float4 main(PSInput input) : SV_TARGET +{ + return input.color; +} +#--- pipeline.yaml +--- +Shaders: + - Stage: Mesh + Entry: main + - Stage: Pixel + Entry: main +Buffers: + - Name: Output + Format: Float32 + Channels: 4 + FillSize: 1048576 # 256x256 @ 16 bytes per pixel + OutputProps: + Height: 256 + Width: 256 + Depth: 1 +Bindings: + RenderTarget: Output +DescriptorSets: [] +... +#--- rules.yaml +--- +- Type: PixelPercent + Val: 0.2 # No more than 0.2% of pixels may be visibly different. +... +#--- end + +# UNSUPPORTED: Metal + +# RUN: split-file %s %t +# RUN: %dxc_target -T ms_6_5 -Fo %t-mesh.o %t/mesh.hlsl +# RUN: %dxc_target -T ps_6_5 -Fo %t-pixel.o %t/pixel.hlsl +# RUN: %offloader %t/pipeline.yaml %t-mesh.o %t-pixel.o -r Output -o %t/Output.png +# RUN: imgdiff %t/Output.png %goldenimage_dir/hlsl/Graphics/MeshShaders/SimpleTriangle.png -rules %t/rules.yaml diff --git a/test/Graphics/VerticesFromVertexID.test b/test/Graphics/VerticesFromVertexID.test new file mode 100644 index 000000000..4eb933f77 --- /dev/null +++ b/test/Graphics/VerticesFromVertexID.test @@ -0,0 +1,74 @@ +#--- vertex.hlsl +struct PSInput +{ + float4 position : SV_POSITION; + float4 color : COLOR; +}; + +PSInput main(uint vertexID : SV_VertexID) +{ + PSInput result; + + if (vertexID == 0) { + result.position = float4(0.0, 0.25, 0.0, 1.0); + result.color = float4(1.0, 0.0, 0.0, 1.0); + } else if (vertexID == 1) { + result.position = float4(0.25, -0.25, 0.0, 1.0); + result.color = float4(0.0, 1.0, 0.0, 1.0); + } else { + result.position = float4(-0.25, -0.25, 0.0, 1.0); + result.color = float4(0.0, 0.0, 1.0, 1.0); + } + + return result; +} + + +#--- pixel.hlsl +struct PSInput +{ + float4 position : SV_POSITION; + float4 color : COLOR; +}; + +float4 main(PSInput input) : SV_TARGET +{ + return input.color; +} +#--- pipeline.yaml +--- +Shaders: + - Stage: Vertex + Entry: main + - Stage: Pixel + Entry: main +Buffers: + - Name: Output + Format: Float32 + Channels: 4 + FillSize: 1048576 # 256x256 @ 16 bytes per pixel + OutputProps: + Height: 256 + Width: 256 + Depth: 1 +Bindings: + RenderTarget: Output +DescriptorSets: [] +DispatchParameters: + VertexCount: 3 +... +#--- rules.yaml +--- +- Type: PixelPercent + Val: 0.2 # No more than 0.2% of pixels may be visibly different. +... +#--- end + +# XFAIL: Clang && !Vulkan +# REQUIRES: goldenimage + +# RUN: split-file %s %t +# RUN: %dxc_target -T vs_6_0 -Fo %t-vertex.o %t/vertex.hlsl +# RUN: %dxc_target -T ps_6_0 -Fo %t-pixel.o %t/pixel.hlsl +# RUN: %offloader %t/pipeline.yaml %t-vertex.o %t-pixel.o -r Output -o %t/Output.png +# RUN: imgdiff %t/Output.png %goldenimage_dir/hlsl/Graphics/VerticesFromVertexID.png -rules %t/rules.yaml