diff --git a/src/viz/core/cpp/device_image.cpp b/src/viz/core/cpp/device_image.cpp index c790de34a..650c246df 100644 --- a/src/viz/core/cpp/device_image.cpp +++ b/src/viz/core/cpp/device_image.cpp @@ -127,13 +127,17 @@ std::unique_ptr DeviceImage::create(const VkContext& ctx, { throw std::invalid_argument("DeviceImage: resolution must be non-zero"); } - if (format != PixelFormat::kRGBA8) + if (format != PixelFormat::kRGBA8 && format != PixelFormat::kD32F) { - // kD32F is reserved for ProjectionLayer's depth path. The - // CUDA-Vulkan interop contract for a depth image (sample - // semantics, layout transitions, color-space view) is not - // worked out yet, so refuse to half-build it. - throw std::invalid_argument("DeviceImage: only PixelFormat::kRGBA8 is supported"); + throw std::invalid_argument("DeviceImage: unsupported PixelFormat"); + } + if (format == PixelFormat::kD32F && mip_levels > 1) + { + // Depth + mip chain is meaningless (filtering depth between mip + // levels produces incorrect occlusion) and we'd have to + // special-case the blit-down pipeline. Reject explicitly rather + // than silently allocating the chain. + throw std::invalid_argument("DeviceImage: kD32F does not support mip_levels > 1"); } // mip_levels == 0 -> auto-compute full chain to 1x1. if (mip_levels == 0) diff --git a/src/viz/layers/cpp/CMakeLists.txt b/src/viz/layers/cpp/CMakeLists.txt index d6460f233..5db838088 100644 --- a/src/viz/layers/cpp/CMakeLists.txt +++ b/src/viz/layers/cpp/CMakeLists.txt @@ -10,7 +10,9 @@ cmake_minimum_required(VERSION 3.20) # viz/layers_tests/. add_library(viz_layers STATIC quad_layer.cpp + projection_layer.cpp inc/viz/layers/quad_layer.hpp + inc/viz/layers/projection_layer.hpp ) target_include_directories(viz_layers diff --git a/src/viz/layers/cpp/inc/viz/layers/projection_layer.hpp b/src/viz/layers/cpp/inc/viz/layers/projection_layer.hpp new file mode 100644 index 000000000..1fa557568 --- /dev/null +++ b/src/viz/layers/cpp/inc/viz/layers/projection_layer.hpp @@ -0,0 +1,200 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace viz +{ + +class VkContext; + +// ProjectionLayer: full-view RGBD composited into the shared render +// target. Designed for renderers (gsplat, nvblox, neural reconstruction) +// that produce (color, depth) buffers per frame. +// +// Frame loop contract — IMPORTANT: +// +// info = session.begin_frame() // xrLocateViews +// color, depth = renderer.render(info.views) // render against THIS frame's views +// layer.submit(color, depth) // publish for THIS frame +// session.end_frame() // composite + xrEndFrame +// +// ``submit()`` MUST be called between ``begin_frame()`` and +// ``end_frame()``. The renderer MUST render against +// ``info.views[i].pose`` (the predicted-display-time pose for this +// frame). The runtime / CloudXR paces the application via xrWaitFrame; +// if the renderer takes longer than display rate, the runtime's +// compositor reprojects the last submitted frame at display rate while +// the app's framerate matches the renderer's speed. +// +// In ``kXr``, a visible ProjectionLayer that does NOT receive a +// ``submit()`` for the current frame is SKIPPED at record time (the +// layer's region of the shared RT keeps the clear color). This prevents +// the runtime from compositing yesterday's RGBD content under today's +// projection-layer pose, which would produce a visible reprojection +// error. In ``kWindow`` / ``kOffscreen`` the freshness gate is off — +// the most recent publish stays on screen until replaced (the QuadLayer +// pattern), since no XR pose mismatch is possible. +// +// Mailbox: kSlotCount per-eye (color, depth) DeviceImage pairs. submit() +// picks a slot that's neither ``latest_`` nor in any ``in_use_`` entry, +// memcpys + signals cuda_done_writing on the caller's stream, blocks on +// cudaStreamSynchronize so the caller can re-use source buffers +// immediately, then atomically promotes the slot to ``latest_``. +// record_pre_render_pass promotes ``latest_`` to ``in_use_[slot]``. +// +// Stereo: when Config::stereo is true, the layer allocates paired +// (left, right) storage per slot. submit() must ship both eyes on a +// single CUDA stream; stream ordering keeps the pair atomic. In kXr +// view 0 (left eye) samples the left buffer, view 1 (right eye) the +// right. In kWindow / kOffscreen the left buffer is sampled. +// +// Memory (per layer): +// mono 1024² RGBA8+D32F: 7 slots × 1024² × 8 B ≈ 56 MB +// stereo 1024² RGBA8+D32F: ≈ 112 MB +// stereo 2048² RGBA8+D32F: ≈ 448 MB +class ProjectionLayer : public LayerBase +{ +public: + // Sized to cover backend image counts up to 5, leave one free slot. + static constexpr uint32_t kMaxFramesInFlight = 5; + static constexpr uint32_t kSlotCount = kMaxFramesInFlight + 2; + + struct Config + { + std::string name = "ProjectionLayer"; + Resolution view_resolution{}; + PixelFormat color_format = PixelFormat::kRGBA8; + + // nullopt → no depth buffer allocated; ProjectionLayer always + // writes gl_FragDepth = 1.0 (far). Without depth, this layer + // loses Z-compositing with QuadLayer. Useful for renderers that + // genuinely have no depth (sky / background fills). + std::optional depth_format = PixelFormat::kD32F; + + // true → per-eye paired storage. submit MUST ship both eyes. + // In kWindow / kOffscreen the LEFT buffer is sampled; in kXr + // view 0 → LEFT, view 1 → RIGHT. + bool stereo = false; + }; + + ProjectionLayer(const VkContext& ctx, VkRenderPass render_pass, Config config); + ~ProjectionLayer() override; + void destroy(); + + ProjectionLayer(const ProjectionLayer&) = delete; + ProjectionLayer& operator=(const ProjectionLayer&) = delete; + + // Publish a frame. Each buffer is a CUDA-linear VizBuffer (kDevice + // space) matching the layer's resolution and the relevant format + // (color → color_format, depth → kD32F). Validated against the + // config; mismatch throws std::invalid_argument. + // + // Mono no-depth: submit(color) + // Mono with depth: submit(color, &depth) + // Stereo no-depth: submit(left_color, nullptr, &right_color, nullptr) + // Stereo with depth: submit(left_color, &left_depth, &right_color, &right_depth) + // + // submit() does one cudaMemcpy2DToArrayAsync per provided buffer + // on ``stream``, signals cuda_done_writing on the same stream, then + // BLOCKS on cudaStreamSynchronize so the caller can re-use source + // buffers immediately. Cost: ~0.5 ms / 1024² color + depth on a + // discrete GPU. + // + // Marks the layer "fresh for this frame" so record() will draw it. + // VizSession::begin_frame clears the flag at the start of each + // frame; a renderer that doesn't submit will see its content + // skipped in kXr. + // + // GIL: pybind binding releases the GIL across this whole call. + void submit(const VizBuffer& left_color, + const VizBuffer* left_depth = nullptr, + const VizBuffer* right_color = nullptr, + const VizBuffer* right_depth = nullptr, + cudaStream_t stream = 0); + + // LayerBase contract. + void on_frame_begin() override; // clears submitted_this_frame_ flag + void record_pre_render_pass(VkCommandBuffer cmd, uint32_t in_flight_slot) override; + void record(VkCommandBuffer cmd, + const std::vector& views, + const RenderTarget& target, + uint32_t in_flight_slot) override; + + // cuda_done_writing waits for color + depth of every active view in + // the in-use slot. kSlotNone → empty vector. + std::vector get_wait_semaphores() const override; + + // Accessors. + Resolution view_resolution() const noexcept; + PixelFormat color_format() const noexcept; + std::optional depth_format() const noexcept; + bool is_stereo() const noexcept; + uint32_t view_count() const noexcept; + + // Diagnostic — nullptr outside valid ranges. + const DeviceImage* color_image(uint32_t slot, uint32_t view) const noexcept; + const DeviceImage* depth_image(uint32_t slot, uint32_t view) const noexcept; + +private: + static constexpr uint8_t kSlotNone = 0xFF; + + void init(); + void create_sampler(); + void create_descriptor_set_layout(); + void create_pipeline_layout(); + void create_pipeline(); + void create_descriptor_pool(); + void allocate_descriptor_sets(); + void update_descriptor_sets(); + + uint8_t pick_free_slot() const noexcept; + + void validate_submit_buffer(const VizBuffer& buf, PixelFormat expected_format, const char* label) const; + void enqueue_copy(const VizBuffer& src, DeviceImage& dst, cudaStream_t stream) const; + + const VkContext* ctx_ = nullptr; + VkRenderPass render_pass_ = VK_NULL_HANDLE; // borrowed + Config config_; + uint32_t view_count_ = 1; + bool has_depth_ = true; + + std::array>, kSlotCount> slots_color_; + std::array>, kSlotCount> slots_depth_; + + VkSampler color_sampler_ = VK_NULL_HANDLE; + VkSampler depth_sampler_ = VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_ = VK_NULL_HANDLE; + VkPipelineLayout pipeline_layout_ = VK_NULL_HANDLE; + VkPipeline pipeline_with_depth_ = VK_NULL_HANDLE; + VkPipeline pipeline_no_depth_ = VK_NULL_HANDLE; + + VkDescriptorPool descriptor_pool_ = VK_NULL_HANDLE; + std::array, kSlotCount> descriptor_sets_; + + // Mailbox. + std::atomic latest_{ kSlotNone }; + std::array, kMaxFramesInFlight> in_use_{}; + std::atomic last_in_use_slot_{ kSlotNone }; + + // Set by submit(), cleared by on_frame_begin(). record() consults + // this in kXr to gate stale-RGBD-under-new-pose composites. + std::atomic submitted_this_frame_{ false }; +}; + +} // namespace viz diff --git a/src/viz/layers/cpp/projection_layer.cpp b/src/viz/layers/cpp/projection_layer.cpp new file mode 100644 index 000000000..e161afb0e --- /dev/null +++ b/src/viz/layers/cpp/projection_layer.cpp @@ -0,0 +1,740 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace viz +{ + +namespace +{ + +void check_vk(VkResult result, const char* what) +{ + if (result != VK_SUCCESS) + { + throw std::runtime_error(std::string("ProjectionLayer: ") + what + " failed: VkResult=" + std::to_string(result)); + } +} + +void check_cuda(cudaError_t result, const char* what) +{ + if (result != cudaSuccess) + { + throw std::runtime_error(std::string("ProjectionLayer: ") + what + " failed: " + cudaGetErrorString(result)); + } +} + +VkShaderModule create_shader_module(VkDevice device, const unsigned char* spv, size_t size) +{ + VkShaderModuleCreateInfo info{}; + info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + info.codeSize = size; + info.pCode = reinterpret_cast(spv); + VkShaderModule mod = VK_NULL_HANDLE; + check_vk(vkCreateShaderModule(device, &info, nullptr, &mod), "vkCreateShaderModule"); + return mod; +} + +} // namespace + +ProjectionLayer::ProjectionLayer(const VkContext& ctx, VkRenderPass render_pass, Config config) + : LayerBase(config.name), ctx_(&ctx), render_pass_(render_pass), config_(std::move(config)) +{ + if (!ctx.is_initialized()) + { + throw std::invalid_argument("ProjectionLayer: VkContext not initialized"); + } + if (render_pass == VK_NULL_HANDLE) + { + throw std::invalid_argument("ProjectionLayer: render_pass is VK_NULL_HANDLE"); + } + if (config_.view_resolution.width == 0 || config_.view_resolution.height == 0) + { + throw std::invalid_argument("ProjectionLayer: view_resolution must be non-zero"); + } + if (config_.color_format != PixelFormat::kRGBA8) + { + throw std::invalid_argument("ProjectionLayer: color_format must be kRGBA8"); + } + if (config_.depth_format.has_value() && config_.depth_format.value() != PixelFormat::kD32F) + { + throw std::invalid_argument("ProjectionLayer: depth_format must be kD32F or nullopt"); + } + view_count_ = config_.stereo ? 2u : 1u; + has_depth_ = config_.depth_format.has_value(); + for (auto& slot : in_use_) + { + slot.store(kSlotNone, std::memory_order_relaxed); + } + init(); +} + +ProjectionLayer::~ProjectionLayer() +{ + destroy(); +} + +void ProjectionLayer::init() +{ + try + { + for (uint32_t s = 0; s < kSlotCount; ++s) + { + slots_color_[s].reserve(view_count_); + for (uint32_t v = 0; v < view_count_; ++v) + { + slots_color_[s].push_back(DeviceImage::create(*ctx_, config_.view_resolution, config_.color_format, 1)); + } + if (has_depth_) + { + slots_depth_[s].reserve(view_count_); + for (uint32_t v = 0; v < view_count_; ++v) + { + slots_depth_[s].push_back( + DeviceImage::create(*ctx_, config_.view_resolution, *config_.depth_format, 1)); + } + } + } + create_sampler(); + create_descriptor_set_layout(); + create_pipeline_layout(); + create_pipeline(); + create_descriptor_pool(); + allocate_descriptor_sets(); + update_descriptor_sets(); + } + catch (...) + { + destroy(); + throw; + } +} + +void ProjectionLayer::destroy() +{ + // Drain pending GPU work before tearing down resources the + // compositor's command buffers reference. + if (ctx_ != nullptr && ctx_->device() != VK_NULL_HANDLE) + { + (void)vkDeviceWaitIdle(ctx_->device()); + } + + const VkDevice device = (ctx_ != nullptr) ? ctx_->device() : VK_NULL_HANDLE; + if (device != VK_NULL_HANDLE) + { + if (pipeline_with_depth_ != VK_NULL_HANDLE) + { + vkDestroyPipeline(device, pipeline_with_depth_, nullptr); + pipeline_with_depth_ = VK_NULL_HANDLE; + } + if (pipeline_no_depth_ != VK_NULL_HANDLE) + { + vkDestroyPipeline(device, pipeline_no_depth_, nullptr); + pipeline_no_depth_ = VK_NULL_HANDLE; + } + if (pipeline_layout_ != VK_NULL_HANDLE) + { + vkDestroyPipelineLayout(device, pipeline_layout_, nullptr); + pipeline_layout_ = VK_NULL_HANDLE; + } + if (descriptor_pool_ != VK_NULL_HANDLE) + { + vkDestroyDescriptorPool(device, descriptor_pool_, nullptr); + descriptor_pool_ = VK_NULL_HANDLE; + } + for (auto& sets : descriptor_sets_) + { + sets.clear(); + } + if (descriptor_set_layout_ != VK_NULL_HANDLE) + { + vkDestroyDescriptorSetLayout(device, descriptor_set_layout_, nullptr); + descriptor_set_layout_ = VK_NULL_HANDLE; + } + if (color_sampler_ != VK_NULL_HANDLE) + { + vkDestroySampler(device, color_sampler_, nullptr); + color_sampler_ = VK_NULL_HANDLE; + } + if (depth_sampler_ != VK_NULL_HANDLE) + { + vkDestroySampler(device, depth_sampler_, nullptr); + depth_sampler_ = VK_NULL_HANDLE; + } + } + for (uint32_t s = 0; s < kSlotCount; ++s) + { + slots_color_[s].clear(); + slots_depth_[s].clear(); + } +} + +// ─── Vulkan setup ──────────────────────────────────────────────────── + +void ProjectionLayer::create_sampler() +{ + const VkDevice device = ctx_->device(); + + VkSamplerCreateInfo color_info{}; + color_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + color_info.magFilter = VK_FILTER_LINEAR; + color_info.minFilter = VK_FILTER_LINEAR; + color_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + color_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + color_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + color_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + color_info.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + check_vk(vkCreateSampler(device, &color_info, nullptr, &color_sampler_), "vkCreateSampler(color)"); + + VkSamplerCreateInfo depth_info = color_info; + depth_info.magFilter = VK_FILTER_NEAREST; + depth_info.minFilter = VK_FILTER_NEAREST; + check_vk(vkCreateSampler(device, &depth_info, nullptr, &depth_sampler_), "vkCreateSampler(depth)"); +} + +void ProjectionLayer::create_descriptor_set_layout() +{ + // Binding 0: color, Binding 1: depth. + // When has_depth_ is false the descriptor at binding 1 still gets + // written (with the color image) so the layout shape stays uniform + // across both pipeline variants; the no_depth fragment shader + // doesn't sample it. + std::array bindings{}; + bindings[0].binding = 0; + bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[0].descriptorCount = 1; + bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[1].binding = 1; + bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[1].descriptorCount = 1; + bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + + VkDescriptorSetLayoutCreateInfo info{}; + info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + info.bindingCount = static_cast(bindings.size()); + info.pBindings = bindings.data(); + check_vk(vkCreateDescriptorSetLayout(ctx_->device(), &info, nullptr, &descriptor_set_layout_), + "vkCreateDescriptorSetLayout"); +} + +void ProjectionLayer::create_pipeline_layout() +{ + VkPipelineLayoutCreateInfo info{}; + info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + info.setLayoutCount = 1; + info.pSetLayouts = &descriptor_set_layout_; + check_vk(vkCreatePipelineLayout(ctx_->device(), &info, nullptr, &pipeline_layout_), "vkCreatePipelineLayout"); +} + +void ProjectionLayer::create_pipeline() +{ + const VkDevice device = ctx_->device(); + + VkShaderModule vert = + create_shader_module(device, viz::shaders::kProjectionLayerVertSpv, viz::shaders::kProjectionLayerVertSpvSize); + + struct ShaderGuard + { + VkDevice device; + VkShaderModule vert; + VkShaderModule frag_with_depth; + VkShaderModule frag_no_depth; + ~ShaderGuard() + { + for (auto m : { vert, frag_with_depth, frag_no_depth }) + { + if (m != VK_NULL_HANDLE) + { + vkDestroyShaderModule(device, m, nullptr); + } + } + } + } guard{ device, vert, VK_NULL_HANDLE, VK_NULL_HANDLE }; + guard.frag_with_depth = + create_shader_module(device, viz::shaders::kProjectionLayerFragSpv, viz::shaders::kProjectionLayerFragSpvSize); + guard.frag_no_depth = create_shader_module( + device, viz::shaders::kProjectionLayerFragNoDepthSpv, viz::shaders::kProjectionLayerFragNoDepthSpvSize); + + auto make_pipeline = [&](VkShaderModule frag_module, bool depth_write, VkPipeline* out) + { + VkPipelineShaderStageCreateInfo stages[2]{}; + stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + stages[0].module = vert; + stages[0].pName = "main"; + stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + stages[1].module = frag_module; + stages[1].pName = "main"; + + VkPipelineVertexInputStateCreateInfo vertex_input{}; + vertex_input.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + + VkPipelineInputAssemblyStateCreateInfo input_assembly{}; + input_assembly.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + input_assembly.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + + VkPipelineViewportStateCreateInfo viewport_state{}; + viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewport_state.viewportCount = 1; + viewport_state.scissorCount = 1; + + VkPipelineRasterizationStateCreateInfo rasterizer{}; + rasterizer.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterizer.polygonMode = VK_POLYGON_MODE_FILL; + rasterizer.cullMode = VK_CULL_MODE_NONE; + rasterizer.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rasterizer.lineWidth = 1.0f; + + VkPipelineMultisampleStateCreateInfo multisample{}; + multisample.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + multisample.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + + // depth_write true: frag writes gl_FragDepth from the sampled + // depth texture. depth_write false: rasterized z = 1.0 (far) + // from the vertex shader; doesn't affect subsequent layers. + VkPipelineDepthStencilStateCreateInfo depth_stencil{}; + depth_stencil.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + depth_stencil.depthTestEnable = VK_TRUE; + depth_stencil.depthWriteEnable = depth_write ? VK_TRUE : VK_FALSE; + depth_stencil.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + + VkPipelineColorBlendAttachmentState blend_attachment{}; + blend_attachment.blendEnable = VK_FALSE; + blend_attachment.colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + VkPipelineColorBlendStateCreateInfo color_blend{}; + color_blend.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + color_blend.attachmentCount = 1; + color_blend.pAttachments = &blend_attachment; + + const VkDynamicState dynamic_states[] = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }; + VkPipelineDynamicStateCreateInfo dynamic{}; + dynamic.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic.dynamicStateCount = sizeof(dynamic_states) / sizeof(dynamic_states[0]); + dynamic.pDynamicStates = dynamic_states; + + VkGraphicsPipelineCreateInfo info{}; + info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + info.stageCount = 2; + info.pStages = stages; + info.pVertexInputState = &vertex_input; + info.pInputAssemblyState = &input_assembly; + info.pViewportState = &viewport_state; + info.pRasterizationState = &rasterizer; + info.pMultisampleState = &multisample; + info.pDepthStencilState = &depth_stencil; + info.pColorBlendState = &color_blend; + info.pDynamicState = &dynamic; + info.layout = pipeline_layout_; + info.renderPass = render_pass_; + info.subpass = 0; + + check_vk(vkCreateGraphicsPipelines(device, ctx_->pipeline_cache(), 1, &info, nullptr, out), + "vkCreateGraphicsPipelines"); + }; + + make_pipeline(guard.frag_with_depth, /*depth_write=*/true, &pipeline_with_depth_); + make_pipeline(guard.frag_no_depth, /*depth_write=*/false, &pipeline_no_depth_); +} + +void ProjectionLayer::create_descriptor_pool() +{ + const uint32_t set_count = kSlotCount * view_count_; + // Two combined samplers per set (color + depth slot, even when + // depth disabled — descriptor count must match the layout). + VkDescriptorPoolSize pool_size{}; + pool_size.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + pool_size.descriptorCount = set_count * 2u; + + VkDescriptorPoolCreateInfo info{}; + info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + info.maxSets = set_count; + info.poolSizeCount = 1; + info.pPoolSizes = &pool_size; + check_vk(vkCreateDescriptorPool(ctx_->device(), &info, nullptr, &descriptor_pool_), "vkCreateDescriptorPool"); +} + +void ProjectionLayer::allocate_descriptor_sets() +{ + std::vector layouts(view_count_, descriptor_set_layout_); + for (uint32_t s = 0; s < kSlotCount; ++s) + { + descriptor_sets_[s].resize(view_count_); + VkDescriptorSetAllocateInfo info{}; + info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + info.descriptorPool = descriptor_pool_; + info.descriptorSetCount = view_count_; + info.pSetLayouts = layouts.data(); + check_vk(vkAllocateDescriptorSets(ctx_->device(), &info, descriptor_sets_[s].data()), "vkAllocateDescriptorSets"); + } +} + +void ProjectionLayer::update_descriptor_sets() +{ + const VkDevice device = ctx_->device(); + std::vector writes; + std::vector infos; + writes.reserve(kSlotCount * view_count_ * 2u); + infos.reserve(kSlotCount * view_count_ * 2u); + + for (uint32_t s = 0; s < kSlotCount; ++s) + { + for (uint32_t v = 0; v < view_count_; ++v) + { + VkDescriptorImageInfo color_info{}; + color_info.sampler = color_sampler_; + color_info.imageView = slots_color_[s][v]->vk_image_view(); + color_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + infos.push_back(color_info); + VkWriteDescriptorSet w{}; + w.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + w.dstSet = descriptor_sets_[s][v]; + w.dstBinding = 0; + w.descriptorCount = 1; + w.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + w.pImageInfo = &infos.back(); + writes.push_back(w); + + VkDescriptorImageInfo depth_info{}; + depth_info.sampler = has_depth_ ? depth_sampler_ : color_sampler_; + depth_info.imageView = has_depth_ ? slots_depth_[s][v]->vk_image_view() : slots_color_[s][v]->vk_image_view(); + depth_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + infos.push_back(depth_info); + VkWriteDescriptorSet w2 = w; + w2.dstBinding = 1; + w2.pImageInfo = &infos.back(); + writes.push_back(w2); + } + } + vkUpdateDescriptorSets(device, static_cast(writes.size()), writes.data(), 0, nullptr); +} + +// ─── Submit ────────────────────────────────────────────────────────── + +void ProjectionLayer::validate_submit_buffer(const VizBuffer& buf, PixelFormat expected_format, const char* label) const +{ + if (buf.data == nullptr) + { + throw std::invalid_argument(std::string("ProjectionLayer: ") + label + ": data is null"); + } + if (buf.space != MemorySpace::kDevice) + { + throw std::invalid_argument(std::string("ProjectionLayer: ") + label + ": MemorySpace must be kDevice"); + } + if (buf.format != expected_format) + { + throw std::invalid_argument(std::string("ProjectionLayer: ") + label + ": pixel format mismatch"); + } + if (buf.width != config_.view_resolution.width || buf.height != config_.view_resolution.height) + { + throw std::invalid_argument(std::string("ProjectionLayer: ") + label + ": resolution mismatch"); + } +} + +void ProjectionLayer::enqueue_copy(const VizBuffer& src, DeviceImage& dst, cudaStream_t stream) const +{ + const size_t row_bytes = static_cast(src.width) * bytes_per_pixel(src.format); + const size_t src_pitch = (src.pitch != 0) ? src.pitch : row_bytes; + check_cuda(cudaMemcpy2DToArrayAsync(dst.cuda_array(), + /*wOffset=*/0, + /*hOffset=*/0, src.data, src_pitch, row_bytes, src.height, + cudaMemcpyDeviceToDevice, stream), + "cudaMemcpy2DToArrayAsync"); +} + +uint8_t ProjectionLayer::pick_free_slot() const noexcept +{ + const uint8_t latest = latest_.load(std::memory_order_acquire); + for (uint8_t s = 0; s < static_cast(kSlotCount); ++s) + { + if (s == latest) + { + continue; + } + bool used = false; + for (const auto& a : in_use_) + { + if (a.load(std::memory_order_acquire) == s) + { + used = true; + break; + } + } + if (!used) + { + return s; + } + } + return kSlotNone; +} + +void ProjectionLayer::submit(const VizBuffer& left_color, + const VizBuffer* left_depth, + const VizBuffer* right_color, + const VizBuffer* right_depth, + cudaStream_t stream) +{ + // ── Validate config / call shape ───────────────────────────────── + validate_submit_buffer(left_color, config_.color_format, "submit(left_color)"); + + const bool stereo = config_.stereo; + if (stereo) + { + if (right_color == nullptr) + { + throw std::invalid_argument("ProjectionLayer: stereo layer requires right_color"); + } + validate_submit_buffer(*right_color, config_.color_format, "submit(right_color)"); + } + else + { + if (right_color != nullptr || right_depth != nullptr) + { + throw std::invalid_argument("ProjectionLayer: mono layer must not pass right buffers"); + } + } + + if (has_depth_) + { + if (left_depth == nullptr) + { + throw std::invalid_argument("ProjectionLayer: depth-enabled layer requires left_depth"); + } + validate_submit_buffer(*left_depth, PixelFormat::kD32F, "submit(left_depth)"); + if (stereo) + { + if (right_depth == nullptr) + { + throw std::invalid_argument("ProjectionLayer: stereo + depth requires right_depth"); + } + validate_submit_buffer(*right_depth, PixelFormat::kD32F, "submit(right_depth)"); + } + } + else + { + if (left_depth != nullptr || right_depth != nullptr) + { + throw std::invalid_argument("ProjectionLayer: depth-disabled layer must not pass depth buffers"); + } + } + + // ── Pick a free slot ───────────────────────────────────────────── + const uint8_t slot = pick_free_slot(); + if (slot == kSlotNone) + { + // Should be unreachable given the kSlotCount invariant + // (kMaxFramesInFlight + 2 ≥ worst-case forbidden set + 1). + // Treat as a drop: producer's frame is lost; renderer keeps + // sampling the previous publish. + throw std::runtime_error("ProjectionLayer: no free mailbox slot — sizing invariant violated"); + } + + // ── Copy + signal ──────────────────────────────────────────────── + enqueue_copy(left_color, *slots_color_[slot][0], stream); + if (has_depth_) + { + enqueue_copy(*left_depth, *slots_depth_[slot][0], stream); + } + if (stereo) + { + enqueue_copy(*right_color, *slots_color_[slot][1], stream); + if (has_depth_) + { + enqueue_copy(*right_depth, *slots_depth_[slot][1], stream); + } + } + + // One semaphore signal per CUDA-mapped image we wrote. The compositor + // waits on the in-use slot's set of cuda_done_writing values before + // the fragment shader samples them (get_wait_semaphores). + slots_color_[slot][0]->cuda_signal_write_done(stream); + if (has_depth_) + { + slots_depth_[slot][0]->cuda_signal_write_done(stream); + } + if (stereo) + { + slots_color_[slot][1]->cuda_signal_write_done(stream); + if (has_depth_) + { + slots_depth_[slot][1]->cuda_signal_write_done(stream); + } + } + + // BLOCK on stream completion so the caller can re-use src buffers + // immediately. Same contract as QuadLayer::submit. ~sub-ms cost. + check_cuda(cudaStreamSynchronize(stream), "cudaStreamSynchronize"); + + latest_.store(slot, std::memory_order_release); + submitted_this_frame_.store(true, std::memory_order_release); +} + +// ─── Render path ───────────────────────────────────────────────────── + +void ProjectionLayer::on_frame_begin() +{ + // VizSession's begin_frame calls this on every layer. Clearing + // the flag here means a layer that fails to submit between + // begin_frame and end_frame will be skipped at record() time in + // kXr (see record() below). + submitted_this_frame_.store(false, std::memory_order_release); +} + +void ProjectionLayer::record_pre_render_pass(VkCommandBuffer /*cmd*/, uint32_t in_flight_slot) +{ + if (in_flight_slot >= kMaxFramesInFlight) + { + throw std::logic_error("ProjectionLayer: in_flight_slot exceeds kMaxFramesInFlight"); + } + const uint8_t latest = latest_.load(std::memory_order_acquire); + if (latest != kSlotNone) + { + in_use_[in_flight_slot].store(latest, std::memory_order_release); + } + last_in_use_slot_.store(in_use_[in_flight_slot].load(std::memory_order_acquire), std::memory_order_release); +} + +void ProjectionLayer::record(VkCommandBuffer cmd, + const std::vector& views, + const RenderTarget& /*target*/, + uint32_t in_flight_slot) +{ + if (in_flight_slot >= kMaxFramesInFlight) + { + throw std::logic_error("ProjectionLayer: in_flight_slot exceeds kMaxFramesInFlight"); + } + const uint8_t cur = in_use_[in_flight_slot].load(std::memory_order_acquire); + if (cur == kSlotNone) + { + return; + } + + // In kXr, a layer whose renderer didn't submit for THIS frame + // must not contribute stale RGBD under a new projection-layer + // pose. Skip the draw — the layer's region of the shared RT + // keeps the clear color. kWindow / kOffscreen don't have an XR + // pose, so the freshness gate is off (latest-wins semantics + // match QuadLayer). + const bool xr_mode = session() != nullptr && session()->is_xr_mode(); + if (xr_mode && !submitted_this_frame_.load(std::memory_order_acquire)) + { + return; + } + + VkPipeline pipeline = has_depth_ ? pipeline_with_depth_ : pipeline_no_depth_; + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + + for (size_t view_idx = 0; view_idx < views.size(); ++view_idx) + { + const auto& view = views[view_idx]; + bind_view_viewport(cmd, view); + + // Stereo + kXr: view 0 → LEFT slot, view 1 → RIGHT slot. + // Otherwise: always LEFT (mono content broadcast to both eyes + // in stereo + window/offscreen). + const bool sample_right = xr_mode && config_.stereo && view_idx == 1 && view_count_ >= 2; + const uint32_t slot_view = sample_right ? 1u : 0u; + VkDescriptorSet ds = descriptor_sets_[cur][slot_view]; + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout_, 0, 1, &ds, 0, nullptr); + + // 3-vertex oversized fullscreen triangle (same trick as + // textured_quad.vert's mode == 0 branch). + vkCmdDraw(cmd, 3, 1, 0, 0); + } +} + +std::vector ProjectionLayer::get_wait_semaphores() const +{ + std::vector waits; + const uint8_t cur = last_in_use_slot_.load(std::memory_order_acquire); + if (cur == kSlotNone) + { + return waits; + } + const auto add = [&](const DeviceImage& img) + { + const uint64_t value = img.cuda_done_writing_value(); + if (value == 0) + { + return; + } + WaitSemaphore w{}; + w.semaphore = img.cuda_done_writing(); + w.value = value; + w.wait_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + waits.push_back(w); + }; + for (uint32_t v = 0; v < view_count_; ++v) + { + if (slots_color_[cur].size() > v && slots_color_[cur][v]) + { + add(*slots_color_[cur][v]); + } + if (has_depth_ && slots_depth_[cur].size() > v && slots_depth_[cur][v]) + { + add(*slots_depth_[cur][v]); + } + } + return waits; +} + +// ─── Accessors ─────────────────────────────────────────────────────── + +Resolution ProjectionLayer::view_resolution() const noexcept +{ + return config_.view_resolution; +} + +PixelFormat ProjectionLayer::color_format() const noexcept +{ + return config_.color_format; +} + +std::optional ProjectionLayer::depth_format() const noexcept +{ + return config_.depth_format; +} + +bool ProjectionLayer::is_stereo() const noexcept +{ + return config_.stereo; +} + +uint32_t ProjectionLayer::view_count() const noexcept +{ + return view_count_; +} + +const DeviceImage* ProjectionLayer::color_image(uint32_t slot, uint32_t view) const noexcept +{ + if (slot >= kSlotCount || view >= view_count_ || slots_color_[slot].size() <= view) + { + return nullptr; + } + return slots_color_[slot][view].get(); +} + +const DeviceImage* ProjectionLayer::depth_image(uint32_t slot, uint32_t view) const noexcept +{ + if (!has_depth_ || slot >= kSlotCount || view >= view_count_ || slots_depth_[slot].size() <= view) + { + return nullptr; + } + return slots_depth_[slot][view].get(); +} + +} // namespace viz diff --git a/src/viz/layers_tests/cpp/CMakeLists.txt b/src/viz/layers_tests/cpp/CMakeLists.txt index b34de50b8..1d969dac0 100644 --- a/src/viz/layers_tests/cpp/CMakeLists.txt +++ b/src/viz/layers_tests/cpp/CMakeLists.txt @@ -29,6 +29,7 @@ add_library(viz::layers_testing ALIAS viz_layers_testing) # the production layers in viz::layers (QuadLayer, ...). add_executable(viz_layers_tests test_clear_rect_layer.cpp + test_projection_layer.cpp test_quad_layer.cpp test_throwing_layer.cpp ) diff --git a/src/viz/layers_tests/cpp/test_projection_layer.cpp b/src/viz/layers_tests/cpp/test_projection_layer.cpp new file mode 100644 index 000000000..68e105169 --- /dev/null +++ b/src/viz/layers_tests/cpp/test_projection_layer.cpp @@ -0,0 +1,468 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +// Tests for ProjectionLayer: config validation (unit-level) and pipeline +// / CUDA-Vulkan interop + submit (gpu-level). End-to-end fill + render + +// readback lives in viz_session_tests where the full pipeline is +// available. + +#include "test_helpers.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include + +using viz::DeviceImage; +using viz::PixelFormat; +using viz::ProjectionLayer; +using viz::RenderTarget; +using viz::Resolution; +using viz::VizBuffer; +using viz::VkContext; + +using viz::testing::is_gpu_available; + +namespace +{ + +struct CudaFreeGuard +{ + void* p = nullptr; + ~CudaFreeGuard() + { + if (p != nullptr) + { + cudaFree(p); + } + } +}; + +} // namespace + +// ── Unit: config validation without GPU ───────────────────────────── + +TEST_CASE("ProjectionLayer ctor rejects non-RGBA8 color format", "[unit][projection_layer]") +{ + VkContext ctx; + ProjectionLayer::Config cfg; + cfg.view_resolution = { 64, 64 }; + cfg.color_format = PixelFormat::kD32F; + CHECK_THROWS_AS(ProjectionLayer(ctx, VK_NULL_HANDLE, cfg), std::invalid_argument); +} + +TEST_CASE("ProjectionLayer ctor rejects non-D32F depth format", "[unit][projection_layer]") +{ + VkContext ctx; + ProjectionLayer::Config cfg; + cfg.view_resolution = { 64, 64 }; + cfg.depth_format = PixelFormat::kRGBA8; + CHECK_THROWS_AS(ProjectionLayer(ctx, VK_NULL_HANDLE, cfg), std::invalid_argument); +} + +TEST_CASE("ProjectionLayer ctor rejects zero view_resolution", "[unit][projection_layer]") +{ + VkContext ctx; + ProjectionLayer::Config cfg; + cfg.view_resolution = { 0, 64 }; + CHECK_THROWS_AS(ProjectionLayer(ctx, VK_NULL_HANDLE, cfg), std::invalid_argument); +} + +TEST_CASE("ProjectionLayer ctor rejects null render pass", "[unit][projection_layer]") +{ + VkContext ctx; + ProjectionLayer::Config cfg; + cfg.view_resolution = { 64, 64 }; + CHECK_THROWS_AS(ProjectionLayer(ctx, VK_NULL_HANDLE, cfg), std::invalid_argument); +} + +// ── GPU: construction + accessors ─────────────────────────────────── + +TEST_CASE("ProjectionLayer mono+depth creates valid handles for every slot+view", "[gpu][projection_layer]") +{ + if (!is_gpu_available()) + { + SKIP("No Vulkan-capable GPU available"); + } + VkContext ctx; + ctx.init({}); + auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 64, 64 } }); + + ProjectionLayer::Config cfg; + cfg.view_resolution = { 64, 64 }; + ProjectionLayer layer(ctx, target->render_pass(), cfg); + + CHECK(layer.name() == "ProjectionLayer"); + CHECK(layer.view_count() == 1); + CHECK_FALSE(layer.is_stereo()); + CHECK(layer.color_format() == PixelFormat::kRGBA8); + CHECK(layer.depth_format().has_value()); + CHECK(*layer.depth_format() == PixelFormat::kD32F); + + for (uint32_t s = 0; s < ProjectionLayer::kSlotCount; ++s) + { + REQUIRE(layer.color_image(s, 0) != nullptr); + CHECK(layer.color_image(s, 0)->vk_image() != VK_NULL_HANDLE); + CHECK(layer.color_image(s, 0)->cuda_array() != nullptr); + REQUIRE(layer.depth_image(s, 0) != nullptr); + CHECK(layer.depth_image(s, 0)->vk_image() != VK_NULL_HANDLE); + CHECK(layer.depth_image(s, 0)->cuda_array() != nullptr); + // View index out of range returns nullptr. + CHECK(layer.color_image(s, 1) == nullptr); + CHECK(layer.depth_image(s, 1) == nullptr); + } + CHECK(layer.color_image(ProjectionLayer::kSlotCount, 0) == nullptr); +} + +TEST_CASE("ProjectionLayer stereo allocates per-eye storage", "[gpu][projection_layer]") +{ + if (!is_gpu_available()) + { + SKIP("No Vulkan-capable GPU available"); + } + VkContext ctx; + ctx.init({}); + auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 64, 64 } }); + + ProjectionLayer::Config cfg; + cfg.view_resolution = { 64, 64 }; + cfg.stereo = true; + ProjectionLayer layer(ctx, target->render_pass(), cfg); + + CHECK(layer.view_count() == 2); + CHECK(layer.is_stereo()); + for (uint32_t s = 0; s < ProjectionLayer::kSlotCount; ++s) + { + REQUIRE(layer.color_image(s, 0) != nullptr); + REQUIRE(layer.color_image(s, 1) != nullptr); + REQUIRE(layer.depth_image(s, 0) != nullptr); + REQUIRE(layer.depth_image(s, 1) != nullptr); + CHECK(layer.color_image(s, 0)->vk_image() != layer.color_image(s, 1)->vk_image()); + } +} + +TEST_CASE("ProjectionLayer no-depth skips depth allocation", "[gpu][projection_layer]") +{ + if (!is_gpu_available()) + { + SKIP("No Vulkan-capable GPU available"); + } + VkContext ctx; + ctx.init({}); + auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 32, 32 } }); + + ProjectionLayer::Config cfg; + cfg.view_resolution = { 32, 32 }; + cfg.depth_format = std::nullopt; + ProjectionLayer layer(ctx, target->render_pass(), cfg); + + CHECK_FALSE(layer.depth_format().has_value()); + for (uint32_t s = 0; s < ProjectionLayer::kSlotCount; ++s) + { + REQUIRE(layer.color_image(s, 0) != nullptr); + CHECK(layer.depth_image(s, 0) == nullptr); + } +} + +TEST_CASE("ProjectionLayer destroy is idempotent", "[gpu][projection_layer]") +{ + if (!is_gpu_available()) + { + SKIP("No Vulkan-capable GPU available"); + } + VkContext ctx; + ctx.init({}); + auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 32, 32 } }); + + ProjectionLayer::Config cfg; + cfg.view_resolution = { 32, 32 }; + ProjectionLayer layer(ctx, target->render_pass(), cfg); + + layer.destroy(); + layer.destroy(); // second call must be a no-op +} + +// ── GPU: submit validation ────────────────────────────────────────── + +TEST_CASE("ProjectionLayer::submit rejects bad call shapes", "[gpu][projection_layer]") +{ + if (!is_gpu_available()) + { + SKIP("No Vulkan-capable GPU available"); + } + VkContext ctx; + ctx.init({}); + auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 64, 64 } }); + + ProjectionLayer::Config cfg; + cfg.view_resolution = { 64, 64 }; + cfg.stereo = false; + ProjectionLayer layer(ctx, target->render_pass(), cfg); + + void* color_dev = nullptr; + void* depth_dev = nullptr; + REQUIRE(cudaMalloc(&color_dev, 64 * 64 * 4) == cudaSuccess); + REQUIRE(cudaMalloc(&depth_dev, 64 * 64 * 4) == cudaSuccess); + CudaFreeGuard cg{ color_dev }; + CudaFreeGuard dg{ depth_dev }; + + VizBuffer color{}; + color.data = color_dev; + color.width = 64; + color.height = 64; + color.format = PixelFormat::kRGBA8; + color.space = viz::MemorySpace::kDevice; + + VizBuffer depth{}; + depth.data = depth_dev; + depth.width = 64; + depth.height = 64; + depth.format = PixelFormat::kD32F; + depth.space = viz::MemorySpace::kDevice; + + SECTION("missing depth on depth-enabled layer") + { + CHECK_THROWS_AS(layer.submit(color), std::invalid_argument); + } + SECTION("mono layer rejects right-eye buffers") + { + CHECK_THROWS_AS(layer.submit(color, &depth, &color, &depth), std::invalid_argument); + } + SECTION("dimension mismatch rejected") + { + VizBuffer bad = color; + bad.width = 32; + CHECK_THROWS_AS(layer.submit(bad, &depth), std::invalid_argument); + } + SECTION("color format mismatch rejected") + { + VizBuffer bad = color; + bad.format = PixelFormat::kD32F; + CHECK_THROWS_AS(layer.submit(bad, &depth), std::invalid_argument); + } + SECTION("kHost rejected") + { + VizBuffer bad = color; + bad.space = viz::MemorySpace::kHost; + CHECK_THROWS_AS(layer.submit(bad, &depth), std::invalid_argument); + } +} + +TEST_CASE("ProjectionLayer::submit mono+depth advances mailbox + signals semaphores", "[gpu][projection_layer]") +{ + if (!is_gpu_available()) + { + SKIP("No Vulkan-capable GPU available"); + } + VkContext ctx; + ctx.init({}); + auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 64, 64 } }); + + ProjectionLayer::Config cfg; + cfg.view_resolution = { 64, 64 }; + ProjectionLayer layer(ctx, target->render_pass(), cfg); + + void* color_dev = nullptr; + void* depth_dev = nullptr; + REQUIRE(cudaMalloc(&color_dev, 64 * 64 * 4) == cudaSuccess); + REQUIRE(cudaMalloc(&depth_dev, 64 * 64 * 4) == cudaSuccess); + CudaFreeGuard cg{ color_dev }; + CudaFreeGuard dg{ depth_dev }; + + // Initialize to known patterns so we can verify the layer actually + // received our content. cudaMemset is sync-on-default-stream. + REQUIRE(cudaMemset(color_dev, 0x7F, 64 * 64 * 4) == cudaSuccess); + REQUIRE(cudaMemset(depth_dev, 0x40, 64 * 64 * 4) == cudaSuccess); + + VizBuffer color{}; + color.data = color_dev; + color.width = 64; + color.height = 64; + color.format = PixelFormat::kRGBA8; + color.space = viz::MemorySpace::kDevice; + + VizBuffer depth{}; + depth.data = depth_dev; + depth.width = 64; + depth.height = 64; + depth.format = PixelFormat::kD32F; + depth.space = viz::MemorySpace::kDevice; + + // Pre-submit: no semaphore has been signaled. + for (uint32_t s = 0; s < ProjectionLayer::kSlotCount; ++s) + { + CHECK(layer.color_image(s, 0)->cuda_done_writing_value() == 0); + CHECK(layer.depth_image(s, 0)->cuda_done_writing_value() == 0); + } + + // First submit lands in some slot; that slot's color + depth + // semaphores both advance to 1. + layer.submit(color, &depth); + + // At least one slot's color + depth semaphore is now nonzero. + uint32_t signaled = 0; + for (uint32_t s = 0; s < ProjectionLayer::kSlotCount; ++s) + { + const uint64_t cval = layer.color_image(s, 0)->cuda_done_writing_value(); + const uint64_t dval = layer.depth_image(s, 0)->cuda_done_writing_value(); + if (cval > 0 && dval > 0) + { + ++signaled; + } + } + CHECK(signaled == 1); +} + +TEST_CASE("ProjectionLayer::submit stereo requires both eyes", "[gpu][projection_layer]") +{ + if (!is_gpu_available()) + { + SKIP("No Vulkan-capable GPU available"); + } + VkContext ctx; + ctx.init({}); + auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 64, 64 } }); + + ProjectionLayer::Config cfg; + cfg.view_resolution = { 64, 64 }; + cfg.stereo = true; + ProjectionLayer layer(ctx, target->render_pass(), cfg); + + void* color_dev = nullptr; + void* depth_dev = nullptr; + REQUIRE(cudaMalloc(&color_dev, 64 * 64 * 4) == cudaSuccess); + REQUIRE(cudaMalloc(&depth_dev, 64 * 64 * 4) == cudaSuccess); + CudaFreeGuard cg{ color_dev }; + CudaFreeGuard dg{ depth_dev }; + + VizBuffer color{}; + color.data = color_dev; + color.width = 64; + color.height = 64; + color.format = PixelFormat::kRGBA8; + color.space = viz::MemorySpace::kDevice; + + VizBuffer depth{}; + depth.data = depth_dev; + depth.width = 64; + depth.height = 64; + depth.format = PixelFormat::kD32F; + depth.space = viz::MemorySpace::kDevice; + + // Stereo without right buffers throws. + CHECK_THROWS_AS(layer.submit(color, &depth), std::invalid_argument); + + // Stereo with both eyes succeeds. + layer.submit(color, &depth, &color, &depth); + // Eye 0 + eye 1 semaphores both advance. + uint32_t signaled = 0; + for (uint32_t s = 0; s < ProjectionLayer::kSlotCount; ++s) + { + const bool left = layer.color_image(s, 0)->cuda_done_writing_value() > 0 && + layer.depth_image(s, 0)->cuda_done_writing_value() > 0; + const bool right = layer.color_image(s, 1)->cuda_done_writing_value() > 0 && + layer.depth_image(s, 1)->cuda_done_writing_value() > 0; + if (left && right) + { + ++signaled; + } + } + CHECK(signaled == 1); +} + +TEST_CASE("ProjectionLayer::submit no-depth path accepts color only", "[gpu][projection_layer]") +{ + if (!is_gpu_available()) + { + SKIP("No Vulkan-capable GPU available"); + } + VkContext ctx; + ctx.init({}); + auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 32, 32 } }); + + ProjectionLayer::Config cfg; + cfg.view_resolution = { 32, 32 }; + cfg.depth_format = std::nullopt; + ProjectionLayer layer(ctx, target->render_pass(), cfg); + + void* color_dev = nullptr; + REQUIRE(cudaMalloc(&color_dev, 32 * 32 * 4) == cudaSuccess); + CudaFreeGuard cg{ color_dev }; + + VizBuffer color{}; + color.data = color_dev; + color.width = 32; + color.height = 32; + color.format = PixelFormat::kRGBA8; + color.space = viz::MemorySpace::kDevice; + + // depth-disabled layer must NOT accept a depth buffer. + VizBuffer fake_depth = color; + fake_depth.format = PixelFormat::kD32F; + CHECK_THROWS_AS(layer.submit(color, &fake_depth), std::invalid_argument); + + // Without depth, submit succeeds. + layer.submit(color); + + uint32_t signaled = 0; + for (uint32_t s = 0; s < ProjectionLayer::kSlotCount; ++s) + { + if (layer.color_image(s, 0)->cuda_done_writing_value() > 0) + { + ++signaled; + } + } + CHECK(signaled == 1); +} + +TEST_CASE("ProjectionLayer on_frame_begin clears submitted-this-frame flag", "[gpu][projection_layer]") +{ + if (!is_gpu_available()) + { + SKIP("No Vulkan-capable GPU available"); + } + VkContext ctx; + ctx.init({}); + auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 32, 32 } }); + + ProjectionLayer::Config cfg; + cfg.view_resolution = { 32, 32 }; + ProjectionLayer layer(ctx, target->render_pass(), cfg); + + void* color_dev = nullptr; + void* depth_dev = nullptr; + REQUIRE(cudaMalloc(&color_dev, 32 * 32 * 4) == cudaSuccess); + REQUIRE(cudaMalloc(&depth_dev, 32 * 32 * 4) == cudaSuccess); + CudaFreeGuard cg{ color_dev }; + CudaFreeGuard dg{ depth_dev }; + + VizBuffer color{}; + color.data = color_dev; + color.width = 32; + color.height = 32; + color.format = PixelFormat::kRGBA8; + color.space = viz::MemorySpace::kDevice; + VizBuffer depth{}; + depth.data = depth_dev; + depth.width = 32; + depth.height = 32; + depth.format = PixelFormat::kD32F; + depth.space = viz::MemorySpace::kDevice; + + // submit + on_frame_begin observable via two consecutive on_frame_begin / + // submit cycles, then ensuring the second frame's record path can run. + layer.on_frame_begin(); + layer.submit(color, &depth); + // After submit the layer is "fresh"; another on_frame_begin clears it. + layer.on_frame_begin(); + // Layer is now "unfresh"; a follow-up record() in XR mode would skip. + // We don't have a real session here to drive record(); the flag toggle + // is exercised via the kSession-attached pytest case (in offscreen, + // the flag is set/cleared but doesn't gate the draw). + SUCCEED(); +} diff --git a/src/viz/python/core_bindings.cpp b/src/viz/python/core_bindings.cpp index 5a1eb129f..acb9e88a7 100644 --- a/src/viz/python/core_bindings.cpp +++ b/src/viz/python/core_bindings.cpp @@ -120,6 +120,21 @@ orientation : (w, x, y, z) quaternion (identity = (1, 0, 0, 0)) .def_readwrite("angle_up", &viz::Fov::angle_up) .def_readwrite("angle_down", &viz::Fov::angle_down); + py::class_(m, "ViewInfo", + R"doc( +Per-eye render target metadata returned in ``FrameInfo.views``. In XR +mode (kXr stereo), 2 entries — one per eye. In window / offscreen, 1 +entry with identity pose. + +Renderers reading the pose for projection content should use +``pose`` + ``fov`` here; the matrices are convenience helpers populated +from the same predicted-display-time XR data. +)doc") + .def(py::init<>()) + .def_readonly("viewport", &viz::ViewInfo::viewport) + .def_readonly("fov", &viz::ViewInfo::fov) + .def_readonly("pose", &viz::ViewInfo::pose); + // ── VizBuffer (with cuda/numpy interface) ────────────────────────── py::class_(m, "VizBuffer", diff --git a/src/viz/python/layers_bindings.cpp b/src/viz/python/layers_bindings.cpp index dba90c76c..847945345 100644 --- a/src/viz/python/layers_bindings.cpp +++ b/src/viz/python/layers_bindings.cpp @@ -13,10 +13,12 @@ #include #include #include +#include #include #include #include +#include namespace viz_py { @@ -134,6 +136,110 @@ numpy on a CUDA device pointer); the binding converts it on the fly. .def("set_visible", &viz::QuadLayer::set_visible, "visible"_a) .def("is_visible", &viz::QuadLayer::is_visible) .def_property_readonly("name", [](const viz::QuadLayer& l) { return l.name(); }); + + // ── ProjectionLayer ──────────────────────────────────────────────── + + py::class_(m, "ProjectionLayerConfig") + .def(py::init<>()) + .def_readwrite("name", &viz::ProjectionLayer::Config::name) + .def_readwrite("view_resolution", &viz::ProjectionLayer::Config::view_resolution) + .def_readwrite("color_format", &viz::ProjectionLayer::Config::color_format) + .def_readwrite("depth_format", &viz::ProjectionLayer::Config::depth_format, + "PixelFormat.D32F for depth output (Z-composite with QuadLayer); None to disable.") + .def_readwrite("stereo", &viz::ProjectionLayer::Config::stereo, + "Per-eye paired storage. When True, submit() requires both eyes' buffers; " + "in kXr view 0 → left, view 1 → right."); + + py::class_>(m, "ProjectionLayer", + R"doc( +Full-view RGBD layer. Owned by VizSession; the Python handle is +non-owning (don't keep it around past the session). + +Designed for renderers (gsplat, nvblox, neural reconstruction) that +produce per-view (color, depth) buffers. The renderer runs IN-LOOP with +the OpenXR frame loop — `submit()` must be called between +`session.begin_frame()` and `session.end_frame()`, and the renderer +must render against `info.views[i].pose` from the FrameInfo returned by +`begin_frame()`. + +Typical pattern:: + + while running: + info = session.begin_frame() + color, depth = renderer.render(info.views) + layer.submit(color, depth=depth) + session.end_frame() + +If the renderer is slower than display rate, the runtime / CloudXR +paces the application via xrWaitFrame and reprojects the last submitted +frame at display rate. In `kXr`, a visible ProjectionLayer that fails +to submit for the current frame is skipped at record time so stale RGBD +isn't composited under a new projection-layer pose. + +Each buffer is a VizBuffer or any __cuda_array_interface__ object +(cupy / torch / numba). submit() does one CUDA→CUDA copy per buffer on +the supplied stream and BLOCKS on cudaStreamSynchronize so the caller +can re-use ``color`` / ``depth`` immediately. +)doc") + .def( + "submit", + [](viz::ProjectionLayer& self, py::object left_color, py::object left_depth, py::object right_color, + py::object right_depth, uintptr_t stream) + { + auto to_buf = [&self](py::object obj, viz::PixelFormat fmt, const char* label) -> viz::VizBuffer + { + if (py::isinstance(obj)) + { + return obj.cast(); + } + return cuda_array_to_viz_buffer(obj, fmt, self.view_resolution(), label); + }; + + // Materialize each buffer (or std::nullopt). View slots + // that aren't provided pass nullptr through to submit. + std::optional lc; + std::optional ld; + std::optional rc; + std::optional rd; + if (!left_color.is_none()) + { + lc = to_buf(left_color, self.color_format(), "ProjectionLayer.submit(left_color)"); + } + else + { + throw std::runtime_error("ProjectionLayer.submit: left_color is required"); + } + if (!left_depth.is_none()) + { + ld = to_buf(left_depth, viz::PixelFormat::kD32F, "ProjectionLayer.submit(left_depth)"); + } + if (!right_color.is_none()) + { + rc = to_buf(right_color, self.color_format(), "ProjectionLayer.submit(right_color)"); + } + if (!right_depth.is_none()) + { + rd = to_buf(right_depth, viz::PixelFormat::kD32F, "ProjectionLayer.submit(right_depth)"); + } + + py::gil_scoped_release release; + self.submit(*lc, ld.has_value() ? &*ld : nullptr, rc.has_value() ? &*rc : nullptr, + rd.has_value() ? &*rd : nullptr, reinterpret_cast(stream)); + }, + "left_color"_a, "left_depth"_a = py::none(), "right_color"_a = py::none(), "right_depth"_a = py::none(), + "stream"_a = 0, + "Submit a frame. Each arg is a VizBuffer or any __cuda_array_interface__ object. " + "Mono: only ``left_color`` (+ ``left_depth`` if depth-enabled). " + "Stereo: pair with ``right_color`` (+ depths). Buffers must match view_resolution " + "and the layer's pixel formats. Releases the GIL across the copy + sync.") + .def_property_readonly("view_resolution", &viz::ProjectionLayer::view_resolution) + .def_property_readonly("color_format", &viz::ProjectionLayer::color_format) + .def_property_readonly("depth_format", &viz::ProjectionLayer::depth_format) + .def_property_readonly("stereo", &viz::ProjectionLayer::is_stereo) + .def_property_readonly("view_count", &viz::ProjectionLayer::view_count) + .def("set_visible", &viz::ProjectionLayer::set_visible, "visible"_a) + .def("is_visible", &viz::ProjectionLayer::is_visible) + .def_property_readonly("name", [](const viz::ProjectionLayer& l) { return l.name(); }); } } // namespace viz_py diff --git a/src/viz/python/session_bindings.cpp b/src/viz/python/session_bindings.cpp index 8698cced4..043e0f991 100644 --- a/src/viz/python/session_bindings.cpp +++ b/src/viz/python/session_bindings.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -35,7 +36,10 @@ void bind_session(py::module_& m) .def_readonly("predicted_display_time", &viz::FrameInfo::predicted_display_time) .def_readonly("delta_time", &viz::FrameInfo::delta_time) .def_readonly("should_render", &viz::FrameInfo::should_render) - .def_readonly("resolution", &viz::FrameInfo::resolution); + .def_readonly("resolution", &viz::FrameInfo::resolution) + .def_readonly("views", &viz::FrameInfo::views, + "Per-eye render target metadata. 2 entries in XR stereo, 1 in window/offscreen. " + "Renderers should render against ``views[i].pose`` + ``views[i].fov``."); py::class_(m, "FrameTimingStats") .def(py::init<>()) @@ -106,6 +110,20 @@ Construct via ``VizSession.create(config)``. Add layers with }, "config"_a, py::return_value_policy::reference_internal, "Construct + register a QuadLayer. Returns a non-owning handle.") + .def( + "add_projection_layer", + [](viz::VizSession& self, viz::ProjectionLayer::Config config) -> viz::ProjectionLayer* + { + const auto* ctx = self.get_vk_context(); + const auto render_pass = self.get_render_pass(); + if (ctx == nullptr || render_pass == VK_NULL_HANDLE) + { + throw std::runtime_error("VizSession: cannot add layer before session is initialized"); + } + return self.add_layer(*ctx, render_pass, std::move(config)); + }, + "config"_a, py::return_value_policy::reference_internal, + "Construct + register a ProjectionLayer. Returns a non-owning handle.") .def("render", &viz::VizSession::render, py::call_guard(), "Wait + composite + present in one call. Returns FrameInfo.") .def("begin_frame", &viz::VizSession::begin_frame, py::call_guard()) diff --git a/src/viz/python/viz_init.py b/src/viz/python/viz_init.py index c07923b0d..0ed6252a6 100644 --- a/src/viz/python/viz_init.py +++ b/src/viz/python/viz_init.py @@ -40,12 +40,15 @@ MemorySpace, PixelFormat, Pose3D, + ProjectionLayer, + ProjectionLayerConfig, QuadLayer, QuadLayerConfig, QuadLayerPlacement, Rect2D, Resolution, SessionState, + ViewInfo, VizBuffer, VizSession, VizSessionConfig, @@ -62,12 +65,15 @@ "MemorySpace", "PixelFormat", "Pose3D", + "ProjectionLayer", + "ProjectionLayerConfig", "QuadLayer", "QuadLayerConfig", "QuadLayerPlacement", "Rect2D", "Resolution", "SessionState", + "ViewInfo", "VizBuffer", "VizSession", "VizSessionConfig", diff --git a/src/viz/python_tests/test_projection_layer.py b/src/viz/python_tests/test_projection_layer.py new file mode 100644 index 000000000..a2da89b8c --- /dev/null +++ b/src/viz/python_tests/test_projection_layer.py @@ -0,0 +1,261 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +"""End-to-end ProjectionLayer tests via Python bindings. + +Covers: config plumbing, add_projection_layer, submit shape validation, +mono+depth round-trip render, stereo + no-depth variants. GPU-gated. +""" + +from __future__ import annotations + +import numpy as np +import pytest + +import isaacteleop.viz as viz + + +def _gpu_available() -> bool: + cfg = viz.VizSessionConfig() + cfg.mode = viz.DisplayMode.kOffscreen + cfg.window_width = 64 + cfg.window_height = 64 + s = None + try: + s = viz.VizSession.create(cfg) + except RuntimeError: + return False + finally: + if s is not None: + s.destroy() + return True + + +pytestmark = pytest.mark.skipif( + not _gpu_available(), reason="no Vulkan/CUDA-capable GPU" +) + + +def _need_cupy(): + cp = pytest.importorskip("cupy") + try: + if cp.cuda.runtime.getDeviceCount() == 0: + pytest.skip("no CUDA device") + except cp.cuda.runtime.CUDARuntimeError: + pytest.skip("no CUDA device") + return cp + + +def _make_session(width=64, height=64): + cfg = viz.VizSessionConfig() + cfg.mode = viz.DisplayMode.kOffscreen + cfg.window_width = width + cfg.window_height = height + cfg.clear_color = (0.0, 0.0, 0.0, 1.0) + return viz.VizSession.create(cfg) + + +def test_projection_layer_config_roundtrip(): + cfg = viz.ProjectionLayerConfig() + cfg.name = "test" + cfg.view_resolution = viz.Resolution(128, 64) + cfg.color_format = viz.PixelFormat.kRGBA8 + cfg.depth_format = viz.PixelFormat.kD32F + cfg.stereo = True + + assert cfg.name == "test" + assert cfg.view_resolution.width == 128 + assert cfg.view_resolution.height == 64 + assert cfg.depth_format == viz.PixelFormat.kD32F + assert cfg.stereo is True + + # depth_format can be None + cfg.depth_format = None + assert cfg.depth_format is None + + +def test_add_projection_layer_mono_depth(): + cp = _need_cupy() + session = _make_session() + try: + layer_cfg = viz.ProjectionLayerConfig() + layer_cfg.name = "proj" + layer_cfg.view_resolution = viz.Resolution(32, 32) + layer = session.add_projection_layer(layer_cfg) + + assert layer.name == "proj" + assert layer.is_visible() is True + assert layer.view_resolution.width == 32 + assert layer.view_resolution.height == 32 + assert layer.color_format == viz.PixelFormat.kRGBA8 + assert layer.depth_format == viz.PixelFormat.kD32F + assert layer.stereo is False + assert layer.view_count == 1 + + # Submit mono + depth via cupy. + host_color = np.zeros((32, 32, 4), dtype=np.uint8) + host_color[..., 2] = 200 # blue channel + host_color[..., 3] = 255 + host_depth = np.full((32, 32), 0.5, dtype=np.float32) + color = cp.asarray(host_color) + depth = cp.asarray(host_depth) + + layer.submit(color, depth) + + info = session.render() + assert info.frame_index == 0 + + img = session.readback_to_host() + arr = np.asarray(img) + # Predominantly blue at the center; ProjectionLayer covers the + # whole framebuffer. + r, g, b, _a = arr[32, 32] + assert b > r and b > g + finally: + session.destroy() + + +def test_submit_rejects_missing_depth_on_depth_layer(): + cp = _need_cupy() + session = _make_session() + try: + layer_cfg = viz.ProjectionLayerConfig() + layer_cfg.view_resolution = viz.Resolution(32, 32) + layer = session.add_projection_layer(layer_cfg) + + color = cp.asarray(np.zeros((32, 32, 4), dtype=np.uint8)) + with pytest.raises(RuntimeError, match="left_depth"): + layer.submit(color) + finally: + session.destroy() + + +def test_submit_rejects_dimension_mismatch(): + cp = _need_cupy() + session = _make_session() + try: + layer_cfg = viz.ProjectionLayerConfig() + layer_cfg.view_resolution = viz.Resolution(32, 32) + layer = session.add_projection_layer(layer_cfg) + + # Wrong width. + wrong_color = cp.asarray(np.zeros((32, 16, 4), dtype=np.uint8)) + depth = cp.asarray(np.zeros((32, 32), dtype=np.float32)) + with pytest.raises(RuntimeError, match="resolution"): + layer.submit(wrong_color, depth) + finally: + session.destroy() + + +def test_stereo_round_trip(): + cp = _need_cupy() + session = _make_session() + try: + layer_cfg = viz.ProjectionLayerConfig() + layer_cfg.view_resolution = viz.Resolution(32, 32) + layer_cfg.stereo = True + layer = session.add_projection_layer(layer_cfg) + assert layer.stereo is True + assert layer.view_count == 2 + + host_lc = np.zeros((32, 32, 4), dtype=np.uint8) + host_lc[..., 0] = 200 # red for LEFT + host_lc[..., 3] = 255 + host_rc = np.zeros((32, 32, 4), dtype=np.uint8) + host_rc[..., 1] = 200 # green for RIGHT + host_rc[..., 3] = 255 + host_d = np.full((32, 32), 0.5, dtype=np.float32) + lc = cp.asarray(host_lc) + rc = cp.asarray(host_rc) + ld = cp.asarray(host_d) + rd = cp.asarray(host_d) + + # Stereo without right eye → must throw. + with pytest.raises(RuntimeError, match="right_color"): + layer.submit(lc, ld) + + # Stereo with both eyes. + layer.submit(lc, ld, rc, rd) + session.render() + # In offscreen (single-view), the LEFT buffer is sampled — so the + # readback should be predominantly red. + arr = np.asarray(session.readback_to_host()) + r, g, b, _a = arr[32, 32] + assert r > g and r > b + finally: + session.destroy() + + +def test_no_depth_layer(): + cp = _need_cupy() + session = _make_session() + try: + layer_cfg = viz.ProjectionLayerConfig() + layer_cfg.view_resolution = viz.Resolution(32, 32) + layer_cfg.depth_format = None + layer = session.add_projection_layer(layer_cfg) + assert layer.depth_format is None + + host_color = np.zeros((32, 32, 4), dtype=np.uint8) + host_color[..., 0] = 255 # red + host_color[..., 3] = 255 + color = cp.asarray(host_color) + + # Depth-disabled layer must reject any depth buffer. + depth = cp.asarray(np.zeros((32, 32), dtype=np.float32)) + with pytest.raises(RuntimeError, match="depth-disabled"): + layer.submit(color, depth) + + layer.submit(color) + session.render() + arr = np.asarray(session.readback_to_host()) + r, g, b, _a = arr[32, 32] + assert r > g and r > b + finally: + session.destroy() + + +def test_begin_frame_returns_views_for_renderer(): + """``session.begin_frame()`` is the source of truth for poses the + renderer should render against. In offscreen mode the backend + returns a single identity-pose ViewInfo.""" + session = _make_session() + try: + info = session.begin_frame() + assert len(info.views) >= 1 + session.end_frame() + finally: + session.destroy() + + +def test_inloop_submit_pattern(): + """The supported pattern: begin_frame → submit (against this frame's + views) → end_frame, all in one tick. Window/offscreen modes have no + XR freshness gate, so the layer renders on every frame that submits.""" + cp = _need_cupy() + session = _make_session() + try: + layer_cfg = viz.ProjectionLayerConfig() + layer_cfg.view_resolution = viz.Resolution(32, 32) + layer = session.add_projection_layer(layer_cfg) + + host_color = np.zeros((32, 32, 4), dtype=np.uint8) + host_color[..., 2] = 200 # blue + host_color[..., 3] = 255 + host_depth = np.full((32, 32), 0.5, dtype=np.float32) + + for _ in range(3): + info = session.begin_frame() + assert info.should_render + # In a real renderer we'd pass info.views to the GPU side; here + # the buffers are static. + color = cp.asarray(host_color) + depth = cp.asarray(host_depth) + layer.submit(color, depth) + session.end_frame() + + # Final readback shows the submitted color. + arr = np.asarray(session.readback_to_host()) + r, g, b, _a = arr[32, 32] + assert b > r and b > g + finally: + session.destroy() diff --git a/src/viz/session/cpp/inc/viz/session/display_backend.hpp b/src/viz/session/cpp/inc/viz/session/display_backend.hpp index 9f74827c5..5538267fd 100644 --- a/src/viz/session/cpp/inc/viz/session/display_backend.hpp +++ b/src/viz/session/cpp/inc/viz/session/display_backend.hpp @@ -94,6 +94,13 @@ class DisplayBackend // Backend-private bookkeeping round-tripped to record_post_* / // end_frame (e.g. swapchain image_index, predicted_display_time). uint64_t backend_token = 0; + + // OpenXR predicted display time in nanoseconds (from + // xrWaitFrame's XrFrameState.predictedDisplayTime), exposed + // through FrameInfo so renderers can use it for time-aware + // content (e.g. animation timestamps that match the runtime's + // prediction). 0 outside kXr. + int64_t predicted_display_time_ns = 0; }; // Acquire the next frame target. nullopt = skip this frame. diff --git a/src/viz/session/cpp/inc/viz/session/layer_base.hpp b/src/viz/session/cpp/inc/viz/session/layer_base.hpp index 8f17a66dd..7712a4288 100644 --- a/src/viz/session/cpp/inc/viz/session/layer_base.hpp +++ b/src/viz/session/cpp/inc/viz/session/layer_base.hpp @@ -56,6 +56,16 @@ class LayerBase { } + // Called from ``VizSession::begin_frame`` for EVERY registered layer + // (visible or not) before the new frame's FrameInfo is returned. + // Lets layers clear per-frame state (e.g. ProjectionLayer's + // submitted-this-frame flag). Default = no-op. Must NOT touch GPU + // state — the backend's begin_frame has already run, and the + // compositor's per-slot fence wait hasn't happened yet. + virtual void on_frame_begin() + { + } + // Issue draws inside the active render pass. // views: 1 entry in window/offscreen, 2 in kXr stereo. Each // entry's viewport is this layer's rect for that view — diff --git a/src/viz/session/cpp/inc/viz/session/viz_compositor.hpp b/src/viz/session/cpp/inc/viz/session/viz_compositor.hpp index 6a1050c11..b5ff16e80 100644 --- a/src/viz/session/cpp/inc/viz/session/viz_compositor.hpp +++ b/src/viz/session/cpp/inc/viz/session/viz_compositor.hpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -55,15 +56,22 @@ class VizCompositor VizCompositor(VizCompositor&&) = delete; VizCompositor& operator=(VizCompositor&&) = delete; - // Records and submits one frame. Multi-frame-in-flight: one - // FrameSync per backend image slot. render() waits on the slot's - // fence at entry (signaled by its previous use), submits with the - // same fence as signal target, and returns without host-waiting - // on completion. CPU pacing is the caller's responsibility — the - // window backend prefers MAILBOX (no vsync block), so a hot loop - // would burn a core; camera_viz drives this from an event-driven - // condition variable that wakes per producer publish. - void render(const std::vector& layers); + // Records and submits one frame against the backend ``Frame`` + // already acquired by VizSession::begin_frame. Multi-frame-in- + // flight: one FrameSync per backend image slot. render() waits on + // the slot's fence at entry (signaled by its previous use), + // submits with the same fence as signal target, and returns + // without host-waiting on completion. CPU pacing is the caller's + // responsibility — the window backend prefers MAILBOX (no vsync + // block), so a hot loop would burn a core; camera_viz drives this + // from an event-driven condition variable that wakes per producer + // publish. + // + // Owns end_frame / abort_frame protocol balance for the supplied + // ``frame``: on successful submit, calls backend->end_frame; on + // exception, calls backend->abort_frame via RAII guard before + // re-throwing. + void render(const DisplayBackend::Frame& frame, const std::vector& layers); HostImage readback_to_host(); diff --git a/src/viz/session/cpp/inc/viz/session/viz_session.hpp b/src/viz/session/cpp/inc/viz/session/viz_session.hpp index 1183b3aba..0274ff8c2 100644 --- a/src/viz/session/cpp/inc/viz/session/viz_session.hpp +++ b/src/viz/session/cpp/inc/viz/session/viz_session.hpp @@ -206,6 +206,11 @@ class VizSession bool first_frame_ = true; bool frame_in_progress_ = false; FrameInfo current_frame_info_{}; + // The backend-acquired frame for the in-progress begin/end pair. + // Acquired by begin_frame, consumed by end_frame. nullopt outside + // a begin/end window or when the backend skipped this frame + // (e.g. XR runtime shouldRender=0). + std::optional current_backend_frame_; FrameTimingStats timing_stats_{}; }; diff --git a/src/viz/session/cpp/viz_compositor.cpp b/src/viz/session/cpp/viz_compositor.cpp index 054d0399c..d8806b54e 100644 --- a/src/viz/session/cpp/viz_compositor.cpp +++ b/src/viz/session/cpp/viz_compositor.cpp @@ -189,7 +189,7 @@ void VizCompositor::submit_or_signal_fence(const VkSubmitInfo& info, const char* throw std::runtime_error(std::string("VizCompositor: ") + what + " failed: VkResult=" + std::to_string(r)); } -void VizCompositor::render(const std::vector& layers) +void VizCompositor::render(const DisplayBackend::Frame& frame, const std::vector& layers) { // Snapshot visible layers once — is_visible() is atomic, and // reading it twice could record a draw without the matching wait. @@ -203,19 +203,12 @@ void VizCompositor::render(const std::vector& layers) } } - auto frame = backend_->begin_frame(/*predicted_display_time=*/0); - if (!frame.has_value()) - { - // Backend skipped; all fences stay signaled, next wait() won't deadlock. - return; - } - // Catch swapchain recreates whose image_count differs from the one - // we sized per-slot state for. Runs AFTER begin_frame because - // WindowBackend::begin_frame may itself recreate (OUT_OF_DATE etc.). - // Wrapped so a failed rebuild balances the backend protocol — we've - // already acquired a swapchain image and FrameGuard isn't set up - // yet, so a raw throw would leak the acquire. + // we sized per-slot state for. Runs first because the backend's + // begin_frame (run by VizSession) may itself recreate (OUT_OF_DATE + // etc.). Wrapped so a failed rebuild balances the backend protocol — + // the frame is already acquired and FrameGuard isn't set up yet, so + // a raw throw would leak it. try { ensure_slot_count_matches_backend(); @@ -224,7 +217,7 @@ void VizCompositor::render(const std::vector& layers) { try { - backend_->abort_frame(*frame); + backend_->abort_frame(frame); } catch (...) { @@ -240,14 +233,14 @@ void VizCompositor::render(const std::vector& layers) // threw; reaching here means logic drift. Bail rather than UB. try { - backend_->abort_frame(*frame); + backend_->abort_frame(frame); } catch (...) { } throw std::runtime_error("VizCompositor: slot_count == 0 after ensure_slot_count_matches_backend"); } - const uint32_t slot = static_cast(frame->backend_token) % slot_count; + const uint32_t slot = static_cast(frame.backend_token) % slot_count; FrameSync& slot_sync = *frame_syncs_[slot]; VkCommandBuffer command_buffer = command_buffers_[slot]; @@ -297,12 +290,12 @@ void VizCompositor::render(const std::vector& layers) } } } - } frame_guard{ backend_, &*frame }; + } frame_guard{ backend_, &frame }; const RenderTarget& rt = backend_->render_target(); const Resolution rt_extent = rt.resolution(); - // XR: per-eye viewports come from frame->views. tile layout is + // XR: per-eye viewports come from frame.views. tile layout is // window/offscreen letterboxing only. const bool xr_mode = backend_->is_xr(); @@ -366,7 +359,7 @@ void VizCompositor::render(const std::vector& layers) } for (size_t i = 0; i < visible_layers.size(); ++i) { - std::vector layer_views = frame->views; + std::vector layer_views = frame.views; if (layer_views.empty()) { layer_views.push_back(ViewInfo{}); @@ -389,7 +382,7 @@ void VizCompositor::render(const std::vector& layers) vkCmdWriteTimestamp(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, gpu_timestamp_pool_, query_base + 1); } - backend_->record_post_render_pass(command_buffer, *frame); + backend_->record_post_render_pass(command_buffer, frame); // ts2: end of backend post-pass (ts2-ts1 = blit/transition cost). if (gpu_timestamp_pool_ != VK_NULL_HANDLE) @@ -421,18 +414,18 @@ void VizCompositor::render(const std::vector& layers) } } } - if (frame->wait_before_render != VK_NULL_HANDLE) + if (frame.wait_before_render != VK_NULL_HANDLE) { - wait_semaphores.push_back(frame->wait_before_render); + wait_semaphores.push_back(frame.wait_before_render); wait_values.push_back(0); - wait_stages.push_back(frame->wait_stage); + wait_stages.push_back(frame.wait_stage); } std::vector signal_semaphores; std::vector signal_values; - if (frame->signal_after_render != VK_NULL_HANDLE) + if (frame.signal_after_render != VK_NULL_HANDLE) { - signal_semaphores.push_back(frame->signal_after_render); + signal_semaphores.push_back(frame.signal_after_render); signal_values.push_back(0); } @@ -486,7 +479,7 @@ void VizCompositor::render(const std::vector& layers) } } - backend_->end_frame(*frame); + backend_->end_frame(frame); frame_guard.released = true; } diff --git a/src/viz/session/cpp/viz_session.cpp b/src/viz/session/cpp/viz_session.cpp index 2732eed6b..f18d6b411 100644 --- a/src/viz/session/cpp/viz_session.cpp +++ b/src/viz/session/cpp/viz_session.cpp @@ -209,11 +209,47 @@ FrameInfo VizSession::begin_frame() last_frame_time_ = now; current_frame_info_.frame_index = frame_index_; - current_frame_info_.predicted_display_time = 0; // XR-only; 0 in offscreen - current_frame_info_.should_render = (state_ == SessionState::kRunning); current_frame_info_.resolution = compositor_ ? compositor_->resolution() : Resolution{}; - // Identity placeholder; backends fill per-view info inside render(). - current_frame_info_.views.assign(1, ViewInfo{}); + current_backend_frame_.reset(); + + // Acquire the backend frame BEFORE returning so renderers calling + // submit() against the returned FrameInfo's views are working with + // the same per-eye poses xrEndFrame will submit later. Skip the + // acquire when state isn't kRunning (kStopping/kLost paths submit + // empty xrEndFrames internally) or when the backend itself returns + // nullopt (XR shouldRender=0, swapchain skip). + if (state_ == SessionState::kRunning && backend_) + { + current_backend_frame_ = backend_->begin_frame(/*ignored=*/0); + } + + if (current_backend_frame_.has_value()) + { + current_frame_info_.should_render = true; + current_frame_info_.predicted_display_time = current_backend_frame_->predicted_display_time_ns; + current_frame_info_.views = current_backend_frame_->views; + if (current_frame_info_.views.empty()) + { + current_frame_info_.views.assign(1, ViewInfo{}); + } + } + else + { + current_frame_info_.should_render = false; + current_frame_info_.predicted_display_time = 0; + current_frame_info_.views.assign(1, ViewInfo{}); + } + + // Notify layers a new frame has begun. Lets ProjectionLayer-style + // layers clear per-frame freshness flags so a stale mailbox slot + // doesn't get composited under a new pose. + for (const auto& layer : layers_) + { + if (layer != nullptr) + { + layer->on_frame_begin(); + } + } frame_in_progress_ = true; return current_frame_info_; @@ -225,31 +261,27 @@ void VizSession::end_frame() { throw std::logic_error("VizSession: end_frame called without a matching begin_frame"); } - if (state_ != SessionState::kRunning) - { - frame_in_progress_ = false; - return; - } struct ClearGuard { bool* flag; + std::optional* frame_slot; ~ClearGuard() { *flag = false; + frame_slot->reset(); } - } guard{ &frame_in_progress_ }; - - std::vector raw_layers; - raw_layers.reserve(layers_.size()); - for (const auto& l : layers_) - { - raw_layers.push_back(l.get()); - } + } guard{ &frame_in_progress_, ¤t_backend_frame_ }; - if (current_frame_info_.should_render) + if (current_backend_frame_.has_value()) { - compositor_->render(raw_layers); + std::vector raw_layers; + raw_layers.reserve(layers_.size()); + for (const auto& l : layers_) + { + raw_layers.push_back(l.get()); + } + compositor_->render(*current_backend_frame_, raw_layers); } update_timing_stats(current_frame_info_.delta_time); diff --git a/src/viz/session/cpp/xr_backend.cpp b/src/viz/session/cpp/xr_backend.cpp index a69c7705c..c7b9e0064 100644 --- a/src/viz/session/cpp/xr_backend.cpp +++ b/src/viz/session/cpp/xr_backend.cpp @@ -491,6 +491,10 @@ std::optional XrBackend::begin_frame(int64_t /*ignored*/) // invariant holds if image_count ever grows past 1. const uint32_t slots = image_count(); f.backend_token = (slots == 0) ? 0u : (frame_index_++ % slots); + // Predicted display time forwarded to FrameInfo so renderers can + // use it for time-aware content (e.g. animation timestamps that + // line up with the runtime's prediction). + f.predicted_display_time_ns = static_cast(last_frame_state_.predictedDisplayTime); // Hand protocol-balance off to the compositor's FrameGuard. in_flight_guard.dismissed = true; return f; diff --git a/src/viz/shaders/cpp/CMakeLists.txt b/src/viz/shaders/cpp/CMakeLists.txt index 0f934aaf7..21396d778 100644 --- a/src/viz/shaders/cpp/CMakeLists.txt +++ b/src/viz/shaders/cpp/CMakeLists.txt @@ -45,10 +45,17 @@ function(compile_shader GLSL_PATH VAR_NAME) endfunction() # Shader programs: -# textured_quad — fullscreen quad sampling a combined image sampler. -# Used by QuadLayer to display a CUDA-fed texture. -compile_shader(textured_quad.vert kTexturedQuadVertSpv) -compile_shader(textured_quad.frag kTexturedQuadFragSpv) +# textured_quad — fullscreen / placed quad sampling a combined image +# sampler. Used by QuadLayer. +# projection_layer — fullscreen quad sampling color + depth and +# writing gl_FragDepth for Z-composite with other +# layers. Used by ProjectionLayer (two variants: +# with and without depth output). +compile_shader(textured_quad.vert kTexturedQuadVertSpv) +compile_shader(textured_quad.frag kTexturedQuadFragSpv) +compile_shader(projection_layer.vert kProjectionLayerVertSpv) +compile_shader(projection_layer.frag kProjectionLayerFragSpv) +compile_shader(projection_layer_no_depth.frag kProjectionLayerFragNoDepthSpv) # INTERFACE library exposing the generated headers + a phony custom # target that ensures the headers exist before any consumer compiles. diff --git a/src/viz/shaders/cpp/projection_layer.frag b/src/viz/shaders/cpp/projection_layer.frag new file mode 100644 index 000000000..037a5089f --- /dev/null +++ b/src/viz/shaders/cpp/projection_layer.frag @@ -0,0 +1,22 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +// Samples color + depth and writes both. gl_FragDepth ensures +// other layers (rendered after) Z-test against the projection content +// — that's the core ProjectionLayer feature. The fragment shader is +// dispatched per-view; the descriptor set is bound to the per-eye +// (color, depth) pair. + +#version 450 + +layout(set = 0, binding = 0) uniform sampler2D u_color; +layout(set = 0, binding = 1) uniform sampler2D u_depth; + +layout(location = 0) in vec2 v_uv; +layout(location = 0) out vec4 out_color; + +void main() +{ + out_color = texture(u_color, v_uv); + gl_FragDepth = texture(u_depth, v_uv).r; +} diff --git a/src/viz/shaders/cpp/projection_layer.vert b/src/viz/shaders/cpp/projection_layer.vert new file mode 100644 index 000000000..6dda2c21c --- /dev/null +++ b/src/viz/shaders/cpp/projection_layer.vert @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +// Fullscreen oversized triangle covering NDC [-1, 1]. Same gl_VertexIndex +// UV trick as textured_quad.vert's mode == 0 branch. +// +// gl_Position.z is INERT for both ProjectionLayer pipeline variants: +// * pipeline_with_depth: the frag shader writes gl_FragDepth from +// the sampled depth texture, overriding the rasterized z for both +// the depth test and the depth write. +// * pipeline_no_depth: depthWriteEnable = false, so the rasterized z +// never reaches the depth buffer. The depth attachment keeps the +// clear value (1.0 = far), which is the right semantic for +// "no depth → reproject as far-plane background." +// +// We still set z = 1.0 (far) as the safe default — matches QuadLayer's +// fullscreen convention, and guards against accidentally enabling +// depth write on the no_depth pipeline later (which would otherwise +// place the layer "right at the user's face" and break reprojection). + +#version 450 + +layout(location = 0) out vec2 v_uv; + +void main() +{ + v_uv = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(v_uv * 2.0 - 1.0, 1.0, 1.0); +} diff --git a/src/viz/shaders/cpp/projection_layer_no_depth.frag b/src/viz/shaders/cpp/projection_layer_no_depth.frag new file mode 100644 index 000000000..a1c8abd42 --- /dev/null +++ b/src/viz/shaders/cpp/projection_layer_no_depth.frag @@ -0,0 +1,22 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +// No-depth variant: writes color only, lets the rasterizer's z = 1.0 +// (from the vertex shader) flow through. The pipeline's +// depthWriteEnable = VK_FALSE so this content sits at far without +// affecting subsequent layers' depth test. Used when ProjectionLayer +// is configured without a depth buffer (Config::depth_format == nullopt). + +#version 450 + +layout(set = 0, binding = 0) uniform sampler2D u_color; +// Binding 1 is allocated (descriptor layout stays uniform across both +// pipeline variants) but unused here. + +layout(location = 0) in vec2 v_uv; +layout(location = 0) out vec4 out_color; + +void main() +{ + out_color = texture(u_color, v_uv); +}