Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions src/viz/core/cpp/device_image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,17 @@ std::unique_ptr<DeviceImage> DeviceImage::create(const VkContext& ctx,
{
throw std::invalid_argument("DeviceImage: resolution must be non-zero");
}
if (format != PixelFormat::kRGBA8)
if (format != PixelFormat::kRGBA8 && format != PixelFormat::kD32F)
{
// kD32F is reserved for ProjectionLayer's depth path. The
// CUDA-Vulkan interop contract for a depth image (sample
// semantics, layout transitions, color-space view) is not
// worked out yet, so refuse to half-build it.
throw std::invalid_argument("DeviceImage: only PixelFormat::kRGBA8 is supported");
throw std::invalid_argument("DeviceImage: unsupported PixelFormat");
}
if (format == PixelFormat::kD32F && mip_levels > 1)
{
// Depth + mip chain is meaningless (filtering depth between mip
// levels produces incorrect occlusion) and we'd have to
// special-case the blit-down pipeline. Reject explicitly rather
// than silently allocating the chain.
throw std::invalid_argument("DeviceImage: kD32F does not support mip_levels > 1");
}
// mip_levels == 0 -> auto-compute full chain to 1x1.
if (mip_levels == 0)
Expand Down
2 changes: 2 additions & 0 deletions src/viz/layers/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ cmake_minimum_required(VERSION 3.20)
# viz/layers_tests/.
add_library(viz_layers STATIC
quad_layer.cpp
projection_layer.cpp
inc/viz/layers/quad_layer.hpp
inc/viz/layers/projection_layer.hpp
)

target_include_directories(viz_layers
Expand Down
200 changes: 200 additions & 0 deletions src/viz/layers/cpp/inc/viz/layers/projection_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include <viz/core/device_image.hpp>
#include <viz/core/viz_buffer.hpp>
#include <viz/core/viz_types.hpp>
#include <viz/session/layer_base.hpp>
#include <vulkan/vulkan.h>

#include <array>
#include <atomic>
#include <cstdint>
#include <cuda_runtime.h>
#include <memory>
#include <optional>
#include <string>
#include <vector>

namespace viz
{

class VkContext;

// ProjectionLayer: full-view RGBD composited into the shared render
// target. Designed for renderers (gsplat, nvblox, neural reconstruction)
// that produce (color, depth) buffers per frame.
//
// Frame loop contract — IMPORTANT:
//
// info = session.begin_frame() // xrLocateViews
// color, depth = renderer.render(info.views) // render against THIS frame's views
// layer.submit(color, depth) // publish for THIS frame
// session.end_frame() // composite + xrEndFrame
//
// ``submit()`` MUST be called between ``begin_frame()`` and
// ``end_frame()``. The renderer MUST render against
// ``info.views[i].pose`` (the predicted-display-time pose for this
// frame). The runtime / CloudXR paces the application via xrWaitFrame;
// if the renderer takes longer than display rate, the runtime's
// compositor reprojects the last submitted frame at display rate while
// the app's framerate matches the renderer's speed.
//
// In ``kXr``, a visible ProjectionLayer that does NOT receive a
// ``submit()`` for the current frame is SKIPPED at record time (the
// layer's region of the shared RT keeps the clear color). This prevents
// the runtime from compositing yesterday's RGBD content under today's
// projection-layer pose, which would produce a visible reprojection
// error. In ``kWindow`` / ``kOffscreen`` the freshness gate is off —
// the most recent publish stays on screen until replaced (the QuadLayer
// pattern), since no XR pose mismatch is possible.
//
// Mailbox: kSlotCount per-eye (color, depth) DeviceImage pairs. submit()
// picks a slot that's neither ``latest_`` nor in any ``in_use_`` entry,
// memcpys + signals cuda_done_writing on the caller's stream, blocks on
// cudaStreamSynchronize so the caller can re-use source buffers
// immediately, then atomically promotes the slot to ``latest_``.
// record_pre_render_pass promotes ``latest_`` to ``in_use_[slot]``.
//
// Stereo: when Config::stereo is true, the layer allocates paired
// (left, right) storage per slot. submit() must ship both eyes on a
// single CUDA stream; stream ordering keeps the pair atomic. In kXr
// view 0 (left eye) samples the left buffer, view 1 (right eye) the
// right. In kWindow / kOffscreen the left buffer is sampled.
//
// Memory (per layer):
// mono 1024² RGBA8+D32F: 7 slots × 1024² × 8 B ≈ 56 MB
// stereo 1024² RGBA8+D32F: ≈ 112 MB
// stereo 2048² RGBA8+D32F: ≈ 448 MB
class ProjectionLayer : public LayerBase
{
public:
// Sized to cover backend image counts up to 5, leave one free slot.
static constexpr uint32_t kMaxFramesInFlight = 5;
static constexpr uint32_t kSlotCount = kMaxFramesInFlight + 2;

struct Config
{
std::string name = "ProjectionLayer";
Resolution view_resolution{};
PixelFormat color_format = PixelFormat::kRGBA8;

// nullopt → no depth buffer allocated; ProjectionLayer always
// writes gl_FragDepth = 1.0 (far). Without depth, this layer
// loses Z-compositing with QuadLayer. Useful for renderers that
// genuinely have no depth (sky / background fills).
std::optional<PixelFormat> depth_format = PixelFormat::kD32F;

// true → per-eye paired storage. submit MUST ship both eyes.
// In kWindow / kOffscreen the LEFT buffer is sampled; in kXr
// view 0 → LEFT, view 1 → RIGHT.
bool stereo = false;
};

ProjectionLayer(const VkContext& ctx, VkRenderPass render_pass, Config config);
~ProjectionLayer() override;
void destroy();

ProjectionLayer(const ProjectionLayer&) = delete;
ProjectionLayer& operator=(const ProjectionLayer&) = delete;

// Publish a frame. Each buffer is a CUDA-linear VizBuffer (kDevice
// space) matching the layer's resolution and the relevant format
// (color → color_format, depth → kD32F). Validated against the
// config; mismatch throws std::invalid_argument.
//
// Mono no-depth: submit(color)
// Mono with depth: submit(color, &depth)
// Stereo no-depth: submit(left_color, nullptr, &right_color, nullptr)
// Stereo with depth: submit(left_color, &left_depth, &right_color, &right_depth)
//
// submit() does one cudaMemcpy2DToArrayAsync per provided buffer
// on ``stream``, signals cuda_done_writing on the same stream, then
// BLOCKS on cudaStreamSynchronize so the caller can re-use source
// buffers immediately. Cost: ~0.5 ms / 1024² color + depth on a
// discrete GPU.
//
// Marks the layer "fresh for this frame" so record() will draw it.
// VizSession::begin_frame clears the flag at the start of each
// frame; a renderer that doesn't submit will see its content
// skipped in kXr.
//
// GIL: pybind binding releases the GIL across this whole call.
void submit(const VizBuffer& left_color,
const VizBuffer* left_depth = nullptr,
const VizBuffer* right_color = nullptr,
const VizBuffer* right_depth = nullptr,
cudaStream_t stream = 0);

// LayerBase contract.
void on_frame_begin() override; // clears submitted_this_frame_ flag
void record_pre_render_pass(VkCommandBuffer cmd, uint32_t in_flight_slot) override;
void record(VkCommandBuffer cmd,
const std::vector<ViewInfo>& views,
const RenderTarget& target,
uint32_t in_flight_slot) override;

// cuda_done_writing waits for color + depth of every active view in
// the in-use slot. kSlotNone → empty vector.
std::vector<LayerBase::WaitSemaphore> get_wait_semaphores() const override;

// Accessors.
Resolution view_resolution() const noexcept;
PixelFormat color_format() const noexcept;
std::optional<PixelFormat> depth_format() const noexcept;
bool is_stereo() const noexcept;
uint32_t view_count() const noexcept;

// Diagnostic — nullptr outside valid ranges.
const DeviceImage* color_image(uint32_t slot, uint32_t view) const noexcept;
const DeviceImage* depth_image(uint32_t slot, uint32_t view) const noexcept;

private:
static constexpr uint8_t kSlotNone = 0xFF;

void init();
void create_sampler();
void create_descriptor_set_layout();
void create_pipeline_layout();
void create_pipeline();
void create_descriptor_pool();
void allocate_descriptor_sets();
void update_descriptor_sets();

uint8_t pick_free_slot() const noexcept;

void validate_submit_buffer(const VizBuffer& buf, PixelFormat expected_format, const char* label) const;
void enqueue_copy(const VizBuffer& src, DeviceImage& dst, cudaStream_t stream) const;

const VkContext* ctx_ = nullptr;
VkRenderPass render_pass_ = VK_NULL_HANDLE; // borrowed
Config config_;
uint32_t view_count_ = 1;
bool has_depth_ = true;

std::array<std::vector<std::unique_ptr<DeviceImage>>, kSlotCount> slots_color_;
std::array<std::vector<std::unique_ptr<DeviceImage>>, kSlotCount> slots_depth_;

VkSampler color_sampler_ = VK_NULL_HANDLE;
VkSampler depth_sampler_ = VK_NULL_HANDLE;
VkDescriptorSetLayout descriptor_set_layout_ = VK_NULL_HANDLE;
VkPipelineLayout pipeline_layout_ = VK_NULL_HANDLE;
VkPipeline pipeline_with_depth_ = VK_NULL_HANDLE;
VkPipeline pipeline_no_depth_ = VK_NULL_HANDLE;

VkDescriptorPool descriptor_pool_ = VK_NULL_HANDLE;
std::array<std::vector<VkDescriptorSet>, kSlotCount> descriptor_sets_;

// Mailbox.
std::atomic<uint8_t> latest_{ kSlotNone };
std::array<std::atomic<uint8_t>, kMaxFramesInFlight> in_use_{};
std::atomic<uint8_t> last_in_use_slot_{ kSlotNone };

// Set by submit(), cleared by on_frame_begin(). record() consults
// this in kXr to gate stale-RGBD-under-new-pose composites.
std::atomic<bool> submitted_this_frame_{ false };
};

} // namespace viz
Loading
Loading