diff --git a/android/tools/replay/build.gradle b/android/tools/replay/build.gradle index 88a7c31533..ea2266ba04 100644 --- a/android/tools/replay/build.gradle +++ b/android/tools/replay/build.gradle @@ -31,6 +31,13 @@ android { } } buildTypes { + debug { + externalNativeBuild { + cmake { + cppFlags "-O2", "-DNDEBUG" + } + } + } release { minifyEnabled false proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' diff --git a/framework/application/application.cpp b/framework/application/application.cpp index 3e5da8bf12..3ca0ebb850 100644 --- a/framework/application/application.cpp +++ b/framework/application/application.cpp @@ -65,8 +65,7 @@ Application::Application(const std::string& name, decode::FileProcessor* file_processor, const std::string& cli_wsi_extension, void* platform_specific_wsi_data) : - name_(name), - file_processor_(file_processor), running_(false), paused_(false), pause_frame_(0), + name_(name), file_processor_(file_processor), running_(false), paused_(false), pause_frame_(0), cli_wsi_extension_(cli_wsi_extension), fps_info_(nullptr) { if (!cli_wsi_extension_.empty()) @@ -198,6 +197,14 @@ void Application::SetPaused(bool paused) paused_ = paused; } +void Application::SetRepeatFrameNTimes(uint32_t repeat_frame_n_times) +{ + if (file_processor_) + { + file_processor_->SetRepeatFrameNTimes(repeat_frame_n_times); + } +} + bool Application::PlaySingleFrame() { bool success = false; @@ -238,7 +245,7 @@ void Application::ProcessEvents(bool wait_for_input) bool activeWsiContext = wsi_context && !wsi_context->GetWindows().empty(); auto pWindowFactory = wsi_context ? wsi_context->GetWindowFactory() : nullptr; bool androidWsiContext = pWindowFactory && (strcmp(pWindowFactory->GetSurfaceExtensionName(), - VK_KHR_ANDROID_SURFACE_EXTENSION_NAME) == 0); + VK_KHR_ANDROID_SURFACE_EXTENSION_NAME) == 0); if (activeWsiContext || androidWsiContext) { wsi_context->ProcessEvents(wait_for_input); diff --git a/framework/application/application.h b/framework/application/application.h index 58776480c6..00b428f806 100644 --- a/framework/application/application.h +++ b/framework/application/application.h @@ -73,6 +73,8 @@ class Application final void SetPauseFrame(uint32_t pause_frame) { pause_frame_ = pause_frame; } + void SetRepeatFrameNTimes(uint32_t repeat_frame_n_times); + bool PlaySingleFrame(); void ProcessEvents(bool wait_for_input); @@ -87,10 +89,7 @@ class Application final void StopRunning() { running_ = false; } - uint32_t GetCurrentFrameNumber() const - { - return file_processor_->GetCurrentFrameNumber(); - } + uint32_t GetCurrentFrameNumber() const { return file_processor_->GetCurrentFrameNumber(); } private: // clang-format off diff --git a/framework/decode/file_processor.cpp b/framework/decode/file_processor.cpp index d309d3c5d5..af28154ad7 100644 --- a/framework/decode/file_processor.cpp +++ b/framework/decode/file_processor.cpp @@ -28,6 +28,13 @@ #include "util/logging.h" #include +#include +#if defined(__ANDROID__) +#include +#else +#define ATrace_beginSection(name) +#define ATrace_endSection() +#endif GFXRECON_BEGIN_NAMESPACE(gfxrecon) GFXRECON_BEGIN_NAMESPACE(decode) @@ -111,12 +118,60 @@ bool FileProcessor::ProcessNextFrame() bool FileProcessor::ProcessBlocksOneFrame() { - for (ApiDecoder* decoder : decoders_) + ATrace_beginSection("ProcessBlocksOneFrame"); + block_parser_->SetDecompressionPolicy(BlockParser::DecompressionPolicy::kAlways); + if (current_frame_number_ == kFirstFrame) { - decoder->SetCurrentFrameNumber(current_frame_number_); + // Process initial resources state + ATrace_beginSection("InitState"); + if (!ProcessBlocks()) + { + return false; + } + ATrace_endSection(); } - block_parser_->SetDecompressionPolicy(BlockParser::DecompressionPolicy::kAlways); - return ProcessBlocks(); + + int64_t start_offset = 0; + bool do_repeat = (repeat_frame_n_times_ > 0) && (!file_stack_.empty()); + + if (do_repeat) + { + start_offset = GetCurrentFile().active_file->Tell(); + } + + uint32_t start_frame = current_frame_number_; + + // Handle limited command counts (e.g. from trim ranges or secondary files) + uint32_t remaining_commands_before = 0; + + remaining_commands_before = GetCurrentFile().remaining_commands; + + for (uint32_t i = 0; i <= repeat_frame_n_times_; ++i) + { + // Ensure we replay with the same frame number + current_frame_number_ = start_frame; + + for (ApiDecoder* decoder : decoders_) + { + decoder->SetCurrentFrameNumber(current_frame_number_); + } + + GetCurrentFile().remaining_commands = + (i < repeat_frame_n_times_) ? remaining_commands_before + 1 : remaining_commands_before; + + if (!ProcessBlocks()) + { + return false; + } + + if (i < repeat_frame_n_times_) + { + SeekActiveFile(start_offset, util::platform::FileSeekSet); + } + } + + ATrace_endSection(); + return true; } bool FileProcessor::DoProcessNextFrame(const std::function& block_processor) @@ -274,6 +329,7 @@ void FileProcessor::DecrementRemainingCommands() bool FileProcessor::ProcessBlocks() { + ATrace_beginSection("ProcessBlocks"); BlockBuffer block_buffer; bool success = true; @@ -330,12 +386,19 @@ bool FileProcessor::ProcessBlocks() } // NOTE: Warnings for unknown/invalid blocks are handled in the BlockParser - + if (process_visitor.IsStateDelimiter()) + { + ++block_index_; + DecrementRemainingCommands(); + break; + } if (process_visitor.IsFrameDelimiter()) { // The ProcessVisitor (pre-dispatch) is not the right place to update the frame state, so do it // here UpdateEndFrameState(); + ++block_index_; + DecrementRemainingCommands(); break; } } @@ -348,8 +411,8 @@ bool FileProcessor::ProcessBlocks() ++block_index_; DecrementRemainingCommands(); } + ATrace_endSection(); - DecrementRemainingCommands(); return success; } @@ -525,7 +588,6 @@ void FileProcessor::UpdateEndFrameState() // Make sure to increment the frame number on the way out. ++current_frame_number_; - ++block_index_; } bool FileProcessor::ProcessFrameDelimiter(gfxrecon::format::ApiCallId call_id) diff --git a/framework/decode/file_processor.h b/framework/decode/file_processor.h index 4329ff10e7..2e04fd1144 100644 --- a/framework/decode/file_processor.h +++ b/framework/decode/file_processor.h @@ -161,6 +161,8 @@ class FileProcessor block_index_to_ = block_index_to; } + void SetRepeatFrameNTimes(uint32_t repeat_frame_n_times) { repeat_frame_n_times_ = repeat_frame_n_times; } + bool IsFrameDelimiter(format::BlockType block_type, format::MarkerType marker_type) const; bool IsFrameDelimiter(format::ApiCallId call_id) const; @@ -320,23 +322,21 @@ class FileProcessor void operator()(const ExecuteBlocksFromFileArgs& execute_blocks) { // The block and marker type are implied by the Args type - is_frame_delimiter = false; - success = file_processor_.ProcessExecuteBlocksFromFile(execute_blocks); + success = file_processor_.ProcessExecuteBlocksFromFile(execute_blocks); } // State Marker control void operator()(const StateBeginMarkerArgs& state_begin) { // The block and marker type are implied by the Args type - is_frame_delimiter = false; - success = true; + success = true; file_processor_.ProcessStateBeginMarker(state_begin); } void operator()(const StateEndMarkerArgs& state_end) { // The block and marker type are implied by the Args type - is_frame_delimiter = false; + is_state_delimiter = true; success = true; file_processor_.ProcessStateEndMarker(state_end); } @@ -344,8 +344,7 @@ class FileProcessor void operator()(const AnnotationArgs& annotation) { // The block and marker type are implied by the Command type - is_frame_delimiter = false; - success = true; + success = true; file_processor_.ProcessAnnotation(annotation); } @@ -353,8 +352,7 @@ class FileProcessor void operator()(const Args&) { // The default behavior for a Visit is a successful, non-frame-delimiter - is_frame_delimiter = false; - success = true; + success = true; } // Avoid unpacking the Arg from it's store in the Arg specific overloads @@ -366,10 +364,12 @@ class FileProcessor bool IsSuccess() const { return success; } bool IsFrameDelimiter() const { return is_frame_delimiter; } + bool IsStateDelimiter() const { return is_state_delimiter; } ProcessVisitor(FileProcessor& file_processor) : file_processor_(file_processor) {} private: bool is_frame_delimiter = false; + bool is_state_delimiter = false; bool success = true; FileProcessor& file_processor_; }; @@ -420,6 +420,7 @@ class FileProcessor int64_t block_index_from_{ 0 }; int64_t block_index_to_{ 0 }; bool loading_trimmed_capture_state_; + uint32_t repeat_frame_n_times_{ 0 }; std::string absolute_path_; format::FileHeader file_header_; @@ -432,7 +433,7 @@ class FileProcessor struct ActiveFileContext { ActiveFileContext(FileInputStreamPtr&& active_file_, bool execute_til_eof_ = false) : - active_file(std::move(active_file_)), execute_till_eof(execute_til_eof_){}; + active_file(std::move(active_file_)), execute_till_eof(execute_til_eof_) {}; FileInputStreamPtr active_file; uint32_t remaining_commands{ 0 }; diff --git a/framework/decode/vulkan_replay_consumer_base.cpp b/framework/decode/vulkan_replay_consumer_base.cpp index 59b637e75b..570525da38 100644 --- a/framework/decode/vulkan_replay_consumer_base.cpp +++ b/framework/decode/vulkan_replay_consumer_base.cpp @@ -53,6 +53,7 @@ #include "util/platform.h" #include "util/logging.h" #include "util/callbacks.h" +#include "graphics/vulkan_resources_util.h" #include "spirv_reflect.h" @@ -62,15 +63,43 @@ #include "Vulkan-Utility-Libraries/vk_format_utils.h" #include +#include +#include +#include #include #include #include #include #include +#include +#include +#include + +#if defined(__ANDROID__) +#include +#else +#define ATrace_beginSection(name) +#define ATrace_endSection() +#endif GFXRECON_BEGIN_NAMESPACE(gfxrecon) GFXRECON_BEGIN_NAMESPACE(decode) +bool frame_first_queue_submit = false; + +static uint32_t FindMemoryType(const VkPhysicalDeviceMemoryProperties& memory_properties, + uint32_t type_bits, + VkMemoryPropertyFlags properties) +{ + uint32_t index = std::numeric_limits::max(); + VkMemoryPropertyFlags dummy_flags; + if (graphics::FindMemoryTypeIndex(memory_properties, type_bits, properties, &index, &dummy_flags)) + { + return index; + } + return std::numeric_limits::max(); +} + const size_t kMaxEventStatusRetries = 16; const char kUnknownDeviceLabel[] = ""; @@ -205,8 +234,7 @@ static uint32_t GetHardwareBufferFormatBpp(uint32_t format) VulkanReplayConsumerBase::VulkanReplayConsumerBase(std::shared_ptr application, const VulkanReplayOptions& options) : - options_(options), - loader_handle_(nullptr), get_instance_proc_addr_(nullptr), create_instance_proc_(nullptr), + options_(options), loader_handle_(nullptr), get_instance_proc_addr_(nullptr), create_instance_proc_(nullptr), application_(application), loading_trim_state_(false), replaying_trimmed_capture_(false), fps_info_(nullptr), have_imported_semaphores_(false), omitted_pipeline_cache_data_(false) { @@ -375,6 +403,7 @@ void VulkanReplayConsumerBase::ProcessStateEndMarker(uint64_t frame_number) { fps_info_->ProcessStateEndMarker(frame_number); } + frame_first_queue_submit = true; if (options_.dumping_resources) { @@ -4102,6 +4131,35 @@ VkResult VulkanReplayConsumerBase::OverrideQueueSubmit(PFN_vkQueueSubmit assert((queue_info != nullptr) && (pSubmits != nullptr)); + if (frame_first_queue_submit) + { + frame_first_queue_submit = false; + static bool first_time = true; + if (first_time && wait_before_first_frame_min_ms_ > 0) + { + ATrace_beginSection("FirstTimeWait"); + std::this_thread::sleep_for(std::chrono::milliseconds(wait_before_first_frame_min_ms_)); + ATrace_endSection(); + first_time = false; + } + if (sleep_around_gpu_frame_ms_ > 0.0) + { + // WaitDevicesIdle(); // Need to implement or check availability + std::this_thread::sleep_for(std::chrono::microseconds(static_cast(sleep_around_gpu_frame_ms_ * 1000.0))); + } + + if (frame_warm_up_gpu_load_ > 0) + { + // Add device warm up using dispatch + WarmUpDevice(queue_info, frame_warm_up_gpu_load_); + } + ATrace_beginSection("GFXRFrame"); + if (sleep_around_gpu_frame_ms_ > 0.0) + { + std::this_thread::sleep_for(std::chrono::microseconds(static_cast(sleep_around_gpu_frame_ms_ * 1000.0))); + } + } + VkResult result = VK_SUCCESS; const VkSubmitInfo* submit_infos = pSubmits->GetPointer(); assert(submitCount == 0 || submit_infos != nullptr); @@ -4123,6 +4181,68 @@ VkResult VulkanReplayConsumerBase::OverrideQueueSubmit(PFN_vkQueueSubmit GFXRECON_ASSERT(allocator != nullptr); allocator->ClearStagingResources(); + auto queue_submit = [&](const VkSubmitInfo* submit_infos_arg, uint32_t submitCount_arg) { + VkDevice device = device_info->handle; + WarmUpResources& resources = warmup_resources_[device]; + if (resources.next_semaphore_index < resources.semaphores.size()) + { + std::vector modified_submit_infos(submit_infos_arg, + std::next(submit_infos_arg, submitCount_arg)); + std::vector> new_wait_semaphore_lists(submitCount_arg); + std::vector> new_wait_stage_mask_lists(submitCount_arg); + + for (uint32_t i = 0; i < submitCount_arg; ++i) + { + const VkSubmitInfo& original_submit = submit_infos_arg[i]; + std::vector& new_list = new_wait_semaphore_lists[i]; + new_list.reserve(original_submit.waitSemaphoreCount + 1); + if (original_submit.waitSemaphoreCount > 0 && original_submit.pWaitSemaphores) + { + new_list.assign(original_submit.pWaitSemaphores, + original_submit.pWaitSemaphores + original_submit.waitSemaphoreCount); + } + + std::vector& new_stage_list = new_wait_stage_mask_lists[i]; + new_stage_list.reserve(original_submit.waitSemaphoreCount + 1); + if (original_submit.waitSemaphoreCount > 0 && original_submit.pWaitDstStageMask) + { + new_stage_list.assign(original_submit.pWaitDstStageMask, + original_submit.pWaitDstStageMask + original_submit.waitSemaphoreCount); + } + else + { + // Safe default if mask is missing but we're adding a semaphore + VkPipelineStageFlags default_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + new_stage_list.assign(new_list.size() + 1, default_flags); + // Actually we need to match the size. + // If original was 0, we have 1 now. + // If we assigned from original, we have N. + // But we push_back later. + // Let's just rely on the push_back below for the NEW semaphore. + // But if original_submit.pWaitDstStageMask was NULL, we need to fill the previous ones? + // Spec says pWaitDstStageMask must be valid if waitSemaphoreCount > 0. + // So if waitSemaphoreCount was 0, pWaitDstStageMask might be NULL. + // In that case new_stage_list is empty. + } + + if (resources.next_semaphore_index < resources.semaphores.size()) + { + VkSemaphore warm_up_semaphore = resources.semaphores[resources.next_semaphore_index++]; + new_list.push_back(warm_up_semaphore); + new_stage_list.push_back(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + } + + VkSubmitInfo& modified_submit = modified_submit_infos[i]; + modified_submit.waitSemaphoreCount = static_cast(new_list.size()); + modified_submit.pWaitSemaphores = new_list.data(); + modified_submit.pWaitDstStageMask = new_stage_list.data(); + } + + return func(queue_info->handle, submitCount_arg, modified_submit_infos.data(), fence); + } + return func(queue_info->handle, submitCount_arg, submit_infos_arg, fence); + }; + if (UseAddressReplacement(device_info) && submit_info_data != nullptr) { const auto& address_tracker = GetDeviceAddressTracker(device_info); @@ -4162,7 +4282,7 @@ VkResult VulkanReplayConsumerBase::OverrideQueueSubmit(PFN_vkQueueSubmit { VkSubmitInfo& submit_info_mut = pSubmits->GetPointer()[i]; auto wait_semaphores = graphics::StripWaitSemaphores(&submit_info_mut); - semaphores[i] = address_replacer.UpdateBufferAddresses(cmd_buf_info, + semaphores[i] = address_replacer.UpdateBufferAddresses(cmd_buf_info, addresses_to_replace.data(), addresses_to_replace.size(), GetDeviceAddressTracker(device_info), @@ -4194,7 +4314,7 @@ VkResult VulkanReplayConsumerBase::OverrideQueueSubmit(PFN_vkQueueSubmit // tracked. if ((!have_imported_semaphores_) && (options_.surface_index == -1) && (!options_.dumping_resources)) { - result = func(queue_info->handle, submitCount, submit_infos, fence); + result = queue_submit(submit_infos, submitCount); } else { @@ -4227,7 +4347,7 @@ VkResult VulkanReplayConsumerBase::OverrideQueueSubmit(PFN_vkQueueSubmit if (altered_submits.empty() && !options_.dumping_resources) { - result = func(queue_info->handle, submitCount, submit_infos, fence); + result = queue_submit(submit_infos, submitCount); } else { @@ -4289,10 +4409,8 @@ VkResult VulkanReplayConsumerBase::OverrideQueueSubmit(PFN_vkQueueSubmit } else { - result = func(queue_info->handle, - static_cast(modified_submit_infos.size()), - modified_submit_infos.data(), - fence); + result = + queue_submit(modified_submit_infos.data(), static_cast(modified_submit_infos.size())); } } } @@ -8292,6 +8410,22 @@ VulkanReplayConsumerBase::OverrideQueuePresentKHR(PFN_vkQueuePresentKHR const VulkanQueueInfo* queue_info, const StructPointerDecoder* pPresentInfo) { + if (!frame_first_queue_submit) + { + frame_first_queue_submit = true; + if (sleep_around_gpu_frame_ms_ > 0.0) + { + auto device_info = object_info_table_->GetVkDeviceInfo(queue_info->parent_id); + if (device_info) + { + auto device_table = GetDeviceTable(device_info->handle); + device_table->DeviceWaitIdle(device_info->handle); + } + std::this_thread::sleep_for(std::chrono::microseconds(static_cast(sleep_around_gpu_frame_ms_ * 1000.0))); + } + ATrace_endSection(); + } + assert((queue_info != nullptr) && (pPresentInfo != nullptr) && !pPresentInfo->IsNull()); VkResult result = VK_SUCCESS; @@ -8379,14 +8513,14 @@ VulkanReplayConsumerBase::OverrideQueuePresentKHR(PFN_vkQueuePresentKHR uint32_t replay_index = 0; result = swapchain_->AcquireNextImageKHR(original_result, - device_table->AcquireNextImageKHR, - swapchain_info->device_info, - swapchain_info, - std::numeric_limits::max(), - VK_NULL_HANDLE, - acquire_fence.fence, - capture_image_index, - &replay_index); + device_table->AcquireNextImageKHR, + swapchain_info->device_info, + swapchain_info, + std::numeric_limits::max(), + VK_NULL_HANDLE, + acquire_fence.fence, + capture_image_index, + &replay_index); GFXRECON_ASSERT((result == VK_SUCCESS) || (result == VK_SUBOPTIMAL_KHR)); result = device_table->WaitForFences( @@ -8527,14 +8661,14 @@ VulkanReplayConsumerBase::OverrideQueuePresentKHR(PFN_vkQueuePresentKHR uint32_t replay_index = 0; result = swapchain_->AcquireNextImageKHR(original_result, - device_table->AcquireNextImageKHR, - swapchain_info->device_info, - swapchain_info, - std::numeric_limits::max(), - VK_NULL_HANDLE, - acquire_fence.fence, - capture_image_index, - &replay_index); + device_table->AcquireNextImageKHR, + swapchain_info->device_info, + swapchain_info, + std::numeric_limits::max(), + VK_NULL_HANDLE, + acquire_fence.fence, + capture_image_index, + &replay_index); GFXRECON_ASSERT((result == VK_SUCCESS) || (result == VK_SUBOPTIMAL_KHR)); result = device_table->WaitForFences( @@ -9356,7 +9490,7 @@ VkResult VulkanReplayConsumerBase::OverrideCreateRayTracingPipelinesKHR( uint32_t group_info_count = in_pCreateInfos[create_info_i].groupCount; bool has_data = (device_info->shader_group_handles.find(pipeline_capture_id) != - device_info->shader_group_handles.end()); + device_info->shader_group_handles.end()); if (has_data) { @@ -10105,6 +10239,8 @@ void VulkanReplayConsumerBase::OverrideCmdBeginRenderPass( VkCommandBuffer command_buffer = command_buffer_info->handle; + InsertRenderPassBarrier(command_buffer, command_buffer_info); + func(command_buffer, render_pass_begin_info_decoder->GetPointer(), contents); } @@ -10144,9 +10280,40 @@ void VulkanReplayConsumerBase::OverrideCmdBeginRenderPass2( VkCommandBuffer command_buffer = command_buffer_info->handle; + InsertRenderPassBarrier(command_buffer, command_buffer_info); + func(command_buffer, render_pass_begin_info_decoder->GetPointer(), subpass_begin_info_decode->GetPointer()); } +void VulkanReplayConsumerBase::InsertRenderPassBarrier(VkCommandBuffer command_buffer, + const VulkanCommandBufferInfo* command_buffer_info) +{ + if (options_.render_pass_barrier) + { + auto* device_info = GetObjectInfoTable().GetVkDeviceInfo(command_buffer_info->parent_id); + GFXRECON_ASSERT(device_info != nullptr); + + VkMemoryBarrier memory_barrier = { + VK_STRUCTURE_TYPE_MEMORY_BARRIER, + nullptr, + VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + }; + + GetDeviceTable(command_buffer) + ->CmdPipelineBarrier(command_buffer, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, + 1, + &memory_barrier, + 0, + nullptr, + 0, + nullptr); + } +} + void VulkanReplayConsumerBase::OverrideCmdTraceRaysKHR( PFN_vkCmdTraceRaysKHR func, VulkanCommandBufferInfo* command_buffer_info, @@ -12429,8 +12596,8 @@ bool VulkanReplayConsumerBase::CheckPipelineCacheUUID(const VulkanDeviceInfo* // compare pipelineCacheUUID for device and blob auto* cache_header = reinterpret_cast(create_info->pInitialData); uuid_match = memcmp(cache_header->pipelineCacheUUID, - physical_device_info->replay_device_info->properties->pipelineCacheUUID, - VK_UUID_SIZE) == 0; + physical_device_info->replay_device_info->properties->pipelineCacheUUID, + VK_UUID_SIZE) == 0; } return uuid_match; } @@ -12925,5 +13092,343 @@ void VulkanReplayConsumerBase::OverrideGetDeviceMemoryOpaqueCaptureAddress( allocator->GetDeviceMemoryOpaqueCaptureAddress(info, allocator_data); } +void VulkanReplayConsumerBase::WarmUpDevice(const VulkanQueueInfo* queue_info, uint32_t warm_up_load) +{ + VulkanDeviceInfo* device_info = object_info_table_->GetVkDeviceInfo(queue_info->parent_id); + if (device_info == nullptr) + { + return; + } + + ATrace_beginSection("WarmUpDevice"); + VkDevice device = device_info->handle; + auto device_table = GetDeviceTable(device); + WarmUpResources& resources = warmup_resources_[device]; + + VkResult result = VK_SUCCESS; + + if (resources.command_pool == VK_NULL_HANDLE) + { + VkCommandPoolCreateInfo cmd_pool_info = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO }; + cmd_pool_info.queueFamilyIndex = queue_info->family_index; + cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; + result = device_table->CreateCommandPool(device, &cmd_pool_info, nullptr, &resources.command_pool); + GFXRECON_LOG_WARNING("CreateCommandPool %s.", util::ToString(result).c_str()); + } + + if (result == VK_SUCCESS && resources.shader_module == VK_NULL_HANDLE) + { + // Minimal SPIR-V compute shader: + // #version 450 + + // // Define the size of the workgroup. + // // This means 64 invocations of this shader will run in parallel as a single group. + // layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + + // // Define a storage buffer that the application will provide. + // // It's an array of floating-point numbers that we can both read from and write to. + // // 'binding = 0' means it will be bound to the first descriptor slot (index 0). + // layout(std430, binding = 0) buffer DataBuffer { + // float data[]; + // }; + + // void main() { + // // Get the unique global ID for this specific shader invocation. + // // This gives us a unique index into our data buffer. + // uint index = gl_GlobalInvocationID.x; + + // // Read the initial value from the buffer. + // float value = data[index]; + + // // --- DUMMY WORK --- + // // Perform a series of arbitrary calculations in a loop to keep the GPU busy. + // // This is designed to be work that the compiler can't easily optimize away. + // for (int i = 0; i < 1000; i++) { + // value = sin(value) * 0.999 + cos(float(i)) * 0.001; + // } + + // // Write the final, modified value back into the same position in the buffer. + // data[index] = value; + // } + unsigned char spirv[] = { + 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x0b, 0x00, 0x0d, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, + 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x04, 0x00, + 0x0a, 0x00, 0x47, 0x4c, 0x5f, 0x47, 0x4f, 0x4f, 0x47, 0x4c, 0x45, 0x5f, 0x63, 0x70, 0x70, 0x5f, 0x73, 0x74, + 0x79, 0x6c, 0x65, 0x5f, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x76, 0x65, + 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x47, 0x4c, 0x5f, 0x47, 0x4f, 0x4f, 0x47, 0x4c, 0x45, 0x5f, 0x69, 0x6e, + 0x63, 0x6c, 0x75, 0x64, 0x65, 0x5f, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x76, 0x65, 0x00, 0x05, 0x00, + 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x00, 0x00, 0x00, 0x05, 0x00, 0x08, 0x00, 0x0b, 0x00, + 0x00, 0x00, 0x67, 0x6c, 0x5f, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x49, 0x6e, 0x76, 0x6f, 0x63, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x49, 0x44, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, 0x76, 0x61, + 0x6c, 0x75, 0x65, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x44, 0x61, 0x74, 0x61, + 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0x00, 0x00, 0x06, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x47, 0x00, + 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, + 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, + 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, + 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, + 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, + 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, + 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, + 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, + 0x04, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x1d, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, + 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0xe8, 0x03, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x2b, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x77, 0xbe, 0x7f, 0x3f, 0x2b, 0x00, + 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x6f, 0x12, 0x83, 0x3a, 0x2b, 0x00, 0x04, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, + 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x39, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3a, 0x00, + 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, + 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x07, 0x00, + 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, + 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x3e, 0x00, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, + 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x1b, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3d, 0x00, + 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x18, 0x00, + 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x1f, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1f, 0x00, 0x00, 0x00, + 0xf6, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, + 0x02, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xb1, 0x00, 0x05, 0x00, 0x26, 0x00, + 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x20, 0x00, + 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x06, 0x00, 0x10, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0d, 0x00, + 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x85, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x2c, 0x00, + 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, + 0x2c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x06, 0x00, 0x10, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x85, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x22, 0x00, 0x00, 0x00, 0xf8, 0x00, + 0x02, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, + 0x1e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x17, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x32, 0x00, + 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0xf9, 0x00, 0x02, 0x00, 0x1f, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0x3d, 0x00, + 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x1a, 0x00, + 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, + 0x3e, 0x00, 0x03, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, + 0x01, 0x00 + }; + + VkShaderModuleCreateInfo sm_create_info = { VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO }; + sm_create_info.codeSize = sizeof(spirv); + sm_create_info.pCode = (uint32_t*)spirv; + result = device_table->CreateShaderModule(device, &sm_create_info, nullptr, &resources.shader_module); + GFXRECON_LOG_WARNING("CreateShaderModule %s.", util::ToString(result).c_str()); + } + const uint32_t num_invocations = warm_up_load * 1000 * 64 * 1 * 1; // Dispatch(X,Y,Z) * local_size(X,Y,Z) + const VkDeviceSize buffer_size = sizeof(float) * num_invocations; + + if (result == VK_SUCCESS && resources.buffer == VK_NULL_HANDLE) + { + VkBufferCreateInfo buffer_info = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; + buffer_info.size = buffer_size; + buffer_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + result = device_table->CreateBuffer(device, &buffer_info, nullptr, &resources.buffer); + GFXRECON_LOG_WARNING("CreateBuffer for warm-up %s.", util::ToString(result).c_str()); + + if (result == VK_SUCCESS) + { + VkMemoryRequirements mem_requirements; + device_table->GetBufferMemoryRequirements(device, resources.buffer, &mem_requirements); + + VulkanPhysicalDeviceInfo* physical_device_info = + object_info_table_->GetVkPhysicalDeviceInfo(device_info->parent_id); + VkPhysicalDeviceMemoryProperties* memory_properties = &physical_device_info->capture_memory_properties; + + GFXRECON_ASSERT(physical_device_info != nullptr && physical_device_info->replay_device_info != nullptr && + physical_device_info->replay_device_info->memory_properties.has_value()); + + VkMemoryAllocateInfo alloc_info = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; + alloc_info.allocationSize = mem_requirements.size; + alloc_info.memoryTypeIndex = FindMemoryType( + *memory_properties, mem_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + if (alloc_info.memoryTypeIndex != std::numeric_limits::max()) + { + result = device_table->AllocateMemory(device, &alloc_info, nullptr, &resources.buffer_memory); + GFXRECON_LOG_WARNING("AllocateMemory for warm-up buffer %s.", util::ToString(result).c_str()); + if (result == VK_SUCCESS) + { + result = device_table->BindBufferMemory(device, resources.buffer, resources.buffer_memory, 0); + } + } + else + { + result = VK_ERROR_INITIALIZATION_FAILED; + } + } + } + + if (result == VK_SUCCESS && resources.descriptor_set_layout == VK_NULL_HANDLE) + { + VkDescriptorSetLayoutBinding layout_binding = {}; + layout_binding.binding = 0; + layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + layout_binding.descriptorCount = 1; + layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + VkDescriptorSetLayoutCreateInfo layout_info = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO }; + layout_info.bindingCount = 1; + layout_info.pBindings = &layout_binding; + result = + device_table->CreateDescriptorSetLayout(device, &layout_info, nullptr, &resources.descriptor_set_layout); + GFXRECON_LOG_WARNING("CreateDescriptorSetLayout %s.", util::ToString(result).c_str()); + } + + if (result == VK_SUCCESS && resources.pipeline_layout == VK_NULL_HANDLE) + { + VkPipelineLayoutCreateInfo pl_create_info = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO }; + pl_create_info.setLayoutCount = 1; + pl_create_info.pSetLayouts = &resources.descriptor_set_layout; // Link the layout + result = device_table->CreatePipelineLayout(device, &pl_create_info, nullptr, &resources.pipeline_layout); + GFXRECON_LOG_WARNING("CreatePipelineLayout %s.", util::ToString(result).c_str()); + } + + if (result == VK_SUCCESS && resources.descriptor_pool == VK_NULL_HANDLE) + { + VkDescriptorPoolSize pool_size = {}; + pool_size.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + pool_size.descriptorCount = 1; + + VkDescriptorPoolCreateInfo pool_info = { VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO }; + pool_info.poolSizeCount = 1; + pool_info.pPoolSizes = &pool_size; + pool_info.maxSets = 1; + result = device_table->CreateDescriptorPool(device, &pool_info, nullptr, &resources.descriptor_pool); + GFXRECON_LOG_WARNING("CreateDescriptorPool %s.", util::ToString(result).c_str()); + } + + if (result == VK_SUCCESS && resources.descriptor_set == VK_NULL_HANDLE) + { + VkDescriptorSetAllocateInfo alloc_info = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO }; + alloc_info.descriptorPool = resources.descriptor_pool; + alloc_info.descriptorSetCount = 1; + alloc_info.pSetLayouts = &resources.descriptor_set_layout; + result = device_table->AllocateDescriptorSets(device, &alloc_info, &resources.descriptor_set); + GFXRECON_LOG_WARNING("AllocateDescriptorSets %s.", util::ToString(result).c_str()); + + // NEW: Update the descriptor set to point to our buffer + if (result == VK_SUCCESS) + { + VkDescriptorBufferInfo buffer_info_for_descriptor = {}; + buffer_info_for_descriptor.buffer = resources.buffer; + buffer_info_for_descriptor.offset = 0; + buffer_info_for_descriptor.range = buffer_size; + + VkWriteDescriptorSet descriptor_write = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET }; + descriptor_write.dstSet = resources.descriptor_set; + descriptor_write.dstBinding = 0; + descriptor_write.dstArrayElement = 0; + descriptor_write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptor_write.descriptorCount = 1; + descriptor_write.pBufferInfo = &buffer_info_for_descriptor; + device_table->UpdateDescriptorSets(device, 1, &descriptor_write, 0, nullptr); + } + } + + if (result == VK_SUCCESS && resources.pipeline == VK_NULL_HANDLE) + { + VkComputePipelineCreateInfo cp_create_info = { VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO }; + cp_create_info.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + cp_create_info.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT; + cp_create_info.stage.module = resources.shader_module; + cp_create_info.stage.pName = "main"; + cp_create_info.layout = resources.pipeline_layout; + result = device_table->CreateComputePipelines( + device, VK_NULL_HANDLE, 1, &cp_create_info, nullptr, &resources.pipeline); + GFXRECON_LOG_WARNING("CreateComputePipelines %s.", util::ToString(result).c_str()); + } + + if (result == VK_SUCCESS && resources.semaphores.empty()) + { + resources.semaphores.resize(10); + VkSemaphoreCreateInfo semaphore_info = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO }; + for (size_t i = 0; i < resources.semaphores.size(); ++i) + { + result = device_table->CreateSemaphore(device, &semaphore_info, nullptr, &resources.semaphores[i]); + if (result != VK_SUCCESS) + { + GFXRECON_LOG_WARNING("Failed to create semaphore for warm-up. Error %s.", + util::ToString(result).c_str()); + resources.semaphores.clear(); + break; + } + } + } + + if (result == VK_SUCCESS && resources.command_buffer == VK_NULL_HANDLE) + { + VkCommandBufferAllocateInfo cmd_buf_alloc_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO }; + cmd_buf_alloc_info.commandPool = resources.command_pool; + cmd_buf_alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cmd_buf_alloc_info.commandBufferCount = 1; + result = device_table->AllocateCommandBuffers(device, &cmd_buf_alloc_info, &resources.command_buffer); + GFXRECON_LOG_WARNING("AllocateCommandBuffers %s.", util::ToString(result).c_str()); + + VkCommandBufferBeginInfo begin_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; + begin_info.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; + device_table->BeginCommandBuffer(resources.command_buffer, &begin_info); + device_table->CmdBindPipeline(resources.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, resources.pipeline); + device_table->CmdBindDescriptorSets(resources.command_buffer, + VK_PIPELINE_BIND_POINT_COMPUTE, + resources.pipeline_layout, + 0, + 1, + &resources.descriptor_set, + 0, + nullptr); + device_table->CmdDispatch(resources.command_buffer, warm_up_load * 64, 1, 1); + device_table->EndCommandBuffer(resources.command_buffer); + } + + if (result == VK_SUCCESS) + { + VkSubmitInfo submit_info = { VK_STRUCTURE_TYPE_SUBMIT_INFO }; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &resources.command_buffer; + // next_semaphore_index > 0 means we know how many of them we need from previous frame render + submit_info.signalSemaphoreCount = resources.next_semaphore_index > 0 + ? resources.next_semaphore_index + : static_cast(resources.semaphores.size()); + submit_info.pSignalSemaphores = resources.semaphores.data(); + device_table->QueueSubmit(queue_info->handle, 1, &submit_info, VK_NULL_HANDLE); + resources.next_semaphore_index = 0; + } + + if (result != VK_SUCCESS) + { + GFXRECON_LOG_WARNING("Device warm-up dispatch failed with error %s.", util::ToString(result).c_str()); + } + ATrace_endSection(); +} GFXRECON_END_NAMESPACE(decode) GFXRECON_END_NAMESPACE(gfxrecon) diff --git a/framework/decode/vulkan_replay_consumer_base.h b/framework/decode/vulkan_replay_consumer_base.h index 270f78e65b..07a21231e8 100644 --- a/framework/decode/vulkan_replay_consumer_base.h +++ b/framework/decode/vulkan_replay_consumer_base.h @@ -1659,6 +1659,11 @@ class VulkanReplayConsumerBase : public VulkanConsumer virtual void ClearRecaptureHandleIds() override; virtual bool IsRecapture() override { return options_.capture; } + void SetWaitBeforeFirstFrameMinMs(uint32_t ms) { wait_before_first_frame_min_ms_ = ms; } + void SetSleepAroundGpuFrameMs(double ms) { sleep_around_gpu_frame_ms_ = ms; } + void SetFrameWarmUpGpuLoad(uint32_t load) { frame_warm_up_gpu_load_ = load; } + void WarmUpDevice(const VulkanQueueInfo* queue_info, uint32_t warm_up_load); + //// End recapture members private: @@ -1964,6 +1969,29 @@ class VulkanReplayConsumerBase : public VulkanConsumer std::unordered_map> tracked_pipeline_caches_; std::unordered_map pipeline_cache_correspondances_; + + uint32_t wait_before_first_frame_min_ms_{ 0 }; + double sleep_around_gpu_frame_ms_{ 0.0 }; + uint32_t frame_warm_up_gpu_load_{ 0 }; + struct WarmUpResources + { + VkDescriptorPool descriptor_pool{ VK_NULL_HANDLE }; + VkDescriptorSetLayout descriptor_set_layout{ VK_NULL_HANDLE }; + VkDescriptorSet descriptor_set{ VK_NULL_HANDLE }; + VkPipelineLayout pipeline_layout{ VK_NULL_HANDLE }; + VkPipeline pipeline{ VK_NULL_HANDLE }; + VkBuffer buffer{ VK_NULL_HANDLE }; + VkDeviceMemory buffer_memory{ VK_NULL_HANDLE }; + VkShaderModule shader_module{ VK_NULL_HANDLE }; + VkCommandPool command_pool{ VK_NULL_HANDLE }; + VkCommandBuffer command_buffer{ VK_NULL_HANDLE }; + VkFence fence{ VK_NULL_HANDLE }; + std::vector semaphores; + uint32_t next_semaphore_index{ 0 }; + }; + std::unordered_map warmup_resources_; + + void InsertRenderPassBarrier(VkCommandBuffer command_buffer, const VulkanCommandBufferInfo* command_buffer_info); }; GFXRECON_END_NAMESPACE(decode) diff --git a/framework/decode/vulkan_replay_options.h b/framework/decode/vulkan_replay_options.h index 9b9e6402a1..308753a842 100644 --- a/framework/decode/vulkan_replay_options.h +++ b/framework/decode/vulkan_replay_options.h @@ -119,7 +119,7 @@ struct VulkanReplayOptions : public ReplayOptions std::vector skip_get_fence_ranges; bool wait_before_present{ false }; VkFlags debug_message_severity{ VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT }; + VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT }; // Dumping resources related configurable replay options std::vector BeginCommandBufferQueueSubmit_Indices; @@ -164,7 +164,6 @@ struct VulkanReplayOptions : public ReplayOptions std::string load_pipeline_cache_filename; std::string save_pipeline_cache_filename; bool add_new_pipeline_caches; - // Time of instantiation of this struct. std::chrono::high_resolution_clock::time_point start_time{ std::chrono::high_resolution_clock::now() }; @@ -172,6 +171,8 @@ struct VulkanReplayOptions : public ReplayOptions uint32_t wait_before_first_submit{ 0 }; void MaybeWaitBeforeFirstSubmit() const; + + bool render_pass_barrier{ false }; }; GFXRECON_END_NAMESPACE(decode) diff --git a/framework/util/file_input_stream.cpp b/framework/util/file_input_stream.cpp index e964a4d38d..1a4c9139da 100644 --- a/framework/util/file_input_stream.cpp +++ b/framework/util/file_input_stream.cpp @@ -197,6 +197,19 @@ bool FStreamFileInputStream::HasReadAhead() const noexcept return read_ahead_bytes_ > 0; } +int64_t FStreamFileInputStream::Tell() const +{ + if (IsOpen()) + { + int64_t pos = util::platform::FileTell(fd_); + if (pos >= 0) + { + return pos - static_cast(read_ahead_bytes_); + } + } + return -1; +} + size_t FStreamFileInputStream::ReadFromReadAheadBuffer(void* buffer, size_t bytes) { char* dest = static_cast(buffer); diff --git a/framework/util/file_input_stream.h b/framework/util/file_input_stream.h index 04b2be5ce0..53a32d4efe 100644 --- a/framework/util/file_input_stream.h +++ b/framework/util/file_input_stream.h @@ -64,6 +64,7 @@ class FStreamFileInputStream bool Open(const std::string& filename); void Close(); bool FileSeek(int64_t offset, util::platform::FileSeekOrigin origin); + int64_t Tell() const; bool ReadBytes(void* buffer, size_t bytes); size_t PeekBytes(void* buffer, size_t bytes); bool ReadOverwriteSpan(const size_t bytes, DataSpan& span); @@ -78,8 +79,8 @@ class FStreamFileInputStream #if FILE_INPUT_STREAM_USE_FREAD constexpr static size_t kReadAheadBufferSize = 1024U; #else - constexpr static size_t kReadAheadAlignment = 4096U; - constexpr static size_t kReadAheadBufferSize = 2U * kReadAheadAlignment; + constexpr static size_t kReadAheadAlignment = 4096U; + constexpr static size_t kReadAheadBufferSize = 2U * kReadAheadAlignment; #endif // Design assumes kMaxPeekBytes << kReadAheadBufferSize, as we move data when peeking would @@ -96,7 +97,7 @@ class FStreamFileInputStream size_t ReadFromReadAheadBuffer(void* buffer, size_t bytes); size_t ReadBytesImpl(void* buffer, size_t bytes); - std::string filename_; + std::string filename_; #if FILE_INPUT_STREAM_USE_FREAD FILE* fd_{ nullptr }; diff --git a/framework/util/platform.h b/framework/util/platform.h index 94b996fabd..29a8523138 100644 --- a/framework/util/platform.h +++ b/framework/util/platform.h @@ -246,6 +246,11 @@ inline int64_t FileTell(FILE* stream) return _ftelli64(stream); } +inline int64_t FileTell(int fd) +{ + return _lseeki64(fd, 0, FileSeekCurrent); +} + inline bool FileSeek(FILE* stream, int64_t offset, FileSeekOrigin origin) { int32_t result = _fseeki64(stream, offset, origin); @@ -547,6 +552,11 @@ inline int64_t FileTell(FILE* stream) return ftello(stream); } +inline int64_t FileTell(int fd) +{ + return lseek64(fd, 0, SEEK_CUR); +} + inline bool FileSeek(FILE* stream, int64_t offset, FileSeekOrigin origin) { int32_t result = fseeko(stream, offset, origin); diff --git a/tools/replay/android_main.cpp b/tools/replay/android_main.cpp index cf7552430a..cf66ba3757 100644 --- a/tools/replay/android_main.cpp +++ b/tools/replay/android_main.cpp @@ -68,8 +68,8 @@ const char kLayerProperty[] = "debug.vulkan.layers"; const int32_t kSwipeDistance = 200; -void ProcessAppCmd(struct android_app* app, int32_t cmd); -int32_t ProcessInputEvent(struct android_app* app, AInputEvent* event); +void ProcessAppCmd(struct android_app* app, int32_t cmd); +int32_t ProcessInputEvent(struct android_app* app, AInputEvent* event); static std::unique_ptr file_processor; @@ -160,6 +160,7 @@ void android_main(struct android_app* app) gfxrecon::decode::VulkanTrackedObjectInfoTable tracked_object_info_table; gfxrecon::decode::VulkanReplayOptions replay_options = GetVulkanReplayOptions(arg_parser, filename, &tracked_object_info_table); + replay_options.render_pass_barrier = GetRenderPassBarrier(arg_parser); gfxrecon::decode::VulkanReplayConsumer vulkan_replay_consumer(application, replay_options); gfxrecon::decode::VulkanDecoder vulkan_decoder; @@ -226,6 +227,10 @@ void android_main(struct android_app* app) replay_options.block_index_to); application->SetPauseFrame(GetPauseFrame(arg_parser)); + application->SetRepeatFrameNTimes(GetRepeatFrameNTimes(arg_parser)); + vulkan_replay_consumer.SetWaitBeforeFirstFrameMinMs(GetWaitBeforeFirstFrameMs(arg_parser)); + vulkan_replay_consumer.SetSleepAroundGpuFrameMs(GetSleepAroundGpuFrameMs(arg_parser)); + vulkan_replay_consumer.SetFrameWarmUpGpuLoad(GetFrameWarmUpGpuLoad(arg_parser)); #if ENABLE_OPENXR_SUPPORT gfxrecon::decode::OpenXrReplayOptions openxr_replay_options = {}; diff --git a/tools/replay/replay_settings.h b/tools/replay/replay_settings.h index a1fabb39eb..596777e305 100644 --- a/tools/replay/replay_settings.h +++ b/tools/replay/replay_settings.h @@ -34,7 +34,7 @@ const char kOptions[] = "indices,--dcp,--discard-cached-psos,--use-colorspace-fallback,--use-cached-psos,--dx12-override-object-names,--" "dx12-ags-inject-markers,--offscreen-swapchain-frame-boundary,--wait-before-present,--dump-resources-before-draw," "--dump-resources-modifiable-state-only,--pbi-all,--preload-measurement-range,--add-new-pipeline-caches,--" - "screenshot-ignore-FrameBoundaryANDROID,--deduplicate-device,--log-timestamps,--capture"; + "screenshot-ignore-FrameBoundaryANDROID,--deduplicate-device,--log-timestamps,--capture,--render-pass-barrier"; const char kArguments[] = "--log-level,--log-file,--cpu-mask,--gpu,--gpu-group,--pause-frame,--wsi,--surface-index,-m|--memory-translation," "--replace-shaders,--screenshots,--screenshot-interval,--denied-messages,--allowed-messages,--screenshot-format,--" @@ -42,7 +42,8 @@ const char kArguments[] = "force-windowed,--fwo|--force-windowed-origin,--batching-memory-usage,--measurement-file,--swapchain,--sgfs|--skip-" "get-fence-status,--sgfr|--skip-get-fence-ranges,--dump-resources,--dump-resources-dir,--dump-resources-image-" "format,pbis,--pcj|--pipeline-creation-jobs,--save-pipeline-cache,--load-pipeline-cache,--quit-after-frame,--" - "present-mode,--wait-before-first-submit"; + "present-mode,--wait-before-first-submit,--wait-before-first-frame-ms,--sleep-around-gpu-frame-ms,--frame-warm-up-" + "gpu-load,--frame-repeats"; static void PrintUsage(const char* exe_name) { @@ -387,4 +388,53 @@ static void PrintUsage(const char* exe_name) #endif } +static uint32_t GetRepeatFrameNTimes(const gfxrecon::util::ArgumentParser& arg_parser) +{ + uint32_t repeat_frame_n_times = 0; + const auto& value = arg_parser.GetArgumentValue(kRepeatFrameNTimesArgument); + if (!value.empty()) + { + repeat_frame_n_times = static_cast(std::stoi(value)); + } + return repeat_frame_n_times; +} + +static uint32_t GetWaitBeforeFirstFrameMs(const gfxrecon::util::ArgumentParser& arg_parser) +{ + uint32_t wait_before_first_frame_ms = 0; + const auto& value = arg_parser.GetArgumentValue(kWaitBeforeFirstFrameMsArgument); + if (!value.empty()) + { + wait_before_first_frame_ms = static_cast(std::stoi(value)); + } + return wait_before_first_frame_ms; +} + +static double GetSleepAroundGpuFrameMs(const gfxrecon::util::ArgumentParser& arg_parser) +{ + double sleep_around_gpu_frame_ms = 0.0; + const auto& value = arg_parser.GetArgumentValue(kSleepAroundGpuFrameMsArgument); + if (!value.empty()) + { + sleep_around_gpu_frame_ms = std::stod(value); + } + return sleep_around_gpu_frame_ms; +} + +static uint32_t GetFrameWarmUpGpuLoad(const gfxrecon::util::ArgumentParser& arg_parser) +{ + uint32_t frame_warm_up_gpu_load = 0; + const auto& value = arg_parser.GetArgumentValue(kFrameWarmUpGpuLoadArgument); + if (!value.empty()) + { + frame_warm_up_gpu_load = static_cast(std::stoi(value)); + } + return frame_warm_up_gpu_load; +} + +static bool GetRenderPassBarrier(const gfxrecon::util::ArgumentParser& arg_parser) +{ + return arg_parser.IsOptionSet(kRenderPassBarrierArgument); +} + #endif // GFXRECON_REPLAY_SETTINGS_H diff --git a/tools/tool_settings.h b/tools/tool_settings.h index 69ed7dfd04..e65d10e16f 100644 --- a/tools/tool_settings.h +++ b/tools/tool_settings.h @@ -148,6 +148,11 @@ const char kLoadPipelineCacheArgument[] = "--load-pipeline-cache"; const char kCreateNewPipelineCacheOption[] = "--add-new-pipeline-caches"; const char kDeduplicateDevice[] = "--deduplicate-device"; const char kWaitBeforeFirstSubmit[] = "--wait-before-first-submit"; +const char kRepeatFrameNTimesArgument[] = "--frame-repeats"; +const char kWaitBeforeFirstFrameMsArgument[] = "--wait-before-first-frame-ms"; +const char kSleepAroundGpuFrameMsArgument[] = "--sleep-around-gpu-frame-ms"; +const char kFrameWarmUpGpuLoadArgument[] = "--frame-warm-up-gpu-load"; +const char kRenderPassBarrierArgument[] = "--render-pass-barrier"; const char kScreenshotIgnoreFrameBoundaryArgument[] = "--screenshot-ignore-FrameBoundaryANDROID";