88namespace nbl ::video
99{
1010
11- class NBL_API2 CComputeBlit : public core::IReferenceCounted
11+ class CComputeBlit : public core ::IReferenceCounted
1212{
1313 public:
14+ constexpr static inline asset::SPushConstantRange DefaultPushConstantRange = {
15+ .stageFlags = IGPUShader::E_SHADER_STAGE::ESS_COMPUTE,
16+ .offset = 0ull ,
17+ .size = sizeof (hlsl::blit::parameters2_t )
18+ };
19+ constexpr static inline std::span<const asset::SPushConstantRange> DefaultPushConstantRanges = {&DefaultPushConstantRange,1 };
20+
1421 // Coverage adjustment needs alpha to be stored in HDR with high precision
1522 static inline asset::E_FORMAT getCoverageAdjustmentIntermediateFormat (const asset::E_FORMAT format)
1623 {
@@ -41,7 +48,7 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
4148 }
4249
4350 // ctor
44- CComputeBlit (
51+ NBL_API2 CComputeBlit (
4552 core::smart_refctd_ptr<ILogicalDevice>&& logicalDevice,
4653 core::smart_refctd_ptr<asset::IShaderCompiler::CCache>&& cache=nullptr ,
4754 core::smart_refctd_ptr<system::ILogger>&& logger=nullptr
@@ -52,6 +59,7 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
5259 {
5360 core::smart_refctd_ptr<IGPUComputePipeline> blit;
5461 core::smart_refctd_ptr<IGPUComputePipeline> coverage;
62+ uint16_t workgroupSize;
5563 };
5664 struct SPipelinesCreateInfo
5765 {
@@ -67,13 +75,13 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
6775 hlsl::SBindingInfo samplers;
6876 // must be Storage Image descriptor type
6977 hlsl::SBindingInfo outputs;
70- // ! If you set the balues too small, we'll correct them ourselves anyway
78+ // ! If you set the balues too small, we'll correct them ourselves anyway, default values of 0 means we guess and provide our defaults
7179 // needs to be at least as big as the maximum subgroup size
72- uint32_t workgroupSizeLog2 : 4 = 0 ;
73- //
74- uint32_t sharedMemoryPerInvocation : 6 = 0 ;
80+ uint16_t workgroupSizeLog2 : 4 = 0 ;
81+ // in bytes, needs to be at least enough to store two full input pixels per invocation
82+ uint16_t sharedMemoryPerInvocation : 6 = 0 ;
7583 };
76- SPipelines createAndCachePipelines (const SPipelinesCreateInfo& info);
84+ NBL_API2 SPipelines createAndCachePipelines (const SPipelinesCreateInfo& info);
7785
7886 // ! Returns the original format if supports STORAGE_IMAGE otherwise returns a format in its compat class which supports STORAGE_IMAGE.
7987 inline asset::E_FORMAT getOutputViewFormat (const asset::E_FORMAT format)
@@ -99,101 +107,38 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
99107 }
100108 }
101109
102- #if 0
103- // @param `alphaBinCount` is only required to size the histogram present in the default nbl_glsl_blit_AlphaStatistics_t in default_compute_common.comp
104- core::smart_refctd_ptr<video::IGPUShader> createAlphaTestSpecializedShader(const asset::IImage::E_TYPE inImageType, const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount);
105-
106- core::smart_refctd_ptr<video::IGPUComputePipeline> getAlphaTestPipeline(const uint32_t alphaBinCount, const asset::IImage::E_TYPE imageType)
107- {
108- const auto workgroupDims = getDefaultWorkgroupDims(imageType);
109- const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
110-
111- assert(paddedAlphaBinCount >= asset::IBlitUtilities::MinAlphaBinCount);
112- const auto pipelineIndex = (paddedAlphaBinCount / asset::IBlitUtilities::MinAlphaBinCount) - 1;
113-
114- if (m_alphaTestPipelines[pipelineIndex][imageType])
115- return m_alphaTestPipelines[pipelineIndex][imageType];
116-
117- auto specShader = createAlphaTestSpecializedShader(imageType, paddedAlphaBinCount);
118- IGPUComputePipeline::SCreationParams creationParams;
119- creationParams.shader.shader = specShader.get();
120- creationParams.shader.entryPoint = "main";
121- creationParams.layout = m_blitPipelineLayout[EBT_COVERAGE_ADJUSTMENT].get();
122- assert(m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_alphaTestPipelines[pipelineIndex][imageType]));
123-
124- return m_alphaTestPipelines[pipelineIndex][imageType];
125- }
126-
127- // @param `outFormat` dictates encoding.
128- core::smart_refctd_ptr<video::IGPUShader> createNormalizationSpecializedShader(const asset::IImage::E_TYPE inImageType, const asset::E_FORMAT outFormat,
129- const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount);
130-
131- core::smart_refctd_ptr<video::IGPUComputePipeline> getNormalizationPipeline(const asset::IImage::E_TYPE imageType, const asset::E_FORMAT outFormat,
132- const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount)
110+ // Use the return values of `getOutputViewFormat` and `getCoverageAdjustmentIntermediateFormat` for this
111+ static inline uint32_t getAlphaBinCount (const uint16_t workgroupSize, const asset::E_FORMAT intermediateAlpha, const uint32_t layersToBlit)
133112 {
134- const auto workgroupDims = getDefaultWorkgroupDims(imageType);
135- const uint32_t paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
136- const SNormalizationCacheKey key = { imageType, paddedAlphaBinCount, outFormat };
137-
138- if (m_normalizationPipelines.find(key) == m_normalizationPipelines.end())
113+ uint16_t baseBucketCount;
114+ using format_t = nbl::asset::E_FORMAT;
115+ switch (intermediateAlpha)
139116 {
140- auto specShader = createNormalizationSpecializedShader(imageType, outFormat, paddedAlphaBinCount);
141- IGPUComputePipeline::SCreationParams creationParams;
142- creationParams.shader.shader = specShader.get();
143- creationParams.shader.entryPoint = "main";
144- creationParams.layout = m_blitPipelineLayout[EBT_COVERAGE_ADJUSTMENT].get();
145- assert(m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_normalizationPipelines[key]));
117+ case format_t ::EF_R8_UNORM: [[fallthrough]];
118+ case format_t ::EF_R8_SNORM:
119+ baseBucketCount = 256 ;
120+ break ;
121+ case format_t ::EF_R16_SFLOAT:
122+ baseBucketCount = 512 ;
123+ break ;
124+ case format_t ::EF_R16_UNORM: [[fallthrough]];
125+ case format_t ::EF_R16_SNORM: [[fallthrough]];
126+ baseBucketCount = 1024 ;
127+ break ;
128+ case format_t ::EF_R32_SFLOAT:
129+ baseBucketCount = 2048 ;
130+ break ;
131+ default :
132+ return 0 ;
146133 }
147-
148- return m_normalizationPipelines[key];
134+ // the absolute minimum needed to store a single pixel of a worst case format (precise, all 4 channels)
135+ constexpr auto singlePixelStorage = 4 *sizeof (hlsl::float32_t );
136+ constexpr auto ratio = singlePixelStorage/sizeof (uint16_t );
137+ const auto paddedAlphaBinCount = core::min (core::roundUp (baseBucketCount,workgroupSize),workgroupSize*ratio);
138+ return paddedAlphaBinCount*layersToBlit;
149139 }
150140
151- template <typename BlitUtilities>
152- core::smart_refctd_ptr<video::IGPUComputePipeline> getBlitPipeline(
153- const asset::E_FORMAT outFormat,
154- const asset::IImage::E_TYPE imageType,
155- const core::vectorSIMDu32& inExtent,
156- const core::vectorSIMDu32& outExtent,
157- const asset::IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic,
158- const typename BlitUtilities::convolution_kernels_t& kernels,
159- const uint32_t workgroupSize = 256,
160- const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount)
161- {
162- const auto paddedAlphaBinCount = getPaddedAlphaBinCount(core::vectorSIMDu32(workgroupSize, 1, 1, 1), alphaBinCount);
163-
164- const SBlitCacheKey key =
165- {
166- .wgSize = workgroupSize,
167- .imageType = imageType,
168- .alphaBinCount = paddedAlphaBinCount,
169- .outFormat = outFormat,
170- .smemSize = m_availableSharedMemory,
171- .coverageAdjustment = (alphaSemantic == asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE)
172- };
173-
174- if (m_blitPipelines.find(key) == m_blitPipelines.end())
175- {
176- const auto blitType = (alphaSemantic == asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE) ? EBT_COVERAGE_ADJUSTMENT : EBT_REGULAR;
177-
178- auto specShader = createBlitSpecializedShader<BlitUtilities>(
179- outFormat,
180- imageType,
181- inExtent,
182- outExtent,
183- alphaSemantic,
184- kernels,
185- workgroupSize,
186- paddedAlphaBinCount);
187-
188- IGPUComputePipeline::SCreationParams creationParams;
189- creationParams.shader.shader = specShader.get();
190- creationParams.shader.entryPoint = "main";
191- creationParams.layout = m_blitPipelineLayout[blitType].get();
192- m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_blitPipelines[key]);
193- }
194-
195- return m_blitPipelines[key];
196- }
141+ #if 0
197142
198143 //! Returns the number of output texels produced by one workgroup, deciding factor is `m_availableSharedMemory`.
199144 //! @param outImageFormat is the format of output (of the blit step) image.
@@ -368,152 +313,10 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
368313 outDispatchInfo.wgCount[2] = workgroupCount[2];
369314 }
370315
371- static inline core::vectorSIMDu32 getDefaultWorkgroupDims(const asset::IImage::E_TYPE imageType)
372- {
373- switch (imageType)
374- {
375- case asset::IImage::ET_1D:
376- return core::vectorSIMDu32(256, 1, 1, 1);
377- case asset::IImage::ET_2D:
378- return core::vectorSIMDu32(16, 16, 1, 1);
379- case asset::IImage::ET_3D:
380- return core::vectorSIMDu32(8, 8, 4, 1);
381- default:
382- return core::vectorSIMDu32(1, 1, 1, 1);
383- }
384- }
385-
386- static inline size_t getCoverageAdjustmentScratchSize(const asset::IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic, const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount, const uint32_t layersToBlit)
387- {
388- if (alphaSemantic != asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE)
389- return 0;
390-
391- const auto workgroupDims = getDefaultWorkgroupDims(imageType);
392- const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
393- const auto requiredSize = (sizeof(uint32_t) + paddedAlphaBinCount * sizeof(uint32_t)) * layersToBlit;
394- return requiredSize;
395- }
396-
397- bool updateDescriptorSet(
398- video::IGPUDescriptorSet* blitDS,
399- video::IGPUDescriptorSet* kernelWeightsDS,
400- core::smart_refctd_ptr<video::IGPUImageView> inImageView,
401- core::smart_refctd_ptr<video::IGPUImageView> outImageView,
402- core::smart_refctd_ptr<video::IGPUBuffer> coverageAdjustmentScratchBuffer,
403- core::smart_refctd_ptr<video::IGPUBufferView> kernelWeightsUTB,
404- const asset::ISampler::E_TEXTURE_CLAMP wrapU = asset::ISampler::ETC_CLAMP_TO_EDGE,
405- const asset::ISampler::E_TEXTURE_CLAMP wrapV = asset::ISampler::ETC_CLAMP_TO_EDGE,
406- const asset::ISampler::E_TEXTURE_CLAMP wrapW = asset::ISampler::ETC_CLAMP_TO_EDGE,
407- const asset::ISampler::E_TEXTURE_BORDER_COLOR borderColor = asset::ISampler::ETBC_FLOAT_OPAQUE_BLACK)
408- {
409- constexpr auto MAX_DESCRIPTOR_COUNT = 3;
410-
411- auto updateDS = [this, coverageAdjustmentScratchBuffer](video::IGPUDescriptorSet* ds, video::IGPUDescriptorSet::SDescriptorInfo* infos) -> bool
412- {
413- const auto bindingCount = ds->getLayout()->getTotalBindingCount();
414- if ((bindingCount == 3) && !coverageAdjustmentScratchBuffer)
415- return false;
416-
417- video::IGPUDescriptorSet::SWriteDescriptorSet writes[MAX_DESCRIPTOR_COUNT] = {};
418-
419- uint32_t infoIdx = 0;
420- uint32_t writeCount = 0;
421- for (uint32_t t = 0; t < static_cast<uint32_t>(asset::IDescriptor::E_TYPE::ET_COUNT); ++t)
422- {
423- const auto type = static_cast<asset::IDescriptor::E_TYPE>(t);
424- const auto& redirect = ds->getLayout()->getDescriptorRedirect(type);
425- const auto declaredBindingCount = redirect.getBindingCount();
426-
427- for (uint32_t i = 0; i < declaredBindingCount; ++i)
428- {
429- auto& write = writes[writeCount++];
430- write.dstSet = ds;
431- write.binding = redirect.getBinding(IGPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ i }).data;
432- write.arrayElement = 0u;
433- write.count = redirect.getCount(IGPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ i });
434- write.info = &infos[infoIdx];
435-
436- infoIdx += write.count;
437- }
438- }
439- assert(writeCount == bindingCount);
440- m_device->updateDescriptorSets(writeCount, writes, 0u, nullptr);
441-
442- return true;
443- };
444-
445- if (blitDS)
446- {
447- if (!inImageView || !outImageView)
448- return false;
449-
450- video::IGPUDescriptorSet::SDescriptorInfo infos[MAX_DESCRIPTOR_COUNT] = {};
451-
452- if (!samplers[wrapU][wrapV][wrapW][borderColor])
453- {
454- video::IGPUSampler::SParams params = {};
455- params.TextureWrapU = wrapU;
456- params.TextureWrapV = wrapV;
457- params.TextureWrapW = wrapW;
458- params.BorderColor = borderColor;
459- params.MinFilter = asset::ISampler::ETF_NEAREST;
460- params.MaxFilter = asset::ISampler::ETF_NEAREST;
461- params.MipmapMode = asset::ISampler::ESMM_NEAREST;
462- params.AnisotropicFilter = 0u;
463- params.CompareEnable = 0u;
464- params.CompareFunc = asset::ISampler::ECO_ALWAYS;
465-
466- samplers[wrapU][wrapV][wrapW][borderColor] = m_device->createSampler(params);
467- if (!samplers[wrapU][wrapV][wrapW][borderColor])
468- return false;
469- }
470-
471- infos[0].desc = inImageView;
472- infos[0].info.image.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL;
473- infos[0].info.combinedImageSampler.sampler = samplers[wrapU][wrapV][wrapW][borderColor];
474-
475- infos[1].desc = outImageView;
476- infos[1].info.image.imageLayout = asset::IImage::LAYOUT::GENERAL;
477- infos[1].info.combinedImageSampler.sampler = nullptr;
478-
479- if (coverageAdjustmentScratchBuffer)
480- {
481- infos[2].desc = coverageAdjustmentScratchBuffer;
482- infos[2].info.buffer.offset = 0;
483- infos[2].info.buffer.size = coverageAdjustmentScratchBuffer->getSize();
484- }
485-
486- if (!updateDS(blitDS, infos))
487- return false;
488- }
489-
490- if (kernelWeightsDS)
491- {
492- video::IGPUDescriptorSet::SDescriptorInfo info = {};
493- info.desc = kernelWeightsUTB;
494- info.info.buffer.offset = 0ull;
495- info.info.buffer.size = kernelWeightsUTB->getUnderlyingBuffer()->getSize();
496-
497- if (!updateDS(kernelWeightsDS, &info))
498- return false;
499- }
500-
501- return true;
502- }
503-
504316 //! User is responsible for the memory barriers between previous writes and the first
505317 //! dispatch on the input image, and future reads of output image and the last dispatch.
506318 template <typename BlitUtilities>
507319 inline void blit(
508- video::IGPUCommandBuffer* cmdbuf,
509- const asset::IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic,
510- video::IGPUDescriptorSet* alphaTestDS,
511- video::IGPUComputePipeline* alphaTestPipeline,
512- video::IGPUDescriptorSet* blitDS,
513- video::IGPUDescriptorSet* blitWeightsDS,
514- video::IGPUComputePipeline* blitPipeline,
515- video::IGPUDescriptorSet* normalizationDS,
516- video::IGPUComputePipeline* normalizationPipeline,
517320 const core::vectorSIMDu32& inImageExtent,
518321 const asset::IImage::E_TYPE inImageType,
519322 const asset::E_FORMAT inImageFormat,
@@ -627,7 +430,7 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
627430 }
628431
629432 // ! Query shared memory size for a given `outputTexelsPerWG`.
630- size_t getRequiredSharedMemorySize (
433+ inline size_t getRequiredSharedMemorySize (
631434 const core::vectorSIMDu32& outputTexelsPerWG,
632435 const core::vectorSIMDu32& outExtent,
633436 const asset::IImage::E_TYPE imageType,
@@ -641,16 +444,6 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
641444 const size_t requiredSmem = (core::max (preloadRegion.x * preloadRegion.y * preloadRegion.z , outputTexelsPerWG.x * outputTexelsPerWG.y * preloadRegion.z ) + outputTexelsPerWG.x * preloadRegion.y * preloadRegion.z ) * channelCount * sizeof (float );
642445 return requiredSmem;
643446 };
644-
645- static inline uint32_t getPaddedAlphaBinCount (const core::vectorSIMDu32& workgroupDims, const uint32_t oldAlphaBinCount)
646- {
647- // For the normalization shader, it should be that:
648- // alphaBinCount = k*workGroupSize, k is integer, k >= 1,
649- assert (workgroupDims.x != 0 && workgroupDims.y != 0 && workgroupDims.z != 0 );
650- const auto wgSize = workgroupDims.x * workgroupDims.y * workgroupDims.z ;
651- const auto paddedAlphaBinCount = core::roundUp (oldAlphaBinCount, wgSize);
652- return paddedAlphaBinCount;
653- }
654447};
655448
656449}
0 commit comments