diff --git a/QuickView/ComputeEngine.cpp b/QuickView/ComputeEngine.cpp index 7452753..d001507 100644 --- a/QuickView/ComputeEngine.cpp +++ b/QuickView/ComputeEngine.cpp @@ -99,6 +99,77 @@ void CSToneMap(uint3 id : SV_DispatchThreadID) } )"; +static const char* HLSL_ToneMapHdrToHdr = R"( +Texture2D SrcTex : register(t0); +RWTexture2D DstTex : register(u0); + +cbuffer ToneMapParams : register(b0) +{ + float ContentPeakScRgb; + float DisplayPeakScRgb; + float PaperWhiteScRgb; + float Exposure; +}; + +// ACES-like curve for smooth roll-off mapping from ContentPeak to DisplayPeak +float3 ToneMapHDR(float3 color, float contentPeak, float displayPeak) +{ + // If content peak is less than display peak, no roll-off is strictly needed, + // but we can apply exposure scale. + // Basic Spline or BT.2390 variant: + // Here we map [0, displayPeak] linearly and smoothly roll off up to contentPeak. + + // We will do a simple smooth step for highlights. + + // For now, let's use a Reinhard-like curve adapted for HDR: + // This allows keeping SDR values intact while compressing extreme highlights. + + // If we have headroom, we map up to displayPeak. + float L = max(color.r, max(color.g, color.b)); + if (L <= 0.0) return color; + + // Only compress if we exceed a certain threshold (e.g., 0.5 * displayPeak) + float threshold = displayPeak * 0.7; + + if (L <= threshold || contentPeak <= displayPeak) { + return color; + } + + // Roll-off region + float t = (L - threshold) / (contentPeak - threshold); + t = saturate(t); + // Smooth step + float compressed = threshold + (displayPeak - threshold) * (t * (2.0 - t)); + + return color * (compressed / L); +} + +[numthreads(8, 8, 1)] +void CSToneMapHDR(uint3 id : SV_DispatchThreadID) +{ + uint width, height; + SrcTex.GetDimensions(width, height); + if (id.x >= width || id.y >= height) { + return; + } + + float4 color = SrcTex[id.xy]; + color.rgb = max(color.rgb, 0.0.xxx); + color.a = saturate(color.a); + + float contentPeak = max(ContentPeakScRgb, 1.0); + float displayPeak = max(DisplayPeakScRgb, 1.0); + + // Apply exposure + color.rgb *= Exposure; + + // Tone Map high dynamic range into display's actual peak + color.rgb = ToneMapHDR(color.rgb, contentPeak * Exposure, displayPeak); + + DstTex[id.xy] = color; +} +)"; + HRESULT ComputeEngine::Initialize(ID3D11Device* pDevice) { if (!pDevice) return E_INVALIDARG; m_d3dDevice = pDevice; @@ -141,6 +212,16 @@ HRESULT ComputeEngine::CompileShaders() { hr = m_d3dDevice->CreateComputeShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, &m_csToneMapHdrToSdr); if (FAILED(hr)) return hr; + // 4. HDR to HDR roll-off mapping + blob.Reset(); errorBlob.Reset(); + hr = D3DCompile(HLSL_ToneMapHdrToHdr, strlen(HLSL_ToneMapHdrToHdr), nullptr, nullptr, nullptr, "CSToneMapHDR", "cs_5_0", D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &blob, &errorBlob); + if (FAILED(hr)) { + if (errorBlob) OutputDebugStringA((char*)errorBlob->GetBufferPointer()); + return hr; + } + hr = m_d3dDevice->CreateComputeShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, &m_csToneMapHdrToHdr); + if (FAILED(hr)) return hr; + D3D11_BUFFER_DESC cbDesc = {}; cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; cbDesc.ByteWidth = 16; @@ -329,3 +410,79 @@ HRESULT ComputeEngine::ToneMapHdrToSdr(const uint8_t* srcPixels, int width, int } } // namespace QuickView + +HRESULT ComputeEngine::ToneMapHdrToHdr(const uint8_t* srcPixels, int width, int height, int stride, const ToneMapSettings& settings, ID3D11Texture2D** outTexture) { + if (!m_valid || !srcPixels || width <= 0 || height <= 0 || !outTexture) return E_INVALIDARG; + + D3D11_TEXTURE2D_DESC srcDesc = {}; + srcDesc.Width = static_cast(width); + srcDesc.Height = static_cast(height); + srcDesc.MipLevels = 1; + srcDesc.ArraySize = 1; + srcDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + srcDesc.SampleDesc.Count = 1; + srcDesc.Usage = D3D11_USAGE_IMMUTABLE; + srcDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + D3D11_SUBRESOURCE_DATA initData = {}; + initData.pSysMem = srcPixels; + initData.SysMemPitch = static_cast(stride); + + ComPtr pSrc; + HRESULT hr = m_d3dDevice->CreateTexture2D(&srcDesc, &initData, &pSrc); + if (FAILED(hr)) return hr; + + D3D11_TEXTURE2D_DESC dstDesc = {}; + dstDesc.Width = srcDesc.Width; + dstDesc.Height = srcDesc.Height; + dstDesc.MipLevels = 1; + dstDesc.ArraySize = 1; + dstDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + dstDesc.SampleDesc.Count = 1; + dstDesc.Usage = D3D11_USAGE_DEFAULT; + dstDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; + + ComPtr pDst; + hr = m_d3dDevice->CreateTexture2D(&dstDesc, nullptr, &pDst); + if (FAILED(hr)) return hr; + + ComPtr pSRV; + ComPtr pUAV; + hr = m_d3dDevice->CreateShaderResourceView(pSrc.Get(), nullptr, &pSRV); + if (FAILED(hr)) return hr; + hr = m_d3dDevice->CreateUnorderedAccessView(pDst.Get(), nullptr, &pUAV); + if (FAILED(hr)) return hr; + + D3D11_MAPPED_SUBRESOURCE mapped = {}; + hr = m_d3dContext->Map(m_toneMapConstantBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + if (FAILED(hr)) return hr; + + const float params[4] = { + settings.contentPeakScRgb, + settings.displayPeakScRgb, + settings.paperWhiteScRgb, + settings.exposure + }; + memcpy(mapped.pData, params, sizeof(params)); + m_d3dContext->Unmap(m_toneMapConstantBuffer.Get(), 0); + + m_d3dContext->CSSetShader(m_csToneMapHdrToHdr.Get(), nullptr, 0); + ID3D11ShaderResourceView* srvs[] = { pSRV.Get() }; + m_d3dContext->CSSetShaderResources(0, 1, srvs); + ID3D11UnorderedAccessView* uavs[] = { pUAV.Get() }; + m_d3dContext->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + ID3D11Buffer* constantBuffers[] = { m_toneMapConstantBuffer.Get() }; + m_d3dContext->CSSetConstantBuffers(0, 1, constantBuffers); + m_d3dContext->Dispatch((srcDesc.Width + 7) / 8, (srcDesc.Height + 7) / 8, 1); + + ID3D11UnorderedAccessView* nullUAV[] = { nullptr }; + m_d3dContext->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); + ID3D11ShaderResourceView* nullSRV[] = { nullptr }; + m_d3dContext->CSSetShaderResources(0, 1, nullSRV); + ID3D11Buffer* nullCB[] = { nullptr }; + m_d3dContext->CSSetConstantBuffers(0, 1, nullCB); + m_d3dContext->CSSetShader(nullptr, nullptr, 0); + + *outTexture = pDst.Detach(); + return S_OK; +} diff --git a/QuickView/ComputeEngine.h b/QuickView/ComputeEngine.h index e268eb7..6dac7af 100644 --- a/QuickView/ComputeEngine.h +++ b/QuickView/ComputeEngine.h @@ -64,6 +64,13 @@ class ComputeEngine { /// Input is expected to be RGBA float with scene-linear values where 1.0 /// represents SDR reference white. /// + /// + /// Tone map a linear HDR float buffer into HDR float on the GPU, applying roll-off for extreme highlights. + /// + HRESULT ToneMapHdrToHdr(const uint8_t* srcPixels, int width, int height, + int stride, const ToneMapSettings& settings, + ID3D11Texture2D** outTexture); + HRESULT ToneMapHdrToSdr(const uint8_t* srcPixels, int width, int height, int stride, const ToneMapSettings& settings, ID3D11Texture2D** outTexture); @@ -82,6 +89,8 @@ class ComputeEngine { ComPtr m_csFormatConvert; ComPtr m_csGenMips; ComPtr m_csToneMapHdrToSdr; + ComPtr m_csToneMapHdrToHdr; + ComPtr m_toneMapConstantBuffer; // Helper: Compile Embedded Shaders diff --git a/QuickView/ImageLoader.cpp b/QuickView/ImageLoader.cpp index 236d787..9bd7cd2 100644 --- a/QuickView/ImageLoader.cpp +++ b/QuickView/ImageLoader.cpp @@ -4201,20 +4201,56 @@ HRESULT CImageLoader::LoadToMemory(LPCWSTR filePath, IWICBitmap** ppBitmap, std: } } - // 2. Convert to D2D Compatible Format (PBGRA32) + // 2. Convert to D2D Compatible Format (PBGRA32 or 128bppRGBAFloat for HDR) ComPtr converter; hr = m_wicFactory->CreateFormatConverter(&converter); if (FAILED(hr)) return hr; + WICPixelFormatGUID srcFormat; + hr = frame->GetPixelFormat(&srcFormat); + + // Check if the source format is a high-precision/HDR format + bool isHighPrecision = false; + if (SUCCEEDED(hr)) { + if (IsEqualGUID(srcFormat, GUID_WICPixelFormat128bppRGBAFloat) || + IsEqualGUID(srcFormat, GUID_WICPixelFormat128bppPRGBAFloat) || + IsEqualGUID(srcFormat, GUID_WICPixelFormat128bppRGBFloat) || + IsEqualGUID(srcFormat, GUID_WICPixelFormat64bppRGBAHalf) || + IsEqualGUID(srcFormat, GUID_WICPixelFormat64bppPRGBAHalf) || + IsEqualGUID(srcFormat, GUID_WICPixelFormat64bppRGBHalf) || + IsEqualGUID(srcFormat, GUID_WICPixelFormat64bppRGBA) || + IsEqualGUID(srcFormat, GUID_WICPixelFormat64bppPRGBA) || + IsEqualGUID(srcFormat, GUID_WICPixelFormat64bppRGB) || + IsEqualGUID(srcFormat, GUID_WICPixelFormat48bppRGB) || + IsEqualGUID(srcFormat, GUID_WICPixelFormat48bppRGBHalf) || + IsEqualGUID(srcFormat, GUID_WICPixelFormat32bppRGBE)) { + isHighPrecision = true; + } + } + + WICPixelFormatGUID targetFormat = isHighPrecision ? GUID_WICPixelFormat128bppRGBAFloat : GUID_WICPixelFormat32bppPBGRA; + hr = converter->Initialize( finalSource.Get(), // Use frame source - GUID_WICPixelFormat32bppPBGRA, + targetFormat, WICBitmapDitherTypeNone, nullptr, 0.f, WICBitmapPaletteTypeMedianCut ); - if (FAILED(hr)) return hr; + if (FAILED(hr)) { + // Fallback to PBGRA if float conversion is not supported + targetFormat = GUID_WICPixelFormat32bppPBGRA; + hr = converter->Initialize( + finalSource.Get(), + targetFormat, + WICBitmapDitherTypeNone, + nullptr, + 0.f, + WICBitmapPaletteTypeMedianCut + ); + if (FAILED(hr)) return hr; + } // 3. Force Decode to Memory HRESULT hrBitmap = m_wicFactory->CreateBitmapFromSource( @@ -6335,7 +6371,12 @@ HRESULT CImageLoader::LoadToMemoryPMR(LPCWSTR filePath, DecodedImage* pOutput, s UINT w = 0, h = 0; wicBitmap->GetSize(&w, &h); - UINT stride = w * 4; + WICPixelFormatGUID srcWicFmt; + wicBitmap->GetPixelFormat(&srcWicFmt); + bool isFloat = IsEqualGUID(srcWicFmt, GUID_WICPixelFormat128bppRGBAFloat); + int bpp = isFloat ? 16 : 4; + + UINT stride = w * bpp; size_t bufSize = (size_t)stride * h; try { @@ -6361,7 +6402,7 @@ HRESULT CImageLoader::LoadToMemoryPMR(LPCWSTR filePath, DecodedImage* pOutput, s } else { for (UINT y = 0; y < h; ++y) { memcpy(pOutput->pixels.data() + (size_t)y * stride, - pData + (size_t)y * cbStride, stride); + pData + (size_t)y * cbStride, w * bpp); } } pOutput->isValid = true; @@ -10340,20 +10381,46 @@ HRESULT CImageLoader::LoadToFrame(LPCWSTR filePath, QuickView::RawImageFrame* ou if (finalH < 1) finalH = 1; } + WICPixelFormatGUID outWicFormat; + wicBitmap->GetPixelFormat(&outWicFormat); + bool isFloat = IsEqualGUID(outWicFormat, GUID_WICPixelFormat128bppRGBAFloat); + int bpp = isFloat ? 16 : 4; + QuickView::PixelFormat outPixelFormat = isFloat ? QuickView::PixelFormat::R32G32B32A32_FLOAT : QuickView::PixelFormat::BGRA8888; + // Allocate output buffer with aligned stride - int outStride = CalculateSIMDAlignedStride(finalW, 4); + int outStride = CalculateSIMDAlignedStride(finalW, bpp); size_t outSize = static_cast(outStride) * finalH; uint8_t* pixels = AllocateBuffer(outSize); if (!pixels) return E_OUTOFMEMORY; if (needWicResize) { // Resize directly from WIC memory lock - SIMDUtils::ResizeBilinear(wicData, wicWidth, wicHeight, wicStride, - pixels, finalW, finalH, outStride); + if (isFloat) { + // Very simple fallback for resizing float buffers if needed. + // Ideally SIMDUtils should support it, but for WIC fallback resize we can just nearest-neighbor or skip. + // Using a simple row/col mapping for floats: + float* dst = (float*)pixels; + float* src = (float*)wicData; + for (int y = 0; y < finalH; ++y) { + int srcY = y * wicHeight / finalH; + for (int x = 0; x < finalW; ++x) { + int srcX = x * wicWidth / finalW; + int dstIdx = y * (outStride/4) + x * 4; + int srcIdx = srcY * (wicStride/4) + srcX * 4; + dst[dstIdx+0] = src[srcIdx+0]; + dst[dstIdx+1] = src[srcIdx+1]; + dst[dstIdx+2] = src[srcIdx+2]; + dst[dstIdx+3] = src[srcIdx+3]; + } + } + } else { + SIMDUtils::ResizeBilinear(wicData, wicWidth, wicHeight, wicStride, + pixels, finalW, finalH, outStride); + } } else { // Copy row by row (handles stride mismatch) for (UINT y = 0; y < wicHeight; ++y) { - memcpy(pixels + y * outStride, wicData + y * wicStride, wicWidth * 4); + memcpy(pixels + y * outStride, wicData + y * wicStride, wicWidth * bpp); } } @@ -10362,7 +10429,7 @@ HRESULT CImageLoader::LoadToFrame(LPCWSTR filePath, QuickView::RawImageFrame* ou outFrame->width = finalW; outFrame->height = finalH; outFrame->stride = outStride; - outFrame->format = PixelFormat::BGRA8888; // WIC always converts to BGRA + outFrame->format = outPixelFormat; // Handle float correctly SetupDeleter(pixels); // [v5.3] WIC Fallback Metadata Population diff --git a/QuickView/RenderEngine.cpp b/QuickView/RenderEngine.cpp index 635c8c0..6fab514 100644 --- a/QuickView/RenderEngine.cpp +++ b/QuickView/RenderEngine.cpp @@ -386,9 +386,28 @@ CRenderEngine::UploadRawFrameToGPU(const QuickView::RawImageFrame &frame, D2D1_BITMAP_PROPERTIES1 props = GetDefaultBitmapProps(dxgiFormat, alphaMode); - if (frame.format == QuickView::PixelFormat::R32G32B32A32_FLOAT && - !m_isAdvancedColor) { - if (m_computeEngine && m_computeEngine->IsAvailable()) { + if (frame.format == QuickView::PixelFormat::R32G32B32A32_FLOAT) { + if (m_isAdvancedColor) { + // Pure HDR Environment (Roll-off) + const QuickView::ToneMapSettings toneMapSettings = BuildToneMapSettings(frame, m_displayColorState); + if (m_computeEngine && m_computeEngine->IsAvailable() && toneMapSettings.contentPeakScRgb > toneMapSettings.displayPeakScRgb) { + ComPtr pTex; + if (SUCCEEDED(m_computeEngine->ToneMapHdrToHdr( + frame.pixels, static_cast(frame.width), + static_cast(frame.height), static_cast(frame.stride), + toneMapSettings, &pTex))) { + ComPtr dxgiSurface; + if (SUCCEEDED(pTex.As(&dxgiSurface))) { + return m_d2dContext->CreateBitmapFromDxgiSurface( + dxgiSurface.Get(), &props, + reinterpret_cast(outBitmap)); + } + } + } + // Otherwise, just fall through to standard upload (no tone mapping needed or fallback). + } else { + // SDR Environment (Fallback Tone Mapping) + if (m_computeEngine && m_computeEngine->IsAvailable()) { ComPtr pTex; const QuickView::ToneMapSettings toneMapSettings = BuildToneMapSettings(frame, m_displayColorState); @@ -445,6 +464,7 @@ CRenderEngine::UploadRawFrameToGPU(const QuickView::RawImageFrame &frame, static_cast(frame.height)), sdrPixels.data(), static_cast(frame.width * 4), &sdrProps, reinterpret_cast(outBitmap)); + } } // [Optimization] Use GPU Compute for non-native format conversion diff --git a/QuickView/main.cpp b/QuickView/main.cpp index 4d2b77b..155be00 100644 --- a/QuickView/main.cpp +++ b/QuickView/main.cpp @@ -1479,9 +1479,17 @@ static bool RenderCompareComposite(HWND hwnd) { ComPtr bgBrush; ComPtr borderBrush; ComPtr arrowBrush; - if (FAILED(ctx->CreateSolidColorBrush(D2D1::ColorF(0.0f, 0.0f, 0.0f, 0.50f * opacity), &bgBrush))) return; - if (FAILED(ctx->CreateSolidColorBrush(D2D1::ColorF(1.0f, 1.0f, 1.0f, 0.85f * opacity), &borderBrush))) return; - if (FAILED(ctx->CreateSolidColorBrush(D2D1::ColorF(1.0f, 1.0f, 1.0f, 0.95f * opacity), &arrowBrush))) return; + float hdrWhiteScale = g_compEngine ? (std::max)(1.0f, g_compEngine->GetDisplayColorState().GetSdrWhiteScale()) : 1.0f; + auto scaleUiColor = [hdrWhiteScale](const D2D1_COLOR_F& color) { + return D2D1::ColorF( + (std::max)(0.0f, color.r * hdrWhiteScale), + (std::max)(0.0f, color.g * hdrWhiteScale), + (std::max)(0.0f, color.b * hdrWhiteScale), + color.a); + }; + if (FAILED(ctx->CreateSolidColorBrush(scaleUiColor(D2D1::ColorF(0.0f, 0.0f, 0.0f, 0.50f * opacity)), &bgBrush))) return; + if (FAILED(ctx->CreateSolidColorBrush(scaleUiColor(D2D1::ColorF(1.0f, 1.0f, 1.0f, 0.85f * opacity)), &borderBrush))) return; + if (FAILED(ctx->CreateSolidColorBrush(scaleUiColor(D2D1::ColorF(1.0f, 1.0f, 1.0f, 0.95f * opacity)), &arrowBrush))) return; D2D1_ELLIPSE ellipse = D2D1::Ellipse(D2D1::Point2F(splitX, centerY), radius, radius); ctx->FillEllipse(ellipse, bgBrush.Get()); @@ -1533,7 +1541,15 @@ static bool RenderCompareComposite(HWND hwnd) { ctx->PopAxisAlignedClip(); ComPtr dividerBrush; - ctx->CreateSolidColorBrush(D2D1::ColorF(1.0f, 1.0f, 1.0f, 0.85f), ÷rBrush); + float hdrWhiteScale = g_compEngine ? (std::max)(1.0f, g_compEngine->GetDisplayColorState().GetSdrWhiteScale()) : 1.0f; + auto scaleUiColor = [hdrWhiteScale](const D2D1_COLOR_F& color) { + return D2D1::ColorF( + (std::max)(0.0f, color.r * hdrWhiteScale), + (std::max)(0.0f, color.g * hdrWhiteScale), + (std::max)(0.0f, color.b * hdrWhiteScale), + color.a); + }; + ctx->CreateSolidColorBrush(scaleUiColor(D2D1::ColorF(1.0f, 1.0f, 1.0f, 0.85f)), ÷rBrush); if (dividerBrush) { ctx->DrawLine(D2D1::Point2F(splitX, 0.0f), D2D1::Point2F(splitX, (float)winH), dividerBrush.Get(), 2.0f); } @@ -1549,7 +1565,15 @@ static bool RenderCompareComposite(HWND hwnd) { DrawResourceIntoViewport(ctx, g_imageResource, rightExif, rightView, rightVp); ComPtr dividerBrush; - ctx->CreateSolidColorBrush(D2D1::ColorF(1.0f, 1.0f, 1.0f, 0.35f), ÷rBrush); + float hdrWhiteScale = g_compEngine ? (std::max)(1.0f, g_compEngine->GetDisplayColorState().GetSdrWhiteScale()) : 1.0f; + auto scaleUiColor = [hdrWhiteScale](const D2D1_COLOR_F& color) { + return D2D1::ColorF( + (std::max)(0.0f, color.r * hdrWhiteScale), + (std::max)(0.0f, color.g * hdrWhiteScale), + (std::max)(0.0f, color.b * hdrWhiteScale), + color.a); + }; + ctx->CreateSolidColorBrush(scaleUiColor(D2D1::ColorF(1.0f, 1.0f, 1.0f, 0.35f)), ÷rBrush); if (dividerBrush) { ctx->DrawLine(D2D1::Point2F(splitX, 0.0f), D2D1::Point2F(splitX, (float)winH), dividerBrush.Get(), 1.0f); } @@ -5081,6 +5105,13 @@ static void DrawLocalBackground(ID2D1DeviceContext* context, float widthPixels, float bgLuma = (bgColor.r * 0.299f + bgColor.g * 0.587f + bgColor.b * 0.114f); D2D1_COLOR_F overlayColor = (bgLuma < 0.5f) ? D2D1::ColorF(1.0f, 1.0f, 1.0f, 0.1f) : D2D1::ColorF(0.0f, 0.0f, 0.0f, 0.15f); + float hdrWhiteScale = g_compEngine ? (std::max)(1.0f, g_compEngine->GetDisplayColorState().GetSdrWhiteScale()) : 1.0f; + overlayColor = D2D1::ColorF( + (std::max)(0.0f, overlayColor.r * hdrWhiteScale), + (std::max)(0.0f, overlayColor.g * hdrWhiteScale), + (std::max)(0.0f, overlayColor.b * hdrWhiteScale), + overlayColor.a); + ComPtr brushOverlay; context->CreateSolidColorBrush(overlayColor, &brushOverlay);