Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 157 additions & 0 deletions QuickView/ComputeEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,77 @@ void CSToneMap(uint3 id : SV_DispatchThreadID)
}
)";

static const char* HLSL_ToneMapHdrToHdr = R"(
Texture2D<float4> SrcTex : register(t0);
RWTexture2D<float4> DstTex : register(u0);

cbuffer ToneMapParams : register(b0)
{
float ContentPeakScRgb;
float DisplayPeakScRgb;
float PaperWhiteScRgb;
float Exposure;
};

// ACES-like curve for smooth roll-off mapping from ContentPeak to DisplayPeak
float3 ToneMapHDR(float3 color, float contentPeak, float displayPeak)
{
// If content peak is less than display peak, no roll-off is strictly needed,
// but we can apply exposure scale.
// Basic Spline or BT.2390 variant:
// Here we map [0, displayPeak] linearly and smoothly roll off up to contentPeak.

// We will do a simple smooth step for highlights.

// For now, let's use a Reinhard-like curve adapted for HDR:
// This allows keeping SDR values intact while compressing extreme highlights.

// If we have headroom, we map up to displayPeak.
float L = max(color.r, max(color.g, color.b));
if (L <= 0.0) return color;

// Only compress if we exceed a certain threshold (e.g., 0.5 * displayPeak)
float threshold = displayPeak * 0.7;

if (L <= threshold || contentPeak <= displayPeak) {
return color;
}

// Roll-off region
float t = (L - threshold) / (contentPeak - threshold);
t = saturate(t);
// Smooth step
float compressed = threshold + (displayPeak - threshold) * (t * (2.0 - t));

return color * (compressed / L);
}

[numthreads(8, 8, 1)]
void CSToneMapHDR(uint3 id : SV_DispatchThreadID)
{
uint width, height;
SrcTex.GetDimensions(width, height);
if (id.x >= width || id.y >= height) {
return;
}

float4 color = SrcTex[id.xy];
color.rgb = max(color.rgb, 0.0.xxx);
color.a = saturate(color.a);

float contentPeak = max(ContentPeakScRgb, 1.0);
float displayPeak = max(DisplayPeakScRgb, 1.0);

// Apply exposure
color.rgb *= Exposure;

// Tone Map high dynamic range into display's actual peak
color.rgb = ToneMapHDR(color.rgb, contentPeak * Exposure, displayPeak);

DstTex[id.xy] = color;
}
)";

HRESULT ComputeEngine::Initialize(ID3D11Device* pDevice) {
if (!pDevice) return E_INVALIDARG;
m_d3dDevice = pDevice;
Expand Down Expand Up @@ -141,6 +212,16 @@ HRESULT ComputeEngine::CompileShaders() {
hr = m_d3dDevice->CreateComputeShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, &m_csToneMapHdrToSdr);
if (FAILED(hr)) return hr;

// 4. HDR to HDR roll-off mapping
blob.Reset(); errorBlob.Reset();
hr = D3DCompile(HLSL_ToneMapHdrToHdr, strlen(HLSL_ToneMapHdrToHdr), nullptr, nullptr, nullptr, "CSToneMapHDR", "cs_5_0", D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &blob, &errorBlob);
if (FAILED(hr)) {
if (errorBlob) OutputDebugStringA((char*)errorBlob->GetBufferPointer());
return hr;
}
hr = m_d3dDevice->CreateComputeShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, &m_csToneMapHdrToHdr);
if (FAILED(hr)) return hr;

D3D11_BUFFER_DESC cbDesc = {};
cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
cbDesc.ByteWidth = 16;
Expand Down Expand Up @@ -329,3 +410,79 @@ HRESULT ComputeEngine::ToneMapHdrToSdr(const uint8_t* srcPixels, int width, int
}

} // namespace QuickView

HRESULT ComputeEngine::ToneMapHdrToHdr(const uint8_t* srcPixels, int width, int height, int stride, const ToneMapSettings& settings, ID3D11Texture2D** outTexture) {
if (!m_valid || !srcPixels || width <= 0 || height <= 0 || !outTexture) return E_INVALIDARG;

D3D11_TEXTURE2D_DESC srcDesc = {};
srcDesc.Width = static_cast<UINT>(width);
srcDesc.Height = static_cast<UINT>(height);
srcDesc.MipLevels = 1;
srcDesc.ArraySize = 1;
srcDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
srcDesc.SampleDesc.Count = 1;
srcDesc.Usage = D3D11_USAGE_IMMUTABLE;
srcDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;

D3D11_SUBRESOURCE_DATA initData = {};
initData.pSysMem = srcPixels;
initData.SysMemPitch = static_cast<UINT>(stride);

ComPtr<ID3D11Texture2D> pSrc;
HRESULT hr = m_d3dDevice->CreateTexture2D(&srcDesc, &initData, &pSrc);
if (FAILED(hr)) return hr;

D3D11_TEXTURE2D_DESC dstDesc = {};
dstDesc.Width = srcDesc.Width;
dstDesc.Height = srcDesc.Height;
dstDesc.MipLevels = 1;
dstDesc.ArraySize = 1;
dstDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
dstDesc.SampleDesc.Count = 1;
dstDesc.Usage = D3D11_USAGE_DEFAULT;
dstDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;

ComPtr<ID3D11Texture2D> pDst;
hr = m_d3dDevice->CreateTexture2D(&dstDesc, nullptr, &pDst);
if (FAILED(hr)) return hr;

ComPtr<ID3D11ShaderResourceView> pSRV;
ComPtr<ID3D11UnorderedAccessView> pUAV;
hr = m_d3dDevice->CreateShaderResourceView(pSrc.Get(), nullptr, &pSRV);
if (FAILED(hr)) return hr;
hr = m_d3dDevice->CreateUnorderedAccessView(pDst.Get(), nullptr, &pUAV);
if (FAILED(hr)) return hr;

D3D11_MAPPED_SUBRESOURCE mapped = {};
hr = m_d3dContext->Map(m_toneMapConstantBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped);
if (FAILED(hr)) return hr;

const float params[4] = {
settings.contentPeakScRgb,
settings.displayPeakScRgb,
settings.paperWhiteScRgb,
settings.exposure
};
memcpy(mapped.pData, params, sizeof(params));
m_d3dContext->Unmap(m_toneMapConstantBuffer.Get(), 0);

m_d3dContext->CSSetShader(m_csToneMapHdrToHdr.Get(), nullptr, 0);
ID3D11ShaderResourceView* srvs[] = { pSRV.Get() };
m_d3dContext->CSSetShaderResources(0, 1, srvs);
ID3D11UnorderedAccessView* uavs[] = { pUAV.Get() };
m_d3dContext->CSSetUnorderedAccessViews(0, 1, uavs, nullptr);
ID3D11Buffer* constantBuffers[] = { m_toneMapConstantBuffer.Get() };
m_d3dContext->CSSetConstantBuffers(0, 1, constantBuffers);
m_d3dContext->Dispatch((srcDesc.Width + 7) / 8, (srcDesc.Height + 7) / 8, 1);

ID3D11UnorderedAccessView* nullUAV[] = { nullptr };
m_d3dContext->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr);
ID3D11ShaderResourceView* nullSRV[] = { nullptr };
m_d3dContext->CSSetShaderResources(0, 1, nullSRV);
ID3D11Buffer* nullCB[] = { nullptr };
m_d3dContext->CSSetConstantBuffers(0, 1, nullCB);
m_d3dContext->CSSetShader(nullptr, nullptr, 0);

*outTexture = pDst.Detach();
return S_OK;
}
9 changes: 9 additions & 0 deletions QuickView/ComputeEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ class ComputeEngine {
/// Input is expected to be RGBA float with scene-linear values where 1.0
/// represents SDR reference white.
/// </summary>
/// <summary>
/// Tone map a linear HDR float buffer into HDR float on the GPU, applying roll-off for extreme highlights.
/// </summary>
HRESULT ToneMapHdrToHdr(const uint8_t* srcPixels, int width, int height,
int stride, const ToneMapSettings& settings,
ID3D11Texture2D** outTexture);

HRESULT ToneMapHdrToSdr(const uint8_t* srcPixels, int width, int height,
int stride, const ToneMapSettings& settings,
ID3D11Texture2D** outTexture);
Expand All @@ -82,6 +89,8 @@ class ComputeEngine {
ComPtr<ID3D11ComputeShader> m_csFormatConvert;
ComPtr<ID3D11ComputeShader> m_csGenMips;
ComPtr<ID3D11ComputeShader> m_csToneMapHdrToSdr;
ComPtr<ID3D11ComputeShader> m_csToneMapHdrToHdr;

ComPtr<ID3D11Buffer> m_toneMapConstantBuffer;

// Helper: Compile Embedded Shaders
Expand Down
87 changes: 77 additions & 10 deletions QuickView/ImageLoader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4201,20 +4201,56 @@ HRESULT CImageLoader::LoadToMemory(LPCWSTR filePath, IWICBitmap** ppBitmap, std:
}
}

// 2. Convert to D2D Compatible Format (PBGRA32)
// 2. Convert to D2D Compatible Format (PBGRA32 or 128bppRGBAFloat for HDR)
ComPtr<IWICFormatConverter> converter;
hr = m_wicFactory->CreateFormatConverter(&converter);
if (FAILED(hr)) return hr;

WICPixelFormatGUID srcFormat;
hr = frame->GetPixelFormat(&srcFormat);

// Check if the source format is a high-precision/HDR format
bool isHighPrecision = false;
if (SUCCEEDED(hr)) {
if (IsEqualGUID(srcFormat, GUID_WICPixelFormat128bppRGBAFloat) ||
IsEqualGUID(srcFormat, GUID_WICPixelFormat128bppPRGBAFloat) ||
IsEqualGUID(srcFormat, GUID_WICPixelFormat128bppRGBFloat) ||
IsEqualGUID(srcFormat, GUID_WICPixelFormat64bppRGBAHalf) ||
IsEqualGUID(srcFormat, GUID_WICPixelFormat64bppPRGBAHalf) ||
IsEqualGUID(srcFormat, GUID_WICPixelFormat64bppRGBHalf) ||
IsEqualGUID(srcFormat, GUID_WICPixelFormat64bppRGBA) ||
IsEqualGUID(srcFormat, GUID_WICPixelFormat64bppPRGBA) ||
IsEqualGUID(srcFormat, GUID_WICPixelFormat64bppRGB) ||
IsEqualGUID(srcFormat, GUID_WICPixelFormat48bppRGB) ||
IsEqualGUID(srcFormat, GUID_WICPixelFormat48bppRGBHalf) ||
IsEqualGUID(srcFormat, GUID_WICPixelFormat32bppRGBE)) {
isHighPrecision = true;
}
}

WICPixelFormatGUID targetFormat = isHighPrecision ? GUID_WICPixelFormat128bppRGBAFloat : GUID_WICPixelFormat32bppPBGRA;

hr = converter->Initialize(
finalSource.Get(), // Use frame source
GUID_WICPixelFormat32bppPBGRA,
targetFormat,
WICBitmapDitherTypeNone,
nullptr,
0.f,
WICBitmapPaletteTypeMedianCut
);
if (FAILED(hr)) return hr;
if (FAILED(hr)) {
// Fallback to PBGRA if float conversion is not supported
targetFormat = GUID_WICPixelFormat32bppPBGRA;
hr = converter->Initialize(
finalSource.Get(),
targetFormat,
WICBitmapDitherTypeNone,
nullptr,
0.f,
WICBitmapPaletteTypeMedianCut
);
if (FAILED(hr)) return hr;
}

// 3. Force Decode to Memory
HRESULT hrBitmap = m_wicFactory->CreateBitmapFromSource(
Expand Down Expand Up @@ -6335,7 +6371,12 @@ HRESULT CImageLoader::LoadToMemoryPMR(LPCWSTR filePath, DecodedImage* pOutput, s
UINT w = 0, h = 0;
wicBitmap->GetSize(&w, &h);

UINT stride = w * 4;
WICPixelFormatGUID srcWicFmt;
wicBitmap->GetPixelFormat(&srcWicFmt);
bool isFloat = IsEqualGUID(srcWicFmt, GUID_WICPixelFormat128bppRGBAFloat);
int bpp = isFloat ? 16 : 4;

UINT stride = w * bpp;
size_t bufSize = (size_t)stride * h;

try {
Expand All @@ -6361,7 +6402,7 @@ HRESULT CImageLoader::LoadToMemoryPMR(LPCWSTR filePath, DecodedImage* pOutput, s
} else {
for (UINT y = 0; y < h; ++y) {
memcpy(pOutput->pixels.data() + (size_t)y * stride,
pData + (size_t)y * cbStride, stride);
pData + (size_t)y * cbStride, w * bpp);
}
}
pOutput->isValid = true;
Expand Down Expand Up @@ -10340,20 +10381,46 @@ HRESULT CImageLoader::LoadToFrame(LPCWSTR filePath, QuickView::RawImageFrame* ou
if (finalH < 1) finalH = 1;
}

WICPixelFormatGUID outWicFormat;
wicBitmap->GetPixelFormat(&outWicFormat);
bool isFloat = IsEqualGUID(outWicFormat, GUID_WICPixelFormat128bppRGBAFloat);
int bpp = isFloat ? 16 : 4;
QuickView::PixelFormat outPixelFormat = isFloat ? QuickView::PixelFormat::R32G32B32A32_FLOAT : QuickView::PixelFormat::BGRA8888;

// Allocate output buffer with aligned stride
int outStride = CalculateSIMDAlignedStride(finalW, 4);
int outStride = CalculateSIMDAlignedStride(finalW, bpp);
size_t outSize = static_cast<size_t>(outStride) * finalH;
uint8_t* pixels = AllocateBuffer(outSize);
if (!pixels) return E_OUTOFMEMORY;

if (needWicResize) {
// Resize directly from WIC memory lock
SIMDUtils::ResizeBilinear(wicData, wicWidth, wicHeight, wicStride,
pixels, finalW, finalH, outStride);
if (isFloat) {
// Very simple fallback for resizing float buffers if needed.
// Ideally SIMDUtils should support it, but for WIC fallback resize we can just nearest-neighbor or skip.
// Using a simple row/col mapping for floats:
float* dst = (float*)pixels;
float* src = (float*)wicData;
for (int y = 0; y < finalH; ++y) {
int srcY = y * wicHeight / finalH;
for (int x = 0; x < finalW; ++x) {
int srcX = x * wicWidth / finalW;
int dstIdx = y * (outStride/4) + x * 4;
int srcIdx = srcY * (wicStride/4) + srcX * 4;
dst[dstIdx+0] = src[srcIdx+0];
dst[dstIdx+1] = src[srcIdx+1];
dst[dstIdx+2] = src[srcIdx+2];
dst[dstIdx+3] = src[srcIdx+3];
}
}
} else {
SIMDUtils::ResizeBilinear(wicData, wicWidth, wicHeight, wicStride,
pixels, finalW, finalH, outStride);
}
} else {
// Copy row by row (handles stride mismatch)
for (UINT y = 0; y < wicHeight; ++y) {
memcpy(pixels + y * outStride, wicData + y * wicStride, wicWidth * 4);
memcpy(pixels + y * outStride, wicData + y * wicStride, wicWidth * bpp);
}
}

Expand All @@ -10362,7 +10429,7 @@ HRESULT CImageLoader::LoadToFrame(LPCWSTR filePath, QuickView::RawImageFrame* ou
outFrame->width = finalW;
outFrame->height = finalH;
outFrame->stride = outStride;
outFrame->format = PixelFormat::BGRA8888; // WIC always converts to BGRA
outFrame->format = outPixelFormat; // Handle float correctly
SetupDeleter(pixels);

// [v5.3] WIC Fallback Metadata Population
Expand Down
26 changes: 23 additions & 3 deletions QuickView/RenderEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,9 +386,28 @@ CRenderEngine::UploadRawFrameToGPU(const QuickView::RawImageFrame &frame,

D2D1_BITMAP_PROPERTIES1 props = GetDefaultBitmapProps(dxgiFormat, alphaMode);

if (frame.format == QuickView::PixelFormat::R32G32B32A32_FLOAT &&
!m_isAdvancedColor) {
if (m_computeEngine && m_computeEngine->IsAvailable()) {
if (frame.format == QuickView::PixelFormat::R32G32B32A32_FLOAT) {
if (m_isAdvancedColor) {
// Pure HDR Environment (Roll-off)
const QuickView::ToneMapSettings toneMapSettings = BuildToneMapSettings(frame, m_displayColorState);
if (m_computeEngine && m_computeEngine->IsAvailable() && toneMapSettings.contentPeakScRgb > toneMapSettings.displayPeakScRgb) {
ComPtr<ID3D11Texture2D> pTex;
if (SUCCEEDED(m_computeEngine->ToneMapHdrToHdr(
frame.pixels, static_cast<int>(frame.width),
static_cast<int>(frame.height), static_cast<int>(frame.stride),
toneMapSettings, &pTex))) {
ComPtr<IDXGISurface> dxgiSurface;
if (SUCCEEDED(pTex.As(&dxgiSurface))) {
return m_d2dContext->CreateBitmapFromDxgiSurface(
dxgiSurface.Get(), &props,
reinterpret_cast<ID2D1Bitmap1 **>(outBitmap));
}
}
}
// Otherwise, just fall through to standard upload (no tone mapping needed or fallback).
} else {
// SDR Environment (Fallback Tone Mapping)
if (m_computeEngine && m_computeEngine->IsAvailable()) {
ComPtr<ID3D11Texture2D> pTex;
const QuickView::ToneMapSettings toneMapSettings =
BuildToneMapSettings(frame, m_displayColorState);
Expand Down Expand Up @@ -445,6 +464,7 @@ CRenderEngine::UploadRawFrameToGPU(const QuickView::RawImageFrame &frame,
static_cast<UINT32>(frame.height)),
sdrPixels.data(), static_cast<UINT32>(frame.width * 4), &sdrProps,
reinterpret_cast<ID2D1Bitmap1 **>(outBitmap));
}
}

// [Optimization] Use GPU Compute for non-native format conversion
Expand Down
Loading