diff --git a/openspec/changes/2026-06-09-packed-hdr-decode/proposal.md b/openspec/changes/2026-06-09-packed-hdr-decode/proposal.md new file mode 100644 index 0000000..1e24e21 --- /dev/null +++ b/openspec/changes/2026-06-09-packed-hdr-decode/proposal.md @@ -0,0 +1,149 @@ +# Proposal: packed HDR format decode for remote texture export + +## Motivation + +Issue #236 named R11G11B10_FLOAT and R9G9B9E5_SHAREDEXP as formats to support. +PR #237 (621528f) scoped them out: `_decode_texture_png` rejects all non-Regular +`ResourceFormatType`s with `-32002 "format not supported for remote decode"`. + +Both formats are common HDR render-target and light-probe formats: +- R11G11B10_FLOAT is the standard G-buffer emission/radiance target in UE5, Unity HDRP, + and most modern engines. Its 32 bits-per-pixel with no sign makes it a first-class RT. +- R9G9B9E5_SHAREDEXP appears as HDR skybox / IBL texture storage. + +Both are closed-form bit-unpackable in numpy with no GPU round-trip, so they can follow +the same local-decode path already used for Regular Float formats. + +## Design + +### Entry point + +`_decode_texture_png` currently has a hard gate at the top: + +```python +if fmt.type != rd.ResourceFormatType.Regular: + return None +``` + +The fix adds two explicit branches **before** this gate, keyed on +`fmt.type == rd.ResourceFormatType.R11G11B10` and +`fmt.type == rd.ResourceFormatType.R9G9B9E5`. Each branch: +1. Length-checks `len(raw) != width * height * depth_lvl * 4` (4 bytes/pixel, fixed). +2. Reinterprets `raw` as `uint32` LE and extracts float32 RGB via numpy bitops. +3. Feeds the result into the existing Float display path: `nan_to_num`, `clip(0,1)`, + `_srgb_encode`, alpha=255 opaque, output RGBA PNG. + +The Regular gate is unchanged; every other non-Regular format still returns `None`. + +### Unpack functions + +Two private helpers (in `_helpers.py` alongside the existing helpers): + +**`_unpack_r11g11b10(words: np.ndarray) -> np.ndarray`** + +Input: uint32 array shape `(N,)`. Output: float32 array shape `(N, 3)` — R, G, B. + +Bit extraction (all shifts on the uint32 word): +- R 11-bit: `words & 0x7FF` (bits [0:11)) +- G 11-bit: `(words >> 11) & 0x7FF` (bits [11:22)) +- B 10-bit: `(words >> 22) & 0x3FF` (bits [22:32)) + +For 11-bit component `x` (exp=5 bits, mant=6 bits, no sign): +- `exp = x >> 6`, `mant = x & 0x3F` +- exp == 0 → subnormal: `value = (mant / 64.0) * 2**-14` +- exp == 31 → Inf/NaN (handled by nan_to_num downstream) +- else → normal: `value = (1.0 + mant / 64.0) * 2**(exp - 15)` + +For 10-bit component `x` (exp=5 bits, mant=5 bits, no sign): +- `exp = x >> 5`, `mant = x & 0x1F` +- exp == 0 → subnormal: `value = (mant / 32.0) * 2**-14` +- exp == 31 → Inf/NaN +- else → normal: `value = (1.0 + mant / 32.0) * 2**(exp - 15)` + +Vectorised implementation: build `exp` and `mant` arrays, apply numpy `where` for the +three cases (subnormal / inf-nan / normal). The inf/nan case can emit `np.inf` or any +non-finite value — `nan_to_num` in the display path sanitises them. + +**`_unpack_r9g9b9e5(words: np.ndarray) -> np.ndarray`** + +Input: uint32 array shape `(N,)`. Output: float32 array shape `(N, 3)`. + +Bit extraction: +- R mantissa 9-bit: `words & 0x1FF` (bits [0:9)) +- G mantissa 9-bit: `(words >> 9) & 0x1FF` (bits [9:18)) +- B mantissa 9-bit: `(words >> 18) & 0x1FF` (bits [18:27)) +- Shared exponent 5-bit: `(words >> 27) & 0x1F` (bits [27:32)) + +Decode: `value_c = mant_c * 2.0**(exp - 24)` (equivalent to `mant_c / 512.0 * 2^(exp-15)`). +No Inf/NaN possible (the exponent has no reserved value in this format); shared exponent +E=31 is valid and just produces large values which clip to 1 in the display path. + +### Integration into `_decode_texture_png` + +IMPORTANT (ordering): in the current code the `ResourceFormatType.Regular` gate +(`if fmt.type != rd.ResourceFormatType.Regular: return None`) comes FIRST, and the MSAA +guard (`if getattr(tex, "msSamp", 1) > 1: return None`) comes AFTER it. Packed formats are +non-Regular, so they are rejected by the Regular gate before ever reaching the MSAA guard. +The packed branch MUST therefore be inserted **before** the Regular gate, and it MUST: +(a) perform its own MSAA check (the existing guard is below the Regular gate and is +unreachable for non-Regular formats), and (b) compute `width`/`height`/`depth_lvl` +locally, because those locals are not yet defined this early in the function. + +Insert immediately after `fmt = tex.format` (and after the `if not raw: return None` +check), before the Regular gate: + +```python +# Packed HDR formats: 4 bytes/pixel, closed-form numpy decode. +if fmt.type in (rd.ResourceFormatType.R11G11B10, rd.ResourceFormatType.R9G9B9E5): + if getattr(tex, "msSamp", 1) > 1: + return None + width = max(1, tex.width >> mip) + height = max(1, tex.height >> mip) + depth_lvl = max(1, getattr(tex, "depth", 1) >> mip) + if len(raw) != width * height * depth_lvl * 4: + return None + words = np.frombuffer(raw, dtype=np.dtype(" np.ndarray` to + `src/rdc/handlers/_helpers.py` (place adjacent to `_decode_dtype`). + Vectorised numpy: extract R/G 11-bit and B 10-bit fields; apply subnormal / + normal / inf-nan cases via `np.where`; return float32 shape `(N, 3)`. + +- [x] Add `_unpack_r9g9b9e5(words: np.ndarray) -> np.ndarray` to the same file. + Extract R/G/B 9-bit mantissas and 5-bit shared exponent; decode as + `mant * 2.0**(exp - 24)`; return float32 shape `(N, 3)`. + +## Phase B: hook into `_decode_texture_png` + +- [x] In `_decode_texture_png`, insert the packed-HDR branch **before** the + `ResourceFormatType.Regular` gate (the existing MSAA guard sits below that gate and is + unreachable for non-Regular formats, so it cannot be relied on): + - Guard on `fmt.type in (rd.ResourceFormatType.R11G11B10, rd.ResourceFormatType.R9G9B9E5)` + - Own MSAA check: `if getattr(tex, "msSamp", 1) > 1: return None` + - Compute `width`/`height`/`depth_lvl` locally (those locals are defined only after the + Regular gate in the current code) + - Length check: `len(raw) != width * height * depth_lvl * 4` → return None + - Reinterpret as `uint32` LE, reshape to `(depth_lvl * height, width)`, ravel, call the + appropriate unpack helper, reshape back to `(depth_lvl * height, width, 3)` + - Apply Float display path: `nan_to_num`, `clip`, `_srgb_encode`, alpha=255, RGBA PNG + +## Phase C: unit tests + +- [x] Add TC-1 through TC-14 (from test-plan.md) to + `tests/unit/test_tex_stats_handler.py`, following the `_remote_state` / `_handle_request` + pattern used by the existing remote decode tests. + - Use `struct.pack(")` to construct raw bytes for each test vector. + - Pixel assertions use `img.getpixel((0, 0))` on the decoded PNG. +- [x] TC-15 (MANDATORY): repurpose the existing + `test_tex_export_remote_packed_format_rejected` — it currently asserts R11G11B10 + (`type=13`) is rejected with `-32002`, which this change breaks. Swap its fixture to a + still-unsupported non-Regular packed type (e.g. `R5G6B5` type=14 or `R10G10B10A2` + type=12), keep the `-32002 "not supported"` assertion, and rename the test. + +## Phase D: verification + +- [x] Run `pixi run lint` — no new lint errors. +- [x] Run `pixi run test` — all existing tests pass; new TC-1 through TC-14 pass. +- [ ] Real-GPU verify step per test-plan.md section "Manual / real-GPU verification" + (or mark DEFERRED with a tracking comment if no suitable capture is available). diff --git a/openspec/changes/2026-06-09-packed-hdr-decode/test-plan.md b/openspec/changes/2026-06-09-packed-hdr-decode/test-plan.md new file mode 100644 index 0000000..fca5202 --- /dev/null +++ b/openspec/changes/2026-06-09-packed-hdr-decode/test-plan.md @@ -0,0 +1,185 @@ +# Test plan: packed HDR format decode + +All tests follow the pattern in `tests/unit/test_tex_stats_handler.py`: +`_remote_state(tex, raw, tmp_path)` + `_handle_request(rpc_request("tex_export", {...}), state)`. +Format fields use `rd.ResourceFormat(type=..., compByteWidth=4, compCount=3, compType=1)`. +- `rd.ResourceFormatType.R11G11B10 = 13` +- `rd.ResourceFormatType.R9G9B9E5 = 16` + +--- + +## Bit-vector construction reference + +### R11G11B10_FLOAT + +Per-pixel layout in a little-endian uint32: +- R 11-bit: bits [0:11) — 5-bit exponent (bits 6-10), 6-bit mantissa (bits 0-5), no sign. +- G 11-bit: bits [11:22). +- B 10-bit: bits [22:32) — 5-bit exponent (bits 27-31 of the full word), 5-bit mantissa. + +Decode of an 11-bit component `x`: +- exp = x >> 6, mant = x & 0x3F +- exp == 0: value = (mant / 64) * 2^-14 (subnormal) +- exp == 31: Inf (mant==0) or NaN (mant!=0) +- else: value = (1 + mant/64) * 2^(exp-15) + +Decode of the 10-bit B component `x`: +- exp = x >> 5, mant = x & 0x1F +- exp == 0: value = (mant / 32) * 2^-14 +- exp == 31: Inf/NaN +- else: value = (1 + mant/32) * 2^(exp-15) + +**Known-value uint32 words (LE):** + +| Color (R, G, B) | uint32 word | LE bytes | Notes | +|-----------------|-------------|----------|-------| +| (1.0, 0.5, 0.25) | `0x681C03C0` | `[0xC0,0x03,0x1C,0x68]` | R: exp=15 mant=0; G: exp=14 mant=0; B: exp=13 mant=0 | +| max finite (all ch) | `0xF7FDFFBF` | `[0xBF,0xFF,0xFD,0xF7]` | R,G: exp=30 mant=63; B: exp=30 mant=31 | +| Inf (all ch) | `0xF83E07C0` | `[0xC0,0x07,0x3E,0xF8]` | R,G: exp=31 mant=0; B: exp=31 mant=0 | +| NaN (all ch) | `0xF87E0FC1` | `[0xC1,0x0F,0x7E,0xF8]` | R,G: exp=31 mant=1; B: exp=31 mant=1 | +| subnormal (mant=1 all) | `0x00400801` | `[0x01,0x08,0x40,0x00]` | R,G: exp=0 mant=1; B: exp=0 mant=1 | + +### R9G9B9E5_SHAREDEXP + +Per-pixel layout in a little-endian uint32: +- R mantissa 9-bit: bits [0:9) +- G mantissa 9-bit: bits [9:18) +- B mantissa 9-bit: bits [18:27) +- Shared exponent 5-bit: bits [27:32) + +Decode: `value_c = mant_c * 2.0^(exp - 24)` (= `mant_c / 512 * 2^(exp-15)`). +No reserved exponent values; no Inf/NaN possible. + +**Known-value uint32 words (LE):** + +| Color (R, G, B) | uint32 word | LE bytes | Build (E, rm, gm, bm) | +|-----------------|-------------|----------|-----------------------| +| (1.0, 1.0, 1.0) | `0xC0040201` | `[0x01,0x02,0x04,0xC0]` | E=24, m=1 each: `1 * 2^0 = 1.0` | +| (1.0, 0.5, 0.25) | `0xB0040404` | `[0x04,0x04,0x04,0xB0]` | E=22, rm=4, gm=2, bm=1: `4*2^-2=1, 2*2^-2=0.5, 1*2^-2=0.25` | + +**Expected sRGB output bytes** (after clip + `_srgb_encode`): +- 1.0 → 255, 0.5 → 188, 0.25 → 137, 0.0 → 0 + +--- + +## R11G11B10_FLOAT unit tests + +**TC-1: happy path (1.0, 0.5, 0.25)** +- `fmt`: type=13, compByteWidth=4, compCount=3, compType=1, name="R11G11B10_FLOAT" +- `tex`: 1×1, msSamp=1 +- `raw`: `struct.pack(" +- Assert: `resp["result"]` present; PNG RGBA; pixel[0,0][0]==255, pixel[0,0][1]` ≈ 188 (±2), pixel[0,0][2]` ≈ 137 (±2), alpha==255 + +**TC-2: Inf clips to white** +- `raw`: `struct.pack(" -o /tmp/hdr_rt.png` for a draw event whose primary RT has one + of the packed formats. Verify: + - Command exits 0. + - The PNG file exists and opens in an image viewer showing a plausible HDR scene + (bright highlights clipped to white, not garbled noise). + - `file /tmp/hdr_rt.png` reports PNG, `identify /tmp/hdr_rt.png` (ImageMagick) reports + geometry matching the RT dimensions. + +4. Cross-check: use `SaveTexture` in local mode on the same event/resource. Compare the + two PNGs visually; they should be perceptually similar (same content, slight gamma + difference acceptable since local mode may use a different display mapping). + +5. If a capture is unavailable: fallback is unit vectors only (TC-1 through TC-12 above). + Mark the real-GPU step as DEFERRED and file a tracking comment in the PR. diff --git a/src/rdc/handlers/_helpers.py b/src/rdc/handlers/_helpers.py index 9a128e5..9a5a1f6 100644 --- a/src/rdc/handlers/_helpers.py +++ b/src/rdc/handlers/_helpers.py @@ -288,6 +288,58 @@ def _decode_dtype(rd: Any, comp_type: int, comp_byte_width: int) -> str | None: return table.get((comp_type, comp_byte_width)) +def _unpack_float_component(exp: Any, mant: Any, mant_bits: int) -> Any: + """Decode a no-sign mini-float component to float32 (vectorised). + + exp/mant are uint arrays of the same shape. ``mant_bits`` is the mantissa + width (6 for the 11-bit channels, 5 for the 10-bit channel); the exponent is + always 5 bits (bias 15, max value 31 reserved for Inf/NaN). + """ + import numpy as np + + scale = float(1 << mant_bits) + frac = mant.astype(np.float32) / np.float32(scale) + subnormal = frac * np.float32(2.0**-14) + normal = (np.float32(1.0) + frac) * np.exp2(exp.astype(np.float32) - np.float32(15)) + inf_nan = np.where(mant == 0, np.float32(np.inf), np.float32(np.nan)) + out = np.where(exp == 0, subnormal, normal) + out = np.where(exp == 31, inf_nan, out) + return out.astype(np.float32) + + +def _unpack_r11g11b10(words: Any) -> Any: + """Decode R11G11B10_FLOAT uint32 words to a float32 (N, 3) RGB array. + + R: bits [0:11) (5-bit exp, 6-bit mantissa), G: bits [11:22) (same layout), + B: bits [22:32) (5-bit exp, 5-bit mantissa). No sign; exponent bias 15. + """ + import numpy as np + + r = words & np.uint32(0x7FF) + g = (words >> np.uint32(11)) & np.uint32(0x7FF) + b = (words >> np.uint32(22)) & np.uint32(0x3FF) + rv = _unpack_float_component(r >> np.uint32(6), r & np.uint32(0x3F), 6) + gv = _unpack_float_component(g >> np.uint32(6), g & np.uint32(0x3F), 6) + bv = _unpack_float_component(b >> np.uint32(5), b & np.uint32(0x1F), 5) + return np.stack([rv, gv, bv], axis=-1).astype(np.float32) + + +def _unpack_r9g9b9e5(words: Any) -> Any: + """Decode R9G9B9E5_SHAREDEXP uint32 words to a float32 (N, 3) RGB array. + + R/G/B 9-bit mantissas at [0:9), [9:18), [18:27); shared 5-bit exponent at + [27:32). value = mant * 2^(exp - 24). No reserved exponent, no Inf/NaN. + """ + import numpy as np + + rm = (words & np.uint32(0x1FF)).astype(np.float32) + gm = ((words >> np.uint32(9)) & np.uint32(0x1FF)).astype(np.float32) + bm = ((words >> np.uint32(18)) & np.uint32(0x1FF)).astype(np.float32) + exp = ((words >> np.uint32(27)) & np.uint32(0x1F)).astype(np.float32) + scale = np.exp2(exp - np.float32(24)) + return np.stack([rm * scale, gm * scale, bm * scale], axis=-1).astype(np.float32) + + def _decode_texture_png(rd: Any, tex: Any, raw: bytes, mip: int, *, is_depth: bool) -> bytes | None: """Decode tightly packed GetTextureData bytes into PNG bytes. @@ -323,6 +375,34 @@ def _decode_texture_png(rd: Any, tex: Any, raw: bytes, mip: int, *, is_depth: bo return None fmt = tex.format + + # Packed HDR formats: 4 bytes/pixel, closed-form numpy decode. Non-Regular, + # so they must be handled before the Regular gate (which would reject them); + # they carry their own MSAA guard and local dimension/length computation. + if fmt.type in (rd.ResourceFormatType.R11G11B10, rd.ResourceFormatType.R9G9B9E5): + if getattr(tex, "msSamp", 1) > 1: + return None + width = max(1, tex.width >> mip) + height = max(1, tex.height >> mip) + depth_lvl = max(1, getattr(tex, "depth", 1) >> mip) + if len(raw) != width * height * depth_lvl * 4: + return None + words = np.frombuffer(raw, dtype=np.dtype(" 1: diff --git a/tests/unit/test_tex_stats_handler.py b/tests/unit/test_tex_stats_handler.py index 1400f07..e0a5aed 100644 --- a/tests/unit/test_tex_stats_handler.py +++ b/tests/unit/test_tex_stats_handler.py @@ -529,11 +529,11 @@ def test_tex_export_remote_uscaled_rejected(tmp_path: object) -> None: assert resp["error"]["code"] == -32002 -def test_tex_export_remote_packed_format_rejected(tmp_path: object) -> None: - # R11G11B10 is a packed (non-Regular) format -> reject. - fmt = rd.ResourceFormat( - name="R11G11B10_FLOAT", compByteWidth=4, compCount=3, compType=1, type=13 - ) +def test_tex_export_remote_unsupported_packed_format_rejected(tmp_path: object) -> None: + # R5G6B5 (type=14) is a still-unsupported packed (non-Regular) format -> reject. + # R11G11B10/R9G9B9E5 now decode, so this preserves rejection coverage for a + # packed type the change does not handle. + fmt = rd.ResourceFormat(name="R5G6B5_UNORM", compByteWidth=2, compCount=3, compType=2, type=14) tex = rd.TextureDescription(resourceId=rd.ResourceId(57), width=1, height=1, format=fmt) state = _remote_state(tex, bytes(4), tmp_path) resp, _ = _handle_request(rpc_request("tex_export", {"id": 57}), state) @@ -651,6 +651,149 @@ def test_rt_overlay_remote_still_rejected() -> None: assert "remote mode" in resp["error"]["message"] +# --------------------------------------------------------------------------- +# Packed HDR formats: R11G11B10_FLOAT (type=13) and R9G9B9E5_SHAREDEXP (type=16) +# --------------------------------------------------------------------------- + + +def _r11g11b10_tex(res_id: int, **kw: object) -> rd.TextureDescription: + fmt = rd.ResourceFormat( + name="R11G11B10_FLOAT", compByteWidth=4, compCount=3, compType=1, type=13 + ) + return rd.TextureDescription(resourceId=rd.ResourceId(res_id), format=fmt, **kw) # type: ignore[arg-type] + + +def _r9g9b9e5_tex(res_id: int, **kw: object) -> rd.TextureDescription: + fmt = rd.ResourceFormat( + name="R9G9B9E5_SHAREDEXP", compByteWidth=4, compCount=3, compType=1, type=16 + ) + return rd.TextureDescription(resourceId=rd.ResourceId(res_id), format=fmt, **kw) # type: ignore[arg-type] + + +def test_tex_export_remote_r11g11b10_happy_path(tmp_path: object) -> None: + # (1.0, 0.5, 0.25): R exp=15 mant=0, G exp=14 mant=0, B exp=13 mant=0. + tex = _r11g11b10_tex(110, width=1, height=1) + state = _remote_state(tex, struct.pack(" None: + tex = _r11g11b10_tex(111, width=1, height=1) + state = _remote_state(tex, struct.pack(" None: + tex = _r11g11b10_tex(112, width=1, height=1) + state = _remote_state(tex, struct.pack(" None: + # exp=0 mant=1 for all channels -> ~9.5e-7, sRGB rounds to 0, no error. + tex = _r11g11b10_tex(113, width=1, height=1) + state = _remote_state(tex, struct.pack(" None: + tex = _r11g11b10_tex(114, width=2, height=2) + state = _remote_state(tex, b"\x00" * 4, tmp_path) # should be 16 bytes + resp, _ = _handle_request(rpc_request("tex_export", {"id": 114}), state) + assert resp["error"]["code"] == -32002 + + +def test_tex_export_remote_r11g11b10_msaa_rejected(tmp_path: object) -> None: + tex = _r11g11b10_tex(115, width=1, height=1, msSamp=4) + state = _remote_state(tex, struct.pack(" None: + tex = _r11g11b10_tex(116, width=1, height=1, depth=2) + state = _remote_state(tex, struct.pack("<2I", 0x681C03C0, 0x00000000), tmp_path) + resp, _ = _handle_request(rpc_request("tex_export", {"id": 116}), state) + img = _read_png(resp["result"]["path"]) + assert img.size == (1, 2) + px0 = img.getpixel((0, 0)) + assert px0[0] == 255 + assert abs(px0[1] - 188) <= 2 + assert abs(px0[2] - 137) <= 2 + assert img.getpixel((0, 1)) == (0, 0, 0, 255) + + +def test_tex_export_remote_r9g9b9e5_white(tmp_path: object) -> None: + # E=24, mant=1 each -> 1.0 each channel. + tex = _r9g9b9e5_tex(160, width=1, height=1) + state = _remote_state(tex, struct.pack(" None: + # E=22, rm=4 gm=2 bm=1 -> 1.0, 0.5, 0.25. + tex = _r9g9b9e5_tex(161, width=1, height=1) + state = _remote_state(tex, struct.pack(" None: + tex = _r9g9b9e5_tex(162, width=1, height=1) + state = _remote_state(tex, struct.pack(" None: + tex = _r9g9b9e5_tex(163, width=2, height=2) + state = _remote_state(tex, b"\x00" * 4, tmp_path) + resp, _ = _handle_request(rpc_request("tex_export", {"id": 163}), state) + assert resp["error"]["code"] == -32002 + + +def test_tex_export_remote_r9g9b9e5_3d_tiled(tmp_path: object) -> None: + tex = _r9g9b9e5_tex(164, width=1, height=1, depth=2) + state = _remote_state(tex, struct.pack("<2I", 0xC0040201, 0x00000000), tmp_path) + resp, _ = _handle_request(rpc_request("tex_export", {"id": 164}), state) + img = _read_png(resp["result"]["path"]) + assert img.size == (1, 2) + assert img.getpixel((0, 0)) == (255, 255, 255, 255) + assert img.getpixel((0, 1)) == (0, 0, 0, 255) + + +def test_tex_export_remote_r9g9b9e5_max_exp_clips_white(tmp_path: object) -> None: + # E=31, all mantissas=511 -> each channel = 65408.0 (finite), clips to white. + tex = _r9g9b9e5_tex(165, width=1, height=1) + state = _remote_state(tex, struct.pack("