diff --git a/.cargo/config.toml b/.cargo/config.toml index c3d08218fbd3..ae52362ef602 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -7,6 +7,9 @@ codegen = "run --package re_types_builder --" # Temporary solution while we wait for our own xtasks! run-wasm = "run --release --package run_wasm --" +[unstable] +codegen-backend = true +config-include = true [build] # Incremental compilation blows up the size of the target folder, and is also buggy. diff --git a/.cargo/cranelift.toml b/.cargo/cranelift.toml new file mode 100644 index 000000000000..edc22397888e --- /dev/null +++ b/.cargo/cranelift.toml @@ -0,0 +1,39 @@ +# Cranelift codegen backend for the dev-fast profile (nightly only). +# Crates with unsupported aarch64 intrinsics (CRC32, NEON) are overridden to compile with LLVM. +# This file is included via config-include from the pixi task and run configuration + +[profile.dev-fast] +codegen-backend = "cranelift" + +[profile.dev-fast.package.crc32fast] +codegen-backend = "llvm" + +[profile.dev-fast.package.zune-jpeg] +codegen-backend = "llvm" + +[profile.dev-fast.package.image] +codegen-backend = "llvm" + +[profile.dev-fast.package.re_sdk_types] +codegen-backend = "llvm" + +[profile.dev-fast.package.glam] +codegen-backend = "llvm" + +[profile.dev-fast.package.fearless_simd] +codegen-backend = "llvm" + +[profile.dev-fast.package.vello_common] +codegen-backend = "llvm" + +[profile.dev-fast.package.vello_cpu] +codegen-backend = "llvm" + +[profile.dev-fast.package.tiny-skia] +codegen-backend = "llvm" + +[profile.dev-fast.package.simd-adler32] +codegen-backend = "llvm" + +[profile.dev-fast.package.re_rav1d] +codegen-backend = "llvm" diff --git a/.gitattributes b/.gitattributes index f6126fea948a..83abb61c2de1 100644 --- a/.gitattributes +++ b/.gitattributes @@ -10,4 +10,10 @@ pixi.lock linguist-generated=true **/snapshots/**/*.png filter=lfs diff=lfs merge=lfs -text **/*.h264 filter=lfs diff=lfs merge=lfs -text **/*.mp4 filter=lfs diff=lfs merge=lfs -text +landing/**/*.jpg filter=lfs diff=lfs merge=lfs -text +landing/**/*.jpeg filter=lfs diff=lfs merge=lfs -text +landing/**/*.png filter=lfs diff=lfs merge=lfs -text +landing/**/*.gif filter=lfs diff=lfs merge=lfs -text +landing/**/*.webp filter=lfs diff=lfs merge=lfs -text examples/assets/example.rrd filter=lfs diff=lfs merge=lfs -text +tests/assets/image/*.bin filter=lfs diff=lfs merge=lfs -text diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index ec4575895412..72f60699c39a 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,3 +1,11 @@ + + ### Related -- validate various custom conventions via `pixi run lint-rerun ` (not passing any file will check everything) +- Validate conventions via `pixi run lint-rerun ` (no file = check everything) ## Architecture overview @@ -84,22 +92,22 @@ crates/ └── viewer/ # Viewer UI and rendering ``` -For more details about the architecture see `ARCHITECTURE.md`. +More details in `ARCHITECTURE.md`. ### Type system hierarchy -The type system has three levels (generated from .fbs files): +Three levels (generated from .fbs files): 1. **Datatypes** (`rerun.datatypes.*`) - Basic types like Vec3D, Color 2. **Components** (`rerun.components.*`) - Named semantic wrappers (Position3D, Radius) 3. **Archetypes** (`rerun.archetypes.*`) - Collections of components (Points3D, Image) Each archetype specifies: -- Required components (must be provided) -- Recommended components (have good defaults) -- Optional components (purely optional) +- Required components (must provide) +- Recommended components (good defaults) +- Optional components -Example: `Points3D` archetype requires `positions`, recommends `colors` and `radii`, allows optional `labels`. +Example: `Points3D` requires `positions`, recommends `colors` and `radii`, optional `labels`. ### Data flow @@ -115,11 +123,11 @@ Viewer (immediate mode rendering) ### Blueprint system -The blueprint is the viewer's configuration layer: -- Stored as a separate store (`re_entity_db`) with "blueprint" timeline +Viewer's configuration layer: +- Stored as separate store (`re_entity_db`) with "blueprint" timeline - Defines: view layout, visibility, per-entity overrides, view properties -- Uses the same type system as logged data -- Basic blueprint path hierarchy: `/viewport/`, `/view/{uuid}/`, `/container/{uuid}/` +- Uses same type system as logged data +- Path hierarchy: `/viewport/`, `/view/{uuid}/`, `/container/{uuid}/` ### Visualizers @@ -128,11 +136,15 @@ Each view type (Spatial3D, TimeSeries, etc.) has registered visualizers: - Execute per-frame: query data → process → generate render commands - Examples: Points3DVisualizer, LineStripsVisualizer, MeshVisualizer -The viewer uses **immediate mode**: every frame, query the store and re-render from scratch. +Viewer uses **immediate mode**: every frame, query store + re-render from scratch. + +## Documentation snippets + +See [`docs/snippets/README.md`](docs/snippets/README.md) for running, building, finding snippets. Config in [`docs/snippets/snippets.toml`](docs/snippets/snippets.toml). ## Python development workflow -Python uses a separate uv-managed .venv (not pixi's conda env): +Python uses separate uv-managed .venv (not pixi's conda env): ```bash pixi run py-build # Build rerun-sdk into .venv @@ -140,21 +152,45 @@ pixi run uvpy script.py # Run Python scripts via uv pixi run uv run script.py # Explicit uv run ``` -The `uv` wrapper script unsets `CONDA_PREFIX` to ensure isolation from pixi's environment. +`uv` wrapper unsets `CONDA_PREFIX` for isolation from pixi's env. ## Important notes -- **PyO3 Configuration**: If you see PyO3 config errors, run `pixi run ensure-pyo3-build-cfg` -- **git-lfs**: Required for test snapshots. Install with your package manager and run `git lfs install` -- **Immediate Mode**: The entire viewer is rendered from scratch each frame (no state management callbacks) -- **Arrow Native**: Data is stored, transmitted, and queried as Apache Arrow arrays -- **Multi-language**: Changes to .fbs files affect Rust, Python, and C++ simultaneously +- **PyO3 Configuration**: PyO3 config errors → run `pixi run ensure-pyo3-build-cfg` +- **git-lfs**: Required for test snapshots. Install + run `git lfs install` +- **Immediate Mode**: Entire viewer rendered from scratch each frame (no state management callbacks) +- **Arrow Native**: Data stored, transmitted, queried as Apache Arrow arrays +- **Multi-language**: .fbs changes affect Rust, Python, C++ simultaneously + +## Python docstring formatting + +Python API docs use **MkDocs + mkdocstrings** (NOT Sphinx). Never use reStructuredText (rST) in Python docstrings. Use markdown: + +- Cross-refs: `[`ClassName`][]` not `:class:`ClassName`` / `:func:` / `:meth:` +- Warnings: `!!! warning` (MkDocs admonition with indented body) not `.. warning::` +- Deprecation: use `@deprecated` decorator (mkdocstrings renders it), don't duplicate in docstring +- Code blocks: markdown fenced blocks, not `.. code-block::` +- Params: numpy-style (`Parameters`, `Returns` with `----------`) + +## Documentation system + +See [`docs/README.md`](docs/README.md) for full docs architecture. + +Docs span multiple sites: main docs at `rerun.io/docs` (from `docs/content/`), API refs for Python (MkDocs), C++ (Doxygen), JS (TypeDoc) at `ref.rerun.io/docs/{python,cpp,js}/`. + +Key points: +- **`docs/content/reference/types/`** auto-generated by `pixi run codegen` from `.fbs` files - don't edit +- **`docs/content/reference/cli.md`** auto-generated by `pixi run man` - don't edit +- **Code snippets** in `docs/snippets/all/` with Python, Rust, C++ implementations +- `pixi run py-docs-serve` previews Python API docs locally +- `pixi run -e cpp cpp-docs` builds C++ docs ## Development references -- [`ARCHITECTURE.md`](ARCHITECTURE.md) - Detailed architecture documentation +- [`ARCHITECTURE.md`](ARCHITECTURE.md) - Detailed architecture docs - [`BUILD.md`](BUILD.md) - Full build instructions - [`CODE_STYLE.md`](CODE_STYLE.md) - Code style guidelines - [`CONTRIBUTING.md`](CONTRIBUTING.md) - Contribution guidelines -- [`DESIGN.md`](DESIGN.md) - Guidelines for UI design, covering GUI, CLI, documentation, log messages, etc -- [`rerun_py/README.md`](rerun_py/README.md) - Python SDK specific instructions +- [`DESIGN.md`](DESIGN.md) - UI design guidelines (GUI, CLI, docs, log messages) +- [`docs/README.md`](docs/README.md) - Documentation system (sites, builds, deployment) +- [`rerun_py/README.md`](rerun_py/README.md) - Python SDK instructions diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index bc40ce2fdf15..326cba7e2779 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -99,11 +99,11 @@ Of course, this will only take us so far. In the future we plan on caching queri Here is an overview of the crates included in the project: - - - - - + + + + + @@ -142,6 +142,7 @@ Update instructions: | re_component_ui | Provides UI editors for Rerun component data for registration with the Rerun Viewer component UI registry. | | re_dataframe_ui | Rich table widget over DataFusion. | | re_memory_view | Flamegraph visualization for memory usage trees | +| re_plot | Plot ui components shared between time series and bar chart view. | | re_recording_panel | The UI for the recording panel. | | re_redap_browser | The UI and communication to implement the in-viewer redap server browser. | | re_selection_panel | The UI for the selection panel. | @@ -152,6 +153,7 @@ Update instructions: | re_view_graph | A View that shows a graph (node-link diagram). | | re_view_map | A View that shows geospatial data on a map. | | re_view_spatial | Views that show entities in a 2D or 3D spatial relationship. | +| re_view_status | A view that shows status transitions as horizontal lanes over time. | | re_view_tensor | A View dedicated to visualizing tensors with arbitrary dimensionality. | | re_view_text_document | A simple View that shows a single text box. | | re_view_text_log | A View that shows text entries in a table and scrolls with the active time. | @@ -201,7 +203,8 @@ Update instructions: | -------------------- | ----------------------------------------------------------------- | | re_redap_client | Official client for the Rerun Data Protocol | | re_redap_tests | Official test suite for the Rerun Data Protocol | -| re_data_loader | Handles loading of Rerun data from file using data loader plugins | +| re_importer | Handles importing of Rerun data from file using importer plugins | +| re_parquet | Parquet file loading with configurable column grouping | | re_data_source | Handles loading of Rerun data from different sources | | re_grpc_client | Client for the legacy StoreHub API | | re_grpc_server | Server for the legacy StoreHub API | @@ -222,7 +225,6 @@ Update instructions: | Crate | Description | | -------------------- | ------------------------------------------------------------------------------------ | | re_analytics | Rerun's analytics SDK | -| re_arrow_combinators | Type-safe, composable transformations for Arrow arrays | | re_arrow_util | Helpers for working with arrow | | re_auth | Authentication and authorization helpers | | re_backoff | Simple backoff logic used for retries. | @@ -232,8 +234,8 @@ Update instructions: | re_crash_handler | Detect panics and signals, logging them and optionally sending them to analytics. | | re_error | Helpers for handling errors. | | re_format | Miscellaneous tools to format and parse numbers, durations, etc. | -| re_int_histogram | A histogram with `i64` keys and `u32` counts, supporting both sparse and dense uses. | -| re_lenses | Lenses are an API for extracting, transforming, and restructuring component data. | +| re_lenses | A collection of lenses for mapping component data to Rerun semantic types. | +| re_lenses_core | Core lens types and composable Arrow array transformations | | re_log | Helpers for setting up and doing text logging in the Rerun crates. | | re_log_channel | An in-memory channel of Rerun data messages | | re_mcap | Convert MCAP into Rerun-compatible data. | diff --git a/BUILD.md b/BUILD.md index 7e5587975f12..f50b63a6db8c 100644 --- a/BUILD.md +++ b/BUILD.md @@ -199,7 +199,7 @@ This can be useful for developer tools, e.g. for [setting up IntelliSense in VSC ## Building the docs -High-level documentation for Rerun can be found at [http://rerun.io/docs](http://rerun.io/docs). It is built from the separate repository [rerun-docs](https://github.com/rerun-io/rerun-docs). +High-level documentation for Rerun can be found at [https://rerun.io/docs](https://rerun.io/docs). It is built from the separate repository [rerun-docs](https://github.com/rerun-io/rerun-docs). - 🌊 [C++ API docs](https://ref.rerun.io/docs/cpp) are built with `doxygen` and hosted on GitHub. Use `pixi run -e cpp cpp-docs` to build them locally. For details on the C++ doc-system, see [Writing Docs](rerun_cpp/docs/writing_docs.md). - 🐍 [Python API docs](https://ref.rerun.io/docs/python) are built via `mkdocs` and hosted on GitHub. For details on the Python doc-system, see [Writing Docs](rerun_py/docs/writing_docs.md). diff --git a/CHANGELOG.md b/CHANGELOG.md index 14f339e30acf..f43cb636a240 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,379 @@ # Rerun changelog +## Unreleased + +### ⚠️ Breaking changes +- `rerun rrd compact` renamed to `rerun rrd optimize` + +## [0.31.4](https://github.com/rerun-io/rerun/compare/0.31.3...0.31.4) - 2026-04-29 + +### 🔎 Details + +#### 🪳 Bug fixes +- Prompt login on whoami failure [a312a99](https://github.com/rerun-io/rerun/commit/a312a99) +- Ensure proper `on_new_store` book-keeping for all messages [029e245](https://github.com/rerun-io/rerun/commit/029e245) +- Fix visualizations not showing up when initial data was empty [c867040a](https://github.com/rerun-io/rerun/commit/c867040a) +- Fix `follow` not being propagated to `http` URLs with extensions [09d5f94c](https://github.com/rerun-io/rerun/commit/09d5f94c) + +#### 🕸️ Web +- Web view add open options [#12731](https://github.com/rerun-io/rerun/pull/12731) + +## [0.31.3](https://github.com/rerun-io/rerun/compare/0.31.2...0.31.3) - 2026-04-13 + +### 🔎 Details + +#### 🪳 Bug fixes +- Fix plot view time range ui [3922bea](https://github.com/rerun-io/rerun/commit/3922bea85919bc3ad1570a74a21ae8580e36ea66) + +#### 📚 Docs +- Clearer behavior for `CoordinateFrame("")` [4b5c2f0](https://github.com/rerun-io/rerun/commit/4b5c2f084fbe015ba30a33534fbfa2e48cad8eab) + +#### 🖼 UI improvements +- Spacebar toggles play/pause, never enables following [#12722](https://github.com/rerun-io/rerun/pull/12722) (thanks [@wolfd](https://github.com/wolfd)!) + +#### 📦 Dependencies +- Update datafusion to 52.5.0 [87af7a9](https://github.com/rerun-io/rerun/commit/87af7a93bb031330679e7ecdca956b65a668ffbb) + + +## [0.31.2](https://github.com/rerun-io/rerun/compare/0.31.1...0.31.2) - 2026-04-08 + +### 🔎 Details + +#### 🪳 Bug fixes +- Fix MCAP CLI decoder identifier list [24f2604](https://github.com/rerun-io/rerun/commit/24f26047da9433924d7a00d7039be18cbe19c9ca) +- Fix bug where shapes defined with UI units were scaled incorrectly [9a28f8e](https://github.com/rerun-io/rerun/commit/9a28f8e089d30680bc14ab59653ca8d7b2e5d308) +- Fix off-by-one bug in video stream cache [e4eddc9](https://github.com/rerun-io/rerun/commit/e4eddc9a1ed798c60ae696e7e605c5a80279db30) +- Fix range queries on 3D points in `SpatialView3D` [213957c](https://github.com/rerun-io/rerun/commit/213957c5cd2dbbb7acfffd894310d7ec58ef7f5e) + +#### 📚 Docs +- Fix typo in micro-batching documentation [#12716](https://github.com/rerun-io/rerun/pull/12716) (thanks [@wolfd](https://github.com/wolfd)!) + +#### 🕸️ Web +- Add progress bar to rerun-js and handle incomplete wasm downloads [476a178](https://github.com/rerun-io/rerun/commit/476a1780a52cf99b600d51a184e16028a05f6cce) +- Add rerun-js login setting and default to hiding the login button [3203577](https://github.com/rerun-io/rerun/commit/320357798161a7663dea5777bec8d73431585239) + +#### 🤷‍ Other +- Stop creating the search index into `docs.rs` [6afa84b](https://github.com/rerun-io/rerun/commit/6afa84bfaf501cacb46cbdf5a63ea626473cd1a1) + +## [0.31.1](https://github.com/rerun-io/rerun/compare/0.31.0...0.31.1) - 2026-03-31 + +### 🔎 Details + +#### 🪳 Bug fixes +- Fix our python 3.10 support [2ee1f4f](https://github.com/rerun-io/rerun/commit/2ee1f4f8f8d3651e6eefcbb632e14ed4662531f8) -## 0.30.0 - (UNRELEASED) - plot any scalar & on-demand streaming + +## [0.31.0](https://github.com/rerun-io/rerun/compare/0.30.2...0.31.0) - 2026-03-30 - component mappings & prettier primitives + +🧳 Migration guide: https://rerun.io/docs/reference/migration/migration-0-31 + +### ✨ Overview & highlights + +#### Component mappings + + +https://github.com/user-attachments/assets/18954263-1b34-4819-869d-02fa8117d6a3 + +You can now map components more generally. Want to display your mesh as a point cloud? Just add a point cloud visualizer and select the vertex positions as the source. + +#### Prettier primitives + + +https://github.com/user-attachments/assets/4e523454-4b3c-492b-a2a4-463f0f17ec51 + +Our 3D primitives got a new default look! + +#### Performance improvements + +- Optimization for point clouds, videos shared among views, data ingestion, and recordings with many entities. + + +### ⚠️ Breaking changes + +- MCAP "layers" renamed to "decoders" [cf0a800](https://github.com/rerun-io/rerun/commit/cf0a800770845d4a88c0d5671b246c45c1f3507d) +- `rr.Server` and `rr.CatalogClient`: `address` parameter/method renamed to `url`; `rerun server --address` is now `--host` [#12402](https://github.com/rerun-io/rerun/pull/12402) +- `Entry.update(name=…)` is deprecated in favor of `Entry.set_name(…)` [#12370](https://github.com/rerun-io/rerun/pull/12370) + +🧳 Migration guide: https://rerun.io/docs/reference/migration/migration-0-31 + +### 🔎 Details + +#### 🪵 Log API +- Redesign Lens `Op` API to be `Selector`-based [d962bfe](https://github.com/rerun-io/rerun/commit/d962bfe1b77ebd20ed4c619622505fdda98a43db) +- Add cull mode for front/back face culling on `Mesh3D` [be5a50f](https://github.com/rerun-io/rerun/commit/be5a50ff0e7c2bd92db47cf38d16d4c3ff960b21) +- Add `frame_prefix` to `UrdfTree` for multi-robot URDF setups [8e27391](https://github.com/rerun-io/rerun/commit/8e273910d1fe92c536d33e9876e85f866b6166e2) + +#### 🐍 Python API +- Improve `DynamicArchetype` docs with example on how to use builtin batch types [2052af5](https://github.com/rerun-io/rerun/commit/2052af5526f429dfb882452bac69046f09a7104a) +- Narrow typing for dynamic archetype [7b0bfaf](https://github.com/rerun-io/rerun/commit/7b0bfafcff27884b59546eada88872dad51b411e) +- Fix wrong variable in error message for extra args in log() [#12674](https://github.com/rerun-io/rerun/pull/12674) (thanks [@jashshah999](https://github.com/jashshah999)!) +- Allow re-registering the same blueprint to a dataset [fd03bf0](https://github.com/rerun-io/rerun/commit/fd03bf01e118c4c1245e4d08c2e41563cf707d29) +- Fix `using_index_value` not accepting pyarrow data of the correct types [62b8ac3](https://github.com/rerun-io/rerun/commit/62b8ac3fdec5abad48e9dba3ecbbbc6219acfbf7) +- Fix passing color columns with one element in the python sdk [e312d36](https://github.com/rerun-io/rerun/commit/e312d36e37a2e2c1c0e5426e9a77876d15c3f834) +- Entity filter: do not include all properties if a single property is included [5d3a25f](https://github.com/rerun-io/rerun/commit/5d3a25f3c3c24cb242ef1a241dcd925d6443de9c) +- Support binary stream [0a05422](https://github.com/rerun-io/rerun/commit/0a05422ed4b1caa6a43593555dd2b30f0bbfc6dd) +- Reuse precomputed timelines dict in send_columns instead of re-iterating indexes [#12673](https://github.com/rerun-io/rerun/pull/12673) (thanks [@jashshah999](https://github.com/jashshah999)!) +- Add description back to python package [6ab7971](https://github.com/rerun-io/rerun/commit/6ab7971d88444509e84a2ede36d3b3dff142fa92) +- Add `save()` to `Recording` [4ab863a](https://github.com/rerun-io/rerun/commit/4ab863a68a0225f443a91d8c79b49b2cd1583480) +- Add `compress()` and `as_pil_image()` to `DepthImage` with PNG compression [bde6870](https://github.com/rerun-io/rerun/commit/bde68700b3e0e58061b8a6332f23c64a703a1e0b) +- Introduce `Chunk` API to the Python SDK [32eb891](https://github.com/rerun-io/rerun/commit/32eb8912030e573450c1f4f0c76e08a7c105e141) +- Adds `rr.logout` to Python SDK [97af60e](https://github.com/rerun-io/rerun/commit/97af60e47d168fc57d1de866753df9e96020f326) +- Disable cloud vector & full-text-search for now [3da7180](https://github.com/rerun-io/rerun/commit/3da7180f138adaca596ab551c00c961ba2f5fff4) +- Add ContentFilter for cleaner filter_contents [8ce6d13](https://github.com/rerun-io/rerun/commit/8ce6d134ea789acc4073d9ed3913ee9a18a03e8e) +- Add cloud provider details to end point [c8dd0c8](https://github.com/rerun-io/rerun/commit/c8dd0c8978dc9ef6d898b27a79630f3f36d9c17e) +- Add component descriptor helper to Python archetype classes [8a2ef31](https://github.com/rerun-io/rerun/commit/8a2ef31bbd0979716c95d38a748e584a698db62f) + +#### 🦀 Rust API +- Rust `BlueprintActivation` default now matches python behavior [6d06892](https://github.com/rerun-io/rerun/commit/6d06892d43c1aac31ec3756afbd1bdeae4da2c19) +- Update MSRV to 1.92 [52b51ea](https://github.com/rerun-io/rerun/commit/52b51ea68e30ecef4f259b0a2f65edaa3159f23d) + +#### 🪳 Bug fixes +- Fixes recording staying open when user logs out [80ab340](https://github.com/rerun-io/rerun/commit/80ab340688ae045b7201c6b89b4179ac154e5ddf) +- Selection panel: show all values at the latest time stamp [474dbe8](https://github.com/rerun-io/rerun/commit/474dbe8a5694fd4ff031b78a15c89628ed374d9f) +- Gracefully handle Rrd Manifest failures [900edfd](https://github.com/rerun-io/rerun/commit/900edfdfc81dc182b110296b8e06f41001d4ac1e) +- Fix docs urls being loaded as data sources [80e3ce6](https://github.com/rerun-io/rerun/commit/80e3ce680cfafb80d60e863d84b9c0de94e5f52f) +- Fix: fix clicking names of color maps [92529ca](https://github.com/rerun-io/rerun/commit/92529ca9ba227e5e629176da1ecd8dd29423866f) +- Fix rare ui id conflict in list item content [0966696](https://github.com/rerun-io/rerun/commit/09666960eafc6a8b6c215643d0b28f503dbc1c05) +- Fix drag'n'drop issue on web [2d97010](https://github.com/rerun-io/rerun/commit/2d9701073e5f6f7945c0b4107a2664bb851ac44a) +- Handle `?url=rerun+http://…` in web viewer [0a47b41](https://github.com/rerun-io/rerun/commit/0a47b41dba92e4462f5ecfbd710213522a8e51a1) +- Fix weird tooltip sizes in streams view [0d98570](https://github.com/rerun-io/rerun/commit/0d985702eb03c7518ced87139184239270a88b00) +- Fix showing empty label boxes for various 2d & 3d visualizations [9c547ce](https://github.com/rerun-io/rerun/commit/9c547ce5d1f70815f77c0bab084cfa69dcfeb956) +- Fix bounding box calculation for GLB/GLTF & DAE meshes [8382f14](https://github.com/rerun-io/rerun/commit/8382f1492d14b1e47aa8e42b3c609952d9d15bc1) +- Fix eye camera being affected by previous recording of the same application/dataset [1b9a60f](https://github.com/rerun-io/rerun/commit/1b9a60fc99799cd91665af102c26ff344bd227c6) +- Ignore NaN/Inf values for bounding box calculations used in 3d eye camera operations [3e26be7](https://github.com/rerun-io/rerun/commit/3e26be7f35ec6bab974367ecfda02cc8b505e80d) +- Ensure that share modals are always on top [8d8eb40](https://github.com/rerun-io/rerun/commit/8d8eb407f37312a075bb564ce2d90e998f6c75ae) +- Add zoom in limit to camera, because zooming in too far broke the view [4534001](https://github.com/rerun-io/rerun/commit/45340013b495c48d1f242c4c5ef23bfb062795e2) +- Bump `lz4_flex` to prevent web viewer crashes [9355dd8](https://github.com/rerun-io/rerun/commit/9355dd8dc1c40926b9ae55216952b62df6490284) +- Improve handling of texture creation errors [ea4283e](https://github.com/rerun-io/rerun/commit/ea4283e4bf85b670e170513ed1c4f843bfbb514a) +- Fix prematurely resetting to Welcome screen even if stream finished properly [46d86eb](https://github.com/rerun-io/rerun/commit/46d86ebd4b17a1c544b8ab9a8572f48d0bd8dfd2) +- Fix flipping chunks between loaded/unloaded when hovering time panel [0e51fa6](https://github.com/rerun-io/rerun/commit/0e51fa6ea3fd2e4adb8c40bb4d1ac98adef44de6) +- Fix unset/reset not being greyed out for previously cleared out values [45f6ed2](https://github.com/rerun-io/rerun/commit/45f6ed2cb9430b2354002e164fd14066818c7861) +- Fix default blueprint not being activated for Redap segments [e66cb92](https://github.com/rerun-io/rerun/commit/e66cb928f8b1d12826b201f90c0db7b07f772639) +- Don't close active recording when opening settings [ea86ae9](https://github.com/rerun-io/rerun/commit/ea86ae9baad9a3d480b228bf0930e1bf0258be90) +- Fix some AV1 videos not playing in the WebViewer [c062098](https://github.com/rerun-io/rerun/commit/c062098388e9738463be7936b37d5e5b0e1454d5) +- Fix `sensor_msgs::PointCloud2` MCAP parser for empty point clouds [#12684](https://github.com/rerun-io/rerun/pull/12684) (thanks [@Woodii1998](https://github.com/Woodii1998)!) +- Stop time control following if set time is called [acba7b3](https://github.com/rerun-io/rerun/commit/acba7b3bd76e2ae9f589987e7b293138a2844559) +- Support LeRobot feature names on the `ListArray` path [30a86e1](https://github.com/rerun-io/rerun/commit/30a86e1daa8f3f9b953fd5b40585c5ab4296ea94) + +#### 🌁 Viewer improvements +- Support custom bool types in plots [888a9e8](https://github.com/rerun-io/rerun/commit/888a9e8f75fb63a8b51c1ff6f87b9957f3fe0b93) +- Add util to compute column of `Transform3D` updates for URDF joints [#12666](https://github.com/rerun-io/rerun/pull/12666) +- Unified color swatches [c020c80](https://github.com/rerun-io/rerun/commit/c020c807258773ce5e770a2fc26276cff53b1bf3) +- Make more visualizer errors point to specific components that are in an invalid state [19b2c79](https://github.com/rerun-io/rerun/commit/19b2c79aad92d96c41e71288c9cd0c1e169b16cc) +- Add context menu to visualizer pills on time series view [136a2d1](https://github.com/rerun-io/rerun/commit/136a2d1c8fadc3ec5fde72888c616d4022ad4ead) +- Make many more required components ui editable [2f791ad](https://github.com/rerun-io/rerun/commit/2f791adb8b5f370703178c6718d44317d7efd3a3) +- Limit number of plots only for non-builtin components and increase the limit [0d66ceb](https://github.com/rerun-io/rerun/commit/0d66ceb08f20df4d007934f234568c2002d82517) +- Show _all_ visualizable scalars on time series add-visualizer menu [90bf07b](https://github.com/rerun-io/rerun/commit/90bf07bfd76193e91716cbeb1084eb02fb994e72) +- Stop warning on synthetic `oneof` protobuf fields [975d3ba](https://github.com/rerun-io/rerun/commit/975d3baee9b878decb5ff54fc2c2e30b6222f99c) +- Allow saving image previews in the selection panel [6e09d66](https://github.com/rerun-io/rerun/commit/6e09d6655e812839c4d96948a3d4b6417b65b73b) +- Support plotting components that contain `FixedSizeListArray` [53d2864](https://github.com/rerun-io/rerun/commit/53d286417bf228afe9987e8855b20caa0c846c67) +- Add `!` operator to `Selector` to assert non-`null` values [a53c683](https://github.com/rerun-io/rerun/commit/a53c683bb70594bb0fa83ebbd68b29d882817a33) +- Allow all visualizers to be created for any datatype match [5d5c2f2](https://github.com/rerun-io/rerun/commit/5d5c2f2f52f2f275c9528846fd5863986576986b) +- Fix overrides in blueprints created by a native viewer no showing up on the web viewer and vice versa [74a30f2](https://github.com/rerun-io/rerun/commit/74a30f2f08cd9b21205886f3325a45da04d9e103) +- Displays WorkOS organization when logged in [784c7b5](https://github.com/rerun-io/rerun/commit/784c7b5df379db71e4f435262f76e0cc579d2582) +- Use full recording schema in manifest ingestion [6589388](https://github.com/rerun-io/rerun/commit/6589388d770ba08a2078e48114fdaf878e8b030a) +- Change default fill mode for primitives [9d54afd](https://github.com/rerun-io/rerun/commit/9d54afddd516aa6ca2e6768f4929bec31d049d67) +- Fixes logout + login into a different account [15d18e7](https://github.com/rerun-io/rerun/commit/15d18e7bd230a9ac7029ca278d940c20f2b0dd51) +- Fix spamming errors when trying to show static scalars in the time series view [f9ca495](https://github.com/rerun-io/rerun/commit/f9ca49536b1d852e9bba7037f0a358c50c83ee70) +- Ensure that URDF primitives are colored correctly [25ee3cb](https://github.com/rerun-io/rerun/commit/25ee3cb27e5a5db80d9551614ab7a76634bfbcce) +- Add 5% vertical margin to time series plot bounds [5beb61e](https://github.com/rerun-io/rerun/commit/5beb61e5313ea41bb8ed6ced62aa0eeb6e896aec) +- Ignore outliers for focusing on 2d & 3d point clouds [b367ae9](https://github.com/rerun-io/rerun/commit/b367ae90de2c569b3259748df2e3c5bd4a63be85) +- Limit amount of heuristic lineseries from arbitrary sources [2f6d88f](https://github.com/rerun-io/rerun/commit/2f6d88f6f223ff26b4fe53eb68e61c7b99093dd6) +- Adds cancel button to login flow [900887f](https://github.com/rerun-io/rerun/commit/900887fdcb866ffcd50ded1af41e517b0edd5fc8) +- Add `magnification_filter` component to all image archetypes & add bicubic filtering [2c1ccee](https://github.com/rerun-io/rerun/commit/2c1cceebcdfcedfcdabf819f636c14344d0f54c0) +- Truncate strings in syntax highlighted arrow ui to 100 chars [d06fc95](https://github.com/rerun-io/rerun/commit/d06fc956bd9d005eda3a8f0cfc3d13ea3f3cb03a) +- Show login information on token mismatches in viewer [710ccd9](https://github.com/rerun-io/rerun/commit/710ccd9bebba0a40f42f34afabceec926a53cc56) +- Allow cpu->gpu transfer buffer to shrink again [4977603](https://github.com/rerun-io/rerun/commit/4977603f043889429a9ee58e57e34ff8421e4696) +- Fixes login in Safari [750f65f](https://github.com/rerun-io/rerun/commit/750f65f3fb97fe5c66f3de79134323f65bc87e5c) +- Add support for subtasks in LeRobot loader [464ff56](https://github.com/rerun-io/rerun/commit/464ff567d995bdf34945e650026dbf6ccde52a25) + +#### 🗄️ OSS server +- Allow OSS server to register existing segments to new datasets [d0b8f78](https://github.com/rerun-io/rerun/commit/d0b8f780d01447a158b22c16a438acd509e98288) + +#### 🚀 Performance improvements +- Remove collapsing time gaps for performance reasons [eb0b67a](https://github.com/rerun-io/rerun/commit/eb0b67af333256c9eafebe86f42e56923c077955) +- Share video players between views, de-duplicating video decoding work between pinholes and 2d views [64f1003](https://github.com/rerun-io/rerun/commit/64f100332a1c971cf0d9f339dcfa796535cbfdae) +- Support progressive ingest of the rrd manifest [9a67723](https://github.com/rerun-io/rerun/commit/9a67723fe6ee4bbd2a73cb3d2402501008c7f7d2) +- Performance improvements for many views + many entities [8f297fe](https://github.com/rerun-io/rerun/commit/8f297fec7324feca12be5c3b8eae05cf6c74b84e) +- Speed up many-entities (Refactor view class store subscriber) [e24407f](https://github.com/rerun-io/rerun/commit/e24407f936bd0ade4313e832a0939484a958fc0d) +- Much faster 3D point clouds [cb62f8c](https://github.com/rerun-io/rerun/commit/cb62f8c1b421bc79f28d429bfe83018c8703e908) +- Add `ChunkStoreDiff::SchemaAddition` and use it for heuristics [822ac41](https://github.com/rerun-io/rerun/commit/822ac413a502a2e34e9af0462e242660eb3b0d33) + +#### 🧑‍🏫 Examples +- Add any scalar example [ffd1687](https://github.com/rerun-io/rerun/commit/ffd168732b991ffb39ee67049a5f62681bff7e87) + +#### 📚 Docs +- Add generalized example (snippet + doc page) for component mappings [41cb42f](https://github.com/rerun-io/rerun/commit/41cb42f2add056d206bc931a0ae8fa340bf43058) +- Corrected the docs example for the `DynamicArchetype` [64d466d](https://github.com/rerun-io/rerun/commit/64d466d8bd2694939fff2341936d3e70191aea1c) +- Adding snippet showing how to register a dataset as a subset of an existing dataset [09814c0](https://github.com/rerun-io/rerun/commit/09814c0a83423f9efd3913c9314171b8fc8e309c) + +#### 🖼 UI improvements +- Visibility control from scalar visualizer list [5f03f15](https://github.com/rerun-io/rerun/commit/5f03f15a03ed5d9ce2627d0f9ccee6f08ef55a05) +- Make arrow values expandable if they don't fit [b30522c](https://github.com/rerun-io/rerun/commit/b30522c7e3f3f4e4e904f80a1bdeb7fe04eb6c30) +- Add scrolling to column popup [a9a00ad](https://github.com/rerun-io/rerun/commit/a9a00ad58e15028aed1ca4adfc429a478f82ac11) +- Allow to switch between recordings in chunk store browser [d449525](https://github.com/rerun-io/rerun/commit/d44952577dd6ff44392021d3ca4ea42702043d59) +- Don't flicker videos to black when there's unloaded samples [dc8c31f](https://github.com/rerun-io/rerun/commit/dc8c31ffa05034ec31fa704d9f7fbecaeea8e053) +- Show tooltip even when hovering play head in the time series view [d8bd248](https://github.com/rerun-io/rerun/commit/d8bd248376bd7c1440071b5210c2f8a63f566877) +- Show number of columns and rows in the recording ui [29a88f9](https://github.com/rerun-io/rerun/commit/29a88f9cf4b9fa68806ca2c30151e488bd4f9c03) +- Collapse multi-line labels if there are too many on screen [2916462](https://github.com/rerun-io/rerun/commit/2916462609f769bd8fe6b1cd33d4f3d7e46e73f2) +- Fix slow resizing of table columns [299969c](https://github.com/rerun-io/rerun/commit/299969cddc89bcbecde2ea184ab0f47e6fb849ae) +- Default to only showing active timeline in dataframe and log views [4df6c62](https://github.com/rerun-io/rerun/commit/4df6c62c4cd5c2f3296df64778df3909fefc39a6) +- Refactor chunk browser UI [5b6017b](https://github.com/rerun-io/rerun/commit/5b6017b596fb5de1bebc74b1f062a2a6cc51f6d1) +- Add cmd-K as secondary shortcut for command palette [6d5d512](https://github.com/rerun-io/rerun/commit/6d5d5120bfb421407391dfe54dc996b6361938bb) +- Add fade to scroll areas to indicate that scrolling is possible [c0b8685](https://github.com/rerun-io/rerun/commit/c0b868559c25054185609583ee3aa1eced5f6247) +- Better time range deselect UX [c947940](https://github.com/rerun-io/rerun/commit/c947940acf79fa0b0df60de6bd49bd19fad96034) +- Better legend ui on plot views (time series & bar chart) [bc7393b](https://github.com/rerun-io/rerun/commit/bc7393b88bbb52ddc14290ce8f261aa49d7774c0) + +#### 🕸️ Web +- Enable WebGPU rendering on Safari (MacOS Tahoe+ only) [76bd562](https://github.com/rerun-io/rerun/commit/76bd562982cc15572c743cc818dfe07836dd2b9a) +- Update wasm-bindgen to 0.2.108 [7314d4d](https://github.com/rerun-io/rerun/commit/7314d4d82f679928bea5a4b1c9133a12fc18e6a8) +- Fix(web-viewer): clean up loader when startup is interrupted [#12696](https://github.com/rerun-io/rerun/pull/12696) (thanks [@Woodii1998](https://github.com/Woodii1998)!) + +#### 🧢 MCAP +- Protobuf schema evolution and optional field support in Lenses [75a6dbc](https://github.com/rerun-io/rerun/commit/75a6dbcaca457dc557fc1821bbb2deed65043a56) +- Improve Lenses errors and debug output [47596e0](https://github.com/rerun-io/rerun/commit/47596e0436de132662ce54753f975020a25e9ced) +- Support also "sec" & "nsec" in `TimeSpecToNanos` [f7eb4cd](https://github.com/rerun-io/rerun/commit/f7eb4cd091970b7350977b8f127afa92b160ddd3) +- Add a list of available layers to mcap convert cli [bc46df7](https://github.com/rerun-io/rerun/commit/bc46df75e5e81915a2a1e4f62a5c5ce7dd03431a) +- Add `--timestamp-offset-ns` option to MCAP CLI [787e6cc](https://github.com/rerun-io/rerun/commit/787e6cc087e57a831f2d4a9e906636a1f46ebf4d) +- Decode MCAP metadata records into `__properties` [0b43178](https://github.com/rerun-io/rerun/commit/0b43178d2c6198cb3fd801d104dc53f2f434fd15) +- Split `re_arrow_combinators` into `re_lenses` and `re_lenses_core` [a9f4ca3](https://github.com/rerun-io/rerun/commit/a9f4ca3776e75a7280a769fefb5cf076f3de477a) +- Rename MCAP `Layer` to `Decoder` [cf0a800](https://github.com/rerun-io/rerun/commit/cf0a800770845d4a88c0d5671b246c45c1f3507d) +- Default to UNIX epoch timestamp timeline in MCAP decoders [bb6aee5](https://github.com/rerun-io/rerun/commit/bb6aee556ffa66e67ca9427c9da67aa17bbe6889) +- Support protobuf messages with `Map` and arbitrary `oneof` fields [5ba2817](https://github.com/rerun-io/rerun/commit/5ba2817aec62a42c8504cd4774d4f483d55c227d) +- Create static timeline for /tf_static ROS 2 MCAP channels [959f77f](https://github.com/rerun-io/rerun/commit/959f77f5408414c0bbd2936ddcc7fe5d6cfcbf4b) +- Ignore empty MCAP channels [f0c99c5](https://github.com/rerun-io/rerun/commit/f0c99c5b743cf7cf341ca4c3375ebbb43cc44ade) +- Ignore 'rosbag2' metadata field [357aab8](https://github.com/rerun-io/rerun/commit/357aab8c821f6d51c33a1d49afbcef92848b3f85) +- Load URDF from `/robot_description` ROS 2 string topics in MCAP [426fcfc](https://github.com/rerun-io/rerun/commit/426fcfc2d699a7e721ed5d36e8b44eadeb737921) +- Make `Selector` evaluation `ArrayRef`-based [35dff31](https://github.com/rerun-io/rerun/commit/35dff31f0aeb15cb2067fcdd807ac2a25e9faeb5) +- Remove lenses that produce static columns [95ba113](https://github.com/rerun-io/rerun/commit/95ba113387f7416830b0057a627cf63bf5e5f8e8) + +#### 📦 Dependencies +- Upgrade `jsonwebtoken` to 10.3 [10a42b6](https://github.com/rerun-io/rerun/commit/10a42b6e8021463e2e8829b0915ff7fc2105cf57) + +#### 🤷‍ Other +- Add `--new` flag to always spawn a new viewer even if another one is already using the default port [874d3a8](https://github.com/rerun-io/rerun/commit/874d3a844b16322b9609b5b1490966657a191aa6) +- Add `rerun download` to download full recording from server [ac95098](https://github.com/rerun-io/rerun/commit/ac9509840b9024ac1538270a7b82d19da649fa11) +- Fix `rerun rrd stats` reporting identical compressed/uncompressed sizes [5ac9604](https://github.com/rerun-io/rerun/commit/5ac960404ffb3919353870a28e2e1cdf8d8d53d4) + +## [0.30.2](https://github.com/rerun-io/rerun/compare/0.30.1...0.30.2) - 2026-03-11 + +### ✨ Overview + +Among many other things, this patch addresses a security advisory ([SNYK-RUST-JSONWEBTOKEN-15189005](https://security.snyk.io/vuln/SNYK-RUST-JSONWEBTOKEN-15189005)) and adds a [new example](https://rerun.io/examples/robotics/any_scalar)! + +### 🔎 Details + +#### 🐍 Python API +- Expose executable_name and executable_path in Python spawn() [#12685](https://github.com/rerun-io/rerun/pull/12685) + +#### 🪳 Bug fixes +- Handle `?url=rerun+http://…` in web viewer [5f2d65d](https://github.com/rerun-io/rerun/commit/5f2d65da30c83e8de0d2129feaff57f9461c3806) +- Fix weird tooltip sizes in streams view [92403bb](https://github.com/rerun-io/rerun/commit/92403bbac8a9fbfed94753929a9cf0633088dd00) + +#### 🚀 Performance improvements +- Share video players between views, de-duplicating video decoding work between pinholes and 2d views [ed236da](https://github.com/rerun-io/rerun/commit/ed236dadfc72a824d6abd34afa76a39bac1c518c) + +#### 🧑‍🏫 Examples +- Add any scalar example [3f58058](https://github.com/rerun-io/rerun/commit/3f580585a08c301dc304051dc9ee6f769fd8e819) + +#### 🖼 UI improvements +- Show tooltip even when hovering play head in the time series view [e44fbf0](https://github.com/rerun-io/rerun/commit/e44fbf0336c01bf7454a8edfd8ffa32c02f5d11e) + +#### 🧢 MCAP +- Add a list of available layers to mcap convert cli [89f18dc](https://github.com/rerun-io/rerun/commit/89f18dc6fc2953cf24c5449127bffae9e4cd06d2) +- Decode MCAP metadata records into `__properties` [cc8f1c2](https://github.com/rerun-io/rerun/commit/cc8f1c23e21a863f805759decf41febd2655f756) + +#### 📦 Dependencies +- Upgrade `jsonwebtoken` to 10.3 [312c3b8](https://github.com/rerun-io/rerun/commit/312c3b88e63ebd96411442dfd6ba7c30d5026059) + +#### 🧢 MCAP +- MCAP "layers" have been renamed to "decoders". The CLI flag `-l`/`--layer` is now `-d`/`--decoder`. + +## [0.30.1](https://github.com/rerun-io/rerun/compare/0.30.0...0.30.1) - 2026-03-04 + +### ✨ Overview & highlights + +This patch on top of [0.30.0](https://github.com/rerun-io/rerun/releases/0.30.0) comes not only with a random assortment of bug fixes +but also some small improvements to the Viewer. + +Most notably it's now possible to inspect values that were logged on the same timestamp! +image + +### 🔎 Details + +#### 🐍 Python API +- Allow re-registering the same blueprint to a dataset [cafbec9](https://github.com/rerun-io/rerun/commit/cafbec9b874aedad35e48da14a709f5205571aab) +- Fix `using_index_value` not accepting pyarrow data of the correct types [c59df09](https://github.com/rerun-io/rerun/commit/c59df09001094f6b494039ec13ab6cd35f97438e) + +#### 🪳 Bug fixes +- Gracefully handle Rrd Manifest failures [f14e343](https://github.com/rerun-io/rerun/commit/f14e343fc09a481e76f8898d8fbfe987a4161b80) +- Fix docs urls being loaded as data sources [3eb98db](https://github.com/rerun-io/rerun/commit/3eb98dbbb4ade0478ba333e05f02fb7bb942e989) +- Fix fix clicking names of color maps [700b590](https://github.com/rerun-io/rerun/commit/700b590dca0f658b750197f9be5b9bc3495b9dfd) +- Fix rare ui id conflict in list item content [352f7c8](https://github.com/rerun-io/rerun/commit/352f7c8b5b659fa815d3bc436c54b6d28ca05396) +- Fix drag'n'drop issue on web [956ed91](https://github.com/rerun-io/rerun/commit/956ed9185dd952bad4fc64b9ec8bad9980fa3b45) + +#### 🌁 Viewer improvements +- Selection panel: show all values at the latest time stamp [36ff7ff](https://github.com/rerun-io/rerun/commit/36ff7ff8c6c6ab1e03696a35fb9a181885ea3073) +- Limit number of plots only for non-builtin components and increase the limit [f4fb62b](https://github.com/rerun-io/rerun/commit/f4fb62b5c87a587fc2709c583734980a5b830b36) +- Show _all_ visualizable scalars on time series add-visualizer menu [0da70a5](https://github.com/rerun-io/rerun/commit/0da70a54338e9dedc37a56a943fe4e297925718c) +- Stop warning on synthetic `oneof` protobuf fields [84ee94e](https://github.com/rerun-io/rerun/commit/84ee94eb68b30eb48cec3ccbbfca2884a3b8b2f3) + +#### 📚 Docs +- Corrected the docs example for the `DynamicArchetype` [32c37e8](https://github.com/rerun-io/rerun/commit/32c37e855b9ad2a99f38f6d63b456fc4843de179) + +#### 🖼 UI improvements +- Make arrow values expandable if they don't fit [4edd93e](https://github.com/rerun-io/rerun/commit/4edd93e30d2457d90a344cf107ca96e79a3ba083) +- Add scrolling to column popup [7dbd933](https://github.com/rerun-io/rerun/commit/7dbd9339c70c03426a4f8fe54b8499750a7bbe9a) + +#### 🧢 MCAP +- Support also "sec" & "nsec" in `TimeSpecToNanos` [31c9a43](https://github.com/rerun-io/rerun/commit/31c9a4388d8ec3dbcd2469ebdd26190ae5459a3b) + +## [0.30.1](https://github.com/rerun-io/rerun/compare/0.30.0...0.30.1) - 2026-03-04 + +### ✨ Overview & highlights + +This patch on top of [0.30.0](https://github.com/rerun-io/rerun/releases/0.30.0) comes not only with a random assortment of bug fixes +but also some small improvements to the Viewer. + +Most notably it's now possible to inspect values that were logged on the same timestamp! +image + +### 🔎 Details + +#### 🐍 Python API +- Allow re-registering the same blueprint to a dataset [cafbec9](https://github.com/rerun-io/rerun/commit/cafbec9b874aedad35e48da14a709f5205571aab) +- Fix `using_index_value` not accepting pyarrow data of the correct types [c59df09](https://github.com/rerun-io/rerun/commit/c59df09001094f6b494039ec13ab6cd35f97438e) + +#### 🪳 Bug fixes +- Gracefully handle Rrd Manifest failures [f14e343](https://github.com/rerun-io/rerun/commit/f14e343fc09a481e76f8898d8fbfe987a4161b80) +- Fix docs urls being loaded as data sources [3eb98db](https://github.com/rerun-io/rerun/commit/3eb98dbbb4ade0478ba333e05f02fb7bb942e989) +- Fix fix clicking names of color maps [700b590](https://github.com/rerun-io/rerun/commit/700b590dca0f658b750197f9be5b9bc3495b9dfd) +- Fix rare ui id conflict in list item content [352f7c8](https://github.com/rerun-io/rerun/commit/352f7c8b5b659fa815d3bc436c54b6d28ca05396) +- Fix drag'n'drop issue on web [956ed91](https://github.com/rerun-io/rerun/commit/956ed9185dd952bad4fc64b9ec8bad9980fa3b45) + +#### 🌁 Viewer improvements +- Selection panel: show all values at the latest time stamp [36ff7ff](https://github.com/rerun-io/rerun/commit/36ff7ff8c6c6ab1e03696a35fb9a181885ea3073) +- Limit number of plots only for non-builtin components and increase the limit [f4fb62b](https://github.com/rerun-io/rerun/commit/f4fb62b5c87a587fc2709c583734980a5b830b36) +- Show _all_ visualizable scalars on time series add-visualizer menu [0da70a5](https://github.com/rerun-io/rerun/commit/0da70a54338e9dedc37a56a943fe4e297925718c) +- Stop warning on synthetic `oneof` protobuf fields [84ee94e](https://github.com/rerun-io/rerun/commit/84ee94eb68b30eb48cec3ccbbfca2884a3b8b2f3) + +#### 📚 Docs +- Corrected the docs example for the `DynamicArchetype` [32c37e8](https://github.com/rerun-io/rerun/commit/32c37e855b9ad2a99f38f6d63b456fc4843de179) + +#### 🖼 UI improvements +- Make arrow values expandable if they don't fit [4edd93e](https://github.com/rerun-io/rerun/commit/4edd93e30d2457d90a344cf107ca96e79a3ba083) +- Add scrolling to column popup [7dbd933](https://github.com/rerun-io/rerun/commit/7dbd9339c70c03426a4f8fe54b8499750a7bbe9a) + +#### 🧢 MCAP +- Support also "sec" & "nsec" in `TimeSpecToNanos` [31c9a43](https://github.com/rerun-io/rerun/commit/31c9a4388d8ec3dbcd2469ebdd26190ae5459a3b) + +## [0.30.0](https://github.com/rerun-io/rerun/compare/0.29.2...0.30.0) - 2026-02-25 - plot any scalar & on-demand streaming + +🧳 Migration guide: https://rerun.io/docs/reference/migration/migration-0-30 ### ✨ Overview & highlights @@ -45,9 +417,9 @@ To quickly navigate to the desired visualizer, each time series view now shows a For more details please refer to our documentation: -- [Customize views](https://rerun.io/docs/concepts/visualization/customize-views?speculative-link) -- [Plot any scalar](https://rerun.io/docs/howto/visualization/plot-any-scalar?speculative-link) -- [Component mappings outside of plotting](https://rerun.io/docs/howto/visualizations/component-mappings?speculative-link), shown on the example of a colored point cloud +- [Customize views](https://rerun.io/docs/concepts/visualization/customize-views) +- [Plot any scalar](https://rerun.io/docs/howto/visualization/plot-any-scalar) +- [Component mappings outside of plotting](https://rerun.io/docs/howto/visualization/component-mappings), shown on the example of a colored point cloud And finally, thanks to a contribution from [@vfilter](https://github.com/vfilter), the series lines visualizer now also supports different interpolation modes to render staircase (or step) functions: @@ -131,7 +503,8 @@ For more details, see the [custom visualizer example](https://github.com/rerun-i - **CLI**: `.rrd` files are no longer tailed by default - **SDK**: `SeriesVisible` component type has been removed -🧳 Migration guide: https://rerun.io/docs/reference/migration/migration-0-30?speculative-link +🧳 Migration guide: https://rerun.io/docs/reference/migration/migration-0-30 + ### 🔎 Details @@ -163,15 +536,15 @@ For more details, see the [custom visualizer example](https://github.com/rerun-i - Error when querying an unknown index [74a27df](https://github.com/rerun-io/rerun/commit/74a27dfb1c1257d6edbabbf42f28afe8bb043d2b) - Rename `rerun-sdk[datafusion]` to `rerun-sdk[dataplatform]` and add pandas dependency [b82cd06](https://github.com/rerun-io/rerun/commit/b82cd060273a20ce2a5b99b3b591bce654182710) - Add (and document) time range and selection support to `segment_url` [b2e7eff](https://github.com/rerun-io/rerun/commit/b2e7eff71b53e75d3a6d34c1804773ccc483f648) -- Improve `DynamicArchetype` docs with example on how to use builtin batch types [ccdfe29](https://github.com/rerun-io/rerun/commit/ccdfe2924e3e4b73379a13c4e273ca9b22d6aae8) #### 🦀 Rust API - Basic Rust & Python blueprint API for component mappings [c6d7409](https://github.com/rerun-io/rerun/commit/c6d7409bcd94402e219b4d31c682fef52eb3b340) - Move URDF joint transform computation to Rust [4e10aea](https://github.com/rerun-io/rerun/commit/4e10aeac7fd4213f20337081ea4c738bdf69c1f6) - Allow for custom visualizer with custom shader that integrates into existing view + example [3bf7120](https://github.com/rerun-io/rerun/commit/3bf71204daed7b1d218f6fb145c20d87d0215e36) -- Rust `BlueprintActivation` default now matches python behavior [3f85747](https://github.com/rerun-io/rerun/commit/3f8574733c537038d6aee67c58e2b4967f1d21ea) +- Improve feature-gating in Rust SDK (removes datafusion & co from the SDK deps) [#12659](https://github.com/rerun-io/rerun/pull/12659) (thanks [@paulzhng](https://github.com/paulzhng)!) #### 🪳 Bug fixes +- Fix first-person camera having zero speed in zero-sized scenes [#12535](https://github.com/rerun-io/rerun/pull/12535) (thanks [@Shivam-Bhardwaj](https://github.com/Shivam-Bhardwaj)!) - Fix heuristic for `target_frame` in 3D views for scenes with pinholes & named frames [3c678cc](https://github.com/rerun-io/rerun/commit/3c678cccb0e0e64bbaa2f02c6d6b0ed270bd319d) - Fix `sensor_msgs::PointCloud2` MCAP parser for small pointclouds [6491b95](https://github.com/rerun-io/rerun/commit/6491b955fb3d4d31f14ef0a6ac0d41398c3d3cf4) - Bug fix: allow copying selected text [4094a91](https://github.com/rerun-io/rerun/commit/4094a918637ad60efb27677701e0ae4d86e1b1a6) @@ -184,10 +557,12 @@ For more details, see the [custom visualizer example](https://github.com/rerun-i - Recover from failing to load RrdManifest [ad214ca](https://github.com/rerun-io/rerun/commit/ad214ca5d40112fcc3e460acb21283647801b679) - Fix transparent annotation classes not leading to transparent segmentation image [55578c3](https://github.com/rerun-io/rerun/commit/55578c35e314d284dfb82b0089ff26e26b2191e4) - Make the "copy" button work for components in the time panel tree [8f788a9](https://github.com/rerun-io/rerun/commit/8f788a9d92cd37bdff6e84dcf344d743ab3bb433) +- Fix deadlock when loading LeRobot datasets [#12652](https://github.com/rerun-io/rerun/pull/12652) - Fix NV12/YUYV ROS2 images being incorrectly loaded as depth [04beb77](https://github.com/rerun-io/rerun/commit/04beb777f4a49e6f720b09207fb5873ae22dacc0) - Don't include redo-buffer when saving blueprints [85cc4f9](https://github.com/rerun-io/rerun/commit/85cc4f96e00fb5b9b4a68e6249917832c9c7abc3) - Address issue where `.dae` with multiple triangle groups is not rendered [83e96bf](https://github.com/rerun-io/rerun/commit/83e96bf585dc7e9f708af6952d506e8013959078) - Don't reset video player on keyframe boundary for AV1 [3bcd9b8](https://github.com/rerun-io/rerun/commit/3bcd9b87d02b1af80e3bfb7c4ad42d6483f0d274) +- Fix mono8/mono16 image channel classification [#12660](https://github.com/rerun-io/rerun/pull/12660) - Set AR for wasm development build on macOS [7945601](https://github.com/rerun-io/rerun/commit/7945601bb1b162fc09e79640419a0216c9d14e8e) - Fix interactions going through popups to the timepanel [1efc8c1](https://github.com/rerun-io/rerun/commit/1efc8c131d4f13c3238972c7bcf7957e2685833e) - Fix arrow key stepping on sequence timeline [1c6a71c](https://github.com/rerun-io/rerun/commit/1c6a71c77be2619a3c7abb6eb5c22a6fbbdbf2be) @@ -197,6 +572,7 @@ For more details, see the [custom visualizer example](https://github.com/rerun-i - Extract `Selectors` from (nested) `StructArray` fields [8319663](https://github.com/rerun-io/rerun/commit/8319663a38d538adb20b2785c107d3394a969dce) - Video player fetching missing chunks and less memory usage with big gops [2596f4e](https://github.com/rerun-io/rerun/commit/2596f4ed342ef5fa5363354c82699d53783f7792) - Stop offering to visualize static scalar data as time series (would previously emitting a visualization error) [cc8c82c](https://github.com/rerun-io/rerun/commit/cc8c82c53e5bf9c7aad62f279f24de3537e268de) +- Source selector for visualizer components [#12548](https://github.com/rerun-io/rerun/pull/12548) - Implement `Selector` when resolving component mappings [81879fd](https://github.com/rerun-io/rerun/commit/81879fd3f0cdbe3938865b4fa8ad2ba7b383ff24) - New (simpler) heuristic for spawning time series views [53fc1fe](https://github.com/rerun-io/rerun/commit/53fc1fe6e938b08d0a4e5dc9a4ac32a920d10d50) - Show at most 20 time series plots per entity, hide legend for more than 20 plots [b103ed5](https://github.com/rerun-io/rerun/commit/b103ed5dcc16a1384ac63bf75e5596b3b31d0d06) @@ -220,11 +596,12 @@ For more details, see the [custom visualizer example](https://github.com/rerun-i - Visualizers can be added right from a view's selection [2e0f4cf](https://github.com/rerun-io/rerun/commit/2e0f4cf7154cfcaac206e8274bd682741218e691) - Make columns reorderable by entity in the dataframe view [e1e439e](https://github.com/rerun-io/rerun/commit/e1e439e0ceb4604334d4e33f00e68a90613e99e2) - Allow loading arbitrary data loader files (mcaps, pngs, ...) via http urls [a02a22b](https://github.com/rerun-io/rerun/commit/a02a22b90a2c91b3c9b979c76b8ec83bd5b67477) +- Drop cached videos once all referencing episodes are loaded [#12653](https://github.com/rerun-io/rerun/pull/12653) - Add auto-scroll feature and time indicator to the dataframe view [514f1f3](https://github.com/rerun-io/rerun/commit/514f1f32de4e0e87ecd45ce624145f9625b185e5) +- Add `InterpolationMode` component for step function rendering [#12657](https://github.com/rerun-io/rerun/pull/12657) (thanks [@vfilter](https://github.com/vfilter)!) - Allow loading extensionless http urls via magic bytes detection [daf7a35](https://github.com/rerun-io/rerun/commit/daf7a35ebd5b25a3b53c7fda9d4b6c77dee0e705) - Fixes performance issue of too many time series plots [a74b382](https://github.com/rerun-io/rerun/commit/a74b382b9ce0b413f14d96d4a2f264e1f4b2abe8) - Support `(U)Int16` in time series plots [6bb58e4](https://github.com/rerun-io/rerun/commit/6bb58e489ba76c1744599e222180f9a358720933) -- Support custom bool types in plots [fe8d955](https://github.com/rerun-io/rerun/commit/fe8d955ab3053d5aa388455a474abf8bcb469dda) #### 🗄️ OSS server - Test handling of schema conflict and make OSS server compliant [c618910](https://github.com/rerun-io/rerun/commit/c6189106664d039a28312f1ac3007de99c979dc5) @@ -242,6 +619,8 @@ For more details, see the [custom visualizer example](https://github.com/rerun-i - Make handling of out of order video chunks much faster [f050460](https://github.com/rerun-io/rerun/commit/f050460c8b37c83beb0a0c33cf504d79e2aeb206) #### 🧑‍🏫 Examples +- Rerun to LeRobot export example [#12541](https://github.com/rerun-io/rerun/pull/12541) +- Add ROS TF example [#12603](https://github.com/rerun-io/rerun/pull/12603) - Add webpage example [e2fd7f6](https://github.com/rerun-io/rerun/commit/e2fd7f65fa0b264f68a61935aba33ef54ff20ab4) #### 📚 Docs @@ -252,7 +631,6 @@ For more details, see the [custom visualizer example](https://github.com/rerun-i - Add documentation for converting custom data to rrd using log/send_column [f8cf13c](https://github.com/rerun-io/rerun/commit/f8cf13c6686b0dd06e518ec0b8ebe24017bc75ab) - Update MCAP message support documentation [a77922a](https://github.com/rerun-io/rerun/commit/a77922a46338cc1037d5c97ef9680e9879c86f2f) - Add layer identifier "foxglove" to `rerun mcap convert` [1436027](https://github.com/rerun-io/rerun/commit/14360275483603361d0c05ce522046344b226fed) -- Add generalized example (snippet + doc page) for component mappings [82da40f](https://github.com/rerun-io/rerun/commit/82da40f5959d238b1f426f802e16be1ab1782810) #### 🖼 UI improvements - Show that other timelines have data on timeline loader [47bf28f](https://github.com/rerun-io/rerun/commit/47bf28f7377bf09230ddf38151db43c58cfdc3fa) @@ -279,10 +657,7 @@ For more details, see the [custom visualizer example](https://github.com/rerun-i - Add `rerun auth logout` [7b3ae54](https://github.com/rerun-io/rerun/commit/7b3ae543dc0de39cf382aefc435f420de99ef223) - `rerun rrd split` [9bde24f](https://github.com/rerun-io/rerun/commit/9bde24fb5860f507bec155da377f40c1b8b0359e) - Add `--follow` option to explicitly follow files and URLs [c34a84b](https://github.com/rerun-io/rerun/commit/c34a84b5382620033182bb41719d1e1de74f10c6) -- base changelog update [250bb52](https://github.com/rerun-io/rerun/commit/250bb5248b2178d02e656381af274e646ac6d239) -- remove speculative links [c1ddfd6](https://github.com/rerun-io/rerun/commit/c1ddfd6f676ca91b32f79c9e100aa2c8972fe502) -- Bump versions to 0.30.0-rc.1 [b7fa785](https://github.com/rerun-io/rerun/commit/b7fa78566ae4ca71775e3e521aa28f2465c72f8f) -- fill in breaking changes + migration guide link [1555e63](https://github.com/rerun-io/rerun/commit/1555e63aea63308f4bae6ae10dc6d620bd28b016) + ## [0.29.2](https://github.com/rerun-io/rerun/compare/0.29.1...0.29.2) - 2026-02-13 - Bug fixes and documentation update @@ -1318,7 +1693,7 @@ rr.log("video_stream", rr.VideoStream(codec=rr.VideoCodec.H264, sample=bytes(pac ``` For now, we only handle H.264, but support for more codecs is on the roadmap. -Learn more on the updated [video reference page](https://rerun.io/docs/reference/video). +Learn more on the updated [video reference page](https://rerun.io/docs/concepts/logging-and-ingestion/video). #### 😎 Light mode @@ -1964,7 +2339,7 @@ New help texts for all our views: #### APIs * 🔄 [Much easier partial updates of archetypes](https://rerun.io/docs/howto/logging/send-partial-updates) -* 📊 [Greatly improved ease of use of `send_columns`](https://rerun.io/docs/howto/logging/send-columns) +* 📊 [Greatly improved ease of use of `send_columns`](https://rerun.io/docs/howto/logging-and-ingestion/send-columns) * ⏱️ Python notebooks & JS can now control the timeline and panel states (see last section of [this notebook](https://github.com/rerun-io/rerun/blob/0.22.0/examples/python/notebook/cube.ipynb)) * 📝 Lots of [new snippets](https://github.com/rerun-io/rerun/blob/0.22.0/docs/snippets/INDEX.md) for demonstrating partial updates & custom data logging in Python/C++/Rust @@ -2294,14 +2669,14 @@ Read our 🧳 migration guide for more detailed information: https://rerun.io/do https://github.com/user-attachments/assets/553b6d88-143d-4cf9-a4bc-6b620534ab95 📖 Release blogpost: https://rerun.io/blog/maps -🧳 Migration guide: http://rerun.io/docs/reference/migration/migration-0-20 +🧳 Migration guide: https://rerun.io/docs/reference/migration/migration-0-20 ### ✨ Overview & highlights * 🗺️ There is now an map view! * 🎬 Native viewer now supports H.264 video if ffmpeg is installed. * 📽️ Videos now load a lot faster use less RAM. * 📂 Improvements to the existing `Open` (Viewer) & `log_file` (SDK) workflows, and addition of a new `Import` workflow. - * Blueprints can now easily be [re-used across different applications, recordings and SDKs](https://rerun.io/docs/concepts/blueprints) + * Blueprints can now easily be [re-used across different applications, recordings and SDKs](https://rerun.io/docs/concepts/visualization/blueprints) * The new `Import` feature allows you to drag-and-drop any data into an existing recording, directly in the viewer. * ☰ Dataframe queries are now streamed, reducing memory usage. * 💊 Add [capsule archetype](https://rerun.io/docs/reference/types/archetypes/capsules3d). @@ -2313,7 +2688,7 @@ https://github.com/user-attachments/assets/553b6d88-143d-4cf9-a4bc-6b620534ab95 * 🐍 Python 3.8 is being deprecated * 🔌 `connect` & `serve` got deprecated in favor of `connect_tcp` & `serve_web` * 🎨 In Python, lists of numbers without type information are now assumed to be packed integer color representations, unless the length is exactly 3 or 4 -🧳 Migration guide: http://rerun.io/docs/reference/migration/migration-0-20 +🧳 Migration guide: https://rerun.io/docs/reference/migration/migration-0-20 ### 🔎 Details @@ -2433,7 +2808,7 @@ This release fixes an error thrown when the web viewer is closed. 📖 Release blogpost: https://rerun.io/blog/dataframe -🧳 Migration guide: http://rerun.io/docs/reference/migration/migration-0-19 +🧳 Migration guide: https://rerun.io/docs/reference/migration/migration-0-19 ### ✨ Overview & highlights This release introduces two powerful features: a dataframe API (and view), as well as video support. @@ -2441,7 +2816,7 @@ This release introduces two powerful features: a dataframe API (and view), as we #### ☰ Dataframe API We now have an API for querying the contents of an .rrd file. This integrates with popular packages such as [Pandas](https://pandas.pydata.org), [Polars](https://pola.rs), and [DuckDB](https://duckdb.org). -You can read more in [the Dataframe API how-to guide](https://rerun.io/docs/howto/dataframe-api). +You can read more in [the Dataframe API how-to guide](https://rerun.io/docs/howto/query-and-transform/get-data-out). We have also added a matching dataframe view inside the Rerun Viewer. Read more [here](https://rerun.io/docs/reference/types/views/dataframe_view). @@ -2451,14 +2826,14 @@ Rerun now supports logging MP4 videos using the new [`AssetVideo`](https://rerun This can greatly reduce bandwidth and storage requirements. While the web viewer supports a variety of codecs, the native viewer supports only the AV1 codec for the moment, but we plan to support H.264 in the near future as well. -Read more about our video supports (and its limits) [in our video docs](https://rerun.io/docs/reference/video). +Read more about our video supports (and its limits) [in our video docs](https://rerun.io/docs/concepts/logging-and-ingestion/video). ### ⚠️ Breaking changes * 🗾 Blueprint files (.rbl) from previous Rerun versions will no longer load _automatically_ * 🐧 Linux: Rerun now requires glibc 2.17+ * 🦀 Rust: The minimum supported Rust version is now 1.79 -🧳 Migration guide: http://rerun.io/docs/reference/migration/migration-0-19 +🧳 Migration guide: https://rerun.io/docs/reference/migration/migration-0-19 ### 🔎 Details @@ -2577,8 +2952,8 @@ Read more about our video supports (and its limits) [in our video docs](https:// https://github.com/user-attachments/assets/95380a64-df05-4f85-b40a-0c6b8ec8d5cf -* 📖 Release blogpost: http://rerun.io/blog/column-chunks -* 🧳 Migration guide: http://rerun.io/docs/reference/migration/migration-0-18 +* 📖 Release blogpost: https://rerun.io/blog/column-chunks +* 🧳 Migration guide: https://rerun.io/docs/reference/migration/migration-0-18 ### ✨ Overview & highlights @@ -2729,7 +3104,7 @@ _All four tetrahedron meshes on this screen share the same vertices and are inst * `ImageEncoded` is deprecated and replaced with [`EncodedImage`](https://rerun.io/docs/reference/types/archetypes/encoded_image) (JPEG, PNG, …) and [`Image`](https://rerun.io/docs/reference/types/archetypes/image) (NV12, YUY2, …) * [`DepthImage`](https://rerun.io/docs/reference/types/archetypes/depth_image) and [`SegmentationImage`](https://rerun.io/docs/reference/types/archetypes/segmentation_image) are no longer encoded as a tensors, and expects its shape in `[width, height]` order -🧳 Migration guide: http://rerun.io/docs/reference/migration/migration-0-18 +🧳 Migration guide: https://rerun.io/docs/reference/migration/migration-0-18 ### 🔎 Details @@ -2895,7 +3270,7 @@ https://github.com/rerun-io/rerun/assets/49431240/1c75b816-7e3e-4882-9ee6-ba124c 📖 Release blogpost: https://rerun.io/blog/blueprint-overrides -🧳 Migration guide: http://rerun.io/docs/reference/migration/migration-0-17 +🧳 Migration guide: https://rerun.io/docs/reference/migration/migration-0-17 ### ✨ Overview & highlights @@ -2917,7 +3292,7 @@ https://github.com/rerun-io/rerun/assets/49431240/1c75b816-7e3e-4882-9ee6-ba124c * [Vista driving world model](https://rerun.io/examples/generative-vision/vista) * [Stereo Vision SLAM](https://rerun.io/examples/3d-reconstruction/stereo_vision_slam) * [Neural field notebook](https://rerun.io/examples/integrations/notebook_neural_field_2d) -* 🛠️ Improved the logging API with many new and updated archetypes and components (see [migration guide](http://rerun.io/docs/reference/migration/migration-0-17)) +* 🛠️ Improved the logging API with many new and updated archetypes and components (see [migration guide](https://rerun.io/docs/reference/migration/migration-0-17)) * 🖼️ `TensorView` is now fully configurable from blueprint code * 🎛️ Revamped selection panel UI * 🚚 Much work is being done under-the-hood to migrate our data-store to "chunks" (aka units of batched data). More on this in the next release! @@ -2928,7 +3303,7 @@ https://github.com/rerun-io/rerun/assets/49431240/1c75b816-7e3e-4882-9ee6-ba124c * `HalfSizes3D` has been renamed to [`HalfSize3D`](https://rerun.io/docs/reference/types/components/half_size3d) * `.rrd` files from older versions won't load in Rerun 0.17 -🧳 Migration guide: http://rerun.io/docs/reference/migration/migration-0-17 +🧳 Migration guide: https://rerun.io/docs/reference/migration/migration-0-17 ### 🔎 Details @@ -3073,19 +3448,19 @@ https://github.com/rerun-io/rerun/assets/3312232/475468bd-e012-4837-b2b4-b47fa97 * 🟦 Customize views in code: We started exposing some view properties in the blueprint! * 📋 Included are: * Visible time ranges - * check [this new how-to guide](https://www.rerun.io/docs/howto/fixed-window-plot) & example that demonstrates this with plots + * check [this new how-to guide](https://rerun.io/docs/howto/fixed-window-plot) & example that demonstrates this with plots * Time Series legend & y-axis configuration * 2D & 3D View background color * 2D View bounds - * 📚 learn more on the [new view blueprint doc pages](https://www.rerun.io/docs/reference/types/views) + * 📚 learn more on the [new view blueprint doc pages](https://rerun.io/docs/reference/types/views) * 🚀 …more to come in the future! * 🕰️ Deprecated `timeless` in favor of new `static` logging * Except for the name change, they behave similarly in _most_ use cases. Unlike with timeless, static data… * …can't be mixed with non-static data on the same component. * …will override previous static data and not keep old data in memory. - * Check out our [migration guide](https://www.rerun.io/docs/reference/migration/migration-0-16). + * Check out our [migration guide](https://rerun.io/docs/reference/migration/migration-0-16). * 🖼️ 2D View's pan & zoom got redone, it's now a free canvas without any scroll bar -* 🤖 Added [an example](https://www.rerun.io/examples/robotics/ros2_bridge) to use Rerun with ROS2. +* 🤖 Added [an example](https://rerun.io/examples/robotics/ros2_bridge) to use Rerun with ROS2. As always there's a lot going on under the hood: * 🚚 We streamlined our development processes & CI and examples. @@ -3241,7 +3616,7 @@ As always there's a lot going on under the hood: ## [0.15.0](https://github.com/rerun-io/rerun/compare/0.14.1...0.15.0) - Blueprints from Python - 2024-04-09 -The biggest news is the ability to create a _blueprint_ via the Python logging API. Check out our [associated blog post](https://www.rerun.io/blog/blueprint-part-one) for more information. +The biggest news is the ability to create a _blueprint_ via the Python logging API. Check out our [associated blog post](https://rerun.io/blog/blueprint-part-one) for more information. ```py import rerun.blueprint as rrb @@ -3286,15 +3661,15 @@ Blueprints are currently only supported in the Python API, with C++ and Rust sup ### ✨ Overview & highlights -- 🟦 Configure the layout and content of space views from Python [(docs)](https://www.rerun.io/docs/howto/configure-viewer-through-code) -- 🖧 More powerful and flexible data loaders [(docs)](https://www.rerun.io/docs/reference/data-loaders) +- 🟦 Configure the layout and content of space views from Python [(docs)](https://rerun.io/docs/howto/configure-viewer-through-code) +- 🖧 More powerful and flexible data loaders [(docs)](https://rerun.io/docs/reference/data-loaders) - 🖵 Improved UI for managing recordings and applications - 💾 Save and load blueprint files in the viewer - 🎨 Configurable background color for 3D Space Views [#5443](https://github.com/rerun-io/rerun/pull/5443) - 💪 Linux ARM64 support [#5489](https://github.com/rerun-io/rerun/pull/5489) [#5503](https://github.com/rerun-io/rerun/pull/5503) [#5511](https://github.com/rerun-io/rerun/pull/5511) - 🖼️ Show examples in the welcome page - 🖱️ Improve context-menu when right-clicking items in the blueprint panel and streams tree -- ❌ Remove `InstanceKey` from our logging APIs [#5395](https://github.com/rerun-io/rerun/pull/5395) ([migration guide](https://www.rerun.io/docs/reference/migration/migration-0-15)) +- ❌ Remove `InstanceKey` from our logging APIs [#5395](https://github.com/rerun-io/rerun/pull/5395) ([migration guide](https://rerun.io/docs/reference/migration/migration-0-15)) - ❌ Remove groups from blueprints panel [#5326](https://github.com/rerun-io/rerun/pull/5326) ### 🔎 Details @@ -3538,12 +3913,12 @@ We're still ramping up for programmable blueprints (soon!), but meanwhile enjoy ### ✨ Overview & highlights This release focuses on scalar time series -- both from a performance and UI perspectives. -Check out our [associated blog post](https://www.rerun.io/blog/fast-plots) for more information. +Check out our [associated blog post](https://rerun.io/blog/fast-plots) for more information. - 📈 Rerun can now visualize many time series in the kHz range in real-time: - The new query cache optimizes data access, improving query performance by 20-50x - Sub-pixel aggregation prevents unnecessary overdraw when rendering plots, improving rendering time by 30-120x - - [Points](https://www.rerun.io/docs/reference/types/archetypes/points3d), [lines](https://www.rerun.io/docs/reference/types/archetypes/line_strips3d), [arrows](https://www.rerun.io/docs/reference/types/archetypes/arrows3d) and [boxes](https://www.rerun.io/docs/reference/types/archetypes/boxes3d) all benefit from query caching too to a lesser extent, yielding 2-5x performance improvements + - [Points](https://rerun.io/docs/reference/types/archetypes/points3d), [lines](https://rerun.io/docs/reference/types/archetypes/line_strips3d), [arrows](https://rerun.io/docs/reference/types/archetypes/arrows3d) and [boxes](https://rerun.io/docs/reference/types/archetypes/boxes3d) all benefit from query caching too to a lesser extent, yielding 2-5x performance improvements - 🖼 UI overrides: - The new `Scalar`, `SeriesLine` & `SeriesPoint` archetypes allow for customizing plots both at logging and visualization time @@ -3570,7 +3945,7 @@ Check out our [associated blog post](https://www.rerun.io/blog/fast-plots) for m As well as a lot of miscellaneous bug fixes and usability improvements: see details below. -Check out our [migration guide](https://www.rerun.io/docs/reference/migration/migration-0-13). +Check out our [migration guide](https://rerun.io/docs/reference/migration/migration-0-13). ### 🔎 Details @@ -3742,12 +4117,12 @@ Check out our [migration guide](https://www.rerun.io/docs/reference/migration/mi

### ✨ Overview & highlights -- 🌁 The Rerun Viewer now supports a plugin system for creating [arbitrary external data loaders](https://www.rerun.io/docs/reference/data-loaders/overview). +- 🌁 The Rerun Viewer now supports a plugin system for creating [arbitrary external data loaders](https://rerun.io/docs/concepts/logging-and-ingestion/data-loaders/overview). - 🕸️ More built-in examples are now available in the viewer. - 🐍 The Python SDK now works with Python-3.12. - 📘 Blueprint containers can now be selected and modified. - 🚀 In the native viewer, space views are now evaluated in parallel for improved performance. -- 🧑‍🏫 Support and guide for [sharing a recording across multiple processes](https://www.rerun.io/docs/howto/shared-recordings). +- 🧑‍🏫 Support and guide for [sharing a recording across multiple processes](https://rerun.io/docs/howto/shared-recordings). - 📁 Entity-paths allowed characters and escaping are now more file-like [#4476](https://github.com/rerun-io/rerun/pull/4476): - There is no need for " quotes around path parts, instead we now use \ to escape special characters. - You need to escape any character that isn't alphabetical, numeric, ., -, or _. @@ -3986,13 +4361,13 @@ This is a small release primarily to tie up some loose ends for our C++ SDK. ## [0.10.0](https://github.com/rerun-io/rerun/compare/0.9.1...0.10.0) - C++ SDK - 2023-10-30 -[Rerun](https://www.rerun.io/) is an easy-to-use visualization toolbox for computer vision and robotics. +[Rerun](https://rerun.io/) is an easy-to-use visualization toolbox for computer vision and robotics. * Python: `pip install rerun-sdk` * Rust: `cargo add rerun` and `cargo install rerun-cli --locked` * Online demo: -Release blog post: +Release blog post: ### ✨ Overview & highlights * The C++ SDK is finally here! @@ -4074,7 +4449,7 @@ Release blog post: ## [0.9.1](https://github.com/rerun-io/rerun/compare/0.9.0...0.9.1) - Bug fixes and performance improvements - 2023-10-12 -[Rerun](https://www.rerun.io/) is an easy-to-use visualization toolbox for computer vision and robotics. +[Rerun](https://rerun.io/) is an easy-to-use visualization toolbox for computer vision and robotics. * Python: `pip install rerun-sdk` * Rust: `cargo add rerun` and `cargo install rerun-cli` @@ -4135,7 +4510,7 @@ Release blog post: ## [0.9.0](https://github.com/rerun-io/rerun/compare/0.8.2...0.9.0) - New logging API - 2023-10-05 -[Rerun](https://www.rerun.io/) is an easy-to-use visualization toolbox for computer vision and robotics. +[Rerun](https://rerun.io/) is an easy-to-use visualization toolbox for computer vision and robotics. * Python: `pip install rerun-sdk` * Rust: `cargo add rerun` and `cargo install rerun-cli` @@ -4148,7 +4523,7 @@ This API is code-generated from a common definition, meaning the Python and Rust This will let us more easily extend and improve the API going forward. It is also the basis for our C++ API, which is coming in Rerun 0.10.0. -Read [the migration guide](https://www.rerun.io/docs/reference/migration-0-9) for details! +Read [the migration guide](https://rerun.io/docs/reference/migration-0-9) for details! 0.9.0 Welcome Screen @@ -4365,7 +4740,7 @@ Other highlights: ## [0.8.0](https://github.com/rerun-io/rerun/compare/0.7.0...0.8.0) - Infrastructure investments and more transform improvements - 2023-07-27 -[Rerun](https://www.rerun.io/) is an easy-to-use visualization toolbox for computer vision and robotics. +[Rerun](https://rerun.io/) is an easy-to-use visualization toolbox for computer vision and robotics. * Python: `pip install rerun-sdk` * Rust: `cargo add rerun` and `cargo install rerun-cli` @@ -4381,7 +4756,7 @@ Other highlights: - The visualizer can now show coordinate arrows for all affine transforms within the view. [#2577](https://github.com/rerun-io/rerun/pull/2577) - Linestrips and oriented bounding boxes can now be logged via batch APIs in python. - See: `log_linestrips_2d`, `log_linestrips_3d`, [#2822](https://github.com/rerun-io/rerun/pull/2822) and `log_obbs` [#2823](https://github.com/rerun-io/rerun/pull/2823) - - Rust users that build their own Viewer applications can now add fully custom Space Views. Find more information [here](https://www.rerun.io/docs/howto/extend/extend-ui#custom-space-views-classes). + - Rust users that build their own Viewer applications can now add fully custom Space Views. Find more information [here](https://rerun.io/docs/howto/extend/extend-ui#custom-space-views-classes). - New optional `flush_timeout` specifies how long Rerun will wait if a TCP stream is disconnected during a flush. [#2821](https://github.com/rerun-io/rerun/pull/2821) - In Rust, `RecordingStream::connect` now requires `flush_timeout` specified as an `Option`. - To keep default behavior, this can be specified using the `rerun::default_flush_time()` helper. @@ -4392,7 +4767,7 @@ Other highlights: - This can be used as an alternative to the previous `MsgSender::with_time` APIs. - The Rerun SDK now defaults to 8ms long microbatches instead of 50ms. This makes the default behavior more suitable for use-cases like real-time video feeds. [#2220](https://github.com/rerun-io/rerun/pull/2220) - - Check out [the microbatching docs](https://www.rerun.io/docs/reference/sdk/micro-batching) for more information + - Check out [the microbatching docs](https://rerun.io/docs/reference/sdk/micro-batching) for more information on fine-tuning the micro-batching behavior. - The web viewer now incremental loads `.rrd` files when streaming over HTTP. [#2412](https://github.com/rerun-io/rerun/pull/2412) @@ -4723,7 +5098,7 @@ here's a smaller release packed with useful improvements 🎉 - ⚠️ BREAKING: You must now call `rr.init` if you want logging to work. - ⚠️ BREAKING: `set_enabled` has been removed. In order to disable logging at runtime, call `set_global_data_recording(None)`. - See also [the doc section on this topic](https://www.rerun.io/docs/reference/sdk/logging-controls#dynamically-turn-logging-onoff). + See also [the doc section on this topic](https://rerun.io/docs/reference/sdk/logging-controls#dynamically-turn-logging-onoff). - `log_mesh_file`: accept either path or bytes [#2098](https://github.com/rerun-io/rerun/pull/2098) - Add `draw_order` to 2D primitives [#2138](https://github.com/rerun-io/rerun/pull/2138) - Add `rr.version()` [#2084](https://github.com/rerun-io/rerun/pull/2084) @@ -4736,7 +5111,7 @@ here's a smaller release packed with useful improvements 🎉 #### 🦀 Rust SDK - ⚠️ BREAKING: `set_enabled` has been removed. In order to disable logging at runtime, create a no-op recording via `RecordingStream::disabled()`. - See also [the doc section on this topic](https://www.rerun.io/docs/reference/sdk/logging-controls#dynamically-turn-logging-onoff). + See also [the doc section on this topic](https://rerun.io/docs/reference/sdk/logging-controls#dynamically-turn-logging-onoff). - ⚠️ BREAKING: `Session` has been replaced by `RecordingStream` [#1983](https://github.com/rerun-io/rerun/pull/1983) - ⚠️ BREAKING: `native_viewer` is now an opt-in feature of the `rerun` library [#2064](https://github.com/rerun-io/rerun/pull/2064) - Rust SDK: bring back support for implicit splats [#2059](https://github.com/rerun-io/rerun/pull/2059) @@ -4926,15 +5301,15 @@ https://user-images.githubusercontent.com/2910679/233411525-1ceb2790-7f18-400a-b This new release adds MVP support for embedding Rerun in Jupyter notebooks, and brings significant performance improvements across all layers of the stack. * Rerun can now be embedded in Jupyter notebooks - * Tested with Jupyter Notebook Classic, Jupyter Lab, VSCode & Google Colab; checkout our [How-to guide](https://www.rerun.io/docs/howto/notebook) + * Tested with Jupyter Notebook Classic, Jupyter Lab, VSCode & Google Colab; checkout our [How-to guide](https://rerun.io/docs/howto/notebook) * Try it out live on [Google Colab](https://colab.research.google.com/drive/1R9I7s4o6wydQC_zkybqaSRFTtlEaked_?usp=sharing) * All colormapping tasks are now done directly on the GPU * This yields _very significant_ performance improvements for colormapping heavy workload (e.g. segmentation) - * Try it out in our new [`segment_anything` example](https://www.rerun.io/examples/video-image/segment_anything_model) that shows off the latest models from Meta AI + * Try it out in our new [`segment_anything` example](https://rerun.io/examples/video-image/segment_anything_model) that shows off the latest models from Meta AI * GPU picking & hovering now works with all of our primitives, including meshes & depth clouds * This fixes all the shortcomings of the previous CPU-based system * Rerun's automatic backprojection of depth textures ("depth clouds") is now feature complete - * Try it out in our updated [`nyud` example](https://www.rerun.io/examples/robotics/rgbd) + * Try it out in our updated [`nyud` example](https://rerun.io/examples/robotics/rgbd) * Our datastore has been completely revamped to more closely match our latest data model * This yields _very significant_ performance improvements for workloads with many events * Checkout [this post](https://github.com/rerun-io/rerun/issues/1619#issuecomment-1511046649) for a detailed walkthrough of the changes @@ -5302,7 +5677,7 @@ Other highlights: Notably, we previously used a version of the `time` crate which had a security issue (CVE-2020-26235), thanks @mpizenberg for helping out! * Print more information & troubleshooting info on crash -Meanwhile, we did a bunch of improvements to our manual. If you had trouble running Rerun so far, check our updated [troubleshooting](https://www.rerun.io/docs/getting-started/troubleshooting) page (and as always, please [open an issue](https://github.com/rerun-io/rerun/issues/new/choose) if something doesn't work). +Meanwhile, we did a bunch of improvements to our manual. If you had trouble running Rerun so far, check our updated [troubleshooting](https://rerun.io/docs/overview/installing-rerun/troubleshooting) page (and as always, please [open an issue](https://github.com/rerun-io/rerun/issues/new/choose) if something doesn't work). ⚠️ BREAKING: old `.rrd` files no longer load ⚠️ diff --git a/CLAUDE.md b/CLAUDE.md index c4694ca0f5d0..74d24d762a82 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -47,7 +47,7 @@ We use `pixi` for task management and dependency installation. Check `pixi.toml` ### Type definition flow ``` -.fbs files (definitions/) → pixi run codegen → Generated code (Rust/Python/C++) +.fbs files (definitions/) → pixi run codegen → Generated code (Rust/Python/C++) + docs (docs/content/reference/types/) ``` - Type definitions live in `crates/store/re_sdk_types/definitions/rerun/` @@ -152,6 +152,29 @@ The `uv` wrapper script unsets `CONDA_PREFIX` to ensure isolation from pixi's en - **Arrow Native**: Data is stored, transmitted, and queried as Apache Arrow arrays - **Multi-language**: Changes to .fbs files affect Rust, Python, and C++ simultaneously +## Python docstring formatting + +Python API docs are built with **MkDocs + mkdocstrings** (NOT Sphinx). Never use reStructuredText (rST) syntax in Python docstrings or documentation. Use markdown instead: + +- **Cross-references:** Use `[`ClassName`][]` (mkdocstrings syntax), NOT `:class:`ClassName`` / `:func:` / `:meth:` (rST roles) +- **Warnings/notes:** Use MkDocs admonitions (`!!! warning` with indented body), NOT `.. warning::` (rST directives) +- **Deprecation notices:** Use the `@deprecated` decorator (mkdocstrings renders it automatically). Do NOT duplicate in the docstring with `.. deprecated::` or `**Deprecated:**` +- **Code blocks:** Use markdown fenced blocks (`` ``` ``), NOT `.. code-block::` +- **Parameter docs:** Use numpy-style sections (`Parameters`, `Returns` with `----------`), which is what the codebase already uses + +## Documentation system + +See [`docs/README.md`](docs/README.md) for the full documentation architecture. + +The docs span multiple sites: the main docs at `rerun.io/docs` (built from `docs/content/`), plus API reference sites for Python (MkDocs), C++ (Doxygen), and JS (TypeDoc) at `ref.rerun.io/docs/{python,cpp,js}/`. + +Key things to know: +- **`docs/content/reference/types/`** is auto-generated by `pixi run codegen` from `.fbs` files - do not edit directly +- **`docs/content/reference/cli.md`** is auto-generated by `pixi run man` - do not edit directly +- **Code snippets** live in `docs/snippets/all/` with implementations in Python, Rust, and C++ +- `pixi run py-docs-serve` previews Python API docs locally +- `pixi run -e cpp cpp-docs` builds C++ docs + ## Development references - [`ARCHITECTURE.md`](ARCHITECTURE.md) - Detailed architecture documentation @@ -159,4 +182,5 @@ The `uv` wrapper script unsets `CONDA_PREFIX` to ensure isolation from pixi's en - [`CODE_STYLE.md`](CODE_STYLE.md) - Code style guidelines - [`CONTRIBUTING.md`](CONTRIBUTING.md) - Contribution guidelines - [`DESIGN.md`](DESIGN.md) - Guidelines for UI design, covering GUI, CLI, documentation, log messages, etc +- [`docs/README.md`](docs/README.md) - Documentation system (sites, builds, deployment) - [`rerun_py/README.md`](rerun_py/README.md) - Python SDK specific instructions diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 71141da8aa6c..5dfee1e0f4ee 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,5 +1,5 @@ # Contributing to Rerun -This is written for anyone who wants to contribute to the Rerun repository. +This guide is for anyone who wants to contribute to the Rerun repository. ## See also @@ -11,9 +11,9 @@ This is written for anyone who wants to contribute to the Rerun repository. * [`RELEASES.md`](RELEASES.md) ## What to contribute -* **Examples**: We welcome any examples you would like to add. Follow the pattern of the existing examples in the [`examples/`](examples) folder. -* Report bugs and features requests at . -* You can also look at our [`good first issue` tag](https://github.com/rerun-io/rerun/labels/good%20first%20issue). +* **Examples**: We welcome any examples you would like to add. Follow the pattern of existing examples in the [`examples/`](examples) folder. +* Report bugs and feature requests at . +* Look at our [`good first issue` tag](https://github.com/rerun-io/rerun/labels/good%20first%20issue). * We track things we would like implemented in 3rd party crates [here](https://github.com/rerun-io/opensource/issues/1). Note that maintainers do not have infinite time, and reviews take a lot of it. @@ -29,121 +29,101 @@ You can discuss these changes by: * Pinging one of the Rerun maintainers on our [Discord](https://discord.gg/PXtCgFBSmH) > [!NOTE] -> PRs containing large changes which were not discussed previously may be closed without comment. +> PRs containing large undiscussed changes may be closed without comment. ## Pull requests We use [Trunk Based Development](https://trunkbaseddevelopment.com/), which means we encourage small, short-lived branches. -Open draft PR:s to get some early feedback on your work until you feel it is ready for a proper review. -Do not make PR:s from your own `main` branch, as that makes it difficult for reviewers to add their own fixes. -Add any improvements to the branch as new commits instead of rebasing to make it easier for reviewers to follow the progress (add images if possible!). +* Open draft PRs early to get feedback before a full review. +* Don't PR from your own `main` branch — it makes it hard for reviewers to add fixes. +* Add improvements as new commits rather than rebasing, so reviewers can follow progress (add images if possible!). +* All PRs are merged with [`Squash and Merge`](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/incorporating-changes-from-a-pull-request/about-pull-request-merges#squash-and-merge-your-commits), so you don't need a clean commit history on feature branches. Prefer new commits over rebasing — force-pushing discourages collaboration. -All PR:s are merged with [`Squash and Merge`](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/incorporating-changes-from-a-pull-request/about-pull-request-merges#squash-and-merge-your-commits), meaning they all get squashed to just one commit on the `main` branch. This means you don't need to keep a clean commit history on your feature branches. In fact, it is preferable to add new commits to a branch rather than rebasing or squashing. For one, it makes it easier to track progress on a branch, but rebasing and force-pushing also discourages collaboration on a branch. +Our CI will [record binary sizes](https://build.rerun.io/graphs/sizes.html) and run [benchmarks](https://build.rerun.io/graphs/crates.html) on each merged PR. -Our CI will [record various binary sizes](https://build.rerun.io/graphs/sizes.html) and run [some benchmarks](https://build.rerun.io/graphs/crates.html) on each merged PR. - -Pull requests from external contributors require approval for CI runs. This can be done manually, by clicking the `Approve and run` button: +Pull requests from external contributors require approval for CI runs. Click the `Approve and run` button: ![Image showing the approve and run button](https://github.com/rerun-io/rerun/assets/1665677/ead5c04f-df02-4f20-9093-37cfce097b44) -Members of the `rerun-io` organization and collaborators in the `rerun-io/rerun` repository may enable auto-approval of workflow runs for a single PR by commenting with `@rerun-bot approve`: +Members of the `rerun-io` organization can enable auto-approval for a single PR by commenting with `@rerun-bot approve`: ![PR comment with the text `@rerun-bot approve`](https://github.com/rerun-io/rerun/assets/1665677/b5f07f3f-ea95-44a4-8eb7-f07c905f96c3) ### Labeling of PRs & changelog generation -Members of the `rerun-io` organization _have_ to add label PRs since they're part of how we generate [changelogs](https://github.com/rerun-io/rerun/blob/main/CHANGELOG.md). - -If `include in changelog` is present, the **title** of the PR will be used as a line in the detailed section of the changelog. -Therefore, make sure that it's informative & concise without any additional context other than the category. -If a PR title should not be part of the changelog, you _have_ to label the PR with `exclude from changelog`. +Org members _must_ label their PRs — labels are how we generate [changelogs](https://github.com/rerun-io/rerun/blob/main/CHANGELOG.md). -The category of the changelog entry is governed by additional labels of which you have to provide at least one. -The exact list may change over time, see the [CI job](./.github/workflows/labels.yml) checking for it for a full list. - -Beyond changelog categorization, it's encouraged to add too many rather than too few labels as they help with search. +* `include in changelog`: The PR **title** will be used as a changelog entry. Keep it informative and concise. +* `exclude from changelog`: Required if the PR shouldn't appear in the changelog. +* At least one category label is required. See the [CI job](./.github/workflows/labels.yml) for the current list. +* When in doubt, add more labels rather than fewer — they help with search. #### What should go to the changelog? -Whether a PR should be added to the changelog isn't always clear cut, but it's advised to err on the side of -adding too many entries rather than too few! -Generally, whenever you believe that this is a value-add for a user browsing the detailed changelog, add it. -Also, credit where credit is due, be more generous adding contributions from outside the `rerun-io` org to the changelog! +Err on the side of including entries — if it adds value for a user browsing the changelog, add it. +Be generous with external contributions — credit where credit is due! -We typically don't include: -pure refactors, testing, CI fixes, fixes for bugs that showed up since the last release, minor doc changes (like typos) etc. +We typically don't include: pure refactors, testing, CI fixes, fixes for bugs introduced since last release, minor doc changes (typos, etc.). #### Other special labels * `deploy docs`: - PRs marked with this will automatically be cherry-picked to the `docs-latest` branch which then will kick off a rebuild of the public [doc page](https://www.rerun.io/docs) - Use this for landing doc fixes that are relevant to the latest release. + Cherry-picked to `docs-latest`, triggering a rebuild of the [doc page](https://www.rerun.io/docs). + Use this for doc fixes relevant to the latest release. * `do-not-merge`: - Will fail the CI unconditionally. - Useful e.g. for opening PRs that target branches other than `main` which you want to rebase prior to merging - or when you're still waiting on a test result. - Alternatively, you can also use checkboxes in the PR description - any unticked checkboxes will make the ci fail automatically ✨ + Fails CI unconditionally. Useful for PRs targeting non-`main` branches or awaiting test results. + Alternatively, unticked checkboxes in the PR description will also fail CI ✨ ## Contributing to CI -Every CI job would in its ideal state consist of only a single `pixi` (or similar) script invocation that works locally as-is. +Every CI job should ideally be a single `pixi` (or similar) script invocation that works locally as-is. -This approach has a number of benefits: -- Instead of Bash embedded in YAML, scripts may be written in an Actual Programming Language™ -- Significantly lower iteration times when working on CI -- Ability to perform a job manually in case the CI fails +Benefits: +- Scripts in a real programming language instead of Bash embedded in YAML +- Much lower iteration times when working on CI +- Ability to manually re-run a job when CI fails -Additionally, always output any artifacts produced by CI to GCS instead of the GHA artifact storage. This can be a serious lifesaver when something breaks, as it allows anyone to download the output of a script and continue from where it failed, instead of being forced to start over from scratch. +Always output artifacts to GCS instead of GHA artifact storage. This lets anyone download the output of a script and continue from where it failed. -Here are some guidelines to follow when writing such scripts: +### CI script guidelines -Local-first means easy for contributors to run. +Scripts should be local-first and easy for contributors to run. -The following should be documented in each script: +Each script should document: - Dependencies - Files and directories - Environment variables - Usage examples -Inputs should be passed in explicitly via arguments, and use sane defaults. If an input has a default value, it should be documented in its description. - -Every input should be checked as early as possible. This includes: -- Checking if authentication credentials are valid -- Validating inputs and parsing into more specific types where possible: - - Numeric ranges - - String character sets/encodings - - Length limits - - Date formats - - etc. -- Checking that input file paths are valid and the files they point to exist +Pass inputs explicitly via arguments with sane defaults. Validate inputs as early as possible: auth credentials, numeric ranges, string formats, file path existence, etc. -Input and output file paths should also accept GCS paths (`gs://bucket/blob/path`) and stdin/stdout (`-`), if it makes sense. +Support GCS paths (`gs://bucket/blob/path`) and stdin/stdout (`-`) for file I/O where it makes sense. -Be extra descriptive in error messages, it may be the only piece of information someone debugging a CI failure has available to figure out what went wrong. Print frequently to hint at what is going on and display progress to the user. +Write descriptive error messages — they may be the only info someone has when debugging a CI failure. Print frequently to show progress. -Environment variables should only be used for authentication with external services and configuring output (e.g. disabling color). Many SDKs support some form of persistent/default authentication, and scripts should take advantage of this where possible. For example, GCP has [Application Default Credentials](https://cloud.google.com/docs/authentication/client-libraries). +Use environment variables only for auth and output config (e.g. disabling color). Prefer SDK default auth where possible (e.g. GCP [Application Default Credentials](https://cloud.google.com/docs/authentication/client-libraries)). -If the script performs destructive or otherwise irreversible actions, then it should support a `--dry-run` option if possible. +Support `--dry-run` for destructive or irreversible actions. ### Adding dependencies -Be thoughtful when adding dependencies. Each new dependency is a liability which lead to increased compile times, a bigger binary, more code that can break, a larger attack surface, etc. Sometimes it is better to write a hundred lines of code than to add a new dependency. +Be thoughtful when adding dependencies. Each one adds compile time, binary size, potential breakage, and attack surface. Sometimes 100 lines of code is better than a new dependency. -Whenever you add a new dependency in a PR, make sure you motivate it: -* Why use the dependency instead of rolling our own? -* Why this dependency instead of another? +When adding a dependency in a PR, motivate it: +* Why use this dependency instead of rolling our own? +* Why this one over alternatives? -For Rust, make sure you use `default-features = false` if it makes sense, to minimize the amount of new code that is pulled in. +For Rust, use `default-features = false` where it makes sense to minimize new code pulled in. -When reviewing a PR, always check the diff of `Cargo.lock` (it is collapsed by default in GitHub 😤). +When reviewing a PR, always check the `Cargo.lock` diff (collapsed by default in GitHub 😤). -For a guide on picking good dependencies, see . +Guide for picking good dependencies: . -Any full `cargo update` should be its own stand-alone PR. Make sure you include the output of it in the commit message. +A full `cargo update` should be its own stand-alone PR. Include the output in the commit message. ## Structure -The main crates are found in the [`crates/`](crates) folder, with examples in the [`examples/`](examples) folder. +Main crates are in [`crates/`](crates), examples in [`examples/`](examples). To get an overview of the crates, read their documentation with: @@ -160,67 +140,54 @@ cargo run -p rerun -- --help ## Tests There are various kinds of automated tests throughout the repository. -If not noted otherwise, all tests run automated on CI, however their frequency (per PR, on `main`, nightly, etc.) and platform coverage may vary. +Unless noted otherwise, all tests run on CI, though their frequency (per PR, on `main`, nightly) and platform coverage may vary. ### Rust tests ```sh cargo test --all-targets --all-features ``` -or alternatively (if you've [installed cargo nextest](https://nexte.st/)): +or with [cargo nextest](https://nexte.st/): ```sh cargo nextest run --all-targets --all-features cargo test --all-features --doc ``` -Runs both unit & integration tests for Rust crates, including the Rerun viewer. - -Tests are written using the standard `#[test]` attribute. +Runs unit & integration tests for all Rust crates, including the viewer. +Tests use the standard `#[test]` attribute. #### `insta` snapshot tests -Some of the tests in the `rerun` family of crates are [`insta`](https://docs.rs/insta/latest/insta/) snapshot tests. -These tests work by comparing a textual output of a test against a checked-in reference. - -They run as part of the regular Rust test suite, no extra action is required to include them in a test run. +Some tests use [`insta`](https://docs.rs/insta/latest/insta/) snapshot tests, which compare textual output against checked-in references. They run as part of the regular test suite. -If the output of them changes (either intentionally or not), they will fail, and you can review the results by running `cargo insta review` (you first need to install it with `cargo install cargo-insta`). +If output changes, they will fail. Review results with `cargo insta review` (install: `cargo install cargo-insta`). #### Image comparison tests -Some of the tests in the `rerun` family of crates are image comparison tests. -These tests work by rendering an image and then comparing it with a checked-in reference image. - -They run as part of the regular Rust test suite, no extra action is required to include them in a test run. +Some tests render an image and compare it against a checked-in reference image. They run as part of the regular test suite. -Comparison tests are driven by [egui_kittest](https://github.com/emilk/egui/tree/master/crates/egui_kittest)'s `Harness::snapshot` method. -Typically, we use [TestContext](./crates/viewer/re_test_context/src/lib.rs) in order to mock -relevant parts of the Rerun viewer. +These are driven by [egui_kittest](https://github.com/emilk/egui/tree/master/crates/egui_kittest)'s `Harness::snapshot` method. +We typically use [TestContext](./crates/viewer/re_test_context/src/lib.rs) to mock relevant parts of the viewer. ##### Comparing results & updating images -Each run of the comparison tests will produce new images that are saved to the comparison images. -(typically at `/snapshots`) +Each test run produces new images (typically at `/snapshots`). +On failure, a `diff.png` is added highlighting all differences. +To update references, run with `UPDATE_SNAPSHOTS=1`. -Upon failure, additionally `diff.png` file is added that highlights all differences between the reference and the new image. -In order to update reference with the new image, run with `UPDATE_SNAPSHOTS=1` environment variable set. +Use `pixi run snapshots` to compare results of all failed tests visually in Rerun. +You can also update from a failed CI run using `./scripts/update_snapshots_from_ci.sh`. +Inspect PR diffs (including failed comparisons) via https://rerun-io.github.io/kitdiff/?url=. -Use `pixi run snapshots` to compare the results of all failed tests in Rerun. - -For best practices & unexpected sources of image differences refer to the [egui_kittest README](https://github.com/emilk/egui/tree/master/crates/egui_kittest#snapshot-testing). +For best practices and unexpected sources of image differences, see the [egui_kittest README](https://github.com/emilk/egui/tree/master/crates/egui_kittest#snapshot-testing). ##### Rendering backend -Just like for drawing the viewer itself, drawing for comparison tests requires a `wgpu` compatible driver. -As of writing comparison tests are only run via Vulkan & Metal. -For CI / headless environments we a recent version `llvmpipe` for software rendering on Linux & Windows. -On MacOS we use [`SwiftShader`](https://github.com/google/swiftshader/). - -⚠️ Unfortunately, `SwiftShader`'s MSAA & texture filtering differs drastically from `llvmpipe` and -other native renderers which is why we use a lot higher comparison thresholds on Mac. --> **DO NOT** use images generated on MacOS CI as reference image, prefer those produced by our Linux runner. +Image comparison tests require a `wgpu`-compatible driver. Currently they run on Vulkan & Metal. +For CI / headless environments, we use lavapipe (`llvmpipe`) for software rendering on all platforms. +On macOS, we use a custom static build from [`rerun-io/lavapipe-build`](https://github.com/rerun-io/lavapipe-build). -For details on how to set this up refer to the [CI setup](./.github/workflows/reusable_checks_rust.yml). +For setup details, see the [CI workflow](./.github/workflows/reusable_checks_rust.yml). ### Python tests @@ -229,8 +196,7 @@ For details on how to set this up refer to the [CI setup](./.github/workflows/re pixi run py-test ``` -The Python SDK is tested using [`pytest`](https://docs.pytest.org/). -Tests are located in the [./rerun_py/tests/](./rerun_py/tests/) folder. +Uses [`pytest`](https://docs.pytest.org/). Tests are in [./rerun_py/tests/](./rerun_py/tests/). ### C++ tests @@ -238,8 +204,7 @@ Tests are located in the [./rerun_py/tests/](./rerun_py/tests/) folder. pixi run cpp-test ``` -The C++ SDK is tested using [`catch2`](https://github.com/catchorg/Catch2). -Tests are located in the [./rerun_cpp/tests/](./rerun_cpp/tests/) folder. +Uses [`catch2`](https://github.com/catchorg/Catch2). Tests are in [./rerun_cpp/tests/](./rerun_cpp/tests/). ### Snippet comparison tests @@ -248,9 +213,7 @@ Tests are located in the [./rerun_cpp/tests/](./rerun_cpp/tests/) folder. pixi run uvpy docs/snippets/compare_snippet_output.py ``` -More details in the [README.md](./docs/snippets/README.md). - -Makes sure all of the snippets in the [snippets/](./docs/snippets/) folder are working and yield the same output in all of the supported languages, unless configured otherwise in the [snippets.toml](./docs/snippets/snippets.toml) file. +Verifies that all [snippets](./docs/snippets/) produce the same output across languages, unless configured otherwise in [snippets.toml](./docs/snippets/snippets.toml). More details in [README.md](./docs/snippets/README.md). ### Release checklists @@ -258,54 +221,45 @@ Makes sure all of the snippets in the [snippets/](./docs/snippets/) folder are w pixi run uv run tests/python/release_checklist/main.py ``` -More details in the [README.md](./tests/python/release_checklist/README.md). - -A set of **manual** checklist-style tests that should be run prior to each release. -Introduction of new release checklists should be avoided as they add a lot of friction to the release process, -and failures are easy to be missed. +A set of **manual** checklist-style tests run prior to each release. Avoid adding new ones — they add friction and failures are easy to miss. More details in [README.md](./tests/python/release_checklist/README.md). ### Other ad-hoc manual tests -There's various additional test scenes located at [./tests/cpp/](./tests/cpp/), [./tests/python/](./tests/python/) and [./tests/rust/](./tests/rust/). -We generally build those as a CI step, but they are run only irregularly. -See respective readme files for more details. +Additional test scenes in [./tests/cpp/](./tests/cpp/), [./tests/python/](./tests/python/), and [./tests/rust/](./tests/rust/). +These are built on CI but run only irregularly. See respective READMEs for details. ## Tools -We use the [`pixi`](https://pixi.sh/) for managing dev-tool versioning, download and task running. To see available tasks, use `pixi task list`. +We use [`pixi`](https://pixi.sh/) for dev-tool versioning, downloads, and task running. See available tasks with `pixi task list`. + +We use [cargo deny](https://github.com/EmbarkStudios/cargo-deny) to check our dependency tree for copyleft licenses, duplicate dependencies, and [rustsec advisories](https://rustsec.org/advisories). Configure in `deny.toml`, run with `cargo deny check`. -We use [cargo deny](https://github.com/EmbarkStudios/cargo-deny) to check our dependency tree for copy-left licenses, duplicate dependencies and [rustsec advisories](https://rustsec.org/advisories). You can configure it in `deny.toml`. Usage: `cargo deny check` -Configure your editor to run `cargo fmt` on save. Also configure it to strip trailing whitespace, and to end each file with a newline. Settings for VSCode can be found in the `.vscode` folder and should be applied automatically. If you are using another editor, consider adding good setting to this repository! +Configure your editor to run `cargo fmt` on save, strip trailing whitespace, and end each file with a newline. VSCode settings in `.vscode/` should apply automatically. If you use a different editor, consider adding good settings to this repository! -Depending on the changes you made run `cargo test --all-targets --all-features`, `pixi run py-test` and `pixi run -e cpp cpp-test` locally. -For details see [the test section above](#tests). +Run relevant tests locally depending on your changes: `cargo test --all-targets --all-features`, `pixi run py-test`, `pixi run -e cpp cpp-test`. See [Tests](#tests) for details. -It is not strictly required, but we recommend [`cargo nextest`](https://nexte.st/) for running Rust tests as it is significantly faster than `cargo test` and yields much more readable output. -Note however, that as of writing `cargo nextest` does not yet support doc tests, those need to be run with `cargo test`. +We recommend [`cargo nextest`](https://nexte.st/) for running Rust tests — it's faster than `cargo test` with better output. Note that it doesn't support doc tests yet; run those with `cargo test`. ### Linting -Prior to pushing changes to a PR, at a minimum, you should always run `pixi run fast-lint`. This is designed to run -in a few seconds for repeated runs and should catch the more trivial issues to avoid wasting CI time. +Before pushing, always run `pixi run fast-lint`. It takes seconds on repeated runs and catches trivial issues before wasting CI time. ### Hooks -We recommend adding the Rerun pre-push hook to your local checkout, which among other-things will run -`pixi run fast-lint` for you. +We recommend installing the Rerun pre-push hook, which runs `pixi run fast-lint` for you. -To install the hooks, simply copy them into the `.git/hooks` directory of your local checkout. +Copy it into your local `.git/hooks`: ``` cp hooks/pre-push .git/hooks/pre-push chmod +x .git/hooks/pre-push ``` -or if you prefer you can configure git to use this directory as the hooks directory: +or configure git to use the hooks directory directly: ``` git config core.hooksPath hooks ``` ### Optional -You can use [bacon](https://github.com/Canop/bacon) to automatically check your code on each save. For instance, running just `bacon` will re-run `cargo clippy` each time you change a Rust file. See [`bacon.toml`](bacon.toml) for more. - -You can set up [`sccache`](https://github.com/mozilla/sccache) to speed up re-compilation (e.g. when switching branches). You can control the size of the cache with `export SCCACHE_CACHE_SIZE="256G"`. +* [bacon](https://github.com/Canop/bacon) — automatically re-runs `cargo clippy` on save. See [`bacon.toml`](bacon.toml). +* [`sccache`](https://github.com/mozilla/sccache) — speeds up recompilation (e.g. when switching branches). Set cache size: `export SCCACHE_CACHE_SIZE="256G"`. ### Other -You can view higher log levels with `export RUST_LOG=trace`. -Debug logging is automatically enabled for the viewer as long as you're running inside the `rerun` checkout. +View higher log levels with `export RUST_LOG=trace`. +Debug logging is automatically enabled for the viewer when running inside the `rerun` checkout. diff --git a/Cargo.lock b/Cargo.lock index 07a8c9de44a3..d049c42e43aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -58,47 +58,48 @@ dependencies = [ [[package]] name = "accesskit" -version = "0.21.1" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf203f9d3bd8f29f98833d1fbef628df18f759248a547e7e01cfbf63cda36a99" +checksum = "5351dcebb14b579ccab05f288596b2ae097005be7ee50a7c3d4ca9d0d5a66f6a" dependencies = [ "enumn", "serde", + "uuid", ] [[package]] name = "accesskit_atspi_common" -version = "0.14.1" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29f73a9b855b6f4af4962a94553ef0c092b80cf5e17038724d5e30945d036f69" +checksum = "842fd8203e6dfcf531d24f5bac792088edfba7d6b35844fead191603fb32a260" dependencies = [ "accesskit", "accesskit_consumer", "atspi-common", + "phf 0.13.1", "serde", - "thiserror 1.0.69", "zvariant", ] [[package]] name = "accesskit_consumer" -version = "0.30.1" +version = "0.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdd06f5fea9819250fffd4debf926709f3593ac22f8c1541a2573e5ee0ca01cd" +checksum = "53cf47daed85312e763fbf85ceca136e0d7abc68e0a7e12abe11f48172bc3b10" dependencies = [ "accesskit", - "hashbrown 0.15.5", + "hashbrown 0.16.1", ] [[package]] name = "accesskit_macos" -version = "0.22.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fbaf15815f39084e0cb24950c232f0e3634702c2dfbf182ae3b4919a4a1d45" +checksum = "534bc3fdc89a64a1db3c46b33c198fde2b7c3c7d094e5809c8c8bf2970c18243" dependencies = [ "accesskit", "accesskit_consumer", - "hashbrown 0.15.5", + "hashbrown 0.16.1", "objc2 0.5.2", "objc2-app-kit 0.2.2", "objc2-foundation 0.2.2", @@ -106,9 +107,9 @@ dependencies = [ [[package]] name = "accesskit_unix" -version = "0.17.1" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64926a930368d52d95422b822ede15014c04536cabaa2394f99567a1f4788dc6" +checksum = "90e549dd7c6562b6a2ea807b44726e6241707db054a817dc4c7e2b8d3b39bfac" dependencies = [ "accesskit", "accesskit_atspi_common", @@ -124,23 +125,23 @@ dependencies = [ [[package]] name = "accesskit_windows" -version = "0.29.1" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "792991159fa9ba57459de59e12e918bb90c5346fea7d40ac1a11f8632b41e63a" +checksum = "eff7009f1a532e917d66970a1e80c965140c6cfbbabbdde3d64e5431e6c78e21" dependencies = [ "accesskit", "accesskit_consumer", - "hashbrown 0.15.5", + "hashbrown 0.16.1", "static_assertions", - "windows 0.61.3", - "windows-core 0.61.2", + "windows", + "windows-core", ] [[package]] name = "accesskit_winit" -version = "0.29.1" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd9db0ea66997e3f4eae4a5f2c6b6486cf206642639ee629dbbb860ace1dec87" +checksum = "1fe9a94394896352cc4660ca2288bd4ef883d83238853c038b44070c8f134313" dependencies = [ "accesskit", "accesskit_macos", @@ -156,7 +157,7 @@ version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" dependencies = [ - "gimli 0.32.3", + "gimli", ] [[package]] @@ -173,7 +174,7 @@ checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", "const-random", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", "serde", "version_check", @@ -217,7 +218,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef6978589202a00cd7e118380c448a08b6ed394c3a8df3a430d0898e3a42d046" dependencies = [ "android-properties", - "bitflags 2.9.4", + "bitflags 2.11.0", "cc", "cesu8", "jni", @@ -254,7 +255,7 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "animated_urdf" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "clap", @@ -314,9 +315,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "approx" @@ -327,15 +328,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "arbitrary" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" -dependencies = [ - "derive_arbitrary", -] - [[package]] name = "arboard" version = "3.6.1" @@ -345,7 +337,7 @@ dependencies = [ "clipboard-win", "image", "log", - "objc2 0.6.3", + "objc2 0.6.4", "objc2-app-kit 0.3.2", "objc2-core-foundation", "objc2-core-graphics", @@ -364,32 +356,31 @@ checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" [[package]] name = "argh" -version = "0.1.13" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34ff18325c8a36b82f992e533ece1ec9f9a9db446bd1c14d4f936bac88fcd240" +checksum = "d32c2462e89541e6687e684d97310015d64a0627b61106fc472156a38f61cd1e" dependencies = [ "argh_derive", "argh_shared", - "rust-fuzzy-search", ] [[package]] name = "argh_derive" -version = "0.1.13" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb7b2b83a50d329d5d8ccc620f5c7064028828538bdf5646acd60dc1f767803" +checksum = "ccc2a031b364bd099fed016feb1ccfca2c3549d63c16f330cfc40b27b7692231" dependencies = [ "argh_shared", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "argh_shared" -version = "0.1.13" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a464143cc82dedcdc3928737445362466b7674b5db4e2eb8e869846d6d84f4f6" +checksum = "5b9abea17ef74821d1d3490aee9e0749d731445d965b7512308b2aa00c90079e" dependencies = [ "serde", ] @@ -414,9 +405,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "56.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c26b57282a08ae92f727497805122fec964c6245cfa0e13f0e75452eaf3bc41f" +checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" dependencies = [ "arrow-arith", "arrow-array", @@ -436,23 +427,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" +checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "num", + "num-traits", ] [[package]] name = "arrow-array" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" dependencies = [ "ahash", "arrow-buffer", @@ -461,30 +452,34 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.16.0", - "num", + "hashbrown 0.16.1", + "num-complex", + "num-integer", + "num-traits", ] [[package]] name = "arrow-buffer" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" +checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" dependencies = [ "bytes", "half", - "num", + "num-bigint", + "num-traits", ] [[package]] name = "arrow-cast" -version = "56.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed61d9d73eda8df9e3014843def37af3050b5080a9acbe108f045a316d5a0be" +checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", + "arrow-ord", "arrow-schema", "arrow-select", "atoi", @@ -493,15 +488,15 @@ dependencies = [ "comfy-table", "half", "lexical-core", - "num", + "num-traits", "ryu", ] [[package]] name = "arrow-csv" -version = "56.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa95b96ce0c06b4d33ac958370db8c0d31e88e54f9d6e08b0353d18374d9f991" +checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" dependencies = [ "arrow-array", "arrow-cast", @@ -514,21 +509,22 @@ dependencies = [ [[package]] name = "arrow-data" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" +checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" dependencies = [ "arrow-buffer", "arrow-schema", "half", - "num", + "num-integer", + "num-traits", ] [[package]] name = "arrow-ipc" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" +checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" dependencies = [ "arrow-array", "arrow-buffer", @@ -536,15 +532,15 @@ dependencies = [ "arrow-schema", "arrow-select", "flatbuffers", - "lz4_flex 0.11.5", + "lz4_flex 0.12.1", "zstd", ] [[package]] name = "arrow-json" -version = "56.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d747573390905905a2dc4c5a61a96163fe2750457f90a04ee2a88680758c79" +checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" dependencies = [ "arrow-array", "arrow-buffer", @@ -553,20 +549,22 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.11.4", + "indexmap", + "itoa", "lexical-core", "memchr", - "num", - "serde", + "num-traits", + "ryu", + "serde_core", "serde_json", "simdutf8", ] [[package]] name = "arrow-ord" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" +checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" dependencies = [ "arrow-array", "arrow-buffer", @@ -577,9 +575,9 @@ dependencies = [ [[package]] name = "arrow-pyarrow" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d924b32e96f8bb74d94cd82bd97b313c432fcb0ea331689ef9e7c6b8be4b258" +checksum = "d18c442b4c266aaf3d7f7dd40fd7ae058cef7f113b00ff0cd8256e1e218ec544" dependencies = [ "arrow-array", "arrow-data", @@ -589,9 +587,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" +checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" dependencies = [ "arrow-array", "arrow-buffer", @@ -602,34 +600,34 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" dependencies = [ - "bitflags 2.9.4", - "serde", + "bitflags 2.11.0", + "serde_core", "serde_json", ] [[package]] name = "arrow-select" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" +checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" dependencies = [ "ahash", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "num", + "num-traits", ] [[package]] name = "arrow-string" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" +checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" dependencies = [ "arrow-array", "arrow-buffer", @@ -637,7 +635,7 @@ dependencies = [ "arrow-schema", "arrow-select", "memchr", - "num", + "num-traits", "regex", "regex-syntax", ] @@ -711,15 +709,11 @@ version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" dependencies = [ - "bzip2 0.5.2", "flate2", "futures-core", "memchr", "pin-project-lite", "tokio", - "xz2", - "zstd", - "zstd-safe", ] [[package]] @@ -758,7 +752,7 @@ dependencies = [ "futures-lite", "parking", "polling", - "rustix 1.1.2", + "rustix 1.1.4", "slab", "windows-sys 0.61.2", ] @@ -789,7 +783,7 @@ dependencies = [ "cfg-if", "event-listener", "futures-lite", - "rustix 1.1.2", + "rustix 1.1.4", ] [[package]] @@ -800,7 +794,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -815,7 +809,7 @@ dependencies = [ "cfg-if", "futures-core", "futures-io", - "rustix 1.1.2", + "rustix 1.1.4", "signal-hook-registry", "slab", "windows-sys 0.61.2", @@ -840,7 +834,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -857,7 +851,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -901,25 +895,24 @@ checksum = "49c98dba06b920588de7d63f6acc23f1e6a9fade5fd6198e564506334fb5a4f5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "atspi" -version = "0.25.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c83247582e7508838caf5f316c00791eee0e15c0bf743e6880585b867e16815c" +checksum = "c77886257be21c9cd89a4ae7e64860c6f0eefca799bb79127913052bd0eefb3d" dependencies = [ "atspi-common", - "atspi-connection", "atspi-proxies", ] [[package]] name = "atspi-common" -version = "0.9.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33dfc05e7cdf90988a197803bf24f5788f94f7c94a69efa95683e8ffe76cfdfb" +checksum = "20c5617155740c98003016429ad13fe43ce7a77b007479350a9f8bf95a29f63d" dependencies = [ "enumflags2", "serde", @@ -931,23 +924,11 @@ dependencies = [ "zvariant", ] -[[package]] -name = "atspi-connection" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4193d51303d8332304056ae0004714256b46b6635a5c556109b319c0d3784938" -dependencies = [ - "atspi-common", - "atspi-proxies", - "futures-lite", - "zbus", -] - [[package]] name = "atspi-proxies" -version = "0.9.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2eebcb9e7e76f26d0bcfd6f0295e1cd1e6f33bedbc5698a971db8dc43d7751c" +checksum = "2230e48787ed3eb4088996eab66a32ca20c0b67bbd4fd6cdfe79f04f1f04c9fc" dependencies = [ "atspi-common", "serde", @@ -975,9 +956,9 @@ dependencies = [ [[package]] name = "axum" -version = "0.8.6" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a18ed336352031311f4e0b4dd2ff392d4fbb370777c9d18d7fc9d7359f73871" +checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" dependencies = [ "axum-core", "bytes", @@ -1027,9 +1008,9 @@ dependencies = [ [[package]] name = "az" -version = "1.2.1" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b7e4c2464d97fe331d41de9d5db0def0a96f4d823b8b32a2efd503578988973" +checksum = "be5eb007b7cacc6c660343e96f650fedf4b5a77512399eb952ca6642cf8d13f7" [[package]] name = "backtrace" @@ -1043,7 +1024,7 @@ dependencies = [ "miniz_oxide", "object", "rustc-demangle", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -1124,18 +1105,21 @@ dependencies = [ [[package]] name = "bit-set" -version = "0.8.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +checksum = "34ddef2995421ab6a5c779542c81ee77c115206f4ad9d5a8e05f4ff49716a3dd" dependencies = [ "bit-vec", ] [[package]] name = "bit-vec" -version = "0.8.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" +checksum = "b71798fca2c1fe1086445a7258a4bc81e6e49dcd24c8d0dd9a1e57395b603f51" +dependencies = [ + "serde", +] [[package]] name = "bitflags" @@ -1145,12 +1129,12 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.4" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" dependencies = [ "bytemuck", - "serde", + "serde_core", ] [[package]] @@ -1202,12 +1186,6 @@ dependencies = [ "constant_time_eq", ] -[[package]] -name = "block" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" - [[package]] name = "block-buffer" version = "0.10.4" @@ -1232,7 +1210,7 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdeb9d870516001442e364c5220d3574d2da8dc765554b4a617230d33fa58ef5" dependencies = [ - "objc2 0.6.3", + "objc2 0.6.4", ] [[package]] @@ -1250,7 +1228,7 @@ dependencies = [ [[package]] name = "blueprint" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "clap", "rerun", @@ -1258,7 +1236,7 @@ dependencies = [ [[package]] name = "blueprint_stocks" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "chrono", @@ -1291,7 +1269,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -1340,9 +1318,9 @@ checksum = "7575182f7272186991736b70173b0ea045398f984bf5ebbb3804736ce1330c9d" [[package]] name = "bytemuck" -version = "1.24.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" dependencies = [ "bytemuck_derive", ] @@ -1355,7 +1333,7 @@ checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -1385,34 +1363,6 @@ dependencies = [ "bytes", ] -[[package]] -name = "bzip2" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" -dependencies = [ - "bzip2-sys", -] - -[[package]] -name = "bzip2" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bea8dcd42434048e4f7a304411d9273a411f647446c1234a65ce0554923f4cff" -dependencies = [ - "libbz2-rs-sys", -] - -[[package]] -name = "bzip2-sys" -version = "0.1.13+1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" -dependencies = [ - "cc", - "pkg-config", -] - [[package]] name = "cacache" version = "13.1.0" @@ -1446,7 +1396,7 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b99da2f8558ca23c71f4fd15dc57c906239752dd27ff3c00a1d56b685b7cbfec" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "log", "polling", "rustix 0.38.44", @@ -1468,33 +1418,13 @@ dependencies = [ [[package]] name = "camino" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "276a59bf2b2c967788139340c9f0c5b12d7fd6630315c15c217e559de85d2609" +checksum = "e629a66d692cb9ff1a1c664e41771b3dcaf961985a9774c0eb0bd1b51cf60a48" dependencies = [ "serde_core", ] -[[package]] -name = "cargo-manifest" -version = "0.19.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d8af896b707212cd0e99c112a78c9497dd32994192a463ed2f7419d29bd8c6" -dependencies = [ - "serde", - "thiserror 2.0.17", - "toml 0.8.23", -] - -[[package]] -name = "cargo-platform" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84982c6c0ae343635a3a4ee6dedef965513735c8b183caa7289fa6e27399ebd4" -dependencies = [ - "serde", -] - [[package]] name = "cargo-platform" version = "0.3.1" @@ -1516,49 +1446,18 @@ dependencies = [ "wasm-bindgen-cli-support", ] -[[package]] -name = "cargo-util-schemas" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dc1a6f7b5651af85774ae5a34b4e8be397d9cf4bc063b7e6dbd99a841837830" -dependencies = [ - "semver", - "serde", - "serde-untagged", - "serde-value", - "thiserror 2.0.17", - "toml 0.8.23", - "unicode-xid", - "url", -] - -[[package]] -name = "cargo_metadata" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cfca2aaa699835ba88faf58a06342a314a950d2b9686165e038286c30316868" -dependencies = [ - "camino", - "cargo-platform 0.2.0", - "cargo-util-schemas", - "semver", - "serde", - "serde_json", - "thiserror 2.0.17", -] - [[package]] name = "cargo_metadata" -version = "0.23.0" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "981a6f317983eec002839b90fae7411a85621410ae591a9cab2ecf5cb5744873" +checksum = "ef987d17b0a113becdd19d3d0022d04d7ef41f9efe4f3fb63ac44ba61df3ade9" dependencies = [ "camino", - "cargo-platform 0.3.1", + "cargo-platform", "semver", "serde", "serde_json", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -1619,9 +1518,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "cfg_aliases" @@ -1640,16 +1539,16 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ "iana-time-zone", "js-sys", "num-traits", "serde", "wasm-bindgen", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -1706,9 +1605,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.48" +version = "4.5.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2134bb3ea021b78629caa971416385309e0131b351b25e01dc16fb54e1b5fae" +checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" dependencies = [ "clap_builder", "clap_derive", @@ -1716,9 +1615,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.48" +version = "4.5.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2ba64afa3c0a6df7fa517765e31314e983f51dda798ffba27b988194fb65dc9" +checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" dependencies = [ "anstream", "anstyle", @@ -1728,21 +1627,21 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.47" +version = "4.5.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "clap_lex" -version = "0.7.5" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" [[package]] name = "clean-path" @@ -1761,7 +1660,7 @@ dependencies = [ [[package]] name = "clock" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "clap", @@ -1770,13 +1669,13 @@ dependencies = [ [[package]] name = "codespan-reporting" -version = "0.12.0" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe6d2e5af09e8c8ad56c969f2157a3d4238cebc7c55f0a517728c38f7b200f81" +checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681" dependencies = [ "serde", "termcolor", - "unicode-width 0.1.14", + "unicode-width 0.2.2", ] [[package]] @@ -1822,9 +1721,9 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.2.1" +version = "7.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b" +checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" dependencies = [ "crossterm", "unicode-segmentation", @@ -1838,7 +1737,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f76990911f2267d837d9d0ad060aa63aaad170af40904b29461734c339030d4d" dependencies = [ "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -1900,7 +1799,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "once_cell", "tiny-keccak", ] @@ -1954,13 +1853,42 @@ dependencies = [ [[package]] name = "convert_case" -version = "0.6.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +checksum = "affbf0190ed2caf063e3def54ff444b449371d55c58e513a95ab98eca50adb49" dependencies = [ "unicode-segmentation", ] +[[package]] +name = "cookie" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747" +dependencies = [ + "percent-encoding", + "time", + "version_check", +] + +[[package]] +name = "cookie_store" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fc4bff745c9b4c7fb1e97b25d13153da2bc7796260141df62378998d070207f" +dependencies = [ + "cookie", + "document-features", + "idna", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "time", + "url", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -1995,7 +1923,7 @@ checksum = "c07782be35f9e1140080c6b96f0d44b739e2278479f64e02fdab4e32dfd8b081" dependencies = [ "bitflags 1.3.2", "core-foundation 0.9.4", - "core-graphics-types 0.1.3", + "core-graphics-types", "foreign-types", "libc", ] @@ -2011,17 +1939,6 @@ dependencies = [ "libc", ] -[[package]] -name = "core-graphics-types" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d44a101f213f6c4cdc1853d4b78aef6db6bdfa3468798cc1d9912f4735013eb" -dependencies = [ - "bitflags 2.9.4", - "core-foundation 0.10.1", - "libc", -] - [[package]] name = "core_extensions" version = "1.5.3" @@ -2150,6 +2067,16 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-skiplist" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df29de440c58ca2cc6e587ec3d22347551a32435fbde9d2bff64e78a9ffa151b" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -2162,11 +2089,11 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "crossterm_winapi", "document-features", "parking_lot", - "rustix 1.1.2", + "rustix 1.1.4", "winapi", ] @@ -2224,7 +2151,7 @@ checksum = "f27ae1dd37df86211c42e150270f82743308803d90a6f6e6651cd730d5e1732f" [[package]] name = "custom_callback" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "bincode", "mimalloc", @@ -2235,8 +2162,8 @@ dependencies = [ ] [[package]] -name = "custom_data_loader" -version = "0.30.0-alpha.1+dev" +name = "custom_importer" +version = "0.32.0-alpha.1" dependencies = [ "crossbeam", "re_build_tools", @@ -2246,7 +2173,7 @@ dependencies = [ [[package]] name = "custom_store_subscriber" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "re_build_tools", "rerun", @@ -2254,7 +2181,7 @@ dependencies = [ [[package]] name = "custom_view" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "mimalloc", "rerun", @@ -2262,7 +2189,7 @@ dependencies = [ [[package]] name = "custom_visualizer" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "bytemuck", "mimalloc", @@ -2302,7 +2229,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -2313,7 +2240,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -2344,7 +2271,7 @@ checksum = "5c297a1c74b71ae29df00c3e22dd9534821d60eb9af5a0192823fa2acea70c2a" [[package]] name = "dataframe_query" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "itertools 0.14.0", "rerun", @@ -2353,25 +2280,23 @@ dependencies = [ [[package]] name = "datafusion" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a" +checksum = "7541353e77dc7262b71ca27be07d8393661737e3a73b5d1b1c6f7d814c64fa2a" dependencies = [ "arrow", - "arrow-ipc", "arrow-schema", "async-trait", "bytes", - "bzip2 0.6.0", "chrono", "datafusion-catalog", "datafusion-catalog-listing", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", + "datafusion-datasource-arrow", "datafusion-datasource-csv", "datafusion-datasource-json", - "datafusion-datasource-parquet", "datafusion-execution", "datafusion-expr", "datafusion-expr-common", @@ -2388,29 +2313,25 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "datafusion-sql", - "flate2", "futures", "itertools 0.14.0", "log", "object_store", "parking_lot", - "parquet", - "rand 0.9.2", + "rand 0.9.3", "regex", "sqlparser", "tempfile", "tokio", "url", "uuid", - "xz2", - "zstd", ] [[package]] name = "datafusion-catalog" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "187622262ad8f7d16d3be9202b4c1e0116f1c9aa387e5074245538b755261621" +checksum = "9997731f90fa5398ef831ad0e69600f92c861b79c0d38bd1a29b6f0e3a0ce4c8" dependencies = [ "arrow", "async-trait", @@ -2423,7 +2344,6 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-session", - "datafusion-sql", "futures", "itertools 0.14.0", "log", @@ -2434,9 +2354,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9657314f0a32efd0382b9a46fdeb2d233273ece64baa68a7c45f5a192daf0f83" +checksum = "2b30a3dd50dec860c9559275c8d97d9de602e611237a6ecfbda0b3b63b872352" dependencies = [ "arrow", "async-trait", @@ -2446,35 +2366,33 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-plan", - "datafusion-session", "futures", + "itertools 0.14.0", "log", "object_store", - "tokio", ] [[package]] name = "datafusion-common" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1" +checksum = "d551054acec0398ca604512310b77ce05c46f66e54b54d48200a686e385cca4e" dependencies = [ "ahash", "arrow", "arrow-ipc", - "base64 0.22.1", "chrono", "half", - "hashbrown 0.14.5", - "indexmap 2.11.4", + "hashbrown 0.16.1", + "indexmap", "libc", "log", "object_store", "parquet", "paste", - "recursive", "sqlparser", "tokio", "web-time", @@ -2482,9 +2400,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b6234a6c7173fe5db1c6c35c01a12b2aa0f803a3007feee53483218817f8b1e" +checksum = "567d40e285f5b79f8737b576605721cd6c1133b5d2b00bdbd5d9838d90d0812f" dependencies = [ "futures", "log", @@ -2493,15 +2411,13 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86" +checksum = "27d2668f51b3b30befae2207472569e37807fdedd1d14da58acc6f8ca6257eae" dependencies = [ "arrow", - "async-compression", "async-trait", "bytes", - "bzip2 0.6.0", "chrono", "datafusion-common", "datafusion-common-runtime", @@ -2512,38 +2428,54 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", - "flate2", "futures", "glob", "itertools 0.14.0", "log", "object_store", - "parquet", - "rand 0.9.2", - "tempfile", + "rand 0.9.3", "tokio", - "tokio-util", "url", - "xz2", - "zstd", +] + +[[package]] +name = "datafusion-datasource-arrow" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e02e1b3e3a8ec55f1f62de4252b0407c8567363d056078769a197e24fc834a0f" +dependencies = [ + "arrow", + "arrow-ipc", + "async-trait", + "bytes", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "itertools 0.14.0", + "object_store", + "tokio", ] [[package]] name = "datafusion-datasource-csv" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64533a90f78e1684bfb113d200b540f18f268134622d7c96bbebc91354d04825" +checksum = "b559d7bf87d4f900f847baba8509634f838d9718695389e903604cdcccdb01f3" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", @@ -2555,49 +2487,44 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d7ebeb12c77df0aacad26f21b0d033aeede423a64b2b352f53048a75bf1d6e6" +checksum = "250e2d7591ba8b638f063854650faa40bca4e8bd4059b2ece8836f6388d02db4" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", "object_store", - "serde_json", "tokio", ] [[package]] name = "datafusion-datasource-parquet" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09e783c4c7d7faa1199af2df4761c68530634521b176a8d1331ddbc5a5c75133" +checksum = "0b043149f2c3557ca94abc58de40f68a8d412ff53365c06126ed234f8596399d" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", - "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-pruning", "datafusion-session", @@ -2607,24 +2534,24 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "rand 0.9.2", "tokio", ] [[package]] name = "datafusion-doc" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99ee6b1d9a80d13f9deb2291f45c07044b8e62fb540dbde2453a18be17a36429" +checksum = "b9496cb0db222dbb9a3735760ceca7fc56f35e1d5502c38d0caa77a81e9c1f6a" [[package]] name = "datafusion-execution" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4cec0a57653bec7b933fb248d3ffa3fa3ab3bd33bd140dc917f714ac036f531" +checksum = "dc45d23c516ed8d3637751e44e09e21b45b3f58b473c802dddd1f1ad4fe435ff" dependencies = [ "arrow", "async-trait", + "chrono", "dashmap", "datafusion-common", "datafusion-expr", @@ -2632,16 +2559,16 @@ dependencies = [ "log", "object_store", "parking_lot", - "rand 0.9.2", + "rand 0.9.3", "tempfile", "url", ] [[package]] name = "datafusion-expr" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef76910bdca909722586389156d0aa4da4020e1631994d50fadd8ad4b1aa05fe" +checksum = "63dd30526d2db4fda6440806a41e4676334a94bc0596cc9cc2a0efed20ef2c44" dependencies = [ "arrow", "async-trait", @@ -2652,53 +2579,61 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap 2.11.4", + "indexmap", + "itertools 0.14.0", "paste", - "recursive", "serde_json", "sqlparser", ] [[package]] name = "datafusion-expr-common" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6" +checksum = "1b486b5f6255d40976b88bb83813b0d035a8333e0ec39864824e78068cf42fa6" dependencies = [ "arrow", "datafusion-common", - "indexmap 2.11.4", + "indexmap", "itertools 0.14.0", "paste", ] [[package]] name = "datafusion-ffi" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25ddb7c4e645df080c27dad13a198d191da328dd1c98e198664a7a0f64b335cc" +checksum = "26b04181cffefd632e57acfc233ed239626863682dd8bb30ab366293f441bba8" dependencies = [ "abi_stable", "arrow", "arrow-schema", "async-ffi", "async-trait", - "datafusion", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", "datafusion-functions-aggregate-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", "datafusion-proto", "datafusion-proto-common", + "datafusion-session", "futures", "log", - "prost 0.13.5", + "prost", "semver", "tokio", ] [[package]] name = "datafusion-functions" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" +checksum = "07356c94118d881130dd0ffbff127540407d969c8978736e324edcd6c41cd48f" dependencies = [ "arrow", "arrow-buffer", @@ -2706,6 +2641,7 @@ dependencies = [ "blake2", "blake3", "chrono", + "chrono-tz", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -2716,7 +2652,8 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", - "rand 0.9.2", + "num-traits", + "rand 0.9.3", "regex", "sha2", "unicode-segmentation", @@ -2725,9 +2662,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07331fc13603a9da97b74fd8a273f4238222943dffdbbed1c4c6f862a30105bf" +checksum = "b644f9cf696df9233ce6958b9807666d78563b56f923267474dd6c07795f1f8f" dependencies = [ "ahash", "arrow", @@ -2746,9 +2683,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5951e572a8610b89968a09b5420515a121fbc305c0258651f318dc07c97ab17" +checksum = "c1de2deaaabe8923ce9ea9f29c47bbb4ee14f67ea2fe1ab5398d9bbebcf86e56" dependencies = [ "ahash", "arrow", @@ -2759,9 +2696,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdacca9302c3d8fc03f3e94f338767e786a88a33f5ebad6ffc0e7b50364b9ea3" +checksum = "552f8d92e4331ee91d23c02d12bb6acf32cbfd5215117e01c0fb63cd4b15af1a" dependencies = [ "arrow", "arrow-ord", @@ -2769,6 +2706,7 @@ dependencies = [ "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", @@ -2781,9 +2719,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37ff8a99434fbbad604a7e0669717c58c7c4f14c472d45067c4b016621d981" +checksum = "970fd0cdd3df8802b9a9975ff600998289ba9d46682a4f7285cba4820c9ada78" dependencies = [ "arrow", "async-trait", @@ -2797,9 +2735,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e2aea7c79c926cffabb13dc27309d4eaeb130f4a21c8ba91cdd241c813652b" +checksum = "40b4c21a7c8a986a1866c0a87ab756d0bbf7b5f41f306009fa2d9af79c52ed31" dependencies = [ "arrow", "datafusion-common", @@ -2815,9 +2753,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fead257ab5fd2ffc3b40fda64da307e20de0040fe43d49197241d9de82a487f" +checksum = "b1210ad73b8b3211aeaf4a42bef9bd7a2b7fce3ec119a478831f18c6ff7f7b93" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2825,20 +2763,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730" +checksum = "aaa566a963013a38681ad82a727a654bc7feb19632426aea8c3412d415d200c5" dependencies = [ - "datafusion-expr", + "datafusion-doc", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "datafusion-optimizer" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6583ef666ae000a613a837e69e456681a9faa96347bf3877661e9e89e141d8a" +checksum = "ff9aa82b240252a88dee118372f9b9757c545ab9e53c0736bebab2e7da0ef1f2" dependencies = [ "arrow", "chrono", @@ -2846,19 +2784,18 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", - "indexmap 2.11.4", + "indexmap", "itertools 0.14.0", "log", - "recursive", "regex", "regex-syntax", ] [[package]] name = "datafusion-physical-expr" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8668103361a272cbbe3a61f72eca60c9b7c706e87cc3565bcf21e2b277b84f6" +checksum = "7d48022b8af9988c1d852644f9e8b5584c490659769a550c5e8d39457a1da0a5" dependencies = [ "ahash", "arrow", @@ -2868,20 +2805,20 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", - "hashbrown 0.14.5", - "indexmap 2.11.4", + "hashbrown 0.16.1", + "indexmap", "itertools 0.14.0", - "log", "parking_lot", "paste", - "petgraph 0.8.2", + "petgraph 0.8.3", + "tokio", ] [[package]] name = "datafusion-physical-expr-adapter" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "815acced725d30601b397e39958e0e55630e0a10d66ef7769c14ae6597298bb0" +checksum = "ae7a8abc0b4fe624000972a9b145b30b7f1b680bffaa950ea53f78d9b21c27c3" dependencies = [ "arrow", "datafusion-common", @@ -2894,23 +2831,26 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6652fe7b5bf87e85ed175f571745305565da2c0b599d98e697bcbedc7baa47c3" +checksum = "147253ca3e6b9d59c162de64c02800973018660e13340dd1886dd038d17ac429" dependencies = [ "ahash", "arrow", + "chrono", "datafusion-common", "datafusion-expr-common", - "hashbrown 0.14.5", + "hashbrown 0.16.1", + "indexmap", "itertools 0.14.0", + "parking_lot", ] [[package]] name = "datafusion-physical-optimizer" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b7d623eb6162a3332b564a0907ba00895c505d101b99af78345f1acf929b5c" +checksum = "689156bb2282107b6239db8d7ef44b4dab10a9b33d3491a0c74acac5e4fedd72" dependencies = [ "arrow", "datafusion-common", @@ -2922,34 +2862,32 @@ dependencies = [ "datafusion-physical-plan", "datafusion-pruning", "itertools 0.14.0", - "log", - "recursive", ] [[package]] name = "datafusion-physical-plan" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2f7f778a1a838dec124efb96eae6144237d546945587557c9e6936b3414558c" +checksum = "68253dc0ee5330aa558b2549c9b0da5af9fc17d753ae73022939014ad616fc28" dependencies = [ "ahash", "arrow", "arrow-ord", "arrow-schema", "async-trait", - "chrono", "datafusion-common", "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", + "datafusion-functions", "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", "half", - "hashbrown 0.14.5", - "indexmap 2.11.4", + "hashbrown 0.16.1", + "indexmap", "itertools 0.14.0", "log", "parking_lot", @@ -2959,39 +2897,49 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7df9f606892e6af45763d94d210634eec69b9bb6ced5353381682ff090028a3" +checksum = "3f5ab57d0b5a368258fff1d828f1619a10541fa5c4ec4930a383deb3a23204c8" dependencies = [ "arrow", "chrono", - "datafusion", + "datafusion-catalog", + "datafusion-catalog-listing", "datafusion-common", - "datafusion-expr", - "datafusion-proto-common", + "datafusion-datasource", + "datafusion-datasource-arrow", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-datasource-parquet", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-table", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-proto-common", "object_store", - "prost 0.13.5", + "prost", ] [[package]] name = "datafusion-proto-common" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4b14f288ca4ef77743d9672cafecf3adfffff0b9b04af9af79ecbeaaf736901" +checksum = "bd21d2c804802ca4b1719191dfe8e3d0860686649de6375ddc9237f85beb82b3" dependencies = [ "arrow", "datafusion-common", - "prost 0.13.5", + "prost", ] [[package]] name = "datafusion-pruning" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd1e59e2ca14fe3c30f141600b10ad8815e2856caa59ebbd0e3e07cd3d127a65" +checksum = "0fcad240a54d0b1d3e8f668398900260a53122d522b2102ab57218590decacd6" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-datasource", "datafusion-expr-common", @@ -3004,41 +2952,31 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21ef8e2745583619bd7a49474e8f45fbe98ebb31a133f27802217125a7b3d58d" +checksum = "f58e83a68bb67007a8fcbf005c44cefe441270c7ee7f6dee10c0e0109b556f6d" dependencies = [ - "arrow", "async-trait", - "dashmap", "datafusion-common", - "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-plan", - "datafusion-sql", - "futures", - "itertools 0.14.0", - "log", - "object_store", "parking_lot", - "tokio", ] [[package]] name = "datafusion-sql" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89abd9868770386fede29e5a4b14f49c0bf48d652c3b9d7a8a0332329b87d50b" +checksum = "be53e9eb55db0fbb8980bb6d87f2435b0524acf4c718ed54a57cabbb299b2ab3" dependencies = [ "arrow", "bigdecimal", + "chrono", "datafusion-common", "datafusion-expr", - "indexmap 2.11.4", + "indexmap", "log", - "recursive", "regex", "sqlparser", ] @@ -3073,17 +3011,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "derive_arbitrary" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.106", -] - [[package]] name = "devserver_lib" version = "0.4.2" @@ -3092,9 +3019,9 @@ checksum = "edf215dbb8cb1409cca7645aaed35f9e39fb0a21855bba1ac48bc0334903bf66" [[package]] name = "dify" -version = "0.7.4" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11217d469eafa3b809ad84651eb9797ccbb440b4a916d5d85cb1b994e89787f6" +checksum = "90ce0fb972943b4e88cd03b8f92953df0c71bb05e0bde8e5b684895d808013cc" dependencies = [ "anyhow", "colored", @@ -3116,7 +3043,7 @@ dependencies = [ [[package]] name = "dimos-viewer" -version = "0.30.1" +version = "0.32.0-alpha.1" dependencies = [ "bincode", "clap", @@ -3171,10 +3098,10 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89a09f22a6c6069a18470eb92d2298acf25463f14256d24778e1230d789a2aec" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "block2 0.6.2", "libc", - "objc2 0.6.3", + "objc2 0.6.4", ] [[package]] @@ -3185,32 +3112,32 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "dlib" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "330c60081dcc4c72131f8eb70510f1ac07223e5d4163db481a04a0befcffa412" +checksum = "ab8ecd87370524b461f8557c119c405552c396ed91fc0a8eec68679eab26f94a" dependencies = [ - "libloading 0.7.4", + "libloading 0.8.9", ] [[package]] name = "dna" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "itertools 0.14.0", - "rand 0.9.2", + "rand 0.9.3", "rerun", ] [[package]] name = "document-features" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95249b50c6c185bee49034bcb378a49dc2b5dff0be90ff6616d31d64febab05d" +checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61" dependencies = [ "litrs", ] @@ -3241,8 +3168,9 @@ checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04" [[package]] name = "ecolor" -version = "0.33.3" -source = "git+https://github.com/emilk/egui.git?branch=main#fd257b2e95972f2bfe08cbde710f248076a08ee6" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb993f120d46ca077a18f166175b94c6edeb994d812cbd07a4a03cfced2713c" dependencies = [ "bytemuck", "color-hex", @@ -3258,8 +3186,9 @@ checksum = "18aade80d5e09429040243ce1143ddc08a92d7a22820ac512610410a4dd5214f" [[package]] name = "eframe" -version = "0.33.3" -source = "git+https://github.com/emilk/egui.git?branch=main#fd257b2e95972f2bfe08cbde710f248076a08ee6" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fd6562766ec6b921232ceb960acdb405f91971f59c17604840fd9485fee0dc3" dependencies = [ "ahash", "bytemuck", @@ -3274,9 +3203,9 @@ dependencies = [ "image", "js-sys", "log", - "objc2 0.5.2", - "objc2-app-kit 0.2.2", - "objc2-foundation 0.2.2", + "objc2 0.6.4", + "objc2-app-kit 0.3.2", + "objc2-foundation 0.3.2", "parking_lot", "percent-encoding", "pollster", @@ -3296,13 +3225,14 @@ dependencies = [ [[package]] name = "egui" -version = "0.33.3" -source = "git+https://github.com/emilk/egui.git?branch=main#fd257b2e95972f2bfe08cbde710f248076a08ee6" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b71aaacbe69e214aebf7b8d5eacd6256d0d2ca5ff9525c28d1dd7f377f430a9" dependencies = [ "accesskit", "ahash", "backtrace", - "bitflags 2.9.4", + "bitflags 2.11.0", "emath", "epaint", "log", @@ -3316,8 +3246,9 @@ dependencies = [ [[package]] name = "egui-wgpu" -version = "0.33.3" -source = "git+https://github.com/emilk/egui.git?branch=main#fd257b2e95972f2bfe08cbde710f248076a08ee6" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "111e54ac54bf9d767ce8970a454f720878875bca1f2e63f781c722e98f466971" dependencies = [ "ahash", "bytemuck", @@ -3326,7 +3257,7 @@ dependencies = [ "epaint", "log", "profiling", - "thiserror 2.0.17", + "thiserror 2.0.18", "type-map", "web-time", "wgpu", @@ -3335,17 +3266,18 @@ dependencies = [ [[package]] name = "egui-winit" -version = "0.33.3" -source = "git+https://github.com/emilk/egui.git?branch=main#fd257b2e95972f2bfe08cbde710f248076a08ee6" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ed6af7dad7dd045b77731fe6dc291b835489a104d3cd2428a945e71e2265df3" dependencies = [ "accesskit_winit", "arboard", "bytemuck", "egui", "log", - "objc2 0.5.2", - "objc2-foundation 0.2.2", - "objc2-ui-kit", + "objc2 0.6.4", + "objc2-foundation 0.3.2", + "objc2-ui-kit 0.3.2", "profiling", "raw-window-handle", "serde", @@ -3357,9 +3289,9 @@ dependencies = [ [[package]] name = "egui_animation" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3db554dd3784f469d804f7dc25d1b14a2e00f1608d7af60218ccbced720a6e8" +checksum = "dd9bc6d586df44e01b90715eec1eb8d73a61c3c3f554edffb01eb0894a8107ef" dependencies = [ "egui", "hello_egui_utils", @@ -3368,9 +3300,9 @@ dependencies = [ [[package]] name = "egui_commonmark" -version = "0.22.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5246a4e9b83c345ec8230933bd0dca16d1c3c11db0edd4fd9c1a90683240b49" +checksum = "55df531ff51161b3c6212e0ee2166b370f150254bf4448a8a15c3d26fec87958" dependencies = [ "egui", "egui_commonmark_backend", @@ -3380,9 +3312,9 @@ dependencies = [ [[package]] name = "egui_commonmark_backend" -version = "0.22.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3cff846279556f57af8ea606f2e4ceaf83e60b81db014c126dfb926fa06c75b" +checksum = "fe04399ca5a2196965833a2918e50400449721fd9350e31ae7d84d6690859437" dependencies = [ "egui", "egui_extras", @@ -3391,9 +3323,9 @@ dependencies = [ [[package]] name = "egui_dnd" -version = "0.14.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f535b8df7ca89f781954feaa505899c8955b550074ecafcaa3c040f67aec3d46" +checksum = "51a348b3fdbc048c4241aaa2865255e1fdebbc0099324ded8c5b534e598e600c" dependencies = [ "egui", "egui_animation", @@ -3403,8 +3335,9 @@ dependencies = [ [[package]] name = "egui_extras" -version = "0.33.3" -source = "git+https://github.com/emilk/egui.git?branch=main#fd257b2e95972f2bfe08cbde710f248076a08ee6" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8844ef47969c00cc186e06e9721ac952758c0e5cd5f579468037f19c7f49566d" dependencies = [ "ahash", "egui", @@ -3420,8 +3353,9 @@ dependencies = [ [[package]] name = "egui_glow" -version = "0.33.3" -source = "git+https://github.com/emilk/egui.git?branch=main#fd257b2e95972f2bfe08cbde710f248076a08ee6" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91c5dea15932abd0a953fefc8d8490002204808a2e087147aac48608eea9ef79" dependencies = [ "bytemuck", "egui", @@ -3436,8 +3370,9 @@ dependencies = [ [[package]] name = "egui_kittest" -version = "0.33.3" -source = "git+https://github.com/emilk/egui.git?branch=main#fd257b2e95972f2bfe08cbde710f248076a08ee6" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1077ec995dbc754f22afcca9bbab1329071737d749a953e7b430d9b825d40c32" dependencies = [ "dify", "eframe", @@ -3449,15 +3384,15 @@ dependencies = [ "pollster", "serde", "tempfile", - "toml 0.8.23", + "toml", "wgpu", ] [[package]] name = "egui_plot" -version = "0.34.1" +version = "0.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67fc9b427a837264e55381a5cade6e28fe83ac5b165a61b9c888548c732a9c95" +checksum = "d7bd66213736bf9a9a53dc4888570b9194fc0db906507517a7fcc787e888ac47" dependencies = [ "ahash", "egui", @@ -3466,9 +3401,9 @@ dependencies = [ [[package]] name = "egui_table" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f2cf21af68301c187bfd9c89a35f13bf2cbbcc78587f6d7ba3c5b36259337ba" +checksum = "8512decdd471a2b6106d0b42cc0662f0e94b0ca8f21bc1b0075f455f58901010" dependencies = [ "egui", "serde", @@ -3477,9 +3412,9 @@ dependencies = [ [[package]] name = "egui_tiles" -version = "0.14.1" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef184e589f0a80560bd3b63017634642d1ba112a8a8d9b29341f7cafd04601f" +checksum = "08e570b77f6cce3292eba4aee9b9c08cf11dfc68430f4dc9613d939628498647" dependencies = [ "ahash", "egui", @@ -3490,9 +3425,9 @@ dependencies = [ [[package]] name = "ehttp" -version = "0.6.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04499d3c719edecfad5c9b46031726c8540905d73be6d7e4f9788c4a298da908" +checksum = "b2f1b93eb2e039aaff63ce07cca59bd1dca02f2ce30075a17b619d2c42f56efc" dependencies = [ "async-channel", "document-features", @@ -3500,7 +3435,7 @@ dependencies = [ "js-sys", "serde", "serde_json", - "ureq", + "ureq 3.3.0", "wasm-bindgen", "wasm-bindgen-futures", "wasm-streams", @@ -3515,8 +3450,9 @@ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "emath" -version = "0.33.3" -source = "git+https://github.com/emilk/egui.git?branch=main#fd257b2e95972f2bfe08cbde710f248076a08ee6" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8490360fc83eb3a2b20aba120457f7d91c3042d7c628e4f90b43a97b6adf1255" dependencies = [ "bytemuck", "serde", @@ -3561,7 +3497,7 @@ checksum = "f282cfdfe92516eb26c2af8589c274c7c17681f5ecc03c18255fe741c6aa64eb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -3582,7 +3518,7 @@ checksum = "67c78a4d8fdf9953a5c9d458f9efe940fd97a0cab0941c075a813ac594733827" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -3593,7 +3529,7 @@ checksum = "2f9ed6b3789237c8a0c1c505af1c7eb2c560df6186f01b098c3a1064ea532f38" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -3614,23 +3550,23 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "env_filter" -version = "0.1.3" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +checksum = "7a1c3cc8e57274ec99de65301228b537f1e4eedc1b8e0f9411c6caac8ae7308f" dependencies = [ "log", ] [[package]] name = "env_logger" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +checksum = "b2daee4ea451f429a58296525ddf28b45a3b64f1acf6587e2067437bb11e218d" dependencies = [ "anstream", "anstyle", @@ -3641,14 +3577,16 @@ dependencies = [ [[package]] name = "epaint" -version = "0.33.3" -source = "git+https://github.com/emilk/egui.git?branch=main#fd257b2e95972f2bfe08cbde710f248076a08ee6" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d76f9fdbf72eaf0dc3198d90e7bff267b45866f660f8cf49caeb9dcab93342e0" dependencies = [ "ahash", "bytemuck", "ecolor", "emath", "epaint_default_fonts", + "font-types", "log", "nohash-hasher", "parking_lot", @@ -3657,13 +3595,15 @@ dependencies = [ "self_cell", "serde", "skrifa", + "smallvec", "vello_cpu", ] [[package]] name = "epaint_default_fonts" -version = "0.33.3" -source = "git+https://github.com/emilk/egui.git?branch=main#fd257b2e95972f2bfe08cbde710f248076a08ee6" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a1a65f42685d25419a67fd3175e11dfecdd268b0478aaba76785b0a896884e0" [[package]] name = "equivalent" @@ -3671,17 +3611,6 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" -[[package]] -name = "erased-serde" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "259d404d09818dec19332e31d94558aeb442fea04c817006456c24b5460bbd4b" -dependencies = [ - "serde", - "serde_core", - "typeid", -] - [[package]] name = "errno" version = "0.3.14" @@ -3736,7 +3665,7 @@ dependencies = [ [[package]] name = "extend_viewer_ui" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "mimalloc", "rerun", @@ -3744,9 +3673,9 @@ dependencies = [ [[package]] name = "fallible-iterator" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" [[package]] name = "fast-float2" @@ -3786,9 +3715,9 @@ dependencies = [ [[package]] name = "ffmpeg-sidecar" -version = "2.2.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "869812b38b887a5fb7291e5551677476c4696a395ae6676955f42f01e6a5d1c1" +checksum = "f076483fb6efcf02e4abcf3e9388d30123346f85b9a96e8fe834718951b945ed" dependencies = [ "anyhow", ] @@ -3801,9 +3730,9 @@ checksum = "0399f9d26e5191ce32c498bebd31e7a3ceabc2745f0ac54af3f335126c3f24b3" [[package]] name = "fixed" -version = "1.29.0" +version = "1.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707070ccf8c4173548210893a0186e29c266901b71ed20cd9e2ca0193dfe95c3" +checksum = "c566da967934c6c7ee0458a9773de9b2a685bd2ce26a3b28ddfc740e640182f5" dependencies = [ "az", "bytemuck", @@ -3832,11 +3761,11 @@ checksum = "c1671b620ba6e60c11c62b0ea5fec4f8621991e7b1229fa13c010a2cd04e4342" [[package]] name = "flatbuffers" -version = "25.9.23" +version = "25.12.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5" +checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "rustc_version", ] @@ -3856,6 +3785,12 @@ name = "float-cmp" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" + +[[package]] +name = "float-cmp" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8" dependencies = [ "num-traits", ] @@ -3880,11 +3815,12 @@ checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" [[package]] name = "font-types" -version = "0.10.1" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39a654f404bbcbd48ea58c617c2993ee91d1cb63727a37bf2323a4edeed1b8c5" +checksum = "b1e4d2d0cf79d38430cc9dc9aadec84774bff2e1ba30ae2bf6c16cfce9385a23" dependencies = [ "bytemuck", + "serde", ] [[package]] @@ -3905,7 +3841,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -3950,12 +3886,12 @@ dependencies = [ [[package]] name = "fsst" -version = "0.38.3" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "295735676bb13caa4d42e0ec4a9f683f2c879570b22925128288bf363c703a8b" +checksum = "7ae4126c38f86d37d5479295c135a1b81688b6c799d6c39d44b1855f9a0e712c" dependencies = [ "arrow-array", - "rand 0.9.2", + "rand 0.9.3", ] [[package]] @@ -3975,9 +3911,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "futures" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", @@ -3990,9 +3926,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", @@ -4000,15 +3936,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-executor" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" dependencies = [ "futures-core", "futures-task", @@ -4017,9 +3953,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-lite" @@ -4036,32 +3972,32 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-channel", "futures-core", @@ -4071,7 +4007,6 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] @@ -4086,16 +4021,17 @@ dependencies = [ [[package]] name = "generator" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "605183a538e3e2a9c1038635cc5c2d194e2ee8fd0d1b66b8349fad7dbacce5a2" +checksum = "52f04ae4152da20c76fe800fa48659201d5cf627c5149ca0b707b69d7eef6cf9" dependencies = [ "cc", "cfg-if", "libc", "log", "rustversion", - "windows 0.61.3", + "windows-link", + "windows-result", ] [[package]] @@ -4110,9 +4046,9 @@ dependencies = [ [[package]] name = "geo-types" -version = "0.7.16" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62ddb1950450d67efee2bbc5e429c68d052a822de3aad010d28b351fbb705224" +checksum = "24f8647af4005fa11da47cd56252c6ef030be8fa97bdbf355e7dfb6348f0a82c" dependencies = [ "approx", "num-traits", @@ -4140,40 +4076,42 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "wasm-bindgen", ] [[package]] name = "getrandom" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "js-sys", "libc", - "r-efi", - "wasi 0.14.7+wasi-0.2.4", + "r-efi 5.3.0", + "wasip2", "wasm-bindgen", ] [[package]] -name = "gimli" -version = "0.26.2" +name = "getrandom" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22030e2c5a68ec659fde1e949a745124b48e6fa8b045b7ed5bd1fe4ccc5c4e5d" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" dependencies = [ - "fallible-iterator", - "indexmap 1.9.3", - "stable_deref_trait", + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", ] [[package]] @@ -4181,6 +4119,11 @@ name = "gimli" version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" +dependencies = [ + "fallible-iterator", + "indexmap", + "stable_deref_trait", +] [[package]] name = "gl_generator" @@ -4195,9 +4138,9 @@ dependencies = [ [[package]] name = "glam" -version = "0.30.8" +version = "0.30.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e12d847aeb25f41be4c0ec9587d624e9cd631bc007a8fd7ce3f5851e064c6460" +checksum = "19fc433e8437a212d1b6f1e68c7824af3aed907da60afa994e7f542d18d12aa9" dependencies = [ "bytemuck", "serde_core", @@ -4224,9 +4167,9 @@ dependencies = [ [[package]] name = "glow" -version = "0.16.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5e5ea60d70410161c8bf5da3fdfeaa1c72ed2c15f8bbb9d19fe3a4fad085f08" +checksum = "29038e1c483364cc6bb3cf78feee1816002e127c331a1eec55a4d202b9e1adb5" dependencies = [ "js-sys", "slotmap", @@ -4258,7 +4201,7 @@ dependencies = [ "inflections", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -4279,7 +4222,7 @@ version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "12124de845cacfebedff80e877bb37b5b75c34c5a4c89e47e1cdd67fb6041325" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "cfg_aliases", "cgl", "dispatch2", @@ -4287,7 +4230,7 @@ dependencies = [ "glutin_glx_sys", "glutin_wgl_sys", "libloading 0.8.9", - "objc2 0.6.3", + "objc2 0.6.4", "objc2-app-kit 0.3.2", "objc2-core-foundation", "objc2-foundation 0.3.2", @@ -4339,35 +4282,18 @@ dependencies = [ "gl_generator", ] -[[package]] -name = "gpu-alloc" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbcd2dba93594b227a1f57ee09b8b9da8892c34d55aa332e034a228d0fe6a171" -dependencies = [ - "bitflags 2.9.4", - "gpu-alloc-types", -] - -[[package]] -name = "gpu-alloc-types" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98ff03b468aa837d70984d55f5d3f846f6ec31fe34bbb97c4f85219caeee1ca4" -dependencies = [ - "bitflags 2.9.4", -] - [[package]] name = "gpu-allocator" -version = "0.27.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c151a2a5ef800297b4e79efa4f4bec035c5f51d5ae587287c9b952bdf734cacd" +checksum = "51255ea7cfaadb6c5f1528d43e92a82acb2b96c43365989a28b2d44ee38f8795" dependencies = [ + "ash", + "hashbrown 0.16.1", "log", "presser", - "thiserror 1.0.69", - "windows 0.58.0", + "thiserror 2.0.18", + "windows", ] [[package]] @@ -4376,7 +4302,7 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b89c83349105e3732062a895becfc71a8f921bb71ecbbdd8ff99263e3b53a0ca" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "gpu-descriptor-types", "hashbrown 0.15.5", ] @@ -4387,12 +4313,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdf242682df893b86f33a73828fb09ca4b2d3bb6cc95249707fc684d27484b91" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", ] [[package]] name = "graph_lattice" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "clap", @@ -4412,7 +4338,7 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap 2.11.4", + "indexmap", "slab", "tokio", "tokio-util", @@ -4434,32 +4360,22 @@ dependencies = [ [[package]] name = "half" -version = "2.6.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ "bytemuck", "cfg-if", "crunchy", "num-traits", + "zerocopy 0.8.27", ] -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - [[package]] name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", - "allocator-api2", - "serde", -] [[package]] name = "hashbrown" @@ -4474,13 +4390,15 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.16.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" dependencies = [ "allocator-api2", "equivalent", "foldhash 0.2.0", + "serde", + "serde_core", ] [[package]] @@ -4491,9 +4409,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hello_egui_utils" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5d09c2c7f3aa61624b1bec320be9029f30e06769f92e39ac99a6d6e01024ae8" +checksum = "c34bfd8bff6f6df43b0b73ed7949a7aff0c98c2c1bd4c2f2771f5f2f6d98ced0" dependencies = [ "concat-idents", "egui", @@ -4534,13 +4452,22 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfa686283ad6dd069f105e5ab091b04c62850d3e4cf5d67debad1933f55023df" +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + [[package]] name = "home" -version = "0.5.11" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4551,12 +4478,11 @@ checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" [[package]] name = "http" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] @@ -4755,7 +4681,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core 0.62.2", + "windows-core", ] [[package]] @@ -4855,9 +4781,9 @@ dependencies = [ [[package]] name = "id-arena" -version = "2.2.1" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" dependencies = [ "rayon", ] @@ -4912,11 +4838,11 @@ checksum = "edcd27d72f2f071c64249075f42e205ff93c9a4c5f6c6da53e79ed9f9832c285" [[package]] name = "incremental_logging" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "clap", - "rand 0.9.2", + "rand 0.9.3", "rerun", ] @@ -4928,31 +4854,21 @@ checksum = "d9f1a0777d972970f204fdf8ef319f1f4f8459131636d7e3c96c5d59570d0fa6" [[package]] name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - -[[package]] -name = "indexmap" -version = "2.11.4" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "serde", "serde_core", ] [[package]] name = "indicatif" -version = "0.18.0" +version = "0.18.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70a646d946d06bedbbc4cac4c218acf4bbf2d87757a784857025f4d447e4e1cd" +checksum = "25470f23803092da7d239834776d653104d551bc4d7eacaf31e6837854b8e9eb" dependencies = [ "console 0.16.1", "portable-atomic", @@ -4988,7 +4904,7 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f37dccff2791ab604f9babef0ba14fbe0be30bd368dc541e2b08d07c8aa908f3" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "inotify-sys", "libc", ] @@ -5004,9 +4920,9 @@ dependencies = [ [[package]] name = "insta" -version = "1.43.2" +version = "1.46.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46fdb647ebde000f43b5b53f773c30cf9b0cb4300453208713fa38b2c70935a0" +checksum = "e82db8c87c7f1ccecb34ce0c24399b8a73081427f3c7c50a5d597925356115e4" dependencies = [ "console 0.15.11", "globset", @@ -5016,6 +4932,7 @@ dependencies = [ "regex", "serde", "similar", + "tempfile", "walkdir", ] @@ -5025,17 +4942,6 @@ version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" -[[package]] -name = "io-uring" -version = "0.7.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" -dependencies = [ - "bitflags 2.9.4", - "cfg-if", - "libc", -] - [[package]] name = "ipnet" version = "2.11.0" @@ -5123,9 +5029,9 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "jiff" -version = "0.2.15" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -5133,20 +5039,20 @@ dependencies = [ "log", "portable-atomic", "portable-atomic-util", - "serde", + "serde_core", "wasm-bindgen", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] name = "jiff-static" -version = "0.2.15" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -5192,7 +5098,7 @@ version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "libc", ] @@ -5204,10 +5110,12 @@ checksum = "00810f1d8b74be64b13dbf3db89ac67740615d6c891f0e7b6179326533011a07" [[package]] name = "js-sys" -version = "0.3.77" +version = "0.3.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +checksum = "2e04e2ef80ce82e13552136fabeef8a5ed1f985a96805761cbb9a2c34e7664d9" dependencies = [ + "cfg-if", + "futures-util", "once_cell", "wasm-bindgen", ] @@ -5226,7 +5134,7 @@ dependencies = [ "nom 8.0.0", "num-traits", "ordered-float 5.1.0", - "rand 0.9.2", + "rand 0.9.3", "ryu", "serde", "serde_json", @@ -5234,15 +5142,16 @@ dependencies = [ [[package]] name = "jsonwebtoken" -version = "9.3.1" +version = "10.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" +checksum = "0529410abe238729a60b108898784df8984c87f6054c9c4fcacc47e4803c1ce1" dependencies = [ "base64 0.22.1", + "getrandom 0.2.17", "js-sys", - "ring", "serde", "serde_json", + "signature", ] [[package]] @@ -5264,13 +5173,12 @@ checksum = "e2db585e1d738fc771bf08a151420d3ed193d9d895a36df7f6f8a9456b911ddc" [[package]] name = "kittest" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01fd6dd2cce251a360101038acb9334e3a50cd38cd02fefddbf28aa975f043c8" +checksum = "90ceaa75eb0036a32b6b9833962eb18137449e9817e2e586006471925b727fd5" dependencies = [ "accesskit", "accesskit_consumer", - "parking_lot", ] [[package]] @@ -5305,9 +5213,9 @@ dependencies = [ [[package]] name = "kurbo" -version = "0.12.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce9729cc38c18d86123ab736fd2e7151763ba226ac2490ec092d1dd148825e32" +checksum = "7564e90fe3c0d5771e1f0bc95322b21baaeaa0d9213fa6a0b61c99f8b17b3bfb" dependencies = [ "arrayvec", "euclid", @@ -5316,9 +5224,9 @@ dependencies = [ [[package]] name = "lance" -version = "0.38.3" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71555813afa19d7eadfb8adf20609d0c62bbc39e18c66fddc2f7e8ee5d7a107f" +checksum = "45c38e7c7c448a77203b50c05f5bf308cadb3fe074e7dde22e4302206ea44d7a" dependencies = [ "arrow", "arrow-arith", @@ -5335,6 +5243,7 @@ dependencies = [ "byteorder", "bytes", "chrono", + "crossbeam-skiplist", "dashmap", "datafusion", "datafusion-expr", @@ -5362,16 +5271,18 @@ dependencies = [ "object_store", "permutation", "pin-project", - "prost 0.13.5", - "prost-types 0.13.5", - "rand 0.9.2", + "prost", + "prost-types", + "rand 0.9.3", "roaring", + "semver", "serde", "serde_json", "snafu", "tantivy", "tokio", "tokio-stream", + "tokio-util", "tracing", "url", "uuid", @@ -5379,29 +5290,31 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "0.38.3" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40766c050b3295fe4be49d79f80fe217a0b5f8626561bf077a864817ba72e9f3" +checksum = "174bae71821e5535a594f9ecd64b07e6ffe729498a5d27a0ca926b4ff2714664" dependencies = [ "arrow-array", "arrow-buffer", "arrow-cast", "arrow-data", + "arrow-ord", "arrow-schema", "arrow-select", "bytes", - "getrandom 0.2.16", + "futures", + "getrandom 0.2.17", "half", "jsonb", "num-traits", - "rand 0.9.2", + "rand 0.9.3", ] [[package]] name = "lance-bitpacking" -version = "0.38.3" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e50c8911ea56acf2294d88259cc7208649502e2582b85e8bfc9ec8b958b1957" +checksum = "4494187b4244fa56c8cf911d7358e5322fa1cf7d8f6a213b3155a4139eb556b1" dependencies = [ "arrayref", "paste", @@ -5410,9 +5323,9 @@ dependencies = [ [[package]] name = "lance-core" -version = "0.38.3" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "621efcb8ef8ecd4790f91db3bef8a3882692ba42a30cbeb4ad5fcb0ab5bdd2b4" +checksum = "ce0a5d4427c42f7d9302771bb2aa474b09316a8919633abad0030c4d58013e89" dependencies = [ "arrow-array", "arrow-buffer", @@ -5425,6 +5338,7 @@ dependencies = [ "datafusion-sql", "deepsize", "futures", + "itertools 0.13.0", "lance-arrow", "libc", "log", @@ -5433,8 +5347,8 @@ dependencies = [ "num_cpus", "object_store", "pin-project", - "prost 0.13.5", - "rand 0.9.2", + "prost", + "rand 0.9.3", "roaring", "serde_json", "snafu", @@ -5448,9 +5362,9 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "0.38.3" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63f138b828abf6a571ad4fab179532d3a383275df9ee5005ad34d86b22bb9fd7" +checksum = "61c6151ee46a35886ac6c804f870de2992c36de4198da1c64d806529f246d2d7" dependencies = [ "arrow", "arrow-array", @@ -5471,7 +5385,8 @@ dependencies = [ "lance-datagen", "log", "pin-project", - "prost 0.13.5", + "prost", + "prost-build", "snafu", "tokio", "tracing", @@ -5479,9 +5394,9 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "0.38.3" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24bff460ae73d5a722f993afa0309d3c14848b0695a2021c0e1bd70de134f24a" +checksum = "3d130ec0426173daeb14a6032d18a1eb8c1f8858dca3f42735f0d2c7dde3c47f" dependencies = [ "arrow", "arrow-array", @@ -5491,16 +5406,17 @@ dependencies = [ "futures", "half", "hex", - "rand 0.9.2", + "rand 0.9.3", + "rand_distr 0.5.1", "rand_xoshiro", "random_word", ] [[package]] name = "lance-encoding" -version = "0.38.3" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3cdeae2eb2ec5f4dc20c65332cb68d54bb81cde9ee79fd5b92fbacf1c554888" +checksum = "965281449dc6b47d4669e11572f7b2a5e0e6b206dc1f21bce0f3b5f7dc533ece" dependencies = [ "arrow-arith", "arrow-array", @@ -5523,10 +5439,10 @@ dependencies = [ "log", "lz4", "num-traits", - "prost 0.13.5", - "prost-build 0.13.5", - "prost-types 0.13.5", - "rand 0.9.2", + "prost", + "prost-build", + "prost-types", + "rand 0.9.3", "snafu", "strum", "tokio", @@ -5537,9 +5453,9 @@ dependencies = [ [[package]] name = "lance-file" -version = "0.38.3" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3baf6dcef3fa2dcdcad17a22e075a7040dfada4b03179aaf54e0fe86c44e88eb" +checksum = "4c8230f9a2e63170eef3d4f8f7576593a73ff8a2f8d5a75400e511e74d4d308f" dependencies = [ "arrow-arith", "arrow-array", @@ -5561,9 +5477,9 @@ dependencies = [ "log", "num-traits", "object_store", - "prost 0.13.5", - "prost-build 0.13.5", - "prost-types 0.13.5", + "prost", + "prost-build", + "prost-types", "snafu", "tokio", "tracing", @@ -5571,9 +5487,9 @@ dependencies = [ [[package]] name = "lance-index" -version = "0.38.3" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b77e902c163f86e3721e93968c31f72f398bdf50b86bbf8603202413d45e6c6c" +checksum = "950a0bc24e01044fc86260c54e810f2d051660d89caa727234f09b252446c425" dependencies = [ "arrow", "arrow-arith", @@ -5614,15 +5530,17 @@ dependencies = [ "ndarray", "num-traits", "object_store", - "prost 0.13.5", - "prost-build 0.13.5", - "prost-types 0.13.5", - "rand 0.9.2", + "prost", + "prost-build", + "prost-types", + "rand 0.9.3", "rand_distr 0.5.1", + "rangemap", "rayon", "roaring", "serde", "serde_json", + "smallvec", "snafu", "tantivy", "tempfile", @@ -5634,9 +5552,9 @@ dependencies = [ [[package]] name = "lance-io" -version = "0.38.3" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7efd718983fb15257aa58496eb070d6296b78ef4b738a3a0bd7519e2fe918105" +checksum = "023188a98822bc87c87726893fbd6c1deea2dccf5c4214051b789b6047bdc2a4" dependencies = [ "arrow", "arrow-arith", @@ -5653,6 +5571,7 @@ dependencies = [ "chrono", "deepsize", "futures", + "http", "lance-arrow", "lance-core", "lance-namespace", @@ -5660,11 +5579,11 @@ dependencies = [ "object_store", "path_abs", "pin-project", - "prost 0.13.5", - "rand 0.9.2", + "prost", + "rand 0.9.3", "serde", - "shellexpand", "snafu", + "tempfile", "tokio", "tracing", "url", @@ -5672,9 +5591,9 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "0.38.3" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5209810d4b1da1db73b9058a20c6f4dca098b6af75c565793f27caaf3b1b5cec" +checksum = "1bc4b0c9892f985dac4d27c058d21b6adfda5c73a7aaf697a92f300f36995ded" dependencies = [ "arrow-array", "arrow-buffer", @@ -5685,14 +5604,14 @@ dependencies = [ "lance-arrow", "lance-core", "num-traits", - "rand 0.9.2", + "rand 0.9.3", ] [[package]] name = "lance-namespace" -version = "0.38.3" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4af29a44105e773cb7f466c44ed73e71d2d3636a6d7803c55d5452de94d11d54" +checksum = "9c8aab88d6c91b045ac3d3967c73b547d9fed5559a91a907f3f4586d0f2d6cc6" dependencies = [ "arrow", "async-trait", @@ -5704,9 +5623,9 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.0.18" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ea349999bcda4eea53fc05d334b3775ec314761e6a706555c777d7a29b18d19" +checksum = "df9008f9825066088178c10599130c8bb0b9c79a39a479e8c51201620c43864a" dependencies = [ "reqwest", "serde", @@ -5717,9 +5636,9 @@ dependencies = [ [[package]] name = "lance-table" -version = "0.38.3" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e72b3d7617368b43849b309a8ee1357154f00757a7e99eaed775a9832b0004a2" +checksum = "7511e4e951d3938316b16f1a64360cfae0b44d22d9c5aca971ed7b5bc83caea7" dependencies = [ "arrow", "arrow-array", @@ -5738,12 +5657,13 @@ dependencies = [ "lance-io", "log", "object_store", - "prost 0.13.5", - "prost-build 0.13.5", - "prost-types 0.13.5", - "rand 0.9.2", + "prost", + "prost-build", + "prost-types", + "rand 0.9.3", "rangemap", "roaring", + "semver", "serde", "serde_json", "snafu", @@ -5765,9 +5685,15 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "lenses" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "arrow", @@ -5837,17 +5763,11 @@ dependencies = [ "lexical-util", ] -[[package]] -name = "libbz2-rs-sys" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" - [[package]] name = "libc" -version = "0.2.176" +version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" [[package]] name = "libloading" @@ -5866,7 +5786,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" dependencies = [ "cfg-if", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -5891,7 +5811,7 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1580801010e535496706ba011c15f8532df6b42297d2e471fec38ceadd8c0638" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "libc", "redox_syscall 0.5.18", ] @@ -5925,9 +5845,9 @@ checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" @@ -5937,9 +5857,9 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" [[package]] name = "litrs" -version = "0.4.2" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5e54036fe321fd421e10d732f155734c4e4afd610dd556d9a82833ab3ee0bed" +checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" [[package]] name = "lock_api" @@ -5953,9 +5873,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.28" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "log-once" @@ -5968,20 +5888,20 @@ dependencies = [ [[package]] name = "log_benchmark" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "clap", "emath", "glam", - "rand 0.9.2", + "rand 0.9.3", "re_tracing", "rerun", ] [[package]] name = "log_file" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "clap", @@ -6010,7 +5930,7 @@ dependencies = [ "quote", "regex-syntax", "rustc_version", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -6050,7 +5970,7 @@ version = "0.16.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1dc47f592c06f33f8e3aea9591776ec7c9f9e4124778ff8a3c3b87159f7e593" dependencies = [ - "hashbrown 0.16.0", + "hashbrown 0.16.1", ] [[package]] @@ -6080,31 +6000,26 @@ dependencies = [ [[package]] name = "lz4_flex" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" -dependencies = [ - "twox-hash", -] +checksum = "373f5eceeeab7925e0c1098212f2fbc4d416adec9d35051a6ab251e824c1854a" [[package]] name = "lz4_flex" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" +checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" dependencies = [ "twox-hash", ] [[package]] -name = "lzma-sys" -version = "0.1.20" +name = "lz4_flex" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +checksum = "db9a0d582c2874f68138a16ce1867e0ffde6c0bb0a0df85e1f36d04146db488a" dependencies = [ - "cc", - "libc", - "pkg-config", + "twox-hash", ] [[package]] @@ -6119,19 +6034,10 @@ dependencies = [ ] [[package]] -name = "malloc_buf" -version = "0.0.6" +name = "matchers" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" -dependencies = [ - "libc", -] - -[[package]] -name = "matchers" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" dependencies = [ "regex-automata", ] @@ -6157,9 +6063,9 @@ dependencies = [ [[package]] name = "mcap" -version = "0.23.3" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "900ec4fb152ab00bd02fe030787593dfd661b7711f7917ac732b85c22c773387" +checksum = "43908ab970f3a880b02834055a1e04221a3056f442a65ae9111f63e550e7daa5" dependencies = [ "bimap", "binrw", @@ -6211,9 +6117,9 @@ dependencies = [ [[package]] name = "memmap2" -version = "0.9.8" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" dependencies = [ "libc", ] @@ -6237,21 +6143,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "metal" -version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00c15a6f673ff72ddcc22394663290f870fb224c1bfce55734a75c414150e605" -dependencies = [ - "bitflags 2.9.4", - "block", - "core-graphics-types 0.2.0", - "foreign-types", - "log", - "objc", - "paste", -] - [[package]] name = "miette" version = "5.10.0" @@ -6272,7 +6163,7 @@ checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -6323,7 +6214,7 @@ dependencies = [ [[package]] name = "minimal" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "rerun", ] @@ -6336,7 +6227,7 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "minimal_options" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "clap", @@ -6346,7 +6237,7 @@ dependencies = [ [[package]] name = "minimal_serve" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "rerun", ] @@ -6375,7 +6266,7 @@ checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" dependencies = [ "libc", "log", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "windows-sys 0.59.0", ] @@ -6435,27 +6326,27 @@ checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" [[package]] name = "naga" -version = "27.0.0" +version = "29.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12b2e757b11b47345d44e7760e45458339bc490463d9548cd8651c53ae523153" +checksum = "aa2630921705b9b01dcdd0b6864b9562ca3c1951eecd0f0c4f5f04f61e412647" dependencies = [ "arrayvec", "bit-set", - "bitflags 2.9.4", + "bitflags 2.11.0", "cfg-if", "cfg_aliases", "codespan-reporting", "half", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "hexf-parse", - "indexmap 2.11.4", + "indexmap", "libm", "log", "num-traits", "once_cell", "rustc-hash 1.1.0", "spirv", - "thiserror 2.0.17", + "thiserror 2.0.18", "unicode-ident", ] @@ -6495,7 +6386,7 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3f42e7bbe13d351b6bead8286a43aac9534b82bd3cc43e47037f012ebfd62d4" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "jni-sys", "log", "ndk-sys", @@ -6531,7 +6422,7 @@ version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "cfg-if", "cfg_aliases", "libc", @@ -6569,7 +6460,7 @@ version = "8.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d3d07927151ff8575b7087f245456e549fea62edf0ec4e565a5ee50c8402bc3" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "crossbeam-channel", "fsevent-sys", "inotify", @@ -6590,9 +6481,9 @@ checksum = "5e0826a989adedc2a244799e823aece04662b66609d96af8dff7ac6df9a8925d" [[package]] name = "ntapi" -version = "0.4.1" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" dependencies = [ "winapi", ] @@ -6606,20 +6497,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - [[package]] name = "num-bigint" version = "0.4.6" @@ -6653,7 +6530,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -6665,17 +6542,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - [[package]] name = "num-rational" version = "0.4.2" @@ -6727,14 +6593,14 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "numpy" -version = "0.25.0" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29f1dee9aa8d3f6f8e8b9af3803006101bb3653866ef056d530d53ae68587191" +checksum = "9b2dba356160b54f5371b550575b78130a54718b4c6e46b3f33a6da74a27e78b" dependencies = [ "libc", "ndarray", @@ -6746,15 +6612,6 @@ dependencies = [ "rustc-hash 2.1.1", ] -[[package]] -name = "objc" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" -dependencies = [ - "malloc_buf", -] - [[package]] name = "objc-sys" version = "0.3.5" @@ -6773,9 +6630,9 @@ dependencies = [ [[package]] name = "objc2" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7c2599ce0ec54857b29ce62166b0ed9b4f6f1a70ccc9a71165b6154caca8c05" +checksum = "3a12a8ed07aefc768292f076dc3ac8c48f3781c8f2d5851dd3d98950e8c5a89f" dependencies = [ "objc2-encode", ] @@ -6786,12 +6643,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4e89ad9e3d7d297152b17d39ed92cd50ca8063a89a9fa569046d41568891eff" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "block2 0.5.1", "libc", "objc2 0.5.2", - "objc2-core-data 0.2.2", - "objc2-core-image 0.2.2", + "objc2-core-data", + "objc2-core-image", "objc2-foundation 0.2.2", "objc2-quartz-core 0.2.2", ] @@ -6802,19 +6659,12 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d49e936b501e5c5bf01fda3a9452ff86dc3ea98ad5f283e1455153142d97518c" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "block2 0.6.2", - "libc", - "objc2 0.6.3", - "objc2-cloud-kit 0.3.2", - "objc2-core-data 0.3.2", + "objc2 0.6.4", "objc2-core-foundation", "objc2-core-graphics", - "objc2-core-image 0.3.2", - "objc2-core-text", - "objc2-core-video", "objc2-foundation 0.3.2", - "objc2-quartz-core 0.3.2", ] [[package]] @@ -6823,24 +6673,13 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74dd3b56391c7a0596a295029734d3c1c5e7e510a4cb30245f8221ccea96b009" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "block2 0.5.1", "objc2 0.5.2", "objc2-core-location", "objc2-foundation 0.2.2", ] -[[package]] -name = "objc2-cloud-kit" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73ad74d880bb43877038da939b7427bba67e9dd42004a18b809ba7d87cee241c" -dependencies = [ - "bitflags 2.9.4", - "objc2 0.6.3", - "objc2-foundation 0.3.2", -] - [[package]] name = "objc2-contacts" version = "0.2.2" @@ -6858,32 +6697,21 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "617fbf49e071c178c0b24c080767db52958f716d9eabdf0890523aeae54773ef" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "block2 0.5.1", "objc2 0.5.2", "objc2-foundation 0.2.2", ] -[[package]] -name = "objc2-core-data" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b402a653efbb5e82ce4df10683b6b28027616a2715e90009947d50b8dd298fa" -dependencies = [ - "bitflags 2.9.4", - "objc2 0.6.3", - "objc2-foundation 0.3.2", -] - [[package]] name = "objc2-core-foundation" version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "dispatch2", - "objc2 0.6.3", + "objc2 0.6.4", ] [[package]] @@ -6892,9 +6720,9 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e022c9d066895efa1345f8e33e584b9f958da2fd4cd116792e15e07e4720a807" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "dispatch2", - "objc2 0.6.3", + "objc2 0.6.4", "objc2-core-foundation", "objc2-io-surface", ] @@ -6908,17 +6736,7 @@ dependencies = [ "block2 0.5.1", "objc2 0.5.2", "objc2-foundation 0.2.2", - "objc2-metal", -] - -[[package]] -name = "objc2-core-image" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5d563b38d2b97209f8e861173de434bd0214cf020e3423a52624cd1d989f006" -dependencies = [ - "objc2 0.6.3", - "objc2-foundation 0.3.2", + "objc2-metal 0.2.2", ] [[package]] @@ -6933,31 +6751,6 @@ dependencies = [ "objc2-foundation 0.2.2", ] -[[package]] -name = "objc2-core-text" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde0dfb48d25d2b4862161a4d5fcc0e3c24367869ad306b0c9ec0073bfed92d" -dependencies = [ - "bitflags 2.9.4", - "objc2 0.6.3", - "objc2-core-foundation", - "objc2-core-graphics", -] - -[[package]] -name = "objc2-core-video" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d425caf1df73233f29fd8a5c3e5edbc30d2d4307870f802d18f00d83dc5141a6" -dependencies = [ - "bitflags 2.9.4", - "objc2 0.6.3", - "objc2-core-foundation", - "objc2-core-graphics", - "objc2-io-surface", -] - [[package]] name = "objc2-encode" version = "4.1.0" @@ -6970,7 +6763,7 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ee638a5da3799329310ad4cfa62fbf045d5f56e3ef5ba4149e7452dcf89d5a8" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "block2 0.5.1", "dispatch", "libc", @@ -6983,8 +6776,19 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272" dependencies = [ - "bitflags 2.9.4", - "objc2 0.6.3", + "bitflags 2.11.0", + "block2 0.6.2", + "objc2 0.6.4", + "objc2-core-foundation", +] + +[[package]] +name = "objc2-io-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" +dependencies = [ + "libc", "objc2-core-foundation", ] @@ -6994,8 +6798,8 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "180788110936d59bab6bd83b6060ffdfffb3b922ba1396b312ae795e1de9d81d" dependencies = [ - "bitflags 2.9.4", - "objc2 0.6.3", + "bitflags 2.11.0", + "objc2 0.6.4", "objc2-core-foundation", ] @@ -7017,23 +6821,35 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd0cba1276f6023976a406a14ffa85e1fdd19df6b0f737b063b95f6c8c7aadd6" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "block2 0.5.1", "objc2 0.5.2", "objc2-foundation 0.2.2", ] +[[package]] +name = "objc2-metal" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0125f776a10d00af4152d74616409f0d4a2053a6f57fa5b7d6aa2854ac04794" +dependencies = [ + "bitflags 2.11.0", + "block2 0.6.2", + "objc2 0.6.4", + "objc2-foundation 0.3.2", +] + [[package]] name = "objc2-quartz-core" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e42bee7bff906b14b167da2bac5efe6b6a07e6f7c0a21a7308d40c960242dc7a" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "block2 0.5.1", "objc2 0.5.2", "objc2-foundation 0.2.2", - "objc2-metal", + "objc2-metal 0.2.2", ] [[package]] @@ -7042,9 +6858,11 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96c1358452b371bf9f104e21ec536d37a650eb10f7ee379fff67d2e08d537f1f" dependencies = [ - "bitflags 2.9.4", - "objc2 0.6.3", + "bitflags 2.11.0", + "objc2 0.6.4", + "objc2-core-foundation", "objc2-foundation 0.3.2", + "objc2-metal 0.3.2", ] [[package]] @@ -7063,12 +6881,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8bb46798b20cd6b91cbd113524c490f1686f4c4e8f49502431415f3512e2b6f" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "block2 0.5.1", "objc2 0.5.2", - "objc2-cloud-kit 0.2.2", - "objc2-core-data 0.2.2", - "objc2-core-image 0.2.2", + "objc2-cloud-kit", + "objc2-core-data", + "objc2-core-image", "objc2-core-location", "objc2-foundation 0.2.2", "objc2-link-presentation", @@ -7078,6 +6896,18 @@ dependencies = [ "objc2-user-notifications", ] +[[package]] +name = "objc2-ui-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d87d638e33c06f577498cbcc50491496a3ed4246998a7fbba7ccb98b1e7eab22" +dependencies = [ + "bitflags 2.11.0", + "objc2 0.6.4", + "objc2-core-foundation", + "objc2-foundation 0.3.2", +] + [[package]] name = "objc2-uniform-type-identifiers" version = "0.2.2" @@ -7095,7 +6925,7 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76cfcbf642358e8689af64cee815d139339f3ed8ad05103ed5eaf73db8d84cb3" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "block2 0.5.1", "objc2 0.5.2", "objc2-core-location", @@ -7126,7 +6956,7 @@ dependencies = [ "itertools 0.14.0", "parking_lot", "percent-encoding", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "url", @@ -7137,13 +6967,13 @@ dependencies = [ [[package]] name = "objectron" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "clap", "glam", - "prost 0.14.1", - "prost-build 0.14.1", + "prost", + "prost-build", "protoc-prebuilt", "re_build_tools", "rerun", @@ -7200,7 +7030,7 @@ dependencies = [ "futures-sink", "js-sys", "pin-project-lite", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", ] @@ -7241,9 +7071,9 @@ dependencies = [ "opentelemetry-http", "opentelemetry-proto", "opentelemetry_sdk", - "prost 0.14.1", + "prost", "reqwest", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tonic", "tracing", @@ -7257,7 +7087,7 @@ checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f" dependencies = [ "opentelemetry", "opentelemetry_sdk", - "prost 0.14.1", + "prost", "tonic", "tonic-prost", ] @@ -7273,8 +7103,8 @@ dependencies = [ "futures-util", "opentelemetry", "percent-encoding", - "rand 0.9.2", - "thiserror 2.0.17", + "rand 0.9.3", + "thiserror 2.0.18", "tokio", "tokio-stream", ] @@ -7303,15 +7133,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "ordered-float" -version = "4.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" -dependencies = [ - "num-traits", -] - [[package]] name = "ordered-float" version = "5.1.0" @@ -7374,14 +7195,14 @@ dependencies = [ "petgraph 0.6.5", "redox_syscall 0.5.18", "smallvec", - "windows-link 0.2.1", + "windows-link", ] [[package]] name = "parquet" -version = "56.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b56b41d1bd36aae415e42f91cae70ee75cf6cba74416b14dce3e958d5990ec" +checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" dependencies = [ "ahash", "arrow-array", @@ -7398,13 +7219,13 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.15.5", - "lz4_flex 0.11.5", - "num", + "hashbrown 0.16.1", + "lz4_flex 0.12.1", "num-bigint", + "num-integer", + "num-traits", "object_store", "paste", - "ring", "seq-macro", "simdutf8", "snap", @@ -7467,13 +7288,13 @@ checksum = "132dca9b868d927b35b5dd728167b2dee150eb1ad686008fc71ccb298b776fca" [[package]] name = "peniko" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3c76095c9a636173600478e0373218c7b955335048c2bcd12dc6a79657649d8" +checksum = "9a2b6aadb221872732e87d465213e9be5af2849b0e8cc5300a8ba98fffa2e00a" dependencies = [ "bytemuck", "color", - "kurbo 0.12.0", + "kurbo 0.13.0", "linebender_resource_handle", "smallvec", ] @@ -7520,7 +7341,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -7540,28 +7361,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset 0.4.2", - "indexmap 2.11.4", -] - -[[package]] -name = "petgraph" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" -dependencies = [ - "fixedbitset 0.5.7", - "indexmap 2.11.4", + "indexmap", ] [[package]] name = "petgraph" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset 0.5.7", "hashbrown 0.15.5", - "indexmap 2.11.4", + "indexmap", "serde", ] @@ -7571,7 +7382,7 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" dependencies = [ - "phf_macros", + "phf_macros 0.11.3", "phf_shared 0.11.3", ] @@ -7584,6 +7395,17 @@ dependencies = [ "phf_shared 0.12.1", ] +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_macros 0.13.1", + "phf_shared 0.13.1", + "serde", +] + [[package]] name = "phf_generator" version = "0.11.3" @@ -7594,20 +7416,43 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared 0.13.1", +] + [[package]] name = "phf_macros" version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" dependencies = [ - "phf_generator", + "phf_generator 0.11.3", "phf_shared 0.11.3", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", "unicase", ] +[[package]] +name = "phf_macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" +dependencies = [ + "phf_generator 0.13.1", + "phf_shared 0.13.1", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "phf_shared" version = "0.11.3" @@ -7627,6 +7472,15 @@ dependencies = [ "siphasher", ] +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + [[package]] name = "pico-args" version = "0.5.0" @@ -7650,14 +7504,14 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "pin-project-lite" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" [[package]] name = "pin-utils" @@ -7684,11 +7538,11 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "plot_dashboard_stress" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "clap", - "rand 0.9.2", + "rand 0.9.3", "rand_distr 0.5.1", "re_log", "rerun", @@ -7729,7 +7583,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe55bbee2b70d1c1e58d8340eda9a80c5ce11fb9b1bc10b5fc1575c490d38fa9" dependencies = [ "byteorder", - "indexmap 2.11.4", + "indexmap", "peg", ] @@ -7769,7 +7623,7 @@ dependencies = [ "concurrent-queue", "hermit-abi", "pin-project-lite", - "rustix 1.1.2", + "rustix 1.1.4", "windows-sys 0.61.2", ] @@ -7781,9 +7635,9 @@ checksum = "2f3a9f18d041e6d0e102a0a46750538147e5e8992d3b4873aaafee2520b00ce3" [[package]] name = "portable-atomic" -version = "1.11.1" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" @@ -7831,7 +7685,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -7840,14 +7694,14 @@ version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" dependencies = [ - "toml_edit 0.23.6", + "toml_edit", ] [[package]] name = "proc-macro2" -version = "1.0.101" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] @@ -7869,7 +7723,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52717f9a02b6965224f95ca2a81e2e0c5c43baacd28ca057577988930b6c3d5b" dependencies = [ "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -7892,122 +7746,69 @@ checksum = "9adf1691c04c0a5ff46ff8f262b58beb07b0dbb61f96f9f54f6cbd82106ed87f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", -] - -[[package]] -name = "prost" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" -dependencies = [ - "bytes", - "prost-derive 0.13.5", + "syn 2.0.117", ] [[package]] name = "prost" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", - "prost-derive 0.14.1", -] - -[[package]] -name = "prost-build" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" -dependencies = [ - "heck", - "itertools 0.14.0", - "log", - "multimap", - "once_cell", - "petgraph 0.7.1", - "prettyplease", - "prost 0.13.5", - "prost-types 0.13.5", - "regex", - "syn 2.0.106", - "tempfile", + "prost-derive", ] [[package]] name = "prost-build" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac6c3320f9abac597dcbc668774ef006702672474aad53c6d596b62e487b40b1" +checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", "itertools 0.14.0", "log", "multimap", - "once_cell", - "petgraph 0.7.1", + "petgraph 0.8.3", "prettyplease", - "prost 0.14.1", - "prost-types 0.14.1", + "prost", + "prost-types", "regex", - "syn 2.0.106", + "syn 2.0.117", "tempfile", ] [[package]] name = "prost-derive" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" -dependencies = [ - "anyhow", - "itertools 0.14.0", - "proc-macro2", - "quote", - "syn 2.0.106", -] - -[[package]] -name = "prost-derive" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "prost-reflect" -version = "0.16.2" +version = "0.16.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89a3ac73ec9a9118131a4594c9d336631a07852220a1d0ae03ee36b04503a063" +checksum = "b89455ef41ed200cafc47c76c552ee7792370ac420497e551f16123a9135f76e" dependencies = [ "logos", - "prost 0.14.1", - "prost-types 0.14.1", + "prost", + "prost-types", ] [[package]] name = "prost-types" -version = "0.13.5" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ - "prost 0.13.5", -] - -[[package]] -name = "prost-types" -version = "0.14.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72" -dependencies = [ - "prost 0.14.1", + "prost", ] [[package]] @@ -8016,19 +7817,10 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d85d4641fe3b8c6e853dfd09fe35379bc6b6e66bd692ac29ed4f7087de69ed5" dependencies = [ - "ureq", + "ureq 2.12.1", "zip 0.6.6", ] -[[package]] -name = "psm" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" -dependencies = [ - "cc", -] - [[package]] name = "puffin" version = "0.19.1" @@ -8040,7 +7832,7 @@ dependencies = [ "byteorder", "cfg-if", "itertools 0.10.5", - "lz4_flex 0.11.5", + "lz4_flex 0.11.6", "once_cell", "parking_lot", "serde", @@ -8065,16 +7857,16 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "memchr", "unicase", ] [[package]] name = "pyo3" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8970a78afe0628a3e3430376fc5fd76b6b45c4d43360ffd6cdd40bdde72b682a" +checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383" dependencies = [ "chrono", "indoc", @@ -8090,19 +7882,18 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "458eb0c55e7ece017adeba38f2248ff3ac615e53660d7c71a238d7d2a01c7598" +checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f" dependencies = [ - "once_cell", "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7114fe5457c61b276ab77c5055f206295b812608083644a5c5b2640c3102565c" +checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105" dependencies = [ "libc", "pyo3-build-config", @@ -8110,27 +7901,27 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8725c0a622b374d6cb051d11a0983786448f7785336139c3c94f5aa6bef7e50" +checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "pyo3-macros-backend" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4109984c22491085343c05b0dbc54ddc405c3cf7b4374fc533f5c3313a572ccc" +checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf" dependencies = [ "heck", "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -8166,7 +7957,7 @@ dependencies = [ "rustc-hash 2.1.1", "rustls", "socket2 0.5.10", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "web-time", @@ -8174,20 +7965,20 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.12" +version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49df843a9161c85bb8aae55f101bc0bac8bcafd637a620d9122fd7e0b2f7422e" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ "bytes", - "getrandom 0.3.3", + "getrandom 0.3.4", "lru-slab", - "rand 0.9.2", + "rand 0.9.3", "ring", "rustc-hash 2.1.1", "rustls", "rustls-pki-types", "slab", - "thiserror 2.0.17", + "thiserror 2.0.18", "tinyvec", "tracing", "web-time", @@ -8209,9 +8000,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.41" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -8222,6 +8013,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "radium" version = "0.7.0" @@ -8241,9 +8038,9 @@ dependencies = [ [[package]] name = "rand" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +checksum = "7ec095654a25171c2124e9e3393a930bddbffdc939556c914957a4c3e0a87166" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.3", @@ -8275,7 +8072,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", ] [[package]] @@ -8284,7 +8081,7 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", ] [[package]] @@ -8304,7 +8101,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" dependencies = [ "num-traits", - "rand 0.9.2", + "rand 0.9.3", ] [[package]] @@ -8325,7 +8122,7 @@ dependencies = [ "ahash", "brotli", "paste", - "rand 0.9.2", + "rand 0.9.3", "unicase", ] @@ -8347,7 +8144,7 @@ version = "11.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", ] [[package]] @@ -8356,9 +8153,21 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" +[[package]] +name = "raw-window-metal" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40d213455a5f1dc59214213c7330e074ddf8114c9a42411eb890c767357ce135" +dependencies = [ + "objc2 0.6.4", + "objc2-core-foundation", + "objc2-foundation 0.3.2", + "objc2-quartz-core 0.3.2", +] + [[package]] name = "raw_mesh" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "bytes", @@ -8395,7 +8204,7 @@ dependencies = [ [[package]] name = "re_analytics" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "crossbeam", "directories", @@ -8408,27 +8217,15 @@ dependencies = [ "serde", "serde_json", "sha2", - "thiserror 2.0.17", + "thiserror 2.0.18", "url", "uuid", "web-sys", ] -[[package]] -name = "re_arrow_combinators" -version = "0.30.0-alpha.1+dev" -dependencies = [ - "arrow", - "insta", - "re_arrow_util", - "re_log", - "thiserror 2.0.17", - "vec1", -] - [[package]] name = "re_arrow_ui" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "arrow", "egui", @@ -8445,7 +8242,7 @@ dependencies = [ [[package]] name = "re_arrow_util" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "arrow", @@ -8458,32 +8255,36 @@ dependencies = [ "re_tracing", "re_tuid", "serde_json", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] name = "re_auth" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "async-trait", "base64 0.22.1", "directories", "ehttp", - "getrandom 0.3.3", + "getrandom 0.2.17", + "getrandom 0.3.4", + "hmac", "http", "indicatif", "jiff", "js-sys", "jsonwebtoken", "parking_lot", - "rand 0.9.2", + "rand 0.9.3", "re_analytics", "re_log", + "ring", "saturating_cast", "serde", "serde_json", "sha2", - "thiserror 2.0.17", + "signature", + "thiserror 2.0.18", "tiny_http", "tokio", "tonic", @@ -8497,11 +8298,11 @@ dependencies = [ [[package]] name = "re_backoff" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "js-sys", - "rand 0.9.2", + "rand 0.9.3", "tokio", "wasm-bindgen-futures", "web-sys", @@ -8509,7 +8310,7 @@ dependencies = [ [[package]] name = "re_blueprint_tree" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "egui", "egui_kittest", @@ -8536,7 +8337,7 @@ dependencies = [ [[package]] name = "re_build_info" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "re_byte_size", "serde", @@ -8544,10 +8345,10 @@ dependencies = [ [[package]] name = "re_build_tools" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", - "cargo_metadata 0.23.0", + "cargo_metadata", "glob", "jiff", "regex-lite", @@ -8558,9 +8359,10 @@ dependencies = [ [[package]] name = "re_byte_size" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "arrow", + "ecolor", "glam", "half", "insta", @@ -8571,7 +8373,7 @@ dependencies = [ [[package]] name = "re_capabilities" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "document-features", "egui", @@ -8581,14 +8383,14 @@ dependencies = [ [[package]] name = "re_case" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "convert_case", ] [[package]] name = "re_chunk" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "anyhow", @@ -8600,7 +8402,7 @@ dependencies = [ "insta", "itertools 0.14.0", "nohash-hasher", - "rand 0.9.2", + "rand 0.9.3", "re_arrow_util", "re_byte_size", "re_error", @@ -8614,13 +8416,13 @@ dependencies = [ "re_tuid", "re_types_core", "similar-asserts", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", ] [[package]] name = "re_chunk_store" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "anyhow", @@ -8633,7 +8435,7 @@ dependencies = [ "mimalloc", "nohash-hasher", "parking_lot", - "rand 0.9.2", + "rand 0.9.3", "re_arrow_util", "re_byte_size", "re_chunk", @@ -8647,14 +8449,14 @@ dependencies = [ "re_types_core", "saturating_cast", "similar-asserts", - "tap", - "thiserror 2.0.17", + "tempfile", + "thiserror 2.0.18", "web-time", ] [[package]] name = "re_chunk_store_ui" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "arrow", "egui", @@ -8673,7 +8475,7 @@ dependencies = [ [[package]] name = "re_component_fallbacks" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "re_log_types", "re_sdk_types", @@ -8682,7 +8484,7 @@ dependencies = [ [[package]] name = "re_component_ui" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "arrow", "egui", @@ -8692,7 +8494,6 @@ dependencies = [ "egui_plot", "itertools 0.14.0", "nohash-hasher", - "re_arrow_util", "re_data_ui", "re_format", "re_log", @@ -8709,7 +8510,7 @@ dependencies = [ [[package]] name = "re_context_menu" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "egui", "egui_tiles", @@ -8730,7 +8531,7 @@ dependencies = [ [[package]] name = "re_crash_handler" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "backtrace", "econtext", @@ -8741,61 +8542,17 @@ dependencies = [ "re_build_info", ] -[[package]] -name = "re_data_loader" -version = "0.30.0-alpha.1+dev" -dependencies = [ - "ahash", - "anyhow", - "arrow", - "cfg-if", - "crossbeam", - "image", - "indexmap 2.11.4", - "insta", - "itertools 0.14.0", - "mcap", - "memmap2 0.9.8", - "notify", - "parking_lot", - "parquet", - "rayon", - "re_arrow_combinators", - "re_arrow_util", - "re_build_info", - "re_chunk", - "re_chunk_store", - "re_crash_handler", - "re_error", - "re_lenses", - "re_log", - "re_log_channel", - "re_log_encoding", - "re_log_types", - "re_mcap", - "re_quota_channel", - "re_sdk_types", - "re_tracing", - "re_video", - "serde", - "serde_json", - "thiserror 2.0.17", - "urdf-rs", - "walkdir", -] - [[package]] name = "re_data_source" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "ehttp", "itertools 0.14.0", "rayon", - "re_data_loader", - "re_error", "re_format", "re_grpc_client", + "re_importer", "re_log", "re_log_channel", "re_log_encoding", @@ -8810,7 +8567,7 @@ dependencies = [ [[package]] name = "re_data_ui" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "anyhow", @@ -8822,7 +8579,6 @@ dependencies = [ "jiff", "re_arrow_ui", "re_arrow_util", - "re_byte_size", "re_capabilities", "re_chunk_store", "re_entity_db", @@ -8846,7 +8602,7 @@ dependencies = [ [[package]] name = "re_dataframe" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "arrow", @@ -8872,7 +8628,7 @@ dependencies = [ [[package]] name = "re_dataframe_ui" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "arrow", @@ -8889,16 +8645,16 @@ dependencies = [ "itertools 0.14.0", "jiff", "ordered-float 5.1.0", - "parking_lot", "re_arrow_util", - "re_chunk_store", "re_component_ui", "re_dataframe", "re_format", "re_log", "re_log_types", "re_mutex", + "re_protos", "re_quota_channel", + "re_redap_client", "re_sdk_types", "re_sorbet", "re_test_context", @@ -8910,13 +8666,13 @@ dependencies = [ "serde", "static_assertions", "strum", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", ] [[package]] name = "re_datafusion" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "arrow", @@ -8926,63 +8682,69 @@ dependencies = [ "datafusion", "futures", "futures-util", - "getrandom 0.3.3", - "log", + "getrandom 0.3.4", + "http", + "opentelemetry", + "opentelemetry-proto", "parking_lot", + "re_analytics", "re_arrow_util", + "re_backoff", "re_dataframe", "re_log", + "re_log_encoding", "re_log_types", "re_perf_telemetry", "re_protos", "re_redap_client", "re_sorbet", "re_uri", + "reqwest", "tokio", "tokio-stream", "tonic", + "tonic-prost", + "tonic-web-wasm-client", "tracing", "wasm-bindgen-futures", + "web-time", ] [[package]] name = "re_dev_tools" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "argh", "camino", - "cargo_metadata 0.23.0", - "crossbeam", + "cargo_metadata", "glob", "indicatif", "itertools 0.14.0", "rayon", "re_build_tools", "re_format", - "re_quota_channel", "roxmltree", - "rustdoc-json", - "rustdoc-types", "serde", "serde_json", "tempfile", - "toml 0.9.8", - "ureq", + "toml", + "ureq 3.3.0", "url", "wasm-bindgen-cli-support", ] [[package]] name = "re_entity_db" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "anyhow", "arrow", "document-features", "emath", - "indexmap 2.11.4", + "indexmap", + "insta", "itertools 0.14.0", "nohash-hasher", "poll-promise", @@ -8992,7 +8754,6 @@ dependencies = [ "re_chunk", "re_chunk_store", "re_format", - "re_int_histogram", "re_log", "re_log_channel", "re_log_encoding", @@ -9007,20 +8768,21 @@ dependencies = [ "similar-asserts", "static_assertions", "tap", - "thiserror 2.0.17", + "thiserror 2.0.18", + "vec1", "web-time", ] [[package]] name = "re_error" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", ] [[package]] name = "re_format" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "half", "itertools 0.14.0", @@ -9030,11 +8792,12 @@ dependencies = [ [[package]] name = "re_grpc_client" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "async-stream", "crossbeam", "re_chunk", + "re_error", "re_log", "re_log_channel", "re_log_encoding", @@ -9044,7 +8807,7 @@ dependencies = [ "re_sorbet", "re_tracing", "re_uri", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-stream", "tonic", @@ -9055,7 +8818,7 @@ dependencies = [ [[package]] name = "re_grpc_server" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "async-stream", @@ -9063,6 +8826,7 @@ dependencies = [ "parking_lot", "re_byte_size", "re_chunk", + "re_error", "re_format", "re_log", "re_log_channel", @@ -9082,32 +8846,68 @@ dependencies = [ "tonic", "tonic-web", "tower-http", + "wildmatch", ] [[package]] -name = "re_int_histogram" -version = "0.30.0-alpha.1+dev" +name = "re_importer" +version = "0.32.0-alpha.1" dependencies = [ - "criterion", + "ahash", + "anyhow", + "arrow", + "cfg-if", + "crossbeam", + "image", + "indexmap", "insta", - "mimalloc", - "re_byte_size", + "itertools 0.14.0", + "mcap", + "memmap2 0.9.10", + "notify", + "parking_lot", + "parquet", + "rayon", + "re_arrow_util", + "re_build_info", + "re_chunk", + "re_chunk_store", + "re_crash_handler", + "re_error", + "re_format", + "re_lenses", + "re_lenses_core", "re_log", - "smallvec", - "static_assertions", + "re_log_channel", + "re_log_encoding", + "re_log_types", + "re_mcap", + "re_parquet", + "re_quota_channel", + "re_sdk_types", + "re_tracing", + "re_video", + "serde", + "serde_json", + "thiserror 2.0.18", + "urdf-rs", + "walkdir", ] [[package]] name = "re_integration_test" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ + "arrow", "egui", "egui_kittest", "egui_tiles", + "futures", "insta", "ndarray", "parking_lot", "re_build_info", + "re_dataframe_ui", "re_log_encoding", "re_protos", "re_redap_client", @@ -9130,23 +8930,36 @@ dependencies = [ [[package]] name = "re_lenses" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" +dependencies = [ + "arrow", + "re_lenses_core", + "re_log", + "re_sdk_types", +] + +[[package]] +name = "re_lenses_core" +version = "0.32.0-alpha.1" dependencies = [ + "ahash", "arrow", + "insta", "itertools 0.14.0", "nohash-hasher", - "re_arrow_combinators", "re_arrow_util", + "re_byte_size", "re_chunk", + "re_log", "re_log_types", "re_sdk_types", - "thiserror 2.0.17", + "thiserror 2.0.18", "vec1", ] [[package]] name = "re_log" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "crossbeam", "env_filter", @@ -9161,7 +8974,7 @@ dependencies = [ [[package]] name = "re_log_channel" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "camino", "crossbeam", @@ -9173,12 +8986,12 @@ dependencies = [ "re_tracing", "re_uri", "serde", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] name = "re_log_encoding" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "arrow", "bytes", @@ -9188,7 +9001,7 @@ dependencies = [ "insta", "itertools 0.14.0", "js-sys", - "lz4_flex 0.12.0", + "lz4_flex 0.13.0", "mimalloc", "parking_lot", "re_arrow_util", @@ -9205,7 +9018,8 @@ dependencies = [ "re_types_core", "sha2", "similar-asserts", - "thiserror 2.0.17", + "tempfile", + "thiserror 2.0.18", "tokio", "tokio-stream", "tracing", @@ -9218,7 +9032,7 @@ dependencies = [ [[package]] name = "re_log_types" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "arrow", @@ -9248,41 +9062,43 @@ dependencies = [ "serde", "similar-asserts", "static_assertions", - "thiserror 2.0.17", + "thiserror 2.0.18", "typenum", "uuid", "web-time", + "xxhash-rust", ] [[package]] name = "re_mcap" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "anyhow", "arrow", "byteorder", "cdr-encoding", - "crossbeam", "insta", "mcap", "prost-reflect", + "re_arrow_util", "re_chunk", "re_log", "re_log_types", "re_ros_msg", "re_sdk_types", "re_tracing", + "regex-lite", "saturating_cast", "serde", "serde_bytes", "strum", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] name = "re_memory" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "backtrace", @@ -9296,6 +9112,7 @@ dependencies = [ "re_quota_channel", "re_tracing", "saturating_cast", + "serde", "smallvec", "sysinfo", "wasm-bindgen", @@ -9304,7 +9121,7 @@ dependencies = [ [[package]] name = "re_memory_view" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "eframe", "egui", @@ -9329,16 +9146,32 @@ dependencies = [ [[package]] name = "re_mutex" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "cfg-if", "parking_lot", "re_log", ] +[[package]] +name = "re_parquet" +version = "0.32.0-alpha.1" +dependencies = [ + "anyhow", + "arrow", + "bytes", + "parquet", + "re_chunk", + "re_log", + "re_log_types", + "re_sdk_types", + "re_tracing", + "thiserror 2.0.18", +] + [[package]] name = "re_perf_telemetry" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "anyhow", @@ -9367,17 +9200,28 @@ dependencies = [ "tracing-tracy", ] +[[package]] +name = "re_plot" +version = "0.32.0-alpha.1" +dependencies = [ + "ahash", + "egui", + "indexmap", + "re_ui", +] + [[package]] name = "re_protos" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "arrow", "http", "jiff", - "lz4_flex 0.12.0", + "lz4_flex 0.13.0", + "opentelemetry", "pin-project-lite", - "prost 0.14.1", - "prost-types 0.14.1", + "prost", + "prost-types", "pyo3", "re_arrow_util", "re_build_info", @@ -9389,7 +9233,7 @@ dependencies = [ "re_tuid", "re_types_core", "serde", - "thiserror 2.0.17", + "thiserror 2.0.18", "tonic", "tonic-prost", "tower", @@ -9399,7 +9243,7 @@ dependencies = [ [[package]] name = "re_protos_builder" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "camino", "re_log", @@ -9408,7 +9252,7 @@ dependencies = [ [[package]] name = "re_query" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "anyhow", @@ -9421,7 +9265,7 @@ dependencies = [ "nohash-hasher", "parking_lot", "paste", - "rand 0.9.2", + "rand 0.9.3", "re_arrow_util", "re_byte_size", "re_chunk", @@ -9436,12 +9280,12 @@ dependencies = [ "re_types_core", "seq-macro", "similar-asserts", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] name = "re_quota_channel" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "crossbeam", "parking_lot", @@ -9460,7 +9304,7 @@ dependencies = [ "assert_matches", "atomig", "av-data", - "bitflags 2.9.4", + "bitflags 2.11.0", "cc", "cfg-if", "libc", @@ -9476,7 +9320,7 @@ dependencies = [ [[package]] name = "re_recording_panel" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "egui", @@ -9499,7 +9343,7 @@ dependencies = [ [[package]] name = "re_redap_browser" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "cfg-if", @@ -9534,7 +9378,7 @@ dependencies = [ [[package]] name = "re_redap_client" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "arrow", @@ -9542,11 +9386,13 @@ dependencies = [ "futures", "itertools 0.14.0", "jiff", + "opentelemetry", "re_arrow_util", "re_auth", "re_backoff", "re_byte_size", "re_chunk", + "re_error", "re_format", "re_log", "re_log_channel", @@ -9556,7 +9402,7 @@ dependencies = [ "re_protos", "re_uri", "serde", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-stream", "tonic", @@ -9569,18 +9415,17 @@ dependencies = [ [[package]] name = "re_redap_tests" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "arrow", "async-trait", - "chrono", "datafusion", "futures", "insta", "itertools 0.14.0", "lance", - "prost-types 0.14.1", + "prost-types", "re_arrow_util", "re_chunk", "re_chunk_store", @@ -9600,20 +9445,21 @@ dependencies = [ [[package]] name = "re_renderer" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "anyhow", - "bitflags 2.9.4", + "bitflags 2.11.0", "bytemuck", "cfg_aliases", "clean-path", + "criterion", "crossbeam", "dae-parser", "document-features", "ecolor", "enumset", - "getrandom 0.3.3", + "getrandom 0.3.4", "glam", "gltf", "half", @@ -9642,20 +9488,20 @@ dependencies = [ "smallvec", "static_assertions", "stl_io", - "thiserror 2.0.17", + "thiserror 2.0.18", "tobj", "type-map", "unindent", "walkdir", "wasm-bindgen", - "wasm-bindgen-futures", "web-sys", "wgpu", + "windows-core", ] [[package]] name = "re_renderer_examples" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "anyhow", @@ -9666,7 +9512,7 @@ dependencies = [ "itertools 0.14.0", "macaw", "pollster", - "rand 0.9.2", + "rand 0.9.3", "re_log", "re_renderer", "wasm-bindgen-futures", @@ -9678,24 +9524,24 @@ dependencies = [ [[package]] name = "re_ros_msg" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "serde", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] name = "re_rvl" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "byteorder", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] name = "re_sdk" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "arrow", @@ -9708,15 +9554,15 @@ dependencies = [ "nohash-hasher", "parking_lot", "percent-encoding", - "re_arrow_combinators", "re_build_info", "re_build_tools", "re_byte_size", "re_chunk", - "re_data_loader", "re_grpc_client", "re_grpc_server", + "re_importer", "re_lenses", + "re_lenses_core", "re_log", "re_log_channel", "re_log_encoding", @@ -9731,7 +9577,7 @@ dependencies = [ "serde", "serde_json", "similar-asserts", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "uuid", "webbrowser", @@ -9739,7 +9585,7 @@ dependencies = [ [[package]] name = "re_sdk_types" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "array-init", "arrow", @@ -9751,7 +9597,7 @@ dependencies = [ "glam", "half", "image", - "indexmap 2.11.4", + "indexmap", "infer", "itertools 0.14.0", "macaw", @@ -9760,7 +9606,7 @@ dependencies = [ "ndarray", "nohash-hasher", "ply-rs-bw", - "rand 0.9.2", + "rand 0.9.3", "re_byte_size", "re_error", "re_format", @@ -9774,21 +9620,20 @@ dependencies = [ "serde", "similar-asserts", "smallvec", - "thiserror 2.0.17", + "thiserror 2.0.18", "tiff", "uuid", ] [[package]] name = "re_selection_panel" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "arrow", "egui", "egui_tiles", "itertools 0.14.0", "nohash-hasher", - "re_arrow_combinators", "re_case", "re_chunk", "re_chunk_store", @@ -9797,6 +9642,7 @@ dependencies = [ "re_data_ui", "re_entity_db", "re_format", + "re_lenses_core", "re_log", "re_log_types", "re_sdk_types", @@ -9815,7 +9661,7 @@ dependencies = [ [[package]] name = "re_server" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "anyhow", @@ -9824,7 +9670,6 @@ dependencies = [ "bincode", "bytes", "cfg-if", - "chrono", "clap", "datafusion", "ehttp", @@ -9837,6 +9682,7 @@ dependencies = [ "lance-index", "lance-linalg", "nohash-hasher", + "opentelemetry", "parking_lot", "re_arrow_util", "re_build_info", @@ -9857,14 +9703,13 @@ dependencies = [ "re_types_core", "serde", "tempfile", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-stream", "tokio-util", "tonic", "tonic-web", "tower", - "tower-http", "tower-service", "tracing", "url", @@ -9872,12 +9717,13 @@ dependencies = [ [[package]] name = "re_sorbet" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "arrow", "itertools 0.14.0", "nohash-hasher", "re_arrow_util", + "re_byte_size", "re_log", "re_log_types", "re_tracing", @@ -9885,21 +9731,21 @@ dependencies = [ "re_types_core", "semver", "strum", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", "web-time", ] [[package]] name = "re_span" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "num-traits", ] [[package]] name = "re_string_interner" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "nohash-hasher", @@ -9911,7 +9757,7 @@ dependencies = [ [[package]] name = "re_test_context" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "anyhow", @@ -9930,7 +9776,6 @@ dependencies = [ "re_log_channel", "re_log_encoding", "re_log_types", - "re_memory", "re_redap_client", "re_renderer", "re_sdk_types", @@ -9944,7 +9789,7 @@ dependencies = [ [[package]] name = "re_test_viewport" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "egui", @@ -9961,11 +9806,11 @@ dependencies = [ [[package]] name = "re_tf" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "arrow", - "bitflags 2.9.4", + "bitflags 2.11.0", "criterion", "glam", "insta", @@ -9978,31 +9823,33 @@ dependencies = [ "re_chunk_store", "re_entity_db", "re_log", + "re_log_encoding", "re_log_types", "re_mutex", "re_sdk_types", "re_tracing", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] name = "re_time_panel" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ + "ahash", "anyhow", "criterion", "egui", "insta", "itertools 0.14.0", "nohash-hasher", - "rand 0.9.2", + "rand 0.9.3", + "re_byte_size", "re_chunk", "re_chunk_store", "re_context_menu", "re_data_ui", "re_entity_db", "re_format", - "re_int_histogram", "re_log", "re_log_encoding", "re_log_types", @@ -10019,8 +9866,9 @@ dependencies = [ [[package]] name = "re_tracing" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ + "parking_lot", "puffin", "puffin_http", "re_log", @@ -10030,13 +9878,13 @@ dependencies = [ [[package]] name = "re_tuid" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "bytemuck", "criterion", "document-features", - "getrandom 0.3.3", - "rand 0.9.2", + "getrandom 0.3.4", + "rand 0.9.3", "re_byte_size", "re_log", "serde", @@ -10045,14 +9893,14 @@ dependencies = [ [[package]] name = "re_types" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "re_sdk_types", ] [[package]] name = "re_types_builder" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "camino", @@ -10074,20 +9922,20 @@ dependencies = [ "re_tracing", "regex-lite", "serde", - "syn 2.0.106", + "syn 2.0.117", "tempfile", - "toml 0.9.8", + "toml", "unindent", "xshell", ] [[package]] name = "re_types_core" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "arrow", - "bitflags 2.9.4", + "bitflags 2.11.0", "bytemuck", "criterion", "document-features", @@ -10104,12 +9952,12 @@ dependencies = [ "re_tuid", "serde", "similar-asserts", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] name = "re_ui" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "anyhow", @@ -10120,18 +9968,18 @@ dependencies = [ "egui_extras", "egui_kittest", "egui_tiles", - "getrandom 0.3.3", + "getrandom 0.3.4", "itertools 0.14.0", "jiff", "notify", "num-traits", - "objc2-app-kit 0.3.2", "parking_lot", - "rand 0.9.2", + "rand 0.9.3", "raw-window-handle", "re_analytics", "re_build_tools", "re_entity_db", + "re_error", "re_format", "re_log", "re_log_types", @@ -10150,29 +9998,32 @@ dependencies = [ [[package]] name = "re_uri" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ + "percent-encoding", "re_log", "re_log_types", "re_tuid", "serde", "static_assertions", - "thiserror 2.0.17", + "thiserror 2.0.18", "url", ] [[package]] name = "re_video" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "bit-vec", + "bytemuck", "cfg_aliases", "criterion", "cros-codecs", "econtext", "ffmpeg-sidecar", "h264-reader", + "image", "indicatif", "itertools 0.14.0", "js-sys", @@ -10191,7 +10042,7 @@ dependencies = [ "scuffle-bytes-util", "serde", "smallvec", - "thiserror 2.0.17", + "thiserror 2.0.18", "wasm-bindgen", "wasm-bindgen-futures", "web-sys", @@ -10200,17 +10051,16 @@ dependencies = [ [[package]] name = "re_view" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "egui", "glam", "itertools 0.14.0", "nohash-hasher", - "re_arrow_combinators", - "re_arrow_util", "re_chunk_store", "re_entity_db", + "re_lenses_core", "re_log", "re_log_types", "re_query", @@ -10221,19 +10071,21 @@ dependencies = [ "re_ui", "re_viewer_context", "re_viewport_blueprint", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] name = "re_view_bar_chart" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ + "ahash", "arrow", "egui", "egui_plot", "re_chunk_store", - "re_entity_db", + "re_format", "re_log_types", + "re_plot", "re_sdk_types", "re_test_context", "re_test_viewport", @@ -10246,7 +10098,7 @@ dependencies = [ [[package]] name = "re_view_dataframe" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "arrow", @@ -10275,7 +10127,7 @@ dependencies = [ [[package]] name = "re_view_graph" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "egui", @@ -10302,7 +10154,7 @@ dependencies = [ [[package]] name = "re_view_map" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "bytemuck", "egui", @@ -10327,12 +10179,12 @@ dependencies = [ [[package]] name = "re_view_spatial" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "anyhow", "arrow", - "bitflags 2.9.4", + "bitflags 2.11.0", "bytemuck", "egui", "glam", @@ -10373,13 +10225,30 @@ dependencies = [ "saturating_cast", "serde", "smallvec", - "thiserror 2.0.17", + "thiserror 2.0.18", "vec1", ] +[[package]] +name = "re_view_status" +version = "0.32.0-alpha.1" +dependencies = [ + "egui", + "re_chunk_store", + "re_log_types", + "re_sdk_types", + "re_test_context", + "re_test_viewport", + "re_tracing", + "re_ui", + "re_view", + "re_viewer_context", + "re_viewport_blueprint", +] + [[package]] name = "re_view_tensor" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "bytemuck", @@ -10399,13 +10268,13 @@ dependencies = [ "re_view", "re_viewer_context", "re_viewport_blueprint", - "thiserror 2.0.17", + "thiserror 2.0.18", "wgpu", ] [[package]] name = "re_view_text_document" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "egui", "egui_commonmark", @@ -10423,7 +10292,7 @@ dependencies = [ [[package]] name = "re_view_text_log" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "egui", "egui_extras", @@ -10445,19 +10314,24 @@ dependencies = [ [[package]] name = "re_view_time_series" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ + "ahash", "arrayvec", "egui", "egui_plot", + "glam", "itertools 0.14.0", + "macaw", "nohash-hasher", "rayon", + "re_byte_size", "re_chunk_store", "re_component_ui", "re_format", "re_log", "re_log_types", + "re_plot", "re_query", "re_renderer", "re_sdk_types", @@ -10474,7 +10348,7 @@ dependencies = [ [[package]] name = "re_viewer" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "anyhow", @@ -10495,6 +10369,7 @@ dependencies = [ "itertools 0.14.0", "jiff", "js-sys", + "num-traits", "parking_lot", "poll-promise", "rayon", @@ -10510,13 +10385,13 @@ dependencies = [ "re_chunk_store_ui", "re_component_fallbacks", "re_component_ui", - "re_data_loader", "re_data_source", "re_data_ui", "re_dataframe_ui", "re_entity_db", "re_error", "re_format", + "re_importer", "re_log", "re_log_channel", "re_log_encoding", @@ -10533,6 +10408,7 @@ dependencies = [ "re_sdk_types", "re_selection_panel", "re_sorbet", + "re_string_interner", "re_test_context", "re_test_viewport", "re_time_panel", @@ -10541,11 +10417,13 @@ dependencies = [ "re_ui", "re_uri", "re_video", + "re_view", "re_view_bar_chart", "re_view_dataframe", "re_view_graph", "re_view_map", "re_view_spatial", + "re_view_status", "re_view_tensor", "re_view_text_document", "re_view_text_log", @@ -10562,7 +10440,7 @@ dependencies = [ "strum_macros", "tap", "tempfile", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-stream", "url", @@ -10575,13 +10453,12 @@ dependencies = [ [[package]] name = "re_viewer_context" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "anyhow", "arrow", - "bit-vec", - "bitflags 2.9.4", + "bitflags 2.11.0", "bytemuck", "camino", "crossbeam", @@ -10595,16 +10472,15 @@ dependencies = [ "half", "home", "image", - "indexmap 2.11.4", + "indexmap", "itertools 0.14.0", "linked-hash-map", "macaw", "ndarray", "nohash-hasher", "parking_lot", - "rand 0.9.2", + "rand 0.9.3", "rayon", - "re_arrow_combinators", "re_arrow_ui", "re_arrow_util", "re_byte_size", @@ -10615,6 +10491,7 @@ dependencies = [ "re_entity_db", "re_error", "re_format", + "re_lenses_core", "re_log", "re_log_channel", "re_log_encoding", @@ -10640,7 +10517,7 @@ dependencies = [ "smallvec", "static_assertions", "strum_macros", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "uuid", "vec1", @@ -10651,7 +10528,7 @@ dependencies = [ [[package]] name = "re_viewport" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "egui", @@ -10667,7 +10544,6 @@ dependencies = [ "re_sdk_types", "re_tracing", "re_ui", - "re_view", "re_viewer_context", "re_viewport_blueprint", "web-time", @@ -10675,7 +10551,7 @@ dependencies = [ [[package]] name = "re_viewport_blueprint" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "arrow", @@ -10685,7 +10561,7 @@ dependencies = [ "itertools 0.14.0", "mimalloc", "nohash-hasher", - "rand 0.9.2", + "rand 0.9.3", "re_chunk", "re_chunk_store", "re_entity_db", @@ -10700,52 +10576,32 @@ dependencies = [ "re_viewer_context", "slotmap", "smallvec", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] name = "re_web_viewer_server" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "document-features", "re_analytics", "re_build_tools", "re_log", - "thiserror 2.0.17", + "thiserror 2.0.18", "tiny_http", - "zip 2.4.2", + "zip 8.2.0", ] [[package]] name = "read-fonts" -version = "0.35.0" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6717cf23b488adf64b9d711329542ba34de147df262370221940dfabc2c91358" +checksum = "7b634fabf032fab15307ffd272149b622260f55974d9fad689292a5d33df02e5" dependencies = [ "bytemuck", "font-types", ] -[[package]] -name = "recursive" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" -dependencies = [ - "recursive-proc-macro-impl", - "stacker", -] - -[[package]] -name = "recursive-proc-macro-impl" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" -dependencies = [ - "quote", - "syn 2.0.106", -] - [[package]] name = "redox_syscall" version = "0.4.1" @@ -10761,7 +10617,7 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", ] [[package]] @@ -10770,9 +10626,9 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "libredox", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -10792,26 +10648,26 @@ checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "reflink-copy" -version = "0.1.26" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78c81d000a2c524133cc00d2f92f019d399e57906c3b7119271a2495354fe895" +checksum = "13362233b147e57674c37b802d216b7c5e3dcccbed8967c84f0d8d223868ae27" dependencies = [ "cfg-if", "libc", - "rustix 1.1.2", - "windows 0.61.3", + "rustix 1.1.4", + "windows", ] [[package]] name = "regex" -version = "1.11.3" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -10821,9 +10677,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.11" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -10832,9 +10688,9 @@ dependencies = [ [[package]] name = "regex-lite" -version = "0.1.7" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "943f41321c63ef1c92fd763bfe054d2668f7f225a5c29f0105903dc2fc04ba30" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" [[package]] name = "regex-syntax" @@ -10922,7 +10778,7 @@ dependencies = [ [[package]] name = "rerun" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "anyhow", @@ -10933,11 +10789,12 @@ dependencies = [ "crossbeam", "document-features", "env_filter", - "indexmap 2.11.4", + "indexmap", "indicatif", "itertools 0.14.0", "jiff", "log", + "parking_lot", "puffin", "rayon", "re_analytics", @@ -10983,7 +10840,7 @@ dependencies = [ [[package]] name = "rerun-cli" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "document-features", "mimalloc", @@ -10997,8 +10854,8 @@ dependencies = [ ] [[package]] -name = "rerun-loader-rust-file" -version = "0.30.0-alpha.1+dev" +name = "rerun-importer-rust-file" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "argh", @@ -11007,7 +10864,7 @@ dependencies = [ [[package]] name = "rerun_c" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "ahash", "arrow", @@ -11024,8 +10881,9 @@ dependencies = [ [[package]] name = "rerun_py" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ + "anyhow", "arrow", "bytes", "chrono", @@ -11038,25 +10896,33 @@ dependencies = [ "infer", "itertools 0.14.0", "jiff", + "memmap2 0.9.10", "mimalloc", "numpy", "parking_lot", "pyo3", "pyo3-build-config", - "rand 0.9.2", + "rand 0.9.3", "re_arrow_util", "re_auth", "re_build_info", "re_build_tools", + "re_byte_size", "re_chunk", "re_chunk_store", "re_datafusion", + "re_error", + "re_format", "re_grpc_client", "re_grpc_server", + "re_importer", + "re_lenses_core", "re_log", "re_log_encoding", "re_log_types", + "re_mcap", "re_memory", + "re_parquet", "re_perf_telemetry", "re_protos", "re_quota_channel", @@ -11072,7 +10938,7 @@ dependencies = [ "rustls", "strum", "strum_macros", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-stream", "tonic", @@ -11122,7 +10988,7 @@ dependencies = [ "js-sys", "libc", "log", - "objc2 0.6.3", + "objc2 0.6.4", "objc2-app-kit 0.3.2", "objc2-core-foundation", "objc2-foundation 0.3.2", @@ -11152,7 +11018,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.16", + "getrandom 0.2.17", "libc", "untrusted", "windows-sys 0.52.0", @@ -11160,9 +11026,9 @@ dependencies = [ [[package]] name = "roaring" -version = "0.10.12" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19e8d2cfa184d94d0726d650a9f4a1be7f9b76ac9fdb954219878dc00c1c1e7b" +checksum = "8ba9ce64a8f45d7fc86358410bb1a82e8c987504c0d4900e9141d69a9f26c885" dependencies = [ "bytemuck", "byteorder", @@ -11170,14 +11036,15 @@ dependencies = [ [[package]] name = "ron" -version = "0.11.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db09040cc89e461f1a265139777a2bde7f8d8c67c4936f700c63ce3e2904d468" +checksum = "fd490c5b18261893f14449cbd28cb9c0b637aebf161cd77900bfdedaff21ec32" dependencies = [ - "base64 0.22.1", - "bitflags 2.9.4", + "bitflags 2.11.0", + "once_cell", "serde", "serde_derive", + "typeid", "unicode-ident", ] @@ -11189,19 +11056,13 @@ checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97" [[package]] name = "run_wasm" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "cargo-run-wasm", "pico-args", "webbrowser", ] -[[package]] -name = "rust-fuzzy-search" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a157657054ffe556d8858504af8a672a054a6e0bd9e8ee531059100c0fa11bb2" - [[package]] name = "rust-stemmers" version = "1.2.0" @@ -11239,37 +11100,13 @@ dependencies = [ "semver", ] -[[package]] -name = "rustdoc-json" -version = "0.9.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab44348a3493c8a852182d0da3e6d92fe340dd099a745652f276ebbb2d34a330" -dependencies = [ - "cargo-manifest", - "cargo_metadata 0.21.0", - "serde", - "thiserror 2.0.17", - "toml 0.8.23", - "tracing", -] - -[[package]] -name = "rustdoc-types" -version = "0.56.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27bf787c529efe523ed9eb6dcdbaa5954d34329f08d5c243fce928441826ca90" -dependencies = [ - "serde", - "serde_derive", -] - [[package]] name = "rustix" version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "errno", "libc", "linux-raw-sys 0.4.15", @@ -11278,22 +11115,22 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.2" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "errno", "libc", - "linux-raw-sys 0.11.0", + "linux-raw-sys 0.12.1", "windows-sys 0.61.2", ] [[package]] name = "rustls" -version = "0.23.32" +version = "0.23.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd3c25631629d034ce7cd9940adc9d45762d46de2b0f57193c4443b92c6d4d40" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" dependencies = [ "log", "once_cell", @@ -11328,9 +11165,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.7" +version = "0.103.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10b3f4191e8a80e6b43eebabfac91e5dcecebb27a71f04e820c47ec41d314bf" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" dependencies = [ "ring", "rustls-pki-types", @@ -11421,7 +11258,7 @@ version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "core-foundation 0.10.1", "core-foundation-sys", "libc", @@ -11470,28 +11307,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde-untagged" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9faf48a4a2d2693be24c6289dbe26552776eb7737074e6722891fadbe6c5058" -dependencies = [ - "erased-serde", - "serde", - "serde_core", - "typeid", -] - -[[package]] -name = "serde-value" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" -dependencies = [ - "ordered-float 2.10.1", - "serde", -] - [[package]] name = "serde-wasm-bindgen" version = "0.6.5" @@ -11542,20 +11357,20 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "serde_json" -version = "1.0.145" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", - "ryu", "serde", "serde_core", + "zmij", ] [[package]] @@ -11577,23 +11392,14 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "serde_spanned" -version = "0.6.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" -dependencies = [ - "serde", -] - -[[package]] -name = "serde_spanned" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e24345aa0fe688594e73770a5f6d1b216508b4f93484c0026d521acd30134392" +checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776" dependencies = [ "serde_core", ] @@ -11654,20 +11460,11 @@ dependencies = [ [[package]] name = "shared_recording" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "rerun", ] -[[package]] -name = "shellexpand" -version = "3.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b1fdf65dd6331831494dd616b30351c38e96e45921a27745cf98490458b90bb" -dependencies = [ - "dirs", -] - [[package]] name = "shlex" version = "1.3.0" @@ -11683,6 +11480,15 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "rand_core 0.6.4", +] + [[package]] name = "simd-adler32" version = "0.3.7" @@ -11747,9 +11553,9 @@ dependencies = [ [[package]] name = "skrifa" -version = "0.37.0" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c31071dedf532758ecf3fed987cdb4bd9509f900e026ab684b4ecb81ea49841" +checksum = "7fbdfe3d2475fbd7ddd1f3e5cf8288a30eb3e5f95832829570cd88115a7434ac" dependencies = [ "bytemuck", "read-fonts", @@ -11763,9 +11569,9 @@ checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" [[package]] name = "slotmap" -version = "1.0.7" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbff4acf519f630b3a3ddcfaea6c06b42174d9a44bc70c620e9ed1649d58b82a" +checksum = "bdd58c3c93c3d278ca835519292445cb4b0d4dc59ccfdf7ceadaab3f8aeb4038" dependencies = [ "serde", "version_check", @@ -11786,13 +11592,13 @@ version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3457dea1f0eb631b4034d61d4d8c32074caa6cd1ab2d59f2327bd8461e2c0016" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "calloop", "calloop-wayland-source", "cursor-icon", "libc", "log", - "memmap2 0.9.8", + "memmap2 0.9.10", "rustix 0.38.44", "thiserror 1.0.69", "wayland-backend", @@ -11827,23 +11633,23 @@ dependencies = [ [[package]] name = "snafu" -version = "0.8.9" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2" +checksum = "d1d4bced6a69f90b2056c03dcff2c4737f98d6fb9e0853493996e1d253ca29c6" dependencies = [ "snafu-derive", ] [[package]] name = "snafu-derive" -version = "0.8.9" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" +checksum = "54254b8531cafa275c5e096f62d48c81435d1015405a91198ddb11e967301d40" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -11854,12 +11660,12 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "snippets" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "crossbeam", "itertools 0.14.0", "ndarray", - "rand 0.9.2", + "rand 0.9.3", "rand_distr 0.5.1", "re_build_tools", "rerun", @@ -11888,28 +11694,27 @@ dependencies = [ [[package]] name = "spawn_viewer" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "rerun", ] [[package]] name = "spirv" -version = "0.3.0+sdk-1.3.268.0" +version = "0.4.0+sdk-1.4.341.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eda41003dc44290527a59b13432d4a0379379fa074b70174882adfbdfd917844" +checksum = "d9571ea910ebd84c86af4b3ed27f9dbdc6ad06f17c5f96146b2b671e2976744f" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", ] [[package]] name = "sqlparser" -version = "0.58.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", - "recursive", "sqlparser_derive", ] @@ -11921,7 +11726,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -11947,25 +11752,19 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" -[[package]] -name = "stacker" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "windows-sys 0.59.0", -] - [[package]] name = "static_assertions" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "status_example" +version = "0.32.0-alpha.1" +dependencies = [ + "rerun", +] + [[package]] name = "std_prelude" version = "0.2.12" @@ -11974,7 +11773,7 @@ checksum = "8207e78455ffdf55661170876f88daf85356e4edd54e0a3dbc79586ca1e50cbe" [[package]] name = "stdio" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "rerun", ] @@ -11987,12 +11786,12 @@ checksum = "e51f1e89f093f99e7432c491c382b88a6860a5adbe6bf02574bf0a08efff1978" [[package]] name = "stl_io" -version = "0.8.5" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff2e145168af9fef3b518ac0c6f9849c407b3df8a28582ced9f1fda510aa34c" +checksum = "da63e75b86345156b191c021b3ce2a13b973941ecdb8c70d6f00cbbfe0076ed7" dependencies = [ "byteorder", - "float-cmp", + "float-cmp 0.10.0", ] [[package]] @@ -12001,7 +11800,7 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6637bab7722d379c8b41ba849228d680cc12d0a45ba1fa2b48f2a30577a06731" dependencies = [ - "float-cmp", + "float-cmp 0.9.0", ] [[package]] @@ -12029,7 +11828,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -12067,9 +11866,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.106" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -12093,21 +11892,21 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "sysinfo" -version = "0.30.13" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3" +checksum = "92ab6a2f8bfe508deb3c6406578252e491d299cbbf3bc0529ecc3313aee4a52f" dependencies = [ - "cfg-if", - "core-foundation-sys", "libc", + "memchr", "ntapi", - "once_cell", - "windows 0.52.0", + "objc2-core-foundation", + "objc2-io-kit", + "windows", ] [[package]] @@ -12141,9 +11940,9 @@ dependencies = [ "levenshtein_automata", "log", "lru 0.12.5", - "lz4_flex 0.11.5", + "lz4_flex 0.11.6", "measure_time", - "memmap2 0.9.8", + "memmap2 0.9.10", "once_cell", "oneshot", "rayon", @@ -12162,7 +11961,7 @@ dependencies = [ "tantivy-stacker", "tantivy-tokenizer-api", "tempfile", - "thiserror 2.0.17", + "thiserror 2.0.18", "time", "uuid", "winapi", @@ -12276,20 +12075,20 @@ checksum = "df7f62577c25e07834649fc3b39fafdc597c0a3527dc1c60129201ccfcbaa50c" [[package]] name = "tempfile" -version = "3.23.0" +version = "3.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom 0.4.2", "once_cell", - "rustix 1.1.2", + "rustix 1.1.4", "windows-sys 0.61.2", ] [[package]] name = "template" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "rerun", ] @@ -12305,17 +12104,17 @@ dependencies = [ [[package]] name = "test_data_density_graph" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", - "rand 0.9.2", + "rand 0.9.3", "re_log", "rerun", ] [[package]] name = "test_image_memory" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "mimalloc", "re_format", @@ -12323,9 +12122,19 @@ dependencies = [ "rerun", ] +[[package]] +name = "test_label_compaction" +version = "0.32.0-alpha.1" +dependencies = [ + "anyhow", + "clap", + "re_log", + "rerun", +] + [[package]] name = "test_out_of_order_transforms" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "clap", @@ -12336,7 +12145,7 @@ dependencies = [ [[package]] name = "test_ui_wakeup" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "anyhow", "clap", @@ -12355,11 +12164,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl 2.0.17", + "thiserror-impl 2.0.18", ] [[package]] @@ -12370,18 +12179,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "thiserror-impl" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -12554,32 +12363,29 @@ dependencies = [ [[package]] name = "tokio" -version = "1.47.1" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" dependencies = [ - "backtrace", "bytes", - "io-uring", "libc", "mio", "pin-project-lite", "signal-hook-registry", - "slab", "socket2 0.6.0", "tokio-macros", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] name = "tokio-macros" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -12594,9 +12400,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" dependencies = [ "futures-core", "pin-project-lite", @@ -12618,9 +12424,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.16" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" dependencies = [ "bytes", "futures-core", @@ -12631,27 +12437,14 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" -dependencies = [ - "indexmap 2.11.4", - "serde", - "serde_spanned 0.6.9", - "toml_datetime 0.6.11", - "toml_edit 0.22.27", -] - -[[package]] -name = "toml" -version = "0.9.8" +version = "1.0.6+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dc8b1fb61449e27716ec0e1bdf0f6b8f3e8f6b05391e8497b8b6d7804ea6d8" +checksum = "399b1124a3c9e16766831c6bba21e50192572cdd98706ea114f9502509686ffc" dependencies = [ - "indexmap 2.11.4", + "indexmap", "serde_core", - "serde_spanned 1.0.3", - "toml_datetime 0.7.3", + "serde_spanned", + "toml_datetime 1.0.0+spec-1.1.0", "toml_parser", "toml_writer", "winnow", @@ -12659,68 +12452,48 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.11" +version = "0.7.5+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" dependencies = [ - "serde", + "serde_core", ] [[package]] name = "toml_datetime" -version = "0.7.3" +version = "1.0.0+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" +checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e" dependencies = [ "serde_core", ] -[[package]] -name = "toml_edit" -version = "0.22.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" -dependencies = [ - "indexmap 2.11.4", - "serde", - "serde_spanned 0.6.9", - "toml_datetime 0.6.11", - "toml_write", - "winnow", -] - [[package]] name = "toml_edit" version = "0.23.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3effe7c0e86fdff4f69cdd2ccc1b96f933e24811c5441d44904e8683e27184b" dependencies = [ - "indexmap 2.11.4", - "toml_datetime 0.7.3", + "indexmap", + "toml_datetime 0.7.5+spec-1.1.0", "toml_parser", "winnow", ] [[package]] name = "toml_parser" -version = "1.0.4" +version = "1.0.9+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" +checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" dependencies = [ "winnow", ] -[[package]] -name = "toml_write" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" - [[package]] name = "toml_writer" -version = "1.0.4" +version = "1.0.6+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df8b2b54733674ad286d16267dcfc7a71ed5c776e4ac7aa3c3e2561f7c637bf2" +checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607" [[package]] name = "tonic" @@ -12763,7 +12536,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -12773,7 +12546,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67" dependencies = [ "bytes", - "prost 0.14.1", + "prost", "tonic", ] @@ -12785,10 +12558,10 @@ checksum = "b4a16cba4043dc3ff43fcb3f96b4c5c154c64cbd18ca8dce2ab2c6a451d058a2" dependencies = [ "prettyplease", "proc-macro2", - "prost-build 0.14.1", - "prost-types 0.14.1", + "prost-build", + "prost-types", "quote", - "syn 2.0.106", + "syn 2.0.117", "tempfile", "tonic-build", ] @@ -12827,7 +12600,7 @@ dependencies = [ "httparse", "js-sys", "pin-project", - "thiserror 2.0.17", + "thiserror 2.0.18", "tonic", "tower-service", "wasm-bindgen", @@ -12838,13 +12611,13 @@ dependencies = [ [[package]] name = "tower" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ "futures-core", "futures-util", - "indexmap 2.11.4", + "indexmap", "pin-project-lite", "slab", "sync_wrapper", @@ -12857,11 +12630,11 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "bytes", "futures-util", "http", @@ -12888,9 +12661,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "log", "pin-project-lite", @@ -12900,20 +12673,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", "valuable", @@ -12932,16 +12705,13 @@ dependencies = [ [[package]] name = "tracing-opentelemetry" -version = "0.32.0" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e6e5658463dd88089aba75c7791e1d3120633b1bfde22478b28f625a9bb1b8e" +checksum = "1ac28f2d093c6c477eaa76b23525478f38de514fa9aeb1285738d4b97a9552fc" dependencies = [ "js-sys", "opentelemetry", - "opentelemetry_sdk", - "rustversion", "smallvec", - "thiserror 2.0.17", "tracing", "tracing-core", "tracing-log", @@ -12961,9 +12731,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.20" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" dependencies = [ "matchers", "nu-ansi-term", @@ -13009,7 +12779,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c5f7c95348f20c1c913d72157b3c6dee6ea3e30b3d19502c5a7f6d3f160dacbf" dependencies = [ "cc", - "windows-targets 0.52.6", + "windows-targets 0.48.5", ] [[package]] @@ -13044,9 +12814,9 @@ dependencies = [ "http", "httparse", "log", - "rand 0.9.2", + "rand 0.9.3", "sha1", - "thiserror 2.0.17", + "thiserror 2.0.18", "utf-8", ] @@ -13056,7 +12826,7 @@ version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" dependencies = [ - "rand 0.9.2", + "rand 0.9.3", ] [[package]] @@ -13074,6 +12844,12 @@ version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" +[[package]] +name = "typed-path" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e28f89b80c87b8fb0cf04ab448d5dd0dd0ade2f8891bae878de66a75a28600e" + [[package]] name = "typeid" version = "1.0.3" @@ -13178,27 +12954,57 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" dependencies = [ "base64 0.22.1", - "flate2", "log", "once_cell", "rustls", "rustls-pki-types", - "serde", - "serde_json", "url", "webpki-roots 0.26.11", ] +[[package]] +name = "ureq" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dea7109cdcd5864d4eeb1b58a1648dc9bf520360d7af16ec26d0a9354bafcfc0" +dependencies = [ + "base64 0.22.1", + "cookie_store", + "flate2", + "log", + "percent-encoding", + "rustls", + "rustls-pki-types", + "serde", + "serde_json", + "ureq-proto", + "utf8-zero", + "webpki-roots 1.0.2", +] + +[[package]] +name = "ureq-proto" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e994ba84b0bd1b1b0cf92878b7ef898a5c1760108fe7b6010327e274917a808c" +dependencies = [ + "base64 0.22.1", + "http", + "httparse", + "log", +] + [[package]] name = "url" -version = "2.5.7" +version = "2.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" dependencies = [ "form_urlencoded", "idna", "percent-encoding", "serde", + "serde_derive", ] [[package]] @@ -13241,6 +13047,12 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" +[[package]] +name = "utf8-zero" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8c0a043c9540bae7c578c88f91dda8bd82e59ae27c21baca69c8b191aaf5a6e" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -13255,13 +13067,13 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.18.1" +version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.4.2", "js-sys", - "serde", + "serde_core", "wasm-bindgen", ] @@ -13283,12 +13095,13 @@ dependencies = [ [[package]] name = "vello_common" -version = "0.0.4" +version = "0.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a235ba928b3109ad9e7696270edb09445a52ae1c7c08e6d31a19b1cdd6cbc24a" +checksum = "1bd1a4c633ce09e7d713df1a6e036644a125e15e0c169cfb5180ddf5836ca04b" dependencies = [ "bytemuck", "fearless_simd", + "hashbrown 0.16.1", "log", "peniko", "skrifa", @@ -13297,11 +13110,12 @@ dependencies = [ [[package]] name = "vello_cpu" -version = "0.0.4" +version = "0.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0bd1fcf9c1814f17a491e07113623d44e3ec1125a9f3401f5e047d6d326da21" +checksum = "0162bfe48aabf6a9fdcd401b628c7d9f260c2cbabb343c70a65feba6f7849edc" dependencies = [ "bytemuck", + "hashbrown 0.16.1", "vello_common", ] @@ -13313,7 +13127,7 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "viewer_callbacks" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" dependencies = [ "mimalloc", "rerun", @@ -13331,53 +13145,54 @@ dependencies = [ [[package]] name = "walkers" -version = "0.50.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a443c518c081ef29fafc4c53c32c6f8cba953b019854080214cad1e6d47c297" +checksum = "1c81bab51dc24106d35edce41958ed16e3a0c1c0b45f40685ffd3f4b5691490c" dependencies = [ "bytes", "egui", "egui_extras", "futures", "geo-types", + "getrandom 0.2.17", "http-cache-reqwest", "image", "log", "lru 0.16.3", "reqwest", "reqwest-middleware", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "wasm-bindgen-futures", ] [[package]] name = "walrus" -version = "0.23.3" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6481311b98508f4bc2d0abbfa5d42172e7a54b4b24d8f15e28b0dc650be0c59f" +checksum = "e151599d689dac80e85c66a7cfa6ffd1b2ab79220517f9161040a87a5041aee3" dependencies = [ "anyhow", - "gimli 0.26.2", + "gimli", "id-arena", "leb128", "log", "rayon", "walrus-macro", - "wasm-encoder", - "wasmparser", + "wasm-encoder 0.245.1", + "wasmparser 0.245.1", ] [[package]] name = "walrus-macro" -version = "0.22.0" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ad39ff894c43c9649fa724cdde9a6fc50b855d517ef071a93e5df82fe51d3" +checksum = "1a9b0525d7ea6e5f906aca581a172e5c91b4c595290dfa8ad4a2bc9ffef33b44" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -13396,100 +13211,69 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] -name = "wasi" -version = "0.14.7+wasi-0.2.4" +name = "wasip2" +version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ - "wasip2", + "wit-bindgen 0.46.0", ] [[package]] -name = "wasip2" -version = "1.0.1+wasi-0.2.4" +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ - "wit-bindgen", + "wit-bindgen 0.51.0", ] [[package]] name = "wasm-bindgen" -version = "0.2.100" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +checksum = "0551fc1bb415591e3372d0bc4780db7e587d84e2a7e79da121051c5c4b89d0b0" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn 2.0.106", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-cli-support" -version = "0.2.100" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21e1a4a49abe9cd6f762fc65fac2ef5732afeeb66be369d2f71a85b165a533cf" +checksum = "56574baa147c7433657bdfa369f32850d98d928be100828a7b7babb266b31f0e" dependencies = [ "anyhow", "base64 0.22.1", + "leb128", "log", "rustc-demangle", "serde", "serde_json", - "tempfile", "walrus", - "wasm-bindgen-externref-xform", - "wasm-bindgen-multi-value-xform", "wasm-bindgen-shared", - "wasm-bindgen-threads-xform", - "wasm-bindgen-wasm-conventions", - "wasm-bindgen-wasm-interpreter", -] - -[[package]] -name = "wasm-bindgen-externref-xform" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "940542c5cdbe96c35f98b5da5c65fb9d18df55a0cb1d81fc5ca4acc4fda4d61c" -dependencies = [ - "anyhow", - "walrus", - "wasm-bindgen-wasm-conventions", + "wasmparser 0.245.1", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.50" +version = "0.4.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +checksum = "03623de6905b7206edd0a75f69f747f134b7f0a2323392d664448bf2d3c5d87e" dependencies = [ - "cfg-if", "js-sys", - "once_cell", "wasm-bindgen", - "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.100" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +checksum = "7fbdf9a35adf44786aecd5ff89b4563a90325f9da0923236f6104e603c7e86be" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -13497,80 +13281,56 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.100" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +checksum = "dca9693ef2bab6d4e6707234500350d8dad079eb508dca05530c85dc3a529ff2" dependencies = [ + "bumpalo", "proc-macro2", "quote", - "syn 2.0.106", - "wasm-bindgen-backend", + "syn 2.0.117", "wasm-bindgen-shared", ] -[[package]] -name = "wasm-bindgen-multi-value-xform" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64b5ad2e97adde0c3e4369c38e0dbaee329ad8f6cc2ee8e01d1d0b13bd8b14cf" -dependencies = [ - "anyhow", - "walrus", - "wasm-bindgen-wasm-conventions", -] - [[package]] name = "wasm-bindgen-shared" -version = "0.2.100" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +checksum = "39129a682a6d2d841b6c429d0c51e5cb0ed1a03829d8b3d1e69a011e62cb3d3b" dependencies = [ "unicode-ident", ] [[package]] -name = "wasm-bindgen-threads-xform" -version = "0.2.100" +name = "wasm-encoder" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cbdf2d55a50f7edc9dd9aecae7a3a40e9736fda851bd8816f98a86167c8c277" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" dependencies = [ - "anyhow", - "walrus", - "wasm-bindgen-wasm-conventions", + "leb128fmt", + "wasmparser 0.244.0", ] [[package]] -name = "wasm-bindgen-wasm-conventions" -version = "0.2.100" +name = "wasm-encoder" +version = "0.245.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c24fcaa34d2d84407122cfb1d3f37c3586756cf462be18e049b49245a16c08" +checksum = "3f9dca005e69bf015e45577e415b9af8c67e8ee3c0e38b5b0add5aa92581ed5c" dependencies = [ - "anyhow", - "leb128", - "log", - "walrus", - "wasmparser", + "leb128fmt", + "wasmparser 0.245.1", ] [[package]] -name = "wasm-bindgen-wasm-interpreter" -version = "0.2.100" +name = "wasm-metadata" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33f24921401faadd6944206f9d6837d07bbb5ff766ed51ad34528089f66550e0" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" dependencies = [ "anyhow", - "log", - "walrus", - "wasm-bindgen-wasm-conventions", -] - -[[package]] -name = "wasm-encoder" -version = "0.214.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff694f02a8d7a50b6922b197ae03883fbf18cdb2ae9fbee7b6148456f5f44041" -dependencies = [ - "leb128", + "indexmap", + "wasm-encoder 0.244.0", + "wasmparser 0.244.0", ] [[package]] @@ -13588,14 +13348,25 @@ dependencies = [ [[package]] name = "wasmparser" -version = "0.214.0" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5309c1090e3e84dad0d382f42064e9933fdaedb87e468cc239f0eabea73ddcb6" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ - "ahash", - "bitflags 2.9.4", - "hashbrown 0.14.5", - "indexmap 2.11.4", + "bitflags 2.11.0", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "wasmparser" +version = "0.245.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f08c9adee0428b7bddf3890fc27e015ac4b761cc608c822667102b8bfd6995e" +dependencies = [ + "bitflags 2.11.0", + "hashbrown 0.16.1", + "indexmap", "semver", "serde", ] @@ -13608,7 +13379,7 @@ checksum = "673a33c33048a5ade91a6b139580fa174e19fb0d23f396dca9fa15f2e1e49b35" dependencies = [ "cc", "downcast-rs 1.2.1", - "rustix 1.1.2", + "rustix 1.1.4", "scoped-tls", "smallvec", "wayland-sys", @@ -13620,8 +13391,8 @@ version = "0.31.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c66a47e840dc20793f2264eb4b3e4ecb4b75d91c0dd4af04b456128e0bdd449d" dependencies = [ - "bitflags 2.9.4", - "rustix 1.1.2", + "bitflags 2.11.0", + "rustix 1.1.4", "wayland-backend", "wayland-scanner", ] @@ -13632,7 +13403,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "625c5029dbd43d25e6aa9615e88b829a5cad13b2819c4ae129fdbb7c31ab4c7e" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "cursor-icon", "wayland-backend", ] @@ -13654,7 +13425,7 @@ version = "0.32.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "efa790ed75fbfd71283bd2521a1cfdc022aabcc28bdcff00851f9e4ae88d9901" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "wayland-backend", "wayland-client", "wayland-scanner", @@ -13666,7 +13437,7 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fd38cdad69b56ace413c6bcc1fbf5acc5e2ef4af9d5f8f1f9570c0c83eae175" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "wayland-backend", "wayland-client", "wayland-protocols", @@ -13679,7 +13450,7 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1cb6cdc73399c0e06504c437fe3cf886f25568dd5454473d565085b36d6a8bbf" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "wayland-backend", "wayland-client", "wayland-protocols", @@ -13699,9 +13470,9 @@ dependencies = [ [[package]] name = "wayland-sys" -version = "0.31.7" +version = "0.31.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34949b42822155826b41db8e5d0c1be3a2bd296c747577a43a3e6daefc296142" +checksum = "374f6b70e8e0d6bf9461a32988fd553b59ff630964924dad6e4a4eb6bd538d17" dependencies = [ "dlib", "log", @@ -13711,9 +13482,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.77" +version = "0.3.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +checksum = "cd70027e39b12f0849461e08ffc50b9cd7688d942c1c8e3c7b22273236b4dd0a" dependencies = [ "js-sys", "wasm-bindgen", @@ -13731,15 +13502,15 @@ dependencies = [ [[package]] name = "webbrowser" -version = "1.0.5" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aaf4f3c0ba838e82b4e5ccc4157003fb8c324ee24c058470ffb82820becbde98" +checksum = "3f00bb839c1cf1e3036066614cbdcd035ecf215206691ea646aa3c60a24f68f2" dependencies = [ "core-foundation 0.10.1", "jni", "log", "ndk-context", - "objc2 0.6.3", + "objc2 0.6.4", "objc2-foundation 0.3.2", "url", "web-sys", @@ -13771,16 +13542,17 @@ checksum = "a751b3277700db47d3e574514de2eced5e54dc8a5436a3bf7a0b248b2cee16f3" [[package]] name = "wgpu" -version = "27.0.1" +version = "29.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe68bac7cde125de7a731c3400723cadaaf1703795ad3f4805f187459cd7a77" +checksum = "72c239a9a747bbd379590985bac952c2e53cb19873f7072b3370c6a6a8e06837" dependencies = [ "arrayvec", - "bitflags 2.9.4", + "bitflags 2.11.0", + "bytemuck", "cfg-if", "cfg_aliases", "document-features", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "js-sys", "log", "naga", @@ -13800,19 +13572,19 @@ dependencies = [ [[package]] name = "wgpu-core" -version = "27.0.1" +version = "29.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d654c0b6c6335edfca18c11bdaed964def641b8e9997d3a495a2ff4077c922" +checksum = "1e80ac6cf1895df6342f87d975162108f9d98772a0d74bc404ab7304ac29469e" dependencies = [ "arrayvec", "bit-set", "bit-vec", - "bitflags 2.9.4", + "bitflags 2.11.0", "bytemuck", "cfg_aliases", "document-features", - "hashbrown 0.16.0", - "indexmap 2.11.4", + "hashbrown 0.16.1", + "indexmap", "log", "naga", "once_cell", @@ -13822,114 +13594,135 @@ dependencies = [ "raw-window-handle", "rustc-hash 1.1.0", "smallvec", - "thiserror 2.0.17", + "thiserror 2.0.18", "wgpu-core-deps-apple", "wgpu-core-deps-emscripten", "wgpu-core-deps-wasm", "wgpu-core-deps-windows-linux-android", "wgpu-hal", + "wgpu-naga-bridge", "wgpu-types", ] [[package]] name = "wgpu-core-deps-apple" -version = "27.0.0" +version = "29.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0772ae958e9be0c729561d5e3fd9a19679bcdfb945b8b1a1969d9bfe8056d233" +checksum = "43acd053312501689cd92a01a9638d37f3e41a5fd9534875efa8917ee2d11ac0" dependencies = [ "wgpu-hal", ] [[package]] name = "wgpu-core-deps-emscripten" -version = "27.0.0" +version = "29.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b06ac3444a95b0813ecfd81ddb2774b66220b264b3e2031152a4a29fda4da6b5" +checksum = "ef043bf135cc68b6f667c55ff4e345ce2b5924d75bad36a47921b0287ca4b24a" dependencies = [ "wgpu-hal", ] [[package]] name = "wgpu-core-deps-wasm" -version = "27.0.0" +version = "29.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b1027dcf3b027a877e44819df7ceb0e2e98578830f8cd34cd6c3c7c2a7a50b7" +checksum = "2f7b75e72f49035f000dd5262e4126242e92a090a4fd75931ecfe7e60784e6fa" dependencies = [ "wgpu-hal", ] [[package]] name = "wgpu-core-deps-windows-linux-android" -version = "27.0.0" +version = "29.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71197027d61a71748e4120f05a9242b2ad142e3c01f8c1b47707945a879a03c3" +checksum = "725d5c006a8c02967b6d93ef04f6537ec4593313e330cfe86d9d3f946eb90f28" dependencies = [ "wgpu-hal", ] [[package]] name = "wgpu-hal" -version = "27.0.2" +version = "29.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2618a2d6b8a5964ecc1ac32a5db56cb3b1e518725fcd773fd9a782e023453f2b" +checksum = "89a47aef47636562f3937285af4c44b4b5b404b46577471411cc5313a921da7e" dependencies = [ "android_system_properties", "arrayvec", "ash", "bit-set", - "bitflags 2.9.4", - "block", + "bitflags 2.11.0", + "block2 0.6.2", "bytemuck", "cfg-if", "cfg_aliases", - "core-graphics-types 0.2.0", "glow", "glutin_wgl_sys", - "gpu-alloc", "gpu-allocator", "gpu-descriptor", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "js-sys", "khronos-egl", "libc", "libloading 0.8.9", "log", - "metal", "naga", "ndk-sys", - "objc", + "objc2 0.6.4", + "objc2-core-foundation", + "objc2-foundation 0.3.2", + "objc2-metal 0.3.2", + "objc2-quartz-core 0.3.2", "once_cell", - "ordered-float 4.6.0", + "ordered-float 5.1.0", "parking_lot", "portable-atomic", "portable-atomic-util", "profiling", "range-alloc", "raw-window-handle", + "raw-window-metal", "renderdoc-sys", "smallvec", - "thiserror 2.0.17", + "thiserror 2.0.18", "wasm-bindgen", + "wayland-sys", "web-sys", + "wgpu-naga-bridge", + "wgpu-types", + "windows", + "windows-core", +] + +[[package]] +name = "wgpu-naga-bridge" +version = "29.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4684f4410da0cf95a4cb63bb5edaac022461dedb6adf0b64d0d9b5f6890d51" +dependencies = [ + "naga", "wgpu-types", - "windows 0.58.0", - "windows-core 0.58.0", ] [[package]] name = "wgpu-types" -version = "27.0.1" +version = "29.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afdcf84c395990db737f2dd91628706cb31e86d72e53482320d368e52b5da5eb" +checksum = "ec2675540fb1a5cfa5ef122d3d5f390e2c75711a0b946410f2d6ac3a0f77d1f6" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "bytemuck", "js-sys", "log", - "thiserror 2.0.17", + "raw-window-handle", "web-sys", ] +[[package]] +name = "wildmatch" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29333c3ea1ba8b17211763463ff24ee84e41c78224c16b001cd907e663a38c68" + [[package]] name = "winapi" version = "0.3.9" @@ -13963,79 +13756,23 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" -dependencies = [ - "windows-core 0.52.0", - "windows-targets 0.52.6", -] - -[[package]] -name = "windows" -version = "0.58.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" -dependencies = [ - "windows-core 0.58.0", - "windows-targets 0.52.6", -] - -[[package]] -name = "windows" -version = "0.61.3" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" +checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580" dependencies = [ "windows-collections", - "windows-core 0.61.2", + "windows-core", "windows-future", - "windows-link 0.1.3", "windows-numerics", ] [[package]] name = "windows-collections" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" -dependencies = [ - "windows-core 0.61.2", -] - -[[package]] -name = "windows-core" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" -dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-core" -version = "0.58.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" -dependencies = [ - "windows-implement 0.58.0", - "windows-interface 0.58.0", - "windows-result 0.2.0", - "windows-strings 0.1.0", - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-core" -version = "0.61.2" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610" dependencies = [ - "windows-implement 0.60.2", - "windows-interface 0.59.3", - "windows-link 0.1.3", - "windows-result 0.3.4", - "windows-strings 0.4.2", + "windows-core", ] [[package]] @@ -14044,35 +13781,24 @@ version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ - "windows-implement 0.60.2", - "windows-interface 0.59.3", - "windows-link 0.2.1", - "windows-result 0.4.1", - "windows-strings 0.5.1", + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", ] [[package]] name = "windows-future" -version = "0.2.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" +checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb" dependencies = [ - "windows-core 0.61.2", - "windows-link 0.1.3", + "windows-core", + "windows-link", "windows-threading", ] -[[package]] -name = "windows-implement" -version = "0.58.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.106", -] - [[package]] name = "windows-implement" version = "0.60.2" @@ -14081,18 +13807,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", -] - -[[package]] -name = "windows-interface" -version = "0.58.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -14103,15 +13818,9 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] -[[package]] -name = "windows-link" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" - [[package]] name = "windows-link" version = "0.2.1" @@ -14120,30 +13829,12 @@ checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-numerics" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" -dependencies = [ - "windows-core 0.61.2", - "windows-link 0.1.3", -] - -[[package]] -name = "windows-result" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" -dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-result" -version = "0.3.4" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26" dependencies = [ - "windows-link 0.1.3", + "windows-core", + "windows-link", ] [[package]] @@ -14152,26 +13843,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ - "windows-link 0.2.1", -] - -[[package]] -name = "windows-strings" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" -dependencies = [ - "windows-result 0.2.0", - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-strings" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" -dependencies = [ - "windows-link 0.1.3", + "windows-link", ] [[package]] @@ -14180,7 +13852,7 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -14225,7 +13897,7 @@ version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -14280,7 +13952,7 @@ version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" dependencies = [ - "windows-link 0.2.1", + "windows-link", "windows_aarch64_gnullvm 0.53.1", "windows_aarch64_msvc 0.53.1", "windows_i686_gnu 0.53.1", @@ -14293,11 +13965,11 @@ dependencies = [ [[package]] name = "windows-threading" -version = "0.1.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" +checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37" dependencies = [ - "windows-link 0.1.3", + "windows-link", ] [[package]] @@ -14482,14 +14154,14 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winit" -version = "0.30.12" +version = "0.30.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c66d4b9ed69c4009f6321f762d6e61ad8a2389cd431b97cb1e146812e9e6c732" +checksum = "a6755fa58a9f8350bd1e472d4c3fcc25f824ec358933bba33306d0b63df5978d" dependencies = [ "ahash", "android-activity", "atomic-waker", - "bitflags 2.9.4", + "bitflags 2.11.0", "block2 0.5.1", "bytemuck", "calloop", @@ -14501,12 +14173,12 @@ dependencies = [ "dpi", "js-sys", "libc", - "memmap2 0.9.8", + "memmap2 0.9.10", "ndk", "objc2 0.5.2", "objc2-app-kit 0.2.2", "objc2-foundation 0.2.2", - "objc2-ui-kit", + "objc2-ui-kit 0.2.2", "orbclient", "percent-encoding", "pin-project", @@ -14546,6 +14218,94 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.11.0", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder 0.244.0", + "wasm-metadata", + "wasmparser 0.244.0", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser 0.244.0", +] + [[package]] name = "writeable" version = "0.6.1" @@ -14605,7 +14365,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d039de8032a9a8856a6be89cea3e5d12fdd82306ab7c94d74e6deab2460651c5" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.11.0", "dlib", "log", "once_cell", @@ -14651,15 +14411,6 @@ version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" -[[package]] -name = "xz2" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] - [[package]] name = "yoke" version = "0.8.0" @@ -14680,7 +14431,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", "synstructure", ] @@ -14735,7 +14486,7 @@ checksum = "dc6821851fa840b708b4cbbaf6241868cabc85a2dc22f426361b0292bfc0b836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", "zbus-lockstep", "zbus_xml", "zvariant", @@ -14750,7 +14501,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", "zbus_names", "zvariant", "zvariant_utils", @@ -14808,7 +14559,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -14819,7 +14570,7 @@ checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -14839,7 +14590,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", "synstructure", ] @@ -14879,7 +14630,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", ] [[package]] @@ -14896,18 +14647,15 @@ dependencies = [ [[package]] name = "zip" -version = "2.4.2" +version = "8.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50" +checksum = "b680f2a0cd479b4cff6e1233c483fdead418106eae419dc60200ae9850f6d004" dependencies = [ - "arbitrary", "crc32fast", - "crossbeam-utils", - "displaydoc", "flate2", - "indexmap 2.11.4", + "indexmap", "memchr", - "thiserror 2.0.17", + "typed-path", "zopfli", ] @@ -14917,6 +14665,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2f06ae92f42f5e5c42443fd094f245eb656abf56dd7cce9b8b263236565e00f2" +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + [[package]] name = "zopfli" version = "0.8.3" @@ -14995,7 +14749,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.117", "zvariant_utils", ] @@ -15008,6 +14762,6 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.106", + "syn 2.0.117", "winnow", ] diff --git a/Cargo.toml b/Cargo.toml index 69cdfbb65b13..6e7969a460e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,7 +32,7 @@ include = [ license = "MIT OR Apache-2.0" repository = "https://github.com/rerun-io/rerun" rust-version = "1.92" -version = "0.30.0-alpha.1+dev" +version = "0.32.0-alpha.1" [workspace.metadata.cargo-shear] ignored = [ @@ -45,10 +45,12 @@ ignored = [ "re_renderer_examples", # used for specific targets or features - "chrono", # TODO(#11368) "home", "profiling", - "wayland-sys", + "windows-core", +] +ignored-paths = [ + "docs/snippets/all/**/*.rs", # complains about unlinked files for our codegen input. ] [workspace.dependencies] @@ -58,104 +60,106 @@ ignored = [ # re_log_types 0.3.0-alpha.0, NOT 0.3.0-alpha.4 even though it is newer and semver-compatible. # crates/build: -re_build_info = { path = "crates/build/re_build_info", version = "=0.30.0-alpha.1", default-features = false } -re_build_tools = { path = "crates/build/re_build_tools", version = "=0.30.0-alpha.1", default-features = false } -re_dev_tools = { path = "crates/build/re_dev_tools", version = "=0.30.0-alpha.1", default-features = false } -re_protos_builder = { path = "crates/build/re_protos_builder", version = "=0.30.0-alpha.1", default-features = false } -re_types_builder = { path = "crates/build/re_types_builder", version = "=0.30.0-alpha.1", default-features = false } +re_build_info = { path = "crates/build/re_build_info", version = "=0.32.0-alpha.1", default-features = false } +re_build_tools = { path = "crates/build/re_build_tools", version = "=0.32.0-alpha.1", default-features = false } +re_dev_tools = { path = "crates/build/re_dev_tools", version = "=0.32.0-alpha.1", default-features = false } +re_protos_builder = { path = "crates/build/re_protos_builder", version = "=0.32.0-alpha.1", default-features = false } +re_types_builder = { path = "crates/build/re_types_builder", version = "=0.32.0-alpha.1", default-features = false } # crates/store: -re_arrow_combinators = { path = "crates/store/re_arrow_combinators", version = "=0.30.0-alpha.1", default-features = false } -re_chunk = { path = "crates/store/re_chunk", version = "=0.30.0-alpha.1", default-features = false } -re_chunk_store = { path = "crates/store/re_chunk_store", version = "=0.30.0-alpha.1", default-features = false } -re_data_loader = { path = "crates/store/re_data_loader", version = "=0.30.0-alpha.1", default-features = false } -re_data_source = { path = "crates/store/re_data_source", version = "=0.30.0-alpha.1", default-features = false } -re_dataframe = { path = "crates/store/re_dataframe", version = "=0.30.0-alpha.1", default-features = false } -re_datafusion = { path = "crates/store/re_datafusion", version = "=0.30.0-alpha.1", default-features = false } -re_entity_db = { path = "crates/store/re_entity_db", version = "=0.30.0-alpha.1", default-features = false } -re_grpc_client = { path = "crates/store/re_grpc_client", version = "=0.30.0-alpha.1", default-features = false } -re_grpc_server = { path = "crates/store/re_grpc_server", version = "=0.30.0-alpha.1", default-features = false } -re_lenses = { path = "crates/store/re_lenses", version = "=0.30.0-alpha.1", default-features = false } -re_log_channel = { path = "crates/store/re_log_channel", version = "=0.30.0-alpha.1", default-features = false } -re_log_encoding = { path = "crates/store/re_log_encoding", version = "=0.30.0-alpha.1", default-features = false } -re_log_types = { path = "crates/store/re_log_types", version = "=0.30.0-alpha.1", default-features = false } -re_mcap = { path = "crates/store/re_mcap", version = "=0.30.0-alpha.1", default-features = false } -re_protos = { path = "crates/store/re_protos", version = "=0.30.0-alpha.1", default-features = false } -re_query = { path = "crates/store/re_query", version = "=0.30.0-alpha.1", default-features = false } -re_redap_client = { path = "crates/store/re_redap_client", version = "=0.30.0-alpha.1", default-features = false } -re_redap_tests = { path = "crates/store/re_redap_tests", version = "=0.30.0-alpha.1", default-features = false } -re_sdk_types = { path = "crates/store/re_sdk_types", version = "=0.30.0-alpha.1", default-features = false } -re_server = { path = "crates/store/re_server", version = "=0.30.0-alpha.1", default-features = false } -re_sorbet = { path = "crates/store/re_sorbet", version = "=0.30.0-alpha.1", default-features = false } -re_tf = { path = "crates/store/re_tf", version = "=0.30.0-alpha.1", default-features = false } -re_types_core = { path = "crates/store/re_types_core", version = "=0.30.0-alpha.1", default-features = false } -re_uri = { path = "crates/store/re_uri", version = "=0.30.0-alpha.1", default-features = false } +re_lenses_core = { path = "crates/store/re_lenses_core", version = "=0.32.0-alpha.1", default-features = false } +re_chunk = { path = "crates/store/re_chunk", version = "=0.32.0-alpha.1", default-features = false } +re_chunk_store = { path = "crates/store/re_chunk_store", version = "=0.32.0-alpha.1", default-features = false } +re_importer = { path = "crates/store/re_importer", version = "=0.32.0-alpha.1", default-features = false } +re_data_source = { path = "crates/store/re_data_source", version = "=0.32.0-alpha.1", default-features = false } +re_dataframe = { path = "crates/store/re_dataframe", version = "=0.32.0-alpha.1", default-features = false } +re_datafusion = { path = "crates/store/re_datafusion", version = "=0.32.0-alpha.1", default-features = false } +re_entity_db = { path = "crates/store/re_entity_db", version = "=0.32.0-alpha.1", default-features = false } +re_grpc_client = { path = "crates/store/re_grpc_client", version = "=0.32.0-alpha.1", default-features = false } +re_grpc_server = { path = "crates/store/re_grpc_server", version = "=0.32.0-alpha.1", default-features = false } +re_lenses = { path = "crates/store/re_lenses", version = "=0.32.0-alpha.1", default-features = false } +re_log_channel = { path = "crates/store/re_log_channel", version = "=0.32.0-alpha.1", default-features = false } +re_log_encoding = { path = "crates/store/re_log_encoding", version = "=0.32.0-alpha.1", default-features = false } +re_log_types = { path = "crates/store/re_log_types", version = "=0.32.0-alpha.1", default-features = false } +re_mcap = { path = "crates/store/re_mcap", version = "=0.32.0-alpha.1", default-features = false } +re_parquet = { path = "crates/store/re_parquet", version = "=0.32.0-alpha.1", default-features = false } +re_protos = { path = "crates/store/re_protos", version = "=0.32.0-alpha.1", default-features = false } +re_query = { path = "crates/store/re_query", version = "=0.32.0-alpha.1", default-features = false } +re_redap_client = { path = "crates/store/re_redap_client", version = "=0.32.0-alpha.1", default-features = false } +re_redap_tests = { path = "crates/store/re_redap_tests", version = "=0.32.0-alpha.1", default-features = false } +re_sdk_types = { path = "crates/store/re_sdk_types", version = "=0.32.0-alpha.1", default-features = false } +re_server = { path = "crates/store/re_server", version = "=0.32.0-alpha.1", default-features = false } +re_sorbet = { path = "crates/store/re_sorbet", version = "=0.32.0-alpha.1", default-features = false } +re_tf = { path = "crates/store/re_tf", version = "=0.32.0-alpha.1", default-features = false } +re_types_core = { path = "crates/store/re_types_core", version = "=0.32.0-alpha.1", default-features = false } +re_uri = { path = "crates/store/re_uri", version = "=0.32.0-alpha.1", default-features = false } # crates/top: -re_sdk = { path = "crates/top/re_sdk", version = "=0.30.0-alpha.1", default-features = false } -rerun = { path = "crates/top/rerun", version = "=0.30.0-alpha.1", default-features = false } -rerun_c = { path = "crates/top/rerun_c", version = "=0.30.0-alpha.1", default-features = false } -rerun-cli = { path = "crates/top/rerun-cli", version = "=0.30.0-alpha.1", default-features = false } +re_sdk = { path = "crates/top/re_sdk", version = "=0.32.0-alpha.1", default-features = false } +rerun = { path = "crates/top/rerun", version = "=0.32.0-alpha.1", default-features = false } +rerun_c = { path = "crates/top/rerun_c", version = "=0.32.0-alpha.1", default-features = false } +rerun-cli = { path = "crates/top/rerun-cli", version = "=0.32.0-alpha.1", default-features = false } # crates/utils: -re_analytics = { path = "crates/utils/re_analytics", version = "=0.30.0-alpha.1", default-features = false } -re_arrow_util = { path = "crates/utils/re_arrow_util", version = "=0.30.0-alpha.1", default-features = false } -re_auth = { path = "crates/utils/re_auth", version = "=0.30.0-alpha.1", default-features = false } -re_backoff = { path = "crates/utils/re_backoff", version = "=0.30.0-alpha.1", default-features = false } -re_byte_size = { path = "crates/utils/re_byte_size", version = "=0.30.0-alpha.1", default-features = false } -re_capabilities = { path = "crates/utils/re_capabilities", version = "=0.30.0-alpha.1", default-features = false } -re_case = { path = "crates/utils/re_case", version = "=0.30.0-alpha.1", default-features = false } -re_crash_handler = { path = "crates/utils/re_crash_handler", version = "=0.30.0-alpha.1", default-features = false } -re_error = { path = "crates/utils/re_error", version = "=0.30.0-alpha.1", default-features = false } -re_format = { path = "crates/utils/re_format", version = "=0.30.0-alpha.1", default-features = false } -re_int_histogram = { path = "crates/utils/re_int_histogram", version = "=0.30.0-alpha.1", default-features = false } -re_log = { path = "crates/utils/re_log", version = "=0.30.0-alpha.1", default-features = false } -re_memory = { path = "crates/utils/re_memory", version = "=0.30.0-alpha.1", default-features = false } -re_mutex = { path = "crates/utils/re_mutex", version = "=0.30.0-alpha.1", default-features = false } -re_perf_telemetry = { path = "crates/utils/re_perf_telemetry", version = "=0.30.0-alpha.1", default-features = false } -re_quota_channel = { path = "crates/utils/re_quota_channel", version = "=0.30.0-alpha.1", default-features = false } -re_ros_msg = { path = "crates/utils/re_ros_msg", version = "=0.30.0-alpha.1", default-features = false } -re_rvl = { path = "crates/utils/re_rvl", version = "=0.30.0-alpha.1", default-features = false } -re_span = { path = "crates/utils/re_span", version = "=0.30.0-alpha.1", default-features = false } -re_string_interner = { path = "crates/utils/re_string_interner", version = "=0.30.0-alpha.1", default-features = false } -re_tracing = { path = "crates/utils/re_tracing", version = "=0.30.0-alpha.1", default-features = false } -re_tuid = { path = "crates/utils/re_tuid", version = "=0.30.0-alpha.1", default-features = false } -re_video = { path = "crates/utils/re_video", version = "=0.30.0-alpha.1", default-features = false } +re_analytics = { path = "crates/utils/re_analytics", version = "=0.32.0-alpha.1", default-features = false } +re_arrow_util = { path = "crates/utils/re_arrow_util", version = "=0.32.0-alpha.1", default-features = false } +re_auth = { path = "crates/utils/re_auth", version = "=0.32.0-alpha.1", default-features = false } +re_backoff = { path = "crates/utils/re_backoff", version = "=0.32.0-alpha.1", default-features = false } +re_byte_size = { path = "crates/utils/re_byte_size", version = "=0.32.0-alpha.1", default-features = false } +re_capabilities = { path = "crates/utils/re_capabilities", version = "=0.32.0-alpha.1", default-features = false } +re_case = { path = "crates/utils/re_case", version = "=0.32.0-alpha.1", default-features = false } +re_crash_handler = { path = "crates/utils/re_crash_handler", version = "=0.32.0-alpha.1", default-features = false } +re_error = { path = "crates/utils/re_error", version = "=0.32.0-alpha.1", default-features = false } +re_format = { path = "crates/utils/re_format", version = "=0.32.0-alpha.1", default-features = false } +re_log = { path = "crates/utils/re_log", version = "=0.32.0-alpha.1", default-features = false } +re_memory = { path = "crates/utils/re_memory", version = "=0.32.0-alpha.1", default-features = false } +re_mutex = { path = "crates/utils/re_mutex", version = "=0.32.0-alpha.1", default-features = false } +re_perf_telemetry = { path = "crates/utils/re_perf_telemetry", version = "=0.32.0-alpha.1", default-features = false } +re_quota_channel = { path = "crates/utils/re_quota_channel", version = "=0.32.0-alpha.1", default-features = false } +re_ros_msg = { path = "crates/utils/re_ros_msg", version = "=0.32.0-alpha.1", default-features = false } +re_rvl = { path = "crates/utils/re_rvl", version = "=0.32.0-alpha.1", default-features = false } +re_span = { path = "crates/utils/re_span", version = "=0.32.0-alpha.1", default-features = false } +re_string_interner = { path = "crates/utils/re_string_interner", version = "=0.32.0-alpha.1", default-features = false } +re_tracing = { path = "crates/utils/re_tracing", version = "=0.32.0-alpha.1", default-features = false } +re_tuid = { path = "crates/utils/re_tuid", version = "=0.32.0-alpha.1", default-features = false } +re_video = { path = "crates/utils/re_video", version = "=0.32.0-alpha.1", default-features = false } # crates/viewer: -re_arrow_ui = { path = "crates/viewer/re_arrow_ui", version = "=0.30.0-alpha.1", default-features = false } -re_blueprint_tree = { path = "crates/viewer/re_blueprint_tree", version = "=0.30.0-alpha.1", default-features = false } -re_chunk_store_ui = { path = "crates/viewer/re_chunk_store_ui", version = "=0.30.0-alpha.1", default-features = false } -re_component_fallbacks = { path = "crates/viewer/re_component_fallbacks", version = "=0.30.0-alpha.1", default-features = false } -re_component_ui = { path = "crates/viewer/re_component_ui", version = "=0.30.0-alpha.1", default-features = false } -re_context_menu = { path = "crates/viewer/re_context_menu", version = "=0.30.0-alpha.1", default-features = false } -re_data_ui = { path = "crates/viewer/re_data_ui", version = "=0.30.0-alpha.1", default-features = false } -re_dataframe_ui = { path = "crates/viewer/re_dataframe_ui", version = "=0.30.0-alpha.1", default-features = false } -re_memory_view = { path = "crates/viewer/re_memory_view", version = "=0.30.0-alpha.1", default-features = false } -re_recording_panel = { path = "crates/viewer/re_recording_panel", version = "=0.30.0-alpha.1", default-features = false } -re_redap_browser = { path = "crates/viewer/re_redap_browser", version = "=0.30.0-alpha.1", default-features = false } -re_renderer = { path = "crates/viewer/re_renderer", version = "=0.30.0-alpha.1", default-features = false } -re_renderer_examples = { path = "crates/viewer/re_renderer_examples", version = "=0.30.0-alpha.1", default-features = false } -re_selection_panel = { path = "crates/viewer/re_selection_panel", version = "=0.30.0-alpha.1", default-features = false } -re_test_context = { path = "crates/viewer/re_test_context", version = "=0.30.0-alpha.1", default-features = false } -re_test_viewport = { path = "crates/viewer/re_test_viewport", version = "=0.30.0-alpha.1", default-features = false } -re_time_panel = { path = "crates/viewer/re_time_panel", version = "=0.30.0-alpha.1", default-features = false } -re_ui = { path = "crates/viewer/re_ui", version = "=0.30.0-alpha.1", default-features = false } -re_view = { path = "crates/viewer/re_view", version = "=0.30.0-alpha.1", default-features = false } -re_view_bar_chart = { path = "crates/viewer/re_view_bar_chart", version = "=0.30.0-alpha.1", default-features = false } -re_view_dataframe = { path = "crates/viewer/re_view_dataframe", version = "=0.30.0-alpha.1", default-features = false } -re_view_graph = { path = "crates/viewer/re_view_graph", version = "=0.30.0-alpha.1", default-features = false } -re_view_map = { path = "crates/viewer/re_view_map", version = "=0.30.0-alpha.1", default-features = false } -re_view_spatial = { path = "crates/viewer/re_view_spatial", version = "=0.30.0-alpha.1", default-features = false } -re_view_tensor = { path = "crates/viewer/re_view_tensor", version = "=0.30.0-alpha.1", default-features = false } -re_view_text_document = { path = "crates/viewer/re_view_text_document", version = "=0.30.0-alpha.1", default-features = false } -re_view_text_log = { path = "crates/viewer/re_view_text_log", version = "=0.30.0-alpha.1", default-features = false } -re_view_time_series = { path = "crates/viewer/re_view_time_series", version = "=0.30.0-alpha.1", default-features = false } -re_viewer = { path = "crates/viewer/re_viewer", version = "=0.30.0-alpha.1", default-features = false } -re_viewer_context = { path = "crates/viewer/re_viewer_context", version = "=0.30.0-alpha.1", default-features = false } -re_viewport = { path = "crates/viewer/re_viewport", version = "=0.30.0-alpha.1", default-features = false } -re_viewport_blueprint = { path = "crates/viewer/re_viewport_blueprint", version = "=0.30.0-alpha.1", default-features = false } -re_web_viewer_server = { path = "crates/viewer/re_web_viewer_server", version = "=0.30.0-alpha.1", default-features = false } +re_arrow_ui = { path = "crates/viewer/re_arrow_ui", version = "=0.32.0-alpha.1", default-features = false } +re_blueprint_tree = { path = "crates/viewer/re_blueprint_tree", version = "=0.32.0-alpha.1", default-features = false } +re_chunk_store_ui = { path = "crates/viewer/re_chunk_store_ui", version = "=0.32.0-alpha.1", default-features = false } +re_component_fallbacks = { path = "crates/viewer/re_component_fallbacks", version = "=0.32.0-alpha.1", default-features = false } +re_component_ui = { path = "crates/viewer/re_component_ui", version = "=0.32.0-alpha.1", default-features = false } +re_context_menu = { path = "crates/viewer/re_context_menu", version = "=0.32.0-alpha.1", default-features = false } +re_data_ui = { path = "crates/viewer/re_data_ui", version = "=0.32.0-alpha.1", default-features = false } +re_dataframe_ui = { path = "crates/viewer/re_dataframe_ui", version = "=0.32.0-alpha.1", default-features = false } +re_memory_view = { path = "crates/viewer/re_memory_view", version = "=0.32.0-alpha.1", default-features = false } +re_plot = { path = "crates/viewer/re_plot", version = "=0.32.0-alpha.1", default-features = false } +re_recording_panel = { path = "crates/viewer/re_recording_panel", version = "=0.32.0-alpha.1", default-features = false } +re_redap_browser = { path = "crates/viewer/re_redap_browser", version = "=0.32.0-alpha.1", default-features = false } +re_renderer = { path = "crates/viewer/re_renderer", version = "=0.32.0-alpha.1", default-features = false } +re_renderer_examples = { path = "crates/viewer/re_renderer_examples", version = "=0.32.0-alpha.1", default-features = false } +re_selection_panel = { path = "crates/viewer/re_selection_panel", version = "=0.32.0-alpha.1", default-features = false } +re_test_context = { path = "crates/viewer/re_test_context", version = "=0.32.0-alpha.1", default-features = false } +re_test_viewport = { path = "crates/viewer/re_test_viewport", version = "=0.32.0-alpha.1", default-features = false } +re_time_panel = { path = "crates/viewer/re_time_panel", version = "=0.32.0-alpha.1", default-features = false } +re_ui = { path = "crates/viewer/re_ui", version = "=0.32.0-alpha.1", default-features = false } +re_view = { path = "crates/viewer/re_view", version = "=0.32.0-alpha.1", default-features = false } +re_view_bar_chart = { path = "crates/viewer/re_view_bar_chart", version = "=0.32.0-alpha.1", default-features = false } +re_view_dataframe = { path = "crates/viewer/re_view_dataframe", version = "=0.32.0-alpha.1", default-features = false } +re_view_graph = { path = "crates/viewer/re_view_graph", version = "=0.32.0-alpha.1", default-features = false } +re_view_map = { path = "crates/viewer/re_view_map", version = "=0.32.0-alpha.1", default-features = false } +re_view_spatial = { path = "crates/viewer/re_view_spatial", version = "=0.32.0-alpha.1", default-features = false } +re_view_status = { path = "crates/viewer/re_view_status", version = "=0.32.0-alpha.1", default-features = false } +re_view_tensor = { path = "crates/viewer/re_view_tensor", version = "=0.32.0-alpha.1", default-features = false } +re_view_text_document = { path = "crates/viewer/re_view_text_document", version = "=0.32.0-alpha.1", default-features = false } +re_view_text_log = { path = "crates/viewer/re_view_text_log", version = "=0.32.0-alpha.1", default-features = false } +re_view_time_series = { path = "crates/viewer/re_view_time_series", version = "=0.32.0-alpha.1", default-features = false } +re_viewer = { path = "crates/viewer/re_viewer", version = "=0.32.0-alpha.1", default-features = false } +re_viewer_context = { path = "crates/viewer/re_viewer_context", version = "=0.32.0-alpha.1", default-features = false } +re_viewport = { path = "crates/viewer/re_viewport", version = "=0.32.0-alpha.1", default-features = false } +re_viewport_blueprint = { path = "crates/viewer/re_viewport_blueprint", version = "=0.32.0-alpha.1", default-features = false } +re_web_viewer_server = { path = "crates/viewer/re_web_viewer_server", version = "=0.32.0-alpha.1", default-features = false } # Rerun crates in other repos: re_mp4 = "0.4.0" @@ -167,34 +171,34 @@ dav1d = { package = "re_rav1d", version = "0.1.3", default-features = false } # dav1d = { version = "0.10.3" } # Requires separate install of `dav1d` library. Fast in debug builds. Useful for development. # core egui-crates: -ecolor = "0.33.3" -eframe = { version = "0.33.3", default-features = false, features = [ +ecolor = "0.34.0" +eframe = { version = "0.34.0", default-features = false, features = [ "accesskit", "default_fonts", "wayland", "x11", ] } -egui = { version = "0.33.3", features = ["callstack", "color-hex", "rayon"] } -egui_extras = { version = "0.33.3", features = ["http", "image", "serde", "svg"] } -egui_kittest = { version = "0.33.3", features = ["wgpu", "snapshot", "eframe"] } -egui-wgpu = "0.33.3" -emath = "0.33.3" +egui = { version = "0.34.0", features = ["callstack", "color-hex", "rayon"] } +egui_extras = { version = "0.34.0", features = ["http", "image", "serde", "svg"] } +egui_kittest = { version = "0.34.0", features = ["wgpu", "snapshot", "eframe"] } +egui-wgpu = "0.34.0" +emath = "0.34.0" # other egui crates: -egui_commonmark = { version = "0.22.0", default-features = false } -egui_dnd = { version = "0.14.0" } -egui_plot = "0.34.1" # https://github.com/emilk/egui_plot -egui_table = "0.7.0" # https://github.com/rerun-io/egui_table -egui_tiles = "0.14.1" # https://github.com/rerun-io/egui_tiles -walkers = "0.50.0" +egui_commonmark = { version = "0.23.0", default-features = false } +egui_dnd = { version = "0.15.0" } +egui_plot = "0.35.0" # https://github.com/emilk/egui_plot +egui_table = "0.8.0" # https://github.com/rerun-io/egui_table +egui_tiles = "0.15.0" # https://github.com/rerun-io/egui_tiles +walkers = "0.53.0" # All of our direct external dependencies should be found here: ahash = "0.8" -anyhow = { version = "1.0", default-features = false } -argh = "0.1.13" +anyhow = { version = "1.0.102", default-features = false } +argh = "0.1.15" arrayvec = "0.7" array-init = "2.1" -arrow = { version = "56.1", default-features = false, features = [ +arrow = { version = "57.3.0", default-features = false, features = [ # NOTE: Similar to `datafusion`, we enable many features on a workspace level # to avoid re-compilation when changing compile targets. "ffi", @@ -203,96 +207,99 @@ arrow = { version = "56.1", default-features = false, features = [ ] } async-stream = "0.3" async-trait = "0.1.89" -axum = "0.8.6" +axum = "0.8.8" backtrace = "0.3" base64 = "0.22" bincode = "1.3" -bit-vec = "0.8" -bitflags = { version = "2.9", features = ["bytemuck"] } -bytemuck = { version = "1.24", features = ["extern_crate_alloc"] } +bit-vec = "0.9" +bitflags = { version = "2.11", features = ["bytemuck"] } +bytemuck = { version = "1.25", features = ["extern_crate_alloc"] } byteorder = "1.5.0" bytes = "1.11.1" -camino = "1.2" -cargo_metadata = "0.23.0" +camino = "1.2.2" +cargo_metadata = "0.23.1" cargo-run-wasm = "0.4.0" cdr-encoding = "0.10.2" -cfg_aliases = "0.2.0" -cfg-if = "1.0" -chrono = { version = "0.4.42", default-features = false } # Needed for datafusion, see `re_datafusion`'s Cargo.toml +cfg_aliases = "0.2.1" +cfg-if = "1.0.4" +chrono = { version = "0.4.44", default-features = false } # Needed for datafusion, see `re_datafusion`'s Cargo.toml clang-format = "0.3" -clap = { version = "4.5", features = ["derive"] } -clean-path = "0.2.0" +clap = { version = "4.5.60", features = ["derive"] } +clean-path = "0.2.1" colored = "2.2" # Old b/c of dify -comfy-table = { version = "7.2", default-features = false } +comfy-table = { version = "7.2.2", default-features = false } console_error_panic_hook = "0.1.7" const_format = "0.2.35" -convert_case = "0.6.0" -criterion = "0.5.0" +convert_case = "0.11.0" +criterion = "0.5.1" cros-codecs = "0.0.6" -crossbeam = "0.8.0" +crossbeam = "0.8.4" dae-parser = "0.11.0" -datafusion = { version = "50.1.0", default-features = false, features = [ +datafusion = { version = "52.5.0", default-features = false, features = [ # NOTE: we enable the same features everywhere # because otherwise we will recompile datafusion all the time based on our current compile target. - # The features here are the same as in https://github.com/lance-format/lance/blob/v0.38.0/Cargo.toml#L99-L107 + # The features here are the same as in https://github.com/lance-format/lance/blob/v3.0.0/Cargo.toml#L116-L123 # This is very hacky, and I don't like it. "crypto_expressions", "datetime_expressions", "encoding_expressions", "nested_expressions", "regex_expressions", + "sql", "string_expressions", "unicode_expressions", ] } -datafusion-ffi = "50.1.0" +datafusion-ffi = "52.5.0" directories = "6.0" -document-features = "0.2.11" +document-features = "0.2.12" econtext = "0.2.0" # Prints error contexts on crashes -ehttp = "0.6.0" +ehttp = "0.7.1" enumset = "1.1.10" -env_filter = { version = "0.1.3", default-features = false } -env_logger = { version = "0.11.8", default-features = false } -ffmpeg-sidecar = { version = "2.2.0", default-features = false } -fixed = { version = "1.29", default-features = false } +env_filter = { version = "1.0.0", default-features = false } +env_logger = { version = "0.11.9", default-features = false } +ffmpeg-sidecar = { version = "2.4.0", default-features = false } +fixed = { version = "1.30", default-features = false } fjadra = "0.2.1" -flatbuffers = "25.9.23" -futures = "0.3.31" -futures-util = "0.3.31" -getrandom = "0.3.3" -glam = { version = "0.30.8", features = ["debug-glam-assert", "serde"] } +flatbuffers = "25.12.19" +futures = "0.3.32" +futures-util = "0.3.32" +getrandom = "0.3.4" +getrandom02 = { package = "getrandom", version = "0.2.17" } +glam = { version = "0.30.10", features = ["debug-glam-assert", "serde"] } glob = "0.3.3" gltf = "1.4" h264-reader = "0.8.0" -half = { version = "2.6.0", features = ["bytemuck"] } -hexasphere = "16.0.0" -home = "0.5.11" -http = "1.3.1" +half = { version = "2.7.1", features = ["bytemuck"] } +hexasphere = "16.0.0" # Update in tandem with glam +hmac = "0.12.1" +home = "0.5.12" +http = "1.4.0" http-body = "1.0.1" image = { version = "0.25.6", default-features = false, features = ["jpeg", "png"] } indent = "0.1.1" -indexmap = { version = "2.11", features = [ +indexmap = { version = "2.13", features = [ # indexmap version chosen to align with other dependencies "std", "serde", ] } -indicatif = "0.18.0" # Progress bar +indicatif = "0.18.4" # Progress bar infer = "0.16.0" # infer MIME type by checking the magic number signaturefer MIME type by checking the magic number signature -insta = "1.43" +insta = "1.46" itertools = "0.14.0" -jiff = { version = "0.2.15", features = ["js"] } -js-sys = "0.3.77" -jsonwebtoken = { version = "9.3", default-features = false } -lance = { version = "0.38.2", default-features = false } # When you update this, also update the list of features enabled for `datafusion` (~50 lines up) -lance-index = { version = "0.38.2", default-features = false } -lance-linalg = { version = "0.38.2", default-features = false } -libc = "0.2.176" +jiff = { version = "0.2.23", features = ["js"] } +js-sys = "0.3.94" +jsonwebtoken = { version = "10.3", default-features = false } +lance = { version = "3.0.0", default-features = false } # When you update this, also update the list of features enabled for `datafusion` (~50 lines up) +lance-index = { version = "3.0.0", default-features = false } +lance-linalg = { version = "3.0.0", default-features = false } +libc = "0.2.182" linked-hash-map = { version = "0.5.6", default-features = false } -log = "0.4.28" +log = "0.4.29" log-once = "0.4.1" -lz4_flex = "0.12" +lz4_flex = "0.13" macaw = "0.30.0" -mcap = "0.23.3" -memmap2 = "0.9.8" +mcap = "0.24.0" +memmap2 = "0.9.10" memory-stats = "1.2" mimalloc = { version = "0.1.48", features = ["v3"] } mime_guess2 = "2.3" # infer MIME type by file extension, and map mime to file extension @@ -304,38 +311,38 @@ nohash-hasher = "0.2.0" notify = { version = "8.2", features = ["macos_kqueue"] } num-derive = "0.4.2" num-traits = "0.2.19" -numpy = "0.25.0" -objc2-app-kit = "0.3.2" +numpy = "0.26.0" opentelemetry = { version = "0.31.0", features = ["metrics"] } -opentelemetry-appender-tracing = "0.31.0" +opentelemetry-appender-tracing = "0.31.1" opentelemetry-http = "0.31.0" opentelemetry-otlp = { version = "0.31.0", features = ["gzip-tonic"] } +opentelemetry-proto = { version = "0.31.0", default-features = false } opentelemetry_sdk = { version = "0.31.0", features = ["rt-tokio"] } ordered-float = "5.1.0" parking_lot = { version = "0.12.5", features = ["serde"] } -parquet = { version = "56.1", default-features = false } +parquet = { version = "57.3.0", default-features = false } paste = "1.0" pathdiff = "0.2.3" percent-encoding = "2.3" pico-args = "0.5.0" -pin-project-lite = "0.2.16" +pin-project-lite = "0.2.17" ply-rs-bw = { version = "=3.0.0", default-features = false } # ply-rs-bw has released semver breaking changes in patch releases before. Fool me once, shame on you… poll-promise = "0.3.0" pollster = "0.4.0" prettyplease = "0.2.37" -proc-macro2 = { version = "1.0", default-features = false } +proc-macro2 = { version = "1.0.106", default-features = false } profiling = { version = "1.0.17", default-features = false } prometheus-client = "0.24.0" -prost = "0.14.1" -prost-build = "0.14.1" -prost-reflect = "0.16.1" -prost-types = "0.14.1" +prost = "0.14.3" +prost-build = "0.14.3" +prost-reflect = "0.16.3" +prost-types = "0.14.3" protoc-prebuilt = "0.3.0" puffin = "0.19.1" puffin_http = "0.16.1" -pyo3 = "0.25.1" -pyo3-build-config = "0.25.1" -quote = "1.0" +pyo3 = "0.26.0" +pyo3-build-config = "0.26.0" +quote = "1.0.45" rand = { version = "0.9.2", default-features = false, features = [ "small_rng", "std", @@ -344,17 +351,17 @@ rand = { version = "0.9.2", default-features = false, features = [ rand_distr = { version = "0.5.1", default-features = false, features = ["std"] } raw-window-handle = "0.6.2" rayon = "1.11" -regex-lite = "0.1.7" +regex-lite = "0.1.9" +reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } rexif = "0.7.5" rfd = { version = "0.17.2", default-features = false, features = ["xdg-portal"] } -ron = { version = "0.11.0", features = ["integer128"] } +ron = { version = "0.12.0", features = ["integer128"] } roxmltree = "0.20.0" -rustdoc-json = "0.9.7" -rustdoc-types = "0.56.0" -rustls = { version = "0.23.32", default-features = false } +ring = "0.17.14" +rustls = { version = "0.23.37", default-features = false } saturating_cast = "0.1" scuffle-av1 = "0.1.4" -scuffle-bytes-util = "0.1.4" +scuffle-bytes-util = "0.1.5" semver = "1.0.27" seq-macro = "0.3.6" serde = { version = "1.0", features = ["derive"] } @@ -362,37 +369,38 @@ serde_bytes = "0.11.19" serde_json = { version = "1.0", default-features = false, features = ["std"] } serde-wasm-bindgen = "0.6.5" sha2 = "0.10.9" +signature = { version = "2.2", features = ["std"] } similar-asserts = "1.7.0" -slotmap = { version = "1.0.7", features = ["serde"] } +slotmap = { version = "1.1.1", features = ["serde"] } smallvec = { version = "1.15", features = ["const_generics", "union"] } static_assertions = "1.1" -stl_io = "0.8.5" +stl_io = "0.10.0" strum = { version = "0.26.3", features = ["derive"] } # need to update re_rav1d first strum_macros = "0.26.4" # need to update re_rav1d first sublime_fuzzy = "0.7.0" syn = "2.0" -sysinfo = { version = "0.30.13", default-features = false } +sysinfo = { version = "0.38.4", default-features = false } tap = "1.0.1" -tempfile = "3.23" -thiserror = "2.0.17" +tempfile = "3.26" +thiserror = "2.0.18" tiff = "0.9.1" tiny_http = { version = "0.12.0", default-features = false } tobj = "4.0" -tokio = { version = "1.47.1", default-features = false } -tokio-stream = "0.1.17" -tokio-util = { version = "0.7.16", default-features = false } -toml = { version = "0.9.8", default-features = false } +tokio = { version = "1.50.0", default-features = false } +tokio-stream = "0.1.18" +tokio-util = { version = "0.7.18", default-features = false } +toml = { version = "1.0.6", default-features = false } tonic = { version = "0.14.2", default-features = false } tonic-prost = { version = "0.14.2", default-features = false } tonic-prost-build = { version = "0.14.2", default-features = false } tonic-web = "0.14.2" tonic-web-wasm-client = "0.8.0" -tower = "0.5.2" -tower-http = "0.6.6" +tower = "0.5.3" +tower-http = "0.6.8" tower-service = "0.3.3" -tracing = "0.1.41" -tracing-opentelemetry = "0.32.0" -tracing-subscriber = { version = "0.3.20", features = ["tracing-log", "fmt", "env-filter"] } +tracing = "0.1.44" +tracing-opentelemetry = "0.32.1" +tracing-subscriber = { version = "0.3.22", features = ["tracing-log", "fmt", "env-filter"] } tracing-tracy = { version = "0.11.4", default-features = false, features = [ "broadcast", "callstack-inlines", @@ -406,11 +414,12 @@ type-map = "0.5.1" typenum = "1.19" unindent = "0.2.4" urdf-rs = "0.9.0" -ureq = "2.12.1" -url = "2.5" -uuid = { version = "1.18", features = ["serde", "v4", "js"] } +ureq = "3.3.0" +url = "2.5.8" +uuid = { version = "1.21", features = ["serde", "v4", "js"] } vec1 = { version = "1.12", features = ["serde", "smallvec-v1"] } walkdir = "2.5" +wildmatch = "2.6.1" # TODO(#8766): `rerun_js/web-viewer/build-wasm.mjs` is HIGHLY sensitive to changes in `wasm-bindgen`. # Whenever updating `wasm-bindgen`, update this and the narrower dependency specifications in # `crates/viewer/re_viewer/Cargo.toml`, and make sure that notebooks still work: @@ -420,17 +429,20 @@ walkdir = "2.5" # Do not make this an `=` dependency, because that may break Rust users’ builds when a newer # version is released, even if they are not building the web viewer. # For details see https://github.com/rerun-io/rerun/issues/8766 -wasm-bindgen = "0.2.100" # ⚠️ read above notice before touching this! -wasm-bindgen-cli-support = "=0.2.100" # ⚠️ read above notice before touching this! -wasm-bindgen-futures = "0.4.50" -wayland-sys = "0.31.7" -web-sys = "0.3.77" +wasm-bindgen = "0.2.117" # ⚠️ read above notice before touching this! +wasm-bindgen-cli-support = "0.2.117" # ⚠️ read above notice before touching this! +wasm-bindgen-futures = "0.4.67" +web-sys = "0.3.94" +wayland-sys = "0.31.9" web-time = "1.1.0" -webbrowser = "1.0" -winit = { version = "0.30.12", default-features = false } +webbrowser = "1.1" +windows-core = { version = "0.62", default-features = false, features = [ + "std", +] } # Ensure `std` is enabled so `windows-result::Error` impls `core::error::Error` (needed by wgpu-hal gles on Windows) +winit = { version = "0.30.13", default-features = false } # TODO(andreas): Try to get rid of `fragile-send-sync-non-atomic-wasm`. This requires re_renderer being aware of single-thread restriction on resources. # See also https://gpuweb.github.io/gpuweb/explainer/#multithreading-transfer (unsolved part of the Spec as of writing!) -wgpu = { version = "27.0.1", default-features = false, features = [ +wgpu = { version = "29.0.1", default-features = false, features = [ # Backends (see https://docs.rs/wgpu/latest/wgpu/#feature-flags) "gles", "metal", @@ -448,8 +460,8 @@ wgpu = { version = "27.0.1", default-features = false, features = [ "fragile-send-sync-non-atomic-wasm", ] } xshell = "0.2.7" -xxhash-rust = { version = "0.8", features = ["xxh32"] } -zip = { version = "2.1", default-features = false, features = ["deflate"] } +xxhash-rust = { version = "0.8", features = ["xxh32", "xxh64"] } +zip = { version = "8.2", default-features = false, features = ["deflate"] } # --------------------------------------------------------------------------------- [profile] @@ -487,7 +499,7 @@ debug = false "re_component_ui".debug = true "re_context_menu".debug = true "re_crash_handler".debug = true -"re_data_loader".debug = true +"re_importer".debug = true "re_data_source".debug = true "re_data_ui".debug = true "re_dataframe_ui".debug = true @@ -499,7 +511,6 @@ debug = false "re_format".debug = true "re_grpc_client".debug = true "re_grpc_server".debug = true -"re_int_histogram".debug = true "re_integration_test".debug = true "re_log_channel".debug = true "re_log_encoding".debug = true @@ -539,6 +550,7 @@ debug = false "re_view_graph".debug = true "re_view_map".debug = true "re_view_spatial".debug = true +"re_view_status".debug = true "re_view_tensor".debug = true "re_view_text_document".debug = true "re_view_text_log".debug = true @@ -602,6 +614,27 @@ opt-level = 0 # we often debug egui via Rerun debug = true # enable debug symbols for build scripts +## Dev-fast (Cranelift) + +# A fast-compiling dev profile optimized for ultimate compile speed using the Cranelift backend. +# Fast compile profile using the Cranelift codegen backend. +# The codegen-backend settings are in .cargo/config.toml (requires nightly). +# Usage: pixi run rerun-build-fast +# Requires: rustup component add rustc-codegen-cranelift-preview --toolchain nightly +[profile.dev-fast] +inherits = "dev" +debug = false +strip = "debuginfo" + +[profile.dev-fast.package."*"] +opt-level = 1 + +# AV1 decoder is unusable without optimization. +[profile.dev-fast.package.re_rav1d] +opt-level = 3 +debug-assertions = false + + # --------------------------------------------------------------------------------- @@ -678,10 +711,12 @@ flat_map_option = "warn" float_cmp_const = "warn" fn_params_excessive_bools = "warn" fn_to_numeric_cast_any = "warn" +format_push_string = "warn" from_iter_instead_of_collect = "warn" get_unwrap = "warn" if_let_mutex = "warn" ignore_without_reason = "warn" +ignored_unit_patterns = "warn" implicit_clone = "warn" implied_bounds_in_impls = "warn" imprecise_flops = "warn" @@ -728,6 +763,7 @@ match_wildcard_for_single_variants = "warn" mem_forget = "warn" mismatching_type_param_order = "warn" missing_enforced_import_renames = "warn" +missing_fields_in_debug = "warn" missing_safety_doc = "warn" mixed_attributes_style = "warn" mut_mut = "warn" @@ -823,38 +859,11 @@ iter_over_hash_type = "allow" let_underscore_untyped = "allow" missing_assert_message = "allow" missing_errors_doc = "allow" -ref_option = "allow" +ref_option = "warn" significant_drop_tightening = "allow" # An update of parking_lot made this trigger in a lot of places. TODO(emilk): fix those places unnecessary_debug_formatting = "allow" [patch.crates-io] -# https://github.com/rerun-io/arrow-datafusion/pull/1 - workaround for https://github.com/rerun-io/rerun/issues/9440 : -# datafusion = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-catalog = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-catalog-listing = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-common = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-common-runtime = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-datasource = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-doc = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-execution = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-expr = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-expr-common = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-functions = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-ffi = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-functions-aggregate = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-functions-aggregate-common = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-functions-nested = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-functions-table = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-functions-window = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-functions-window-common = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-macros = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-optimizer = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-physical-expr = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-physical-expr-common = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-physical-optimizer = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-physical-plan = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } -# datafusion-sql = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "emilk/patch-duration" } - # Try to avoid patching crates! It prevents us from publishing the crates on crates.io. # If you do patch always prefer to patch to the trunk branch of the upstream repo (i.e. `main`, `master`, …). # If that is not possible, patch to a branch that has a PR open on the upstream repo. @@ -863,13 +872,41 @@ unnecessary_debug_formatting = "allow" # Prefer patching with `branch` over `rev` and let `Cargo.lock` handle the commit hash. # That makes it easy to upade with `cargo update -p $CRATE`. -ecolor = { git = "https://github.com/emilk/egui.git", branch = "main" } -eframe = { git = "https://github.com/emilk/egui.git", branch = "main" } -egui = { git = "https://github.com/emilk/egui.git", branch = "main" } -egui_extras = { git = "https://github.com/emilk/egui.git", branch = "main" } -egui_kittest = { git = "https://github.com/emilk/egui.git", branch = "main" } -egui-wgpu = { git = "https://github.com/emilk/egui.git", branch = "main" } -emath = { git = "https://github.com/emilk/egui.git", branch = "main" } +# datafusion = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-catalog = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-catalog-listing = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-common = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-common-runtime = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-datasource = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-doc = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-execution = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-expr = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-expr-common = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-functions = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-ffi = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-functions-aggregate = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-functions-aggregate-common = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-functions-nested = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-functions-table = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-functions-window = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-functions-window-common = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-macros = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-optimizer = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-physical-expr = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-physical-expr-common = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-physical-optimizer = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-physical-plan = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } +# datafusion-sql = { git = "https://github.com/rerun-io/arrow-datafusion.git", branch = "tsaucer/52.3.0-inner-ffi" } + +# ecolor = { git = "https://github.com/emilk/egui.git", branch = "main" } +# eframe = { git = "https://github.com/emilk/egui.git", branch = "main" } +# egui = { git = "https://github.com/emilk/egui.git", branch = "main" } +# egui_extras = { git = "https://github.com/emilk/egui.git", branch = "main" } +# egui_kittest = { git = "https://github.com/emilk/egui.git", branch = "main" } +# egui-wgpu = { git = "https://github.com/emilk/egui.git", branch = "main" } +# emath = { git = "https://github.com/emilk/egui.git", branch = "main" } + +# kittest = { git = "https://github.com/rerun-io/kittest.git", rev = 'ce7a2f3b12c36021889b50bdff671cec8016b0fb' } # Useful while developing: # ecolor = { path = "../../egui/crates/ecolor" } @@ -881,10 +918,10 @@ emath = { git = "https://github.com/emilk/egui.git", branch = "main" } # emath = { path = "../../egui/crates/emath" } # wgpu = { path = "../../wgpu/wgpu" } -#egui_plot = { git = "https://github.com/emilk/egui_plot.git", branch = "legend-id-keying" } -#egui_plot = { path = "../../../egui_plot/egui_plot" } +# egui_plot = { git = "https://github.com/emilk/egui_plot.git", branch = "legend-id-keying" } +# egui_plot = { path = "../../../egui_plot/egui_plot" } -# egui_tiles = { git = "https://github.com/rerun-io/egui_tiles", branch = "emilk/update-egui" } +# egui_tiles = { git = "https://github.com/rerun-io/egui_tiles", branch = "main" } # egui_tiles = { path = "../egui_tiles" } # egui_table = { git = "https://github.com/rerun-io/egui_table", branch = "main" } @@ -893,9 +930,8 @@ emath = { git = "https://github.com/emilk/egui.git", branch = "main" } # egui_dnd = { git = "https://github.com/rerun-io/hello_egui.git", branch = "emilk/egui-0.33.0" } # egui_commonmark = { git = "https://github.com/rerun-io/egui_commonmark.git", branch = "lucas/update-egui-main" } - # egui_commonmark = { path = "../../forks/egui_commonmark/egui_commonmark" } -# walkers = { git = "https://github.com/rerun-io/walkers", branch = "emilk/rust-1.90" } # https://github.com/podusowski/walkers/pull/394 +# walkers = { git = "https://github.com/rerun-io/walkers", branch = "emilk/egui-0.34" } # dav1d = { path = "/home/cmc/dev/rerun-io/rav1d", package = "re_rav1d", version = "0.1.1" } diff --git a/README.md b/README.md index 80858356a552..61f9a9043725 100644 --- a/README.md +++ b/README.md @@ -19,12 +19,12 @@ It's used in areas like robotics, spatial and embodied AI, generative media, ind Rerun is easy to use! Use the Rerun SDK (available for C++, Python and Rust) to log data like images, tensors, point clouds, and text. Logs are streamed to the Rerun Viewer for live visualization or to file for later use. -You can also query the logged data through [our dataframe API](https://rerun.io/docs/howto/dataframe-api). +You can also query the logged data through [our dataframe API](https://rerun.io/docs/howto/query-and-transform/get-data-out). [Get started](#getting-started) in minutes – no account needed. * [Run the Rerun Viewer in your browser](https://www.rerun.io/viewer) -* [Read about what Rerun is and who it is for](https://www.rerun.io/docs/getting-started/what-is-rerun) +* [Read about what Rerun is and who it is for](https://www.rerun.io/docs/overview/what-is-rerun) ### A short taste ```py @@ -71,14 +71,14 @@ You should now be able to run `rerun --help` in any terminal. ### Documentation -- 📚 [High-level docs](http://rerun.io/docs) +- 📚 [High-level docs](https://rerun.io/docs) - ⏃ [Loggable Types](https://www.rerun.io/docs/reference/types) -- ⚙️ [Examples](http://rerun.io/examples) +- ⚙️ [Examples](https://rerun.io/examples) - 📖 [Code snippets](./docs/snippets/INDEX.md) - 🌊 [C++ API docs](https://ref.rerun.io/docs/cpp) - 🐍 [Python API docs](https://ref.rerun.io/docs/python) - 🦀 [Rust API docs](https://docs.rs/rerun/) -- ⁉️ [Troubleshooting](https://www.rerun.io/docs/getting-started/troubleshooting) +- ⁉️ [Troubleshooting](https://www.rerun.io/docs/overview/installing-rerun/troubleshooting) ## Status @@ -88,9 +88,6 @@ _Expect breaking changes!_ Some shortcomings: * [The viewer slows down when there are too many entities](https://github.com/rerun-io/rerun/issues/7115) -* The data you want to visualize must fit in RAM - - See for how to bound memory use. - - We plan on having a disk-based data store some time in the future. * [Multi-million point clouds can be slow](https://github.com/rerun-io/rerun/issues/1136) @@ -129,13 +126,13 @@ Of course, Rerun is useful for much more than just robots. Any time you have any ### Rerun vs. Rviz When coming from pure visualization tools like [RViz](https://docs.ros.org/en/rolling/Tutorials/Intermediate/RViz/RViz-Main.html), you might be used to seeing the latest data only. -Rerun is more than a pure visualization solution, it provides a platform for multimodal data with a powerful visualizer, storage model and query engine (see also: [*"What is Rerun?"*](https://rerun.io/docs/getting-started/what-is-rerun)). +Rerun is more than a pure visualization solution, it provides a platform for multimodal data with a powerful visualizer, storage model and query engine (see also: [*"What is Rerun?"*](https://rerun.io/docs/overview/what-is-rerun)). In robotics, you can use Rerun e.g. to record test runs, manage and query training data, visually debug live streams or recordings (also from third-party formats like [MCAP](https://rerun.io/docs/howto/logging-and-ingestion/mcap)) and much more. So while Rerun makes your data streams visualizable in the viewer, integrating Rerun logging into your robotics applications also opens up the door for leveraging Rerun's broader capabilities. If you are only interested in visualization, the Rerun viewer has powerful features like the ability to go back in time thanks to its time-aware in-memory database. -You can adjust the size of this buffer to your needs (see [here](https://rerun.io/docs/howto/limit-ram)), e.g. to a smaller size if you want to use Rerun as an RViz replacement in long-running or memory-constrained applications. +You can adjust the size of this buffer to your needs (see [here](https://rerun.io/docs/howto/visualization/limit-ram)), e.g. to a smaller size if you want to use Rerun as an RViz replacement in long-running or memory-constrained applications. ## Business model diff --git a/RELEASES.md b/RELEASES.md index 3e1e44017e93..85175f676b3c 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -91,6 +91,11 @@ Do this once you have prepared your patch-release branch and it's ready for test ### 3. If this is a patch release, cherry-pick commits for inclusion in the release into the branch In GitHub we have a `consider-patch` label that we put on PRs that we might want to include in the release. +The fastest way to get an overview of all the patch candidate PRs from both repositories and their corresponding commit hashes is to run this script: + +``` +uv run scripts/fetch_patch_candidates.py +``` When done, run [`cargo semver-checks`](https://github.com/obi1kenobi/cargo-semver-checks) to check that we haven't introduced any semver breaking changes. @@ -176,3 +181,8 @@ Make sure the `consider-patch` label on GitHub is up-to-date. For a full release Summarize your experience with the release process to our [Release Postmortems](https://www.notion.so/rerunio/Release-Postmortems-271b24554b1980589770df810d2e4ed5) Notion page. Create tickets if you think we can improve the process, put them into the `Actionable items` section. + +### 10. Clean up PR labels + +`uv run scripts/fetch_patch_candidates.py` will show a warning for `consider-patch`-labeled PRs that have been merged before a release. +Make sure to remove the label from PRs that are already part of a release. diff --git a/clippy.toml b/clippy.toml index a135020488a8..b44a8e82337a 100644 --- a/clippy.toml +++ b/clippy.toml @@ -72,6 +72,8 @@ disallowed-methods = [ { path = "egui::Ui::spinner", reason = "Use `ui.loading_indicator` from `re_ui::UiEx" }, { path = "glam::Vec2::normalize", reason = "normalize() can create NaNs. Use try_normalize or normalize_or_zero" }, { path = "glam::Vec3::normalize", reason = "normalize() can create NaNs. Use try_normalize or normalize_or_zero" }, + { path = "macaw::BoundingBox::from_points", reason = "from_points propagates NaN/inf. Use re_renderer::util::bounding_box_from_points instead" }, + { path = "opentelemetry::metrics::HistogramBuilder::with_boundaries", reason = "We have automatic bucketing - with_boundaries has no effect" }, { path = "sha1::Digest::new", reason = "SHA1 is cryptographically broken" }, { path = "std::env::temp_dir", reason = "Use the tempdir crate instead" }, { path = "std::panic::catch_unwind", reason = "We compile with `panic = 'abort'`" }, @@ -110,6 +112,8 @@ doc-valid-idents = [ "GitHub", "GLB", "GLTF", + "GoP", # Group of Pictures - common video term + "GoPs", # Group of Pictures - common video term "iOS", "IPv4", "IPv6", diff --git a/crates/build/re_build_tools/src/lib.rs b/crates/build/re_build_tools/src/lib.rs index 7e247dd3783e..cd8816d93b58 100644 --- a/crates/build/re_build_tools/src/lib.rs +++ b/crates/build/re_build_tools/src/lib.rs @@ -32,13 +32,6 @@ pub use self::rustfmt::rustfmt_str; // ------------------ -/// Should we export the build datetime for developers in the workspace? -/// -/// It will be visible in analytics, in the viewer's about-menu, and with `rerun --version`. -/// -/// To do so accurately may incur unnecessary recompiles, so only turn this on if you really need it. -const EXPORT_BUILD_TIME_FOR_DEVELOPERS: bool = false; - /// Should we export the current git hash/branch for developers in the workspace? /// /// It will be visible in analytics, in the viewer's about-menu, and with `rerun --version`. @@ -131,11 +124,11 @@ pub fn export_build_info_vars_for_crate(crate_name: &str) { let export_datetime = match environment { Environment::PublishingCrates | Environment::RerunCI | Environment::CondaBuild => true, - Environment::DeveloperInWorkspace => EXPORT_BUILD_TIME_FOR_DEVELOPERS, - // Datetime won't always be accurate unless we rebuild as soon as a dependency changes, - // and we don't want to add that burden to our users. - Environment::UsedAsDependency => false, + // and we don't want to add that burden to our users. Thus, it's off by default. + Environment::DeveloperInWorkspace | Environment::UsedAsDependency => { + is_tracked_env_var_set("RERUN_ADD_BUILD_TIME_TO_BUILD_INFO") + } }; let export_git_info = match environment { diff --git a/crates/build/re_dev_tools/Cargo.toml b/crates/build/re_dev_tools/Cargo.toml index 19bc6668eea4..c2ef447487c7 100644 --- a/crates/build/re_dev_tools/Cargo.toml +++ b/crates/build/re_dev_tools/Cargo.toml @@ -22,21 +22,17 @@ all-features = true [dependencies] re_build_tools.workspace = true re_format.workspace = true -re_quota_channel.workspace = true # External anyhow.workspace = true argh.workspace = true camino.workspace = true -crossbeam.workspace = true cargo_metadata.workspace = true glob.workspace = true indicatif.workspace = true itertools.workspace = true rayon.workspace = true roxmltree.workspace = true -rustdoc-json.workspace = true -rustdoc-types.workspace = true serde = { workspace = true, features = ["derive"] } serde_json.workspace = true tempfile.workspace = true diff --git a/crates/build/re_dev_tools/src/build_examples/notebook.rs b/crates/build/re_dev_tools/src/build_examples/notebook.rs index bd90cfbdea35..85e8ce320a98 100644 --- a/crates/build/re_dev_tools/src/build_examples/notebook.rs +++ b/crates/build/re_dev_tools/src/build_examples/notebook.rs @@ -40,7 +40,7 @@ impl Notebook { let mut num_failed = 0; for result in results { match result { - Ok(_) => {} + Ok(()) => {} Err(err) => { eprintln!("{err}"); num_failed += 1; diff --git a/crates/build/re_dev_tools/src/build_examples/rrd.rs b/crates/build/re_dev_tools/src/build_examples/rrd.rs index be90f3bd7c05..e97344f043a8 100644 --- a/crates/build/re_dev_tools/src/build_examples/rrd.rs +++ b/crates/build/re_dev_tools/src/build_examples/rrd.rs @@ -114,19 +114,19 @@ impl Example { wait_for_output(cmd, &self.name, progress)?; } - // Now run compaction on the result: + // Now run optimization on the result: let final_rrd_path = output_dir.join(&self.name).with_extension("rrd"); let mut cmd = Command::new("python3"); cmd.arg("-m").arg("rerun"); cmd.arg("rrd"); - cmd.arg("compact"); + cmd.arg("optimize"); // Small chunks for better streaming: cmd.arg("--max-bytes").arg((128 * 1024).to_string()); cmd.arg(&initial_rrd_path); cmd.arg("-o").arg(&final_rrd_path); - wait_for_output(cmd, &format!("{} compaction", self.name), progress)?; + wait_for_output(cmd, &format!("{} optimization", self.name), progress)?; Ok(final_rrd_path) } diff --git a/crates/build/re_dev_tools/src/build_examples/snippets.rs b/crates/build/re_dev_tools/src/build_examples/snippets.rs index 28e748f43661..a459c0f29077 100644 --- a/crates/build/re_dev_tools/src/build_examples/snippets.rs +++ b/crates/build/re_dev_tools/src/build_examples/snippets.rs @@ -150,14 +150,11 @@ fn collect_snippets_recursively( let backwards_check_opted_out = is_opted_out_backwards_check; if meta.is_dir() { - snippets.extend( - collect_snippets_recursively( - Utf8Path::from_path(&path).unwrap(), - config, - snippet_root_path, - )? - .into_iter(), - ); + snippets.extend(collect_snippets_recursively( + Utf8Path::from_path(&path).unwrap(), + config, + snippet_root_path, + )?); continue; } diff --git a/crates/build/re_dev_tools/src/build_search_index/README.md b/crates/build/re_dev_tools/src/build_search_index/README.md index bb4987a10840..2aaee725d5a4 100644 --- a/crates/build/re_dev_tools/src/build_search_index/README.md +++ b/crates/build/re_dev_tools/src/build_search_index/README.md @@ -5,11 +5,7 @@ Builds a Meilisearch index from our documentation. ### Requirements - `pixi` -- A nightly Rust compiler, used because `rustdoc` JSON output is unstable - ``` - rustup install nightly - rustup +nightly target add wasm32-unknown-unknown - ``` + ### Usage Start a local `meilisearch` instance: diff --git a/crates/build/re_dev_tools/src/build_search_index/build.rs b/crates/build/re_dev_tools/src/build_search_index/build.rs index 0dd1a8126f37..4e2135388664 100644 --- a/crates/build/re_dev_tools/src/build_search_index/build.rs +++ b/crates/build/re_dev_tools/src/build_search_index/build.rs @@ -21,24 +21,12 @@ pub struct Build { /// release version to use in URLs #[argh(option, long = "release-version")] release_version: Option, - - /// exclude one or more crates - #[argh(option, long = "exclude-crate")] - exclude_crates: Vec, - - /// rust toolchain version, e.g. nightly-2025-02-05 - #[argh(option, long = "rust-toolchain")] - rust_toolchain: Option, } impl Build { pub fn run(self) -> anyhow::Result<()> { let client = meili::connect(&self.meilisearch_url, &self.meilisearch_master_key)?; - let documents = ingest::run( - self.release_version, - &self.exclude_crates, - self.rust_toolchain.as_deref().unwrap_or("nightly"), - )?; + let documents = ingest::run(self.release_version)?; client.index(&self.index_name, &documents)?; Ok(()) } diff --git a/crates/build/re_dev_tools/src/build_search_index/ingest.rs b/crates/build/re_dev_tools/src/build_search_index/ingest.rs index 5c1f5cba38a7..4fa5e0e199c7 100644 --- a/crates/build/re_dev_tools/src/build_search_index/ingest.rs +++ b/crates/build/re_dev_tools/src/build_search_index/ingest.rs @@ -6,9 +6,6 @@ mod docs; /// Examples read from `/examples` mod examples; -/// Rust API reference generated by rustdoc -mod rust; - /// Python API reference generated by mkdocs mod python; @@ -25,15 +22,10 @@ use cargo_metadata::Package; use cargo_metadata::semver::Version; use indicatif::{MultiProgress, ProgressBar}; -pub fn run( - release_version: Option, - exclude_crates: &[String], - rust_toolchain: &str, -) -> anyhow::Result> { +pub fn run(release_version: Option) -> anyhow::Result> { let ctx = Context::new(release_version)?; docs::ingest(&ctx)?; examples::ingest(&ctx)?; - rust::ingest(&ctx, exclude_crates, rust_toolchain)?; python::ingest(&ctx)?; cpp::ingest(&ctx)?; Ok(ctx.finish()) @@ -150,7 +142,6 @@ struct DocumentData { enum DocumentKind { Docs, Examples, - Rust, Python, Cpp, } diff --git a/crates/build/re_dev_tools/src/build_search_index/ingest/rust.rs b/crates/build/re_dev_tools/src/build_search_index/ingest/rust.rs deleted file mode 100644 index bbadc165867b..000000000000 --- a/crates/build/re_dev_tools/src/build_search_index/ingest/rust.rs +++ /dev/null @@ -1,579 +0,0 @@ -#![expect(clippy::unwrap_used)] // build tool, so okay here - -use std::collections::HashSet; -use std::fmt::Display; -use std::fs::File; -use std::io::BufReader; - -use anyhow::Context as _; -use cargo_metadata::semver::Version; -use crossbeam::channel::Sender; -use indicatif::ProgressBar; -use rayon::prelude::{IntoParallelIterator as _, ParallelIterator as _}; -use re_quota_channel::send_crossbeam; -use rustdoc_types::{Crate, Id as ItemId, Impl, Item, ItemEnum, Type, Use}; - -use super::{Context, DocumentData, DocumentKind}; -use crate::build_search_index::util::ProgressBarExt as _; - -/// Ingest rust documentation for all published crates in the current workspace. -/// -/// It collects the following top-level `pub` items: -/// - `mod` -/// - `fn` -/// - `struct` -/// - `enum` -/// - `trait` -/// - `const` -/// - `type` -/// - `#[macro_export] macro_rules!` -/// -/// In `impl` blocks, it collects: -/// - associated `const` -/// - associated `type` -/// - associated `fn` -/// -/// It will also walk through any `pub mod`, and correctly resolve `pub use mod::item` where `mod` is not `pub`. -pub fn ingest( - ctx: &Context, - exclude_crates: &[String], - rust_toolchain: &str, -) -> anyhow::Result<()> { - let progress = ctx.progress_bar("rustdoc"); - - let mut crates = Vec::new(); - - for pkg in ctx.metadata.workspace_packages() { - progress.set(pkg.name.to_string(), ctx.is_tty()); - - if exclude_crates.contains(&pkg.name) { - continue; - } - - let publish = match pkg.publish.as_deref() { - Some([]) => false, // explicitly set to `false` - Some(_) | None => true, // omitted, set to `true`, or set to specific registry - }; - if !publish { - continue; - } - - let is_library = pkg - .manifest_path - .parent() - .unwrap() - .join("src/lib.rs") - .try_exists()?; - if !is_library { - continue; - } - - let path = rustdoc_json::Builder::default() - .toolchain(rust_toolchain) - .all_features(true) - .quiet(true) - .manifest_path(&pkg.manifest_path) - .build()?; - - let file = File::open(&path) - .with_context(|| format!("reading {}", path.display())) - .unwrap(); - let reader = BufReader::new(file); - let krate: Crate = serde_json::from_reader(reader).unwrap(); - crates.push(krate); - } - - let (tx, rx) = crossbeam::channel::bounded(1024); - let version = ctx.release_version(); - - ctx.finish_progress_bar(progress); - - crates - .into_iter() - .map(|krate| { - ( - ctx.progress_bar(format!("rustdoc ({})", krate.name())), - krate, - ) - }) - .collect::>() - .into_par_iter() - .for_each(|(progress, krate)| { - let mut visitor = Visitor::new(progress, version, &tx, &krate); - visitor.visit_root(); - visitor.progress.finish_and_clear(); - }); - - drop(tx); - for data in rx { - ctx.push(data); - } - - Ok(()) -} - -struct Visitor<'a> { - progress: ProgressBar, - visited: HashSet, - documents: &'a Sender, - module_path: Vec, - krate: &'a Crate, - base_url: String, -} - -impl<'a> Visitor<'a> { - fn new( - progress: ProgressBar, - version: &Version, - documents: &'a Sender, - krate: &'a Crate, - ) -> Self { - let crate_name = krate.name(); - - Self { - progress, - visited: HashSet::new(), - documents, - krate, - module_path: vec![crate_name], - base_url: base_url(version, krate), - } - } - - fn push(&mut self, pub_in_priv: bool, id: &ItemId, kind: ItemKind) { - let path = self.resolve_path(pub_in_priv, id); - self.push_with_path(id, kind, &path); - } - - fn push_with_path(&mut self, id: &ItemId, kind: ItemKind, path: &[String]) { - // don't push the same document twice - if self.visited.contains(id) { - return; - } - self.visited.insert(*id); - - let mut module_path = &path[..path.len() - 1]; - let name = path.last().unwrap(); - let item_path = match &kind { - ItemKind::Module => { - format!("{name}/index.html") - } - ItemKind::Struct - | ItemKind::Enum - | ItemKind::Trait - | ItemKind::Function - | ItemKind::Type - | ItemKind::Constant - | ItemKind::Macro => { - format!("{kind}.{name}.html") - } - ItemKind::Inherent(parent, _) => { - let parent_name = module_path.last().unwrap(); - module_path = &module_path[..module_path.len() - 1]; - format!("{parent}.{parent_name}.html#{kind}.{name}") - } - }; - - send_crossbeam( - self.documents, - document( - path.join("::"), - format!("{}/{}/{}", self.base_url, module_path.join("/"), item_path), - self.krate.index[id].docs.clone().unwrap_or_default(), - ), - ) - .ok(); - } - - fn visit_root(&mut self) { - let root_module_item = &self.krate.index[&self.krate.root]; - - let ItemEnum::Module(root_module) = &root_module_item.inner else { - unreachable!() - }; - - let name = root_module_item.name.as_ref().unwrap().clone(); - let url = format!("{}/{name}/index.html", self.base_url); - send_crossbeam( - self.documents, - document( - name.clone(), - url, - root_module_item.docs.clone().unwrap_or_default(), - ), - ) - .ok(); - - for item_id in &root_module.items { - self.visit_item(false, item_id); - } - } - - fn visit_item(&mut self, pub_in_priv: bool, id: &ItemId) { - let Some(item) = self.krate.index.get(id) else { - panic!("{id:?} not found"); - }; - - if item.crate_id != self.krate.index[&self.krate.root].crate_id { - // skip items from external crates - return; - } - - use ItemEnum as I; - match &item.inner { - I::Module(inner) => { - self.push(pub_in_priv, id, ItemKind::Module); - let name = item.name.as_ref().unwrap().clone(); - - self.module_path.push(name); - for item_id in &inner.items { - self.visit_item(pub_in_priv, item_id); - } - self.module_path.pop(); - } - I::Use(import) => self.visit_import(import), - I::Impl(impl_) => { - // we only care about inherent impls of the form: - // impl Thing {} - let Some(type_id) = impl_.inherent_impl_type_id() else { - return; - }; - let type_ = &self.krate.index[type_id]; - let type_kind = type_.kind().unwrap(); - let parent_kind = match type_kind { - ItemKind::Struct => ParentItemKind::Struct, - ItemKind::Enum => ParentItemKind::Enum, - ItemKind::Trait => ParentItemKind::Trait, - _ => return, - }; - self.visit_inherent_impl(pub_in_priv, id, impl_, parent_kind); - } - I::Struct(struct_) => { - self.push(pub_in_priv, id, ItemKind::Struct); - for impl_id in &struct_.impls { - let ItemEnum::Impl(impl_) = &self.krate.index[impl_id].inner else { - panic!("invalid item {impl_id:?} expected `impl`, got {item:#?}"); - }; - if !impl_.is_inherent() { - continue; - } - self.visit_inherent_impl(pub_in_priv, id, impl_, ParentItemKind::Struct); - } - } - I::Enum(enum_) => { - self.push(pub_in_priv, id, ItemKind::Enum); - for impl_id in &enum_.impls { - let ItemEnum::Impl(impl_) = &self.krate.index[impl_id].inner else { - panic!("invalid item {impl_id:?} expected `impl`, got {item:#?}"); - }; - if !impl_.is_inherent() { - continue; - } - self.visit_inherent_impl(pub_in_priv, id, impl_, ParentItemKind::Enum); - } - } - I::Trait(trait_) => { - self.push(pub_in_priv, id, ItemKind::Trait); - for item_id in &trait_.items { - self.visit_assoc_item(pub_in_priv, id, item_id, ParentItemKind::Trait); - } - } - I::Function(_) => self.push(pub_in_priv, id, ItemKind::Function), - I::TypeAlias(_) => self.push(pub_in_priv, id, ItemKind::Type), - I::Constant { .. } => self.push(pub_in_priv, id, ItemKind::Constant), - I::Macro(_) => self.push(pub_in_priv, id, ItemKind::Macro), - - I::AssocConst { .. } - | I::AssocType { .. } - | I::Variant(_) - | I::StructField(_) - | I::Union(_) - | I::ExternCrate { .. } - | I::TraitAlias(_) - | I::Static(_) - | I::ExternType - | I::ProcMacro(_) - | I::Primitive(_) => {} - } - } - - fn visit_import(&mut self, import: &Use) { - let Some(id) = import.id.as_ref() else { - return; - }; - - if !self.krate.index.contains_key(id) { - // this is an external crate - return; - } - - // NOTE: this currently relies on the following bug: - // https://github.com/rust-lang/rust/issues/110007 - let is_pub = self.krate.paths.contains_key(id); - if is_pub { - // it already has a path consisting of `pub` modules - // so it will be included even if we don't re-export it - return; - } - - let item = &self.krate.index[id]; - if import.is_glob { - let ItemEnum::Module(module) = &item.inner else { - unreachable!() - }; - for item_id in &module.items { - self.visit_item(true, item_id); - } - } else { - self.visit_item(true, id); - } - } - - fn visit_inherent_impl( - &mut self, - pub_in_priv: bool, - type_id: &ItemId, - impl_: &Impl, - parent_kind: ParentItemKind, - ) { - assert!(impl_.is_inherent()); - - for item_id in &impl_.items { - self.visit_assoc_item(pub_in_priv, type_id, item_id, parent_kind); - } - } - - fn visit_assoc_item( - &mut self, - pub_in_priv: bool, - type_id: &ItemId, - id: &ItemId, - parent_kind: ParentItemKind, - ) { - let item = &self.krate.index[id]; - let kind = match &item.inner { - ItemEnum::Function(_) => ItemKind::Inherent(parent_kind, InherentItemKind::Method), - ItemEnum::AssocConst { .. } => { - ItemKind::Inherent(parent_kind, InherentItemKind::Constant) - } - ItemEnum::AssocType { .. } => ItemKind::Inherent(parent_kind, InherentItemKind::Type), - _ => unreachable!("invalid associated item {item:#?}"), - }; - - let name = item.name.as_ref().unwrap().clone(); - let path = self.resolve_path(pub_in_priv, type_id).with_item(name); - self.push_with_path(id, kind, &path); - } - - fn resolve_path(&self, pub_in_priv: bool, id: &ItemId) -> Vec { - if pub_in_priv { - let name = self.krate.index[id].name.as_ref().unwrap().clone(); - self.module_path.with_item(name) - } else { - let Some(summary) = self.krate.paths.get(id) else { - panic!( - "expected item {id:?} to have a rustdoc-generated path (module_path={:?})", - self.module_path - ); - }; - summary.path.clone() - } - } -} - -fn base_url(version: &Version, krate: &Crate) -> String { - format!( - "https://docs.rs/{krate_name}/{version}", - krate_name = krate.name() - ) - // format!("https://docs.rs/{}/latest", krate.name()) -} - -fn document(path: String, url: String, docs: String) -> DocumentData { - DocumentData { - kind: DocumentKind::Rust, - title: path, - hidden_tags: vec!["rust".into()], - tags: vec![], - content: docs, - url, - } -} - -#[derive(Debug, Clone, Copy)] -enum ItemKind { - /// `mod m` - Module, - - /// `struct S {}` - Struct, - - /// `enum E {}` - Enum, - - /// `trait I {}` - Trait, - - /// `fn f() {}` - Function, - - /// `type T = ()` - Type, - - /// `const V: T = ()` - Constant, - - /// `macro_rules! m {}` - Macro, - - /// Inherent impl item - /// - /// These are also referred to as "associated items" - Inherent(ParentItemKind, InherentItemKind), -} - -impl Display for ItemKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let s = match self { - Self::Module => "module", - Self::Struct => "struct", - Self::Enum => "enum", - Self::Trait => "trait", - Self::Function => "fn", - Self::Type => "type", - Self::Constant => "constant", - Self::Macro => "macro", - Self::Inherent(ParentItemKind::Trait, InherentItemKind::Method) => "tymethod", - Self::Inherent(_, InherentItemKind::Method) => "method", - Self::Inherent(_, InherentItemKind::Constant) => "associatedconstant", - Self::Inherent(_, InherentItemKind::Type) => "associatedtype", - }; - f.write_str(s) - } -} - -/// `ItemKind` for items in inherent impls -/// -/// These are also referred to as associated items -#[derive(Debug, Clone, Copy)] -enum InherentItemKind { - /// A `fn` in an inherent `impl` block: - /// - /// ```rust,ignore - /// struct T; - /// - /// impl T { - /// fn f() {} //<- - /// } - /// ``` - Method, - - /// A `const` in an inherent `impl` block: - /// - /// ```rust,ignore - /// struct T; - /// - /// impl T { - /// const V: () = (); //<- - /// } - /// ``` - Constant, - - /// A `type` in an inherent `impl` block: - /// - /// ```rust,ignore - /// struct T; - /// - /// impl T { - /// type U = (); //<- - /// } - /// ``` - Type, -} - -/// `ItemKind` for types which may have inherent impls -#[derive(Debug, Clone, Copy)] -enum ParentItemKind { - Struct, - Enum, - Trait, -} - -impl Display for ParentItemKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let s = match self { - Self::Struct => "struct", - Self::Enum => "enum", - Self::Trait => "trait", - }; - f.write_str(s) - } -} - -trait CrateExt { - fn name(&self) -> String; -} - -impl CrateExt for Crate { - fn name(&self) -> String { - self.index[&self.root].name.as_ref().unwrap().clone() - } -} - -trait ItemKindExt { - fn kind(&self) -> Option; -} - -impl ItemKindExt for Item { - fn kind(&self) -> Option { - match &self.inner { - ItemEnum::Module(_) => Some(ItemKind::Module), - ItemEnum::Struct(_) => Some(ItemKind::Struct), - ItemEnum::Enum(_) => Some(ItemKind::Enum), - ItemEnum::Function(_) => Some(ItemKind::Function), - ItemEnum::Trait(_) => Some(ItemKind::Trait), - ItemEnum::TypeAlias(_) => Some(ItemKind::Type), - ItemEnum::Constant { .. } => Some(ItemKind::Constant), - ItemEnum::Macro(_) => Some(ItemKind::Macro), - _ => None, - } - } -} - -trait ImplExt { - fn is_inherent(&self) -> bool; - fn inherent_impl_type_id(&self) -> Option<&ItemId>; -} - -impl ImplExt for Impl { - fn is_inherent(&self) -> bool { - self.trait_.is_none() - && self.blanket_impl.is_none() - && matches!(self.for_, Type::ResolvedPath(_)) - } - - fn inherent_impl_type_id(&self) -> Option<&ItemId> { - if self.trait_.is_some() || self.blanket_impl.is_some() { - // not an inherent impl - return None; - } - - match &self.for_ { - Type::ResolvedPath(path) => Some(&path.id), - _ => None, - } - } -} - -trait WithItemExt { - fn with_item(&self, v: T) -> Self; -} - -impl WithItemExt for Vec { - fn with_item(&self, v: T) -> Self { - let mut out = Self::with_capacity(self.len() + 1); - out.extend_from_slice(self); - out.push(v); - out - } -} diff --git a/crates/build/re_dev_tools/src/build_search_index/meili.rs b/crates/build/re_dev_tools/src/build_search_index/meili.rs index b5d36007aa7d..fb754dfb77aa 100644 --- a/crates/build/re_dev_tools/src/build_search_index/meili.rs +++ b/crates/build/re_dev_tools/src/build_search_index/meili.rs @@ -24,7 +24,7 @@ impl SearchClient { let this = Self { url: url.into(), master_key: master_key.into(), - agent: ureq::agent(), + agent: ureq::Agent::new_with_defaults(), }; this.check_master_key()?; @@ -60,10 +60,8 @@ impl SearchClient { [("q", q), ("limit", limit.as_str())], )?; - let result: QueryResult = self - .request_with_url(Method::Get, &url) - .call()? - .into_json()?; + let result: QueryResult = + self.get(url.as_str()).call()?.into_body().read_json()?; Ok(result.hits) } @@ -71,14 +69,14 @@ impl SearchClient { fn index_exists(&self, index: &str) -> anyhow::Result { match self.get(&format!("/indexes/{index}")).call() { Ok(_) => Ok(true), - Err(ureq::Error::Status(404, _)) => Ok(false), - Err(err) => Err(anyhow::anyhow!(err)), + Err(ureq::Error::StatusCode(404)) => Ok(false), + Err(err) => Err(err.into()), } } fn create_index(&self, index: &str) -> anyhow::Result<()> { self.post("/indexes") - .send_json(ureq::json!({ "uid": index, "primaryKey": Document::PRIMARY_KEY }))?; + .send_json(serde_json::json!({ "uid": index, "primaryKey": Document::PRIMARY_KEY }))?; Ok(()) } @@ -89,7 +87,8 @@ impl SearchClient { let task: Task = self .post(&format!("/indexes/{index}/documents")) .send_json(documents)? - .into_json()?; + .into_body() + .read_json()?; self.wait_for_task(task)?; Ok(()) @@ -99,7 +98,8 @@ impl SearchClient { let task: Task = self .delete(&format!("/indexes/{index}")) .call()? - .into_json()?; + .into_body() + .read_json()?; self.wait_for_task(task).context("while waiting for task")?; Ok(()) } @@ -112,7 +112,7 @@ impl SearchClient { } std::thread::sleep(std::time::Duration::from_millis(1)); - task = self.get(&task_url).call()?.into_json()?; + task = self.get(&task_url).call()?.into_body().read_json()?; } Ok(()) @@ -124,37 +124,25 @@ impl SearchClient { Ok(()) } - /// GET `{self.url}{path}` - fn get(&self, path: &str) -> ureq::Request { - self.request(Method::Get, path) - } - - /// POST `{self.url}{path}` - fn post(&self, path: &str) -> ureq::Request { - self.request(Method::Post, path) - } - - /// DELETE `{self.url}{path}` - fn delete(&self, path: &str) -> ureq::Request { - self.request(Method::Delete, path) + fn get(&self, path: &str) -> ureq::RequestBuilder { + let url = format!("{}{path}", self.url); + self.agent + .get(&url) + .header("Authorization", &format!("Bearer {}", self.master_key)) } - fn request(&self, method: Method, path: &str) -> ureq::Request { - let Self { - url, master_key, .. - } = self; - + fn post(&self, path: &str) -> ureq::RequestBuilder { + let url = format!("{}{path}", self.url); self.agent - .request(method.as_str(), &format!("{url}{path}")) - .set("Authorization", &format!("Bearer {master_key}")) + .post(&url) + .header("Authorization", &format!("Bearer {}", self.master_key)) } - fn request_with_url(&self, method: Method, url: &Url) -> ureq::Request { - let Self { master_key, .. } = self; - + fn delete(&self, path: &str) -> ureq::RequestBuilder { + let url = format!("{}{path}", self.url); self.agent - .request_url(method.as_str(), url) - .set("Authorization", &format!("Bearer {master_key}")) + .delete(&url) + .header("Authorization", &format!("Bearer {}", self.master_key)) } } @@ -202,20 +190,3 @@ impl Task { } } } - -#[derive(Clone, Copy)] -enum Method { - Get, - Post, - Delete, -} - -impl Method { - fn as_str(&self) -> &'static str { - match self { - Self::Get => "GET", - Self::Post => "POST", - Self::Delete => "DELETE", - } - } -} diff --git a/crates/build/re_dev_tools/src/build_search_index/repl.rs b/crates/build/re_dev_tools/src/build_search_index/repl.rs index b44e735913c3..2ba460597b01 100644 --- a/crates/build/re_dev_tools/src/build_search_index/repl.rs +++ b/crates/build/re_dev_tools/src/build_search_index/repl.rs @@ -28,10 +28,6 @@ pub struct Repl { /// release version to use in URLs #[argh(option, long = "release-version")] release_version: Option, - - /// exclude one or more crates - #[argh(option, long = "exclude-crate")] - exclude_crates: Vec, } impl Repl { @@ -39,11 +35,7 @@ impl Repl { let client = meili::connect(&self.meilisearch_url, &self.meilisearch_master_key)?; if self.ingest { - let documents = ingest::run( - self.release_version.clone(), - &self.exclude_crates, - "nightly", - )?; + let documents = ingest::run(self.release_version.clone())?; client.index(&self.index_name, &documents)?; } @@ -68,11 +60,7 @@ impl Repl { match line { "quit" | "q" | "" => return Ok(ControlFlow::Break(())), "reindex" => { - let documents = ingest::run( - self.release_version.clone(), - &self.exclude_crates, - "nightly", - )?; + let documents = ingest::run(self.release_version.clone())?; search.index(&self.index_name, &documents)?; } _ => { diff --git a/crates/build/re_dev_tools/src/build_web_viewer/lib.rs b/crates/build/re_dev_tools/src/build_web_viewer/lib.rs index 6797d071adc9..7c4314efdff7 100644 --- a/crates/build/re_dev_tools/src/build_web_viewer/lib.rs +++ b/crates/build/re_dev_tools/src/build_web_viewer/lib.rs @@ -94,8 +94,8 @@ pub fn build( // in order to support recursive cargo builds (calling `cargo` from within a `build.rs`). let target_wasm_dir = Utf8PathBuf::from(format!("{}_wasm", target_directory())); - // Repository root - let root_dir = target_wasm_dir.parent().unwrap(); + // Workspace root + let root_dir = workspace_root(); // Where we will place the final .wasm and .js artifacts. assert!( @@ -149,7 +149,7 @@ pub fn build( eprintln!("{root_dir}> {cmd:?}"); let status = cmd - .current_dir(root_dir) + .current_dir(&root_dir) .status() .context("Failed to build Wasm")?; diff --git a/crates/build/re_types_builder/src/codegen/cpp/mod.rs b/crates/build/re_types_builder/src/codegen/cpp/mod.rs index f8b0d09f5b75..de519b20ebab 100644 --- a/crates/build/re_types_builder/src/codegen/cpp/mod.rs +++ b/crates/build/re_types_builder/src/codegen/cpp/mod.rs @@ -4,6 +4,7 @@ mod includes; mod method; use std::collections::HashSet; +use std::fmt::Write as _; use std::str::FromStr as _; use camino::{Utf8Path, Utf8PathBuf}; @@ -75,9 +76,9 @@ fn quote_hide_from_docs() -> TokenStream { fn string_from_token_stream(token_stream: &TokenStream, source_path: Option<&Utf8Path>) -> String { let mut code = String::new(); - code.push_str(&format!("// {}\n", autogen_warning!())); + writeln!(code, "// {}", autogen_warning!()).ok(); if let Some(source_path) = source_path { - code.push_str(&format!("// Based on {:?}.\n", format_path(source_path))); + writeln!(code, "// Based on {:?}.", format_path(source_path)).ok(); } code.push('\n'); @@ -142,9 +143,9 @@ impl crate::CodeGenerator for CppCodeGenerator { object_kind != ObjectKind::View }) .flat_map(|object_kind| { - scopes - .par_iter() - .flat_map(|scope| self.generate_folder(reporter, objects, scope, *object_kind)) + scopes.par_iter().flat_map(|scope| { + self.generate_folder(reporter, objects, scope.as_ref(), *object_kind) + }) }) .collect() } @@ -161,7 +162,7 @@ impl CppCodeGenerator { &self, reporter: &Reporter, objects: &Objects, - scope: &Option, + scope: Option<&String>, object_kind: ObjectKind, ) -> GeneratedFiles { let folder_name = if let Some(scope) = scope { @@ -177,7 +178,7 @@ impl CppCodeGenerator { // Generate folder contents: let objects_of_kind = objects .objects_of_kind(object_kind) - .filter(|obj| &obj.scope() == scope) + .filter(|obj| obj.scope().as_ref() == scope) .collect_vec(); for &obj in &objects_of_kind { diff --git a/crates/build/re_types_builder/src/codegen/docs/datatype_docs.rs b/crates/build/re_types_builder/src/codegen/docs/datatype_docs.rs index e9584a678fb7..1ee40cf0e6ef 100644 --- a/crates/build/re_types_builder/src/codegen/docs/datatype_docs.rs +++ b/crates/build/re_types_builder/src/codegen/docs/datatype_docs.rs @@ -1,23 +1,25 @@ //! Document a datatype as human-readable markdown. +use std::fmt::Write as _; + use crate::codegen::StringExt as _; use crate::data_type::{AtomicDataType, DataType, UnionMode}; fn atomic_datatype_docs(page: &mut String, datatype: &AtomicDataType) { match datatype { - AtomicDataType::Null => page.push_str("null"), - AtomicDataType::Boolean => page.push_str("boolean"), - AtomicDataType::Int8 => page.push_str("int8"), - AtomicDataType::Int16 => page.push_str("int16"), - AtomicDataType::Int32 => page.push_str("int32"), - AtomicDataType::Int64 => page.push_str("int64"), - AtomicDataType::UInt8 => page.push_str("uint8"), - AtomicDataType::UInt16 => page.push_str("uint16"), - AtomicDataType::UInt32 => page.push_str("uint32"), - AtomicDataType::UInt64 => page.push_str("uint64"), - AtomicDataType::Float16 => page.push_str("float16"), - AtomicDataType::Float32 => page.push_str("float32"), - AtomicDataType::Float64 => page.push_str("float64"), + AtomicDataType::Null => page.push_str("Null"), + AtomicDataType::Boolean => page.push_str("Boolean"), + AtomicDataType::Int8 => page.push_str("Int8"), + AtomicDataType::Int16 => page.push_str("Int16"), + AtomicDataType::Int32 => page.push_str("Int32"), + AtomicDataType::Int64 => page.push_str("Int64"), + AtomicDataType::UInt8 => page.push_str("UInt8"), + AtomicDataType::UInt16 => page.push_str("UInt16"), + AtomicDataType::UInt32 => page.push_str("UInt32"), + AtomicDataType::UInt64 => page.push_str("UInt64"), + AtomicDataType::Float16 => page.push_str("Float16"), + AtomicDataType::Float32 => page.push_str("Float32"), + AtomicDataType::Float64 => page.push_str("Float64"), } } @@ -30,45 +32,53 @@ fn datatype_docs_impl(page: &mut String, indent: usize, datatype: &DataType) { DataType::Atomic(atomic) => { atomic_datatype_docs(page, atomic); } - DataType::Utf8 => page.push_str("utf8"), - DataType::Binary => page.push_str("binary"), + DataType::Utf8 => page.push_str("Utf8"), + DataType::Binary => page.push_str("Binary"), DataType::List(inner) => { - page.push_str("List<"); + page.push_str("List("); + if !inner.is_nullable() { + // This follows the notation set by arrow-rs. + // If we change this, we should probably change + // arrow-rs and datafusion to match. + page.push_str("non-null "); + } datatype_docs_impl(page, indent + 1, inner.data_type()); - page.push('>'); + page.push(')'); } DataType::FixedSizeList(inner, length) => { - page.push_str(&format!("FixedSizeList<{length}, ")); + write!(page, "FixedSizeList({length} x ").ok(); + if !inner.is_nullable() { + page.push_str("non-null "); + } datatype_docs_impl(page, indent + 1, inner.data_type()); - page.push('>'); + page.push(')'); } DataType::Struct(fields) => { - page.push_str("Struct {\n"); + page.push_str("Struct(\n"); for field in fields { - page.push_indented(indent + 1, field.name(), 0); - page.push_str(": "); - if field.is_nullable() { - page.push_str("nullable "); + page.push_indented(indent + 1, format!("{:?}: ", field.name()), 0); + if !field.is_nullable() { + page.push_str("non-null "); } datatype_docs_impl(page, indent + 1, field.data_type()); page.push('\n'); } - page.push_indented(indent, "}", 0); + page.push_indented(indent, ")", 0); } DataType::Union(union_fields, union_mode) => { match union_mode { - UnionMode::Sparse => page.push_str("SparseUnion {\n"), - UnionMode::Dense => page.push_str("DenseUnion {\n"), + UnionMode::Sparse => page.push_str("Union(Sparse,\n"), + UnionMode::Dense => page.push_str("Union(Dense,\n"), } for (index, field) in union_fields.iter().enumerate() { - page.push_indented(indent + 1, format!("{index} = {:?}: ", field.name()), 0); - if field.is_nullable() { - page.push_str("nullable "); + page.push_indented(indent + 1, format!("{index}: ({:?}: ", field.name()), 0); + if !field.is_nullable() { + page.push_str("non-null "); } datatype_docs_impl(page, indent + 1, field.data_type()); - page.push('\n'); + page.push_str(")\n"); } - page.push_indented(indent, "}", 0); + page.push_indented(indent, ")", 0); } DataType::Object { datatype, .. } => { datatype_docs_impl(page, indent, datatype); diff --git a/crates/build/re_types_builder/src/codegen/docs/website.rs b/crates/build/re_types_builder/src/codegen/docs/website.rs index fbb91eef208f..3a04c6d5ddf9 100644 --- a/crates/build/re_types_builder/src/codegen/docs/website.rs +++ b/crates/build/re_types_builder/src/codegen/docs/website.rs @@ -91,7 +91,7 @@ impl CodeGenerator for DocsCodeGenerator { 1, r"Archetypes are bundles of components for which the Rerun viewer has first-class built-in support. See [Entities and Components](../../concepts/logging-and-ingestion/entity-component.md) and -[Visualizers and Overrides](../../concepts/visualization/visualizers-and-overrides.md) for more information. +[Visualizers and Overrides](../../concepts/visualization/customize-views.md) for more information. This page lists all built-in archetypes.", &archetypes, @@ -401,20 +401,20 @@ fn write_fields(reporter: &Reporter, objects: &Objects, o: &mut String, object: Type::Unit => unreachable!("Should be handled elsewhere"), // We use explicit, arrow-like names: - Type::UInt8 => atomic("uint8"), - Type::UInt16 => atomic("uint16"), - Type::UInt32 => atomic("uint32"), - Type::UInt64 => atomic("uint64"), - Type::Int8 => atomic("int8"), - Type::Int16 => atomic("int16"), - Type::Int32 => atomic("int32"), - Type::Int64 => atomic("int64"), - Type::Bool => atomic("boolean"), - Type::Float16 => atomic("float16"), - Type::Float32 => atomic("float32"), - Type::Float64 => atomic("float64"), - Type::Binary => atomic("binary"), - Type::String => atomic("utf8"), + Type::UInt8 => atomic("UInt8"), + Type::UInt16 => atomic("UInt16"), + Type::UInt32 => atomic("UInt32"), + Type::UInt64 => atomic("UInt64"), + Type::Int8 => atomic("Int8"), + Type::Int16 => atomic("Int16"), + Type::Int32 => atomic("Int32"), + Type::Int64 => atomic("Int64"), + Type::Bool => atomic("Boolean"), + Type::Float16 => atomic("Float16"), + Type::Float32 => atomic("Float32"), + Type::Float64 => atomic("Float64"), + Type::Binary => atomic("Binary"), + Type::String => atomic("Utf8"), Type::Array { elem_type, length } => { format!( @@ -460,12 +460,14 @@ fn write_fields(reporter: &Reporter, objects: &Objects, o: &mut String, object: if let Some(enum_or_union_variant_value) = field.enum_or_union_variant_value { if let Some(enum_integer_type) = object.enum_integer_type() { - field_string.push_str(&format!( + write!( + field_string, " = {}", enum_integer_type.format_value(enum_or_union_variant_value) - )); + ) + .ok(); } else { - field_string.push_str(&format!(" = {enum_or_union_variant_value}")); + write!(field_string, " = {enum_or_union_variant_value}").ok(); } } field_string.push('\n'); @@ -475,8 +477,11 @@ fn write_fields(reporter: &Reporter, objects: &Objects, o: &mut String, object: if field.typ == Type::Unit { field_string.push_str("`null`"); } else { - if field.is_nullable { - field_string.push_str("nullable "); + if !field.is_nullable { + // This follows the notation set by arrow-rs. + // If we change this, we should probably change + // arrow-rs and datafusion to match. + field_string.push_str("non-null "); } field_string.push_str(&type_info(objects, &field.typ)); } @@ -607,12 +612,14 @@ fn write_archetype_fields( explanation, } in view_types { - page.push_str(&format!( + write!( + page, "* [{view_name}](../views/{}.md)", re_case::to_snake_case(view_name) - )); + ) + .ok(); if let Some(explanation) = explanation { - page.push_str(&format!(" ({explanation})")); + write!(page, " ({explanation})").ok(); } putln!(page); } @@ -659,14 +666,16 @@ fn write_visualized_archetypes( } else { for (fqname, explanation) in archetype_fqnames { let object = &objects[&fqname]; - page.push_str(&format!( + write!( + page, "* [`{}`](../{}/{}.md)", object.name, object.kind.plural_snake_case(), object.snake_case_name() - )); + ) + .ok(); if let Some(explanation) = explanation { - page.push_str(&format!(" ({explanation})")); + write!(page, " ({explanation})").ok(); } putln!(page); } diff --git a/crates/build/re_types_builder/src/codegen/python/mod.rs b/crates/build/re_types_builder/src/codegen/python/mod.rs index db1913650022..cb75b80360b8 100644 --- a/crates/build/re_types_builder/src/codegen/python/mod.rs +++ b/crates/build/re_types_builder/src/codegen/python/mod.rs @@ -3,6 +3,7 @@ mod views; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::fmt::Write as _; use std::iter; use std::ops::Deref; @@ -566,7 +567,7 @@ impl PythonCodeGenerator { from __future__ import annotations from collections.abc import Iterable, Mapping, Set, Sequence, Dict - from typing import Any, Optional, Union, TYPE_CHECKING, SupportsFloat, Literal, Tuple + from typing import Any, ClassVar, Optional, Union, TYPE_CHECKING, SupportsFloat, Literal, Tuple from typing_extensions import deprecated # type: ignore[misc, unused-ignore] from attrs import define, field @@ -636,11 +637,11 @@ impl PythonCodeGenerator { let import_clauses: HashSet<_> = obj .fields .iter() - .filter_map(|field| quote_import_clauses_from_field(&obj.scope(), field)) + .filter_map(|field| quote_import_clauses_from_field(obj.scope().as_ref(), field)) .chain(obj.fields.iter().filter_map(|field| { let fqname = field.typ.fqname()?; objects[fqname].delegate_datatype(objects).map(|delegate| { - quote_import_clauses_from_fqname(&obj.scope(), &delegate.fqname) + quote_import_clauses_from_fqname(obj.scope().as_ref(), &delegate.fqname) }) })) .collect(); @@ -716,7 +717,7 @@ fn write_init_file( let path = kind_path.join("__init__.py"); let mut code = String::new(); - let manifest = quote_manifest(mods.iter().flat_map(|(_, names)| names.iter())); + let manifest = quote_manifest(mods.values().flat_map(|names| names.iter())); code.push_indented(0, format!("# {}", autogen_warning!()), 2); code.push_unindented( " @@ -866,6 +867,10 @@ fn code_for_struct( code.push_indented(1, "_BATCH_TYPE = None", 1); } + if *kind == ObjectKind::Archetype { + code.push_indented(1, format!(r#"NAME: ClassVar[str] = "{}""#, obj.fqname), 2); + } + if ext_class.has_init { code.push_indented( 1, @@ -904,6 +909,7 @@ fn code_for_struct( if obj.kind == ObjectKind::Archetype { code.push_indented(1, quote_clear_methods(obj), 2); code.push_indented(1, quote_partial_update_methods(reporter, obj, objects), 2); + code.push_indented(1, quote_descriptor_methods(obj, objects), 2); if obj.scope().is_none() { code.push_indented(1, quote_columnar_methods(reporter, obj, objects), 2); } @@ -1093,7 +1099,7 @@ fn code_for_enum( superclasses.push("Enum".to_owned()); superclasses.join(",") }; - code.push_str(&format!("class {enum_name}({superclasses}):\n")); + writeln!(code, "class {enum_name}({superclasses}):").ok(); code.push_indented(1, quote_obj_docs(reporter, objects, obj), 0); for variant in &obj.fields { @@ -1437,7 +1443,7 @@ fn quote_examples(examples: Vec>, lines: &mut Vec) { lines.push(format!("### `{name}`:")); } lines.push("```python".into()); - lines.extend(example.lines.into_iter()); + lines.extend(example.lines); lines.push("```".into()); if let Some(image) = &image { lines.extend( @@ -1796,7 +1802,7 @@ fn quote_union_aliases_from_object<'a>( } fn quote_import_clauses_from_field( - obj_scope: &Option, + obj_scope: Option<&String>, field: &ObjectField, ) -> Option { let fqname = match &field.typ { @@ -1818,7 +1824,7 @@ fn quote_import_clauses_from_field( fqname.map(|fqname| quote_import_clauses_from_fqname(obj_scope, fqname)) } -fn quote_import_clauses_from_fqname(obj_scope: &Option, fqname: &str) -> String { +fn quote_import_clauses_from_fqname(obj_scope: Option<&String>, fqname: &str) -> String { // NOTE: The distinction between `from .` vs. `from rerun.datatypes` has been shown to fix some // nasty lazy circular dependencies in weird edge cases… // In any case it will be normalized by `ruff` if it turns out to be unnecessary. @@ -2298,21 +2304,19 @@ fn quote_arrow_serialization( "##, )); } else if let Some(np_dtype) = np_dtype_from_type(&obj.fields[0].typ) { - if !obj.is_attr_set(ATTR_PYTHON_ALIASES) { - if !obj.is_testing() { - reporter.warn( - &obj.virtpath, - &obj.fqname, - format!("Expected this to have {ATTR_PYTHON_ALIASES} set"), - ); - } - } else { + if obj.is_attr_set(ATTR_PYTHON_ALIASES) { return Ok(unindent(&format!( r##" array = np.asarray(data, dtype={np_dtype}).flatten() return pa.array(array, type=data_type) "## ))); + } else if !obj.is_testing() { + reporter.warn( + &obj.virtpath, + &obj.fqname, + format!("Expected this to have {ATTR_PYTHON_ALIASES} set"), + ); } } } @@ -2413,7 +2417,7 @@ fn quote_arrow_serialization( if isinstance(data, ({name}, int, str)): data = [data] -pa_data = [{name}.auto(v).value if v is not None else None for v in data] # type: ignore[redundant-expr] +pa_data = [{name}.auto(v).value if v is not None else None for v in data] # type: ignore[redundant-expr] # ty: ignore[not-iterable] return pa.array(pa_data, type=data_type) "## @@ -2872,6 +2876,32 @@ fn quote_component_field_mapping(obj: &Object) -> String { .join(",\n") } +fn quote_descriptor_methods(obj: &Object, objects: &Objects) -> String { + let archetype_short_name = &obj.name; + + obj.fields + .iter() + .map(|field| { + let field_name = field.snake_case_name(); + let (typ_unwrapped, _) = quote_field_type_from_field(objects, field, true); + let batch_type = format!("{typ_unwrapped}Batch"); + + unindent(&format!( + r#" + @staticmethod + def descriptor_{field_name}() -> ComponentDescriptor: + return ComponentDescriptor( + "{archetype_short_name}:{field_name}", + archetype={archetype_short_name}.NAME, + component_type={batch_type}._COMPONENT_TYPE, + ) + "# + )) + }) + .collect_vec() + .join("\n") +} + fn quote_partial_update_methods(reporter: &Reporter, obj: &Object, objects: &Objects) -> String { let name = &obj.name; @@ -3026,17 +3056,21 @@ fn quote_columnar_methods(reporter: &Reporter, obj: &Object, objects: &Objects) if pa.types.is_primitive(arrow_array.type) or pa.types.is_fixed_size_list(arrow_array.type): param = kwargs[batch.component_descriptor().component] # type: ignore[index] shape = np.shape(param) # type: ignore[arg-type] - elem_flat_len = int(np.prod(shape[1:])) if len(shape) > 1 else 1 # type: ignore[redundant-expr,misc] + num_rows = shape[0] if len(shape) >= 1 else 1 # type: ignore[redundant-expr,misc] - if pa.types.is_fixed_size_list(arrow_array.type) and arrow_array.type.list_size == elem_flat_len: - # If the product of the last dimensions of the shape are equal to the size of the fixed size list array, - # we have `num_rows` single element batches (each element is a fixed sized list). - # (This should have been already validated by conversion to the arrow_array) - batch_length = 1 + if pa.types.is_fixed_size_list(arrow_array.type): + elem_flat_len = int(np.prod(shape[1:])) if len(shape) > 1 else 1 # type: ignore[redundant-expr,misc] + if arrow_array.type.list_size == elem_flat_len: + # The product of the last dimensions of the shape are equal to the size of the fixed size list array, + # so we have `num_rows` single element batches (each element is a fixed sized list). + batch_length = 1 + else: + batch_length = shape[1] if len(shape) > 1 else 1 # type: ignore[redundant-expr,misc] else: - batch_length = shape[1] if len(shape) > 1 else 1 # type: ignore[redundant-expr,misc] + # For primitive types, derive batch_length from the actual arrow array length + # since the input shape can be misleading (e.g. colors [R,G,B] -> single uint32). + batch_length = len(arrow_array) // num_rows if num_rows > 0 else 1 - num_rows = shape[0] if len(shape) >= 1 else 1 # type: ignore[redundant-expr,misc] sizes = batch_length * np.ones(num_rows) else: # For non-primitive types, default to partitioning each element separately. diff --git a/crates/build/re_types_builder/src/codegen/python/views.rs b/crates/build/re_types_builder/src/codegen/python/views.rs index 38b64a77d94c..f5132504050c 100644 --- a/crates/build/re_types_builder/src/codegen/python/views.rs +++ b/crates/build/re_types_builder/src/codegen/python/views.rs @@ -1,3 +1,5 @@ +use std::fmt::Write as _; + use super::ExtensionClass; use crate::codegen::Target; use crate::codegen::common::StringExt as _; @@ -86,9 +88,9 @@ fn init_method(reporter: &Reporter, objects: &Objects, obj: &Object) -> String { }); let parameter_name = &property.name; - code.push_str(&format!( - "{parameter_name}: blueprint_archetypes.{property_type_name} | {additional_type_annotations} None = None,\n" - )); + writeln!(code, + "{parameter_name}: blueprint_archetypes.{property_type_name} | {additional_type_annotations} None = None," + ).ok(); } code.push_indented(1, ") -> None:", 1); diff --git a/crates/build/re_types_builder/src/codegen/rust/api.rs b/crates/build/re_types_builder/src/codegen/rust/api.rs index 3f4a161ac999..727550bd36c8 100644 --- a/crates/build/re_types_builder/src/codegen/rust/api.rs +++ b/crates/build/re_types_builder/src/codegen/rust/api.rs @@ -1,4 +1,5 @@ use std::collections::{BTreeMap, HashMap, HashSet}; +use std::fmt::Write as _; use std::str::FromStr as _; use anyhow::Context as _; @@ -19,7 +20,7 @@ use crate::codegen::rust::deserializer::{ use crate::codegen::rust::serializer::quote_arrow_serializer; use crate::codegen::rust::util::{is_tuple_struct_from_obj, quote_doc_line}; use crate::codegen::{Target, autogen_warning}; -use crate::objects::{EnumIntegerType, ObjectClass}; +use crate::objects::ObjectClass; use crate::{ ATTR_DEFAULT, ATTR_RERUN_COMPONENT_OPTIONAL, ATTR_RERUN_COMPONENT_RECOMMENDED, ATTR_RERUN_COMPONENT_REQUIRED, ATTR_RERUN_VIEW_IDENTIFIER, ATTR_RERUN_VISUALIZER, @@ -147,9 +148,9 @@ fn generate_object_file( target_file: &Utf8Path, ) -> String { let mut code = String::new(); - code.push_str(&format!("// {}\n", autogen_warning!())); + writeln!(code, "// {}", autogen_warning!()).ok(); if let Some(source_path) = obj.relative_filepath() { - code.push_str(&format!("// Based on {:?}.\n\n", format_path(source_path))); + writeln!(code, "// Based on {:?}.\n", format_path(source_path)).ok(); } code.push_str("#![allow(unused_braces)]\n"); @@ -204,18 +205,18 @@ fn generate_mod_file( let mut code = String::new(); - code.push_str(&format!("// {}\n\n", autogen_warning!())); + writeln!(code, "// {}\n", autogen_warning!()).ok(); for obj in objects { let module_name = obj.snake_case_name(); - code.push_str(&format!("mod {module_name};\n")); + writeln!(code, "mod {module_name};").ok(); // Detect if someone manually created an extension file, and automatically // import it if so. let mut ext_path = dirpath.join(format!("{module_name}_ext")); ext_path.set_extension("rs"); if ext_path.exists() { - code.push_str(&format!("mod {module_name}_ext;\n")); + writeln!(code, "mod {module_name}_ext;").ok(); } } @@ -226,7 +227,7 @@ fn generate_mod_file( let module_name = obj.snake_case_name(); let type_name = &obj.name; - code.push_str(&format!("pub use self::{module_name}::{type_name};\n")); + writeln!(code, "pub use self::{module_name}::{type_name};").ok(); } // And then deprecated. if objects.iter().any(|obj| obj.is_deprecated()) { @@ -240,7 +241,7 @@ fn generate_mod_file( code.push_str("#[expect(deprecated)]\n"); } - code.push_str(&format!("pub use self::{module_name}::{type_name};\n")); + writeln!(code, "pub use self::{module_name}::{type_name};").ok(); } files_to_write.insert(path, code); @@ -604,12 +605,46 @@ fn quote_enum( quote!(Self::#quoted_name => #docstring_md) }); - let repr_type = match obj.enum_integer_type() { - Some(EnumIntegerType::U8) => quote!(u8), - Some(EnumIntegerType::U16) => quote!(u16), - Some(EnumIntegerType::U32) => quote!(u32), - Some(EnumIntegerType::U64) => quote!(u64), - None => unreachable!("enums must have an integer type"), + let enum_int_type = obj + .enum_integer_type() + .expect("enums must have an integer type"); + let repr_type = format_ident!("{}", enum_int_type.type_str()); + + // Check if enum variants are sequentially numbered starting at 1 + // (we assign enum values starting at 1, 0 is reserved). + // If so, we can optimize try_from by indexing into the variants() array. + let is_sequential_from_one = fields.iter().enumerate().all(|(i, field)| { + field + .enum_or_union_variant_value + .is_some_and(|v| v == (i as u64) + 1) + }); + + let quoted_try_from_body = if is_sequential_from_one { + quote! { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } + } else { + let try_from_match_arms = fields.iter().map(|field| { + let variant_name = format_ident!("{}", field.name); + let enum_value = field + .enum_or_union_variant_value + .expect("enum variants must have values"); + let value_literal = proc_macro2::Literal::from_str( + &obj.enum_integer_type() + .expect("enums must have an integer type") + .format_value(enum_value), + ) + .unwrap(); + quote!(#value_literal => Some(Self::#variant_name)) + }); + quote! { + match value { + #(#try_from_match_arms,)* + _ => None, + } + } }; let tokens = quote! { @@ -634,6 +669,7 @@ fn quote_enum( } impl ::re_types_core::reflection::Enum for #name { + type Repr = #repr_type; #[inline] fn variants() -> &'static [Self] { @@ -646,6 +682,11 @@ fn quote_enum( #(#docstring_md_match_arms,)* } } + + #[inline] + fn try_from_integer(value: #repr_type) -> Option { + #quoted_try_from_body + } } impl ::re_byte_size::SizeBytes for #name { diff --git a/crates/build/re_types_builder/src/codegen/rust/arrow.rs b/crates/build/re_types_builder/src/codegen/rust/arrow.rs index 28e6dce50327..0df7e6b603c8 100644 --- a/crates/build/re_types_builder/src/codegen/rust/arrow.rs +++ b/crates/build/re_types_builder/src/codegen/rust/arrow.rs @@ -63,10 +63,10 @@ impl quote::ToTokens for ArrowDataTypeTokenizer<'_> { })) }); quote!(DataType::Union( - UnionFields::new( + UnionFields::try_new( vec![ #(#types,)* ], vec![ #(#fields,)* ], - ), + ).expect("UnionFields::try_new should be infallible"), #mode, )) } diff --git a/crates/build/re_types_builder/src/codegen/rust/blueprint_validation.rs b/crates/build/re_types_builder/src/codegen/rust/blueprint_validation.rs index 7ad107060ae5..0bb71039ddcf 100644 --- a/crates/build/re_types_builder/src/codegen/rust/blueprint_validation.rs +++ b/crates/build/re_types_builder/src/codegen/rust/blueprint_validation.rs @@ -1,4 +1,5 @@ use std::collections::BTreeMap; +use std::fmt::Write as _; use camino::Utf8PathBuf; use proc_macro2::TokenStream; @@ -16,7 +17,7 @@ pub(crate) fn generate_blueprint_validation( ) { let blueprint_scope = Some("blueprint".to_owned()); let mut code = String::new(); - code.push_str(&format!("// {}\n\n", autogen_warning!())); + write!(code, "// {}\n\n", autogen_warning!()).ok(); code.push_str("#![allow(clippy::empty_line_after_doc_comments)]\n\n"); code.push_str("use re_entity_db::EntityDb;\n"); @@ -29,9 +30,11 @@ pub(crate) fn generate_blueprint_validation( if crate_name == "re_viewer" { crate_name = "crate".to_owned(); } - code.push_str(&format!( - "pub use {crate_name}::blueprint::components::{type_name};\n" - )); + writeln!( + code, + "pub use {crate_name}::blueprint::components::{type_name};" + ) + .ok(); } } diff --git a/crates/build/re_types_builder/src/codegen/rust/deserializer.rs b/crates/build/re_types_builder/src/codegen/rust/deserializer.rs index a4f4504e4e2b..e047e0a73037 100644 --- a/crates/build/re_types_builder/src/codegen/rust/deserializer.rs +++ b/crates/build/re_types_builder/src/codegen/rust/deserializer.rs @@ -87,29 +87,17 @@ pub fn quote_arrow_deserializer( InnerRepr::NativeIterable, ); - let quoted_branches = obj.fields.iter().map(|obj_field| { - let quoted_obj_field_type = format_ident!("{}", obj_field.name); - - // We should never hit this unwrap or it means the enum-processing at - // the fbs layer is totally broken. - let enum_value = obj_field.enum_or_union_variant_value.unwrap(); - let quoted_enum_value = proc_macro2::Literal::u64_unsuffixed(enum_value); - - quote! { - Some(#quoted_enum_value) => Ok(Some(Self::#quoted_obj_field_type)) - } - }); - - // TODO(jleibs): We should be able to do this with try_from instead. let quoted_remapping = quote! { .map(|typ| { match typ { - // The actual enum variants - #(#quoted_branches,)* + Some(val) => { + ::try_from_integer(val).map(Some).ok_or_else(|| { + DeserializationError::missing_union_arm( + #quoted_self_datatype, "", val as _, + ) + }) + }, None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - #quoted_self_datatype, "", invalid as _, - )), } }) }; diff --git a/crates/build/re_types_builder/src/codegen/rust/reflection.rs b/crates/build/re_types_builder/src/codegen/rust/reflection.rs index d118aaa44818..734f5fa58c0f 100644 --- a/crates/build/re_types_builder/src/codegen/rust/reflection.rs +++ b/crates/build/re_types_builder/src/codegen/rust/reflection.rs @@ -1,4 +1,5 @@ use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::fmt::Write as _; use camino::Utf8PathBuf; use itertools::Itertools as _; @@ -8,7 +9,7 @@ use quote::{format_ident, quote}; use super::util::{append_tokens, doc_as_lines}; use crate::codegen::{Target, autogen_warning}; use crate::{ - ATTR_RERUN_COMPONENT_REQUIRED, ATTR_RERUN_COMPONENT_UI_EDITABLE, ATTR_RUST_DERIVE, + ATTR_RERUN_COMPONENT_NO_UI_EDIT, ATTR_RERUN_COMPONENT_REQUIRED, ATTR_RUST_DERIVE, ATTR_RUST_DERIVE_ONLY, ObjectKind, Objects, Reporter, }; @@ -41,7 +42,7 @@ pub fn generate_reflection( code.push_str("#![allow(unused_imports)]\n"); code.push('\n'); for namespace in imports { - code.push_str(&format!("use {namespace};\n")); + writeln!(code, "use {namespace};").ok(); } let quoted_reflection = quote! { @@ -218,24 +219,7 @@ fn generate_archetype_reflection(reporter: &Reporter, objects: &Objects) -> Toke ) .join("\n"); let required = field.attrs.has(ATTR_RERUN_COMPONENT_REQUIRED); - let ui_editable = match field - .try_get_attr::(ATTR_RERUN_COMPONENT_UI_EDITABLE) - .as_deref() - { - Some("true") => true, - Some("false") => false, - Some(value) => { - reporter.error( - &field.virtpath, - &field.fqname, - format!( - "Invalid value for {ATTR_RERUN_COMPONENT_UI_EDITABLE}: {value:?}. Expected \"true\" or \"false\"." - ), - ); - !required - } - None => !required, - }; + let ui_editable = !field.attrs.has(ATTR_RERUN_COMPONENT_NO_UI_EDIT); let mut flag_tokens: Vec = Vec::new(); if required { diff --git a/crates/build/re_types_builder/src/codegen/rust/serializer.rs b/crates/build/re_types_builder/src/codegen/rust/serializer.rs index 16e64bd2a765..59526a5c18ec 100644 --- a/crates/build/re_types_builder/src/codegen/rust/serializer.rs +++ b/crates/build/re_types_builder/src/codegen/rust/serializer.rs @@ -271,7 +271,7 @@ pub fn quote_arrow_serializer( re_log::debug_assert_eq!(fields.len(), children.len()); as_array_ref(UnionArray::try_new( - UnionFields::new(field_type_ids, fields), + UnionFields::try_new(field_type_ids, fields)?, ScalarBuffer::from(type_ids), None, children, @@ -437,7 +437,7 @@ pub fn quote_arrow_serializer( re_log::debug_assert_eq!(fields.len(), children.len()); as_array_ref(UnionArray::try_new( - UnionFields::new(field_type_ids, fields), + UnionFields::try_new(field_type_ids, fields)?, ScalarBuffer::from(type_ids), Some(offsets), children, diff --git a/crates/build/re_types_builder/src/codegen/rust/util.rs b/crates/build/re_types_builder/src/codegen/rust/util.rs index da75d12be0df..07c2147eb9f8 100644 --- a/crates/build/re_types_builder/src/codegen/rust/util.rs +++ b/crates/build/re_types_builder/src/codegen/rust/util.rs @@ -307,12 +307,12 @@ pub fn doc_as_lines( lines.push(docline_summary); } - let examples = if !fqname.starts_with("rerun.blueprint.views") { + let examples = if fqname.starts_with("rerun.blueprint.views") { + Vec::new() + } else { collect_snippets_for_api_docs(docs, "rs", true) .map_err(|err| reporter.error(virtpath, fqname, err)) .unwrap_or_default() - } else { - Vec::new() }; if !examples.is_empty() { @@ -350,7 +350,7 @@ pub fn doc_as_lines( } lines.push("```ignore".into()); - lines.extend(example.lines.into_iter()); + lines.extend(example.lines); lines.push("```".into()); if let Some(image) = &image { diff --git a/crates/build/re_types_builder/src/lib.rs b/crates/build/re_types_builder/src/lib.rs index 73f452ec323a..ee3c05109d51 100644 --- a/crates/build/re_types_builder/src/lib.rs +++ b/crates/build/re_types_builder/src/lib.rs @@ -178,7 +178,7 @@ pub const ATTR_ARROW_SPARSE_UNION: &str = "attr.arrow.sparse_union"; pub const ATTR_RERUN_COMPONENT_OPTIONAL: &str = "attr.rerun.component_optional"; pub const ATTR_RERUN_COMPONENT_RECOMMENDED: &str = "attr.rerun.component_recommended"; pub const ATTR_RERUN_COMPONENT_REQUIRED: &str = "attr.rerun.component_required"; -pub const ATTR_RERUN_COMPONENT_UI_EDITABLE: &str = "attr.rerun.component_ui_editable"; +pub const ATTR_RERUN_COMPONENT_NO_UI_EDIT: &str = "attr.rerun.component_no_ui_edit"; pub const ATTR_RERUN_OVERRIDE_TYPE: &str = "attr.rerun.override_type"; pub const ATTR_RERUN_SCOPE: &str = "attr.rerun.scope"; pub const ATTR_RERUN_VIEW_IDENTIFIER: &str = "attr.rerun.view_identifier"; diff --git a/crates/build/re_types_builder/src/objects.rs b/crates/build/re_types_builder/src/objects.rs index 982133424d46..1b8077633291 100644 --- a/crates/build/re_types_builder/src/objects.rs +++ b/crates/build/re_types_builder/src/objects.rs @@ -669,8 +669,8 @@ impl Object { class.is_enum() || val .union_type() - .filter(|utype| utype.base_type() != FbsBaseType::None) - .is_some() + .as_ref() + .is_some_and(|utype| utype.base_type() != FbsBaseType::None) }) .map(|val| { ObjectField::from_raw_enum_value(reporter, include_dir_path, enums, objs, enm, &val) @@ -889,6 +889,16 @@ impl EnumIntegerType { Self::U64 => format!("0x{value:0X}"), } } + + /// Returns the suffix used for the repr type, e.g. `"u8"`, `"u16"`, etc. + pub fn type_str(self) -> &'static str { + match self { + Self::U8 => "u8", + Self::U16 => "u16", + Self::U32 => "u32", + Self::U64 => "u64", + } + } } /// Is this a struct, enum, or union? diff --git a/crates/store/re_arrow_combinators/README.md b/crates/store/re_arrow_combinators/README.md deleted file mode 100644 index 62036dde379b..000000000000 --- a/crates/store/re_arrow_combinators/README.md +++ /dev/null @@ -1,14 +0,0 @@ -# re_arrow_combinators - -Part of the [`rerun`](https://github.com/rerun-io/rerun) family of crates. - -[![Latest version](https://img.shields.io/crates/v/re_arrow_combinators.svg)](https://crates.io/crates/re_arrow_combinators) -[![Documentation](https://docs.rs/re_arrow_combinators/badge.svg)](https://docs.rs/re_arrow_combinators) -![MIT](https://img.shields.io/badge/license-MIT-blue.svg) -![Apache](https://img.shields.io/badge/license-Apache-blue.svg) - -Type-safe, composable transformations for Arrow arrays. - -Provides building blocks for constructing complex data transformations through composition. -These transformations are designed to be used as primitives for user-defined functions (UDFs) -in query engines like DataFusion, as well as in SDK features like lenses. diff --git a/crates/store/re_arrow_combinators/src/lib.rs b/crates/store/re_arrow_combinators/src/lib.rs deleted file mode 100644 index ac78f1d86d1a..000000000000 --- a/crates/store/re_arrow_combinators/src/lib.rs +++ /dev/null @@ -1,22 +0,0 @@ -//! Type-safe, composable transformations for Arrow arrays. -//! -//! This crate provides composable transformations for Arrow arrays. -//! Transformations are composable operations that convert one array type to another, -//! preserving structural properties like row counts and null handling. -//! -//! These transformations serve as building blocks for user-defined functions (UDFs) -//! in query engines like `DataFusion`, as well as SDK features like lenses. - -mod error; -mod index; -mod transform; - -pub mod cast; -pub mod map; -pub mod reshape; -mod selector; - -pub use crate::cast::{DowncastRef, ListToFixedSizeList, PrimitiveCast}; -pub use crate::error::Error; -pub use crate::selector::{Error as SelectorError, Selector, extract_nested_fields}; -pub use crate::transform::{Compose, Transform}; diff --git a/crates/store/re_arrow_combinators/src/selector/mod.rs b/crates/store/re_arrow_combinators/src/selector/mod.rs deleted file mode 100644 index 6ddae844b9a7..000000000000 --- a/crates/store/re_arrow_combinators/src/selector/mod.rs +++ /dev/null @@ -1,184 +0,0 @@ -//! Selector API for parsing and executing [`jq`](https://github.com/jqlang/jq/)-like queries on Arrow arrays. -//! -//! This module provides a high-level path-based API, but in contrast to `jq` its semantics are **columnar**, -//! following Apache Arrow's data model rather than a row-oriented object model. -//! -//! # Syntax -//! -//! The selector syntax is a subset of `jq`: -//! -//! | Syntax | Meaning | Example | -//! |-------------|--------------------------------------------------|----------------| -//! | `.field` | Access a named field in a struct | `.location` | -//! | `[]` | Iterate over every element of a list | `.poses[]` | -//! | `[N]` | Index into a list by position | `.[0]` | -//! | `?` | Optional: suppress errors if a field is missing | `.field?` | -//! | `\|` | Pipe the output of one expression to another | `.foo \| .bar` | -//! -//! Segments can be chained without an explicit pipe: `.poses[].x` is equivalent to `.poses[] | .x`. -//! -//! # Differences from `jq` -//! -//! * **Columnar, not row-oriented** — operations apply to entire Arrow columns rather than individual JSON values. -//! * **No filters, arithmetic, or built-in functions** — only path navigation and iteration are supported. -//! * **No quoted field names or string interpolation** — field names must be bare identifiers -//! (alphanumeric, `-`, `_`). - -mod lexer; -mod parser; -mod runtime; - -use arrow::{ - array::{Array as _, ListArray}, - datatypes::{DataType, Field}, -}; -use vec1::Vec1; - -use parser::{Expr, Segment, SegmentKind}; - -/// A parsed selector expression that can be executed against Arrow arrays. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Selector(Expr); - -impl std::fmt::Display for Selector { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0) - } -} - -impl Selector { - /// Execute this selector against each row of a [`ListArray`]. - /// - /// Performs implicit iteration over the inner list array, and reconstructs the array at the end. - /// - /// `[.[].poses[].x]` is the actual query, we only require writing the `.poses[].x` portion. - /// - /// Returns `None` if the expression was suppressed by an optional segment (e.g. `.field?`). - pub fn execute_per_row(&self, source: &ListArray) -> Result, Error> { - runtime::execute_per_row(&self.0, source).map_err(Into::into) - } -} - -impl std::str::FromStr for Selector { - type Err = Error; - - fn from_str(query: &str) -> Result { - // Lex the query string, collecting tokens and checking for lex errors - let lexer = lexer::Lexer::new(query); - let tokens = lexer.scan_tokens()?; - - let parser = parser::Parser::new(tokens.into_iter()); - let expr = parser.parse()?; - - Ok(Self(expr)) - } -} - -impl crate::Transform for Selector { - type Source = ListArray; - type Target = ListArray; - - fn transform(&self, source: &Self::Source) -> Result { - let result = self.execute_per_row(source).map_err(crate::Error::from)?; - Ok(result.unwrap_or_else(|| null_list_like(source))) - } -} - -impl crate::Transform for &Selector { - type Source = ListArray; - type Target = ListArray; - - fn transform(&self, source: &Self::Source) -> Result { - let result = self.execute_per_row(source).map_err(crate::Error::from)?; - Ok(result.unwrap_or_else(|| null_list_like(source))) - } -} - -/// Creates an all-null [`ListArray`] with the same type and length as `source`. -fn null_list_like(source: &ListArray) -> ListArray { - ListArray::new_null( - Field::new_list_field(source.value_type(), true).into(), - source.len(), - ) -} - -/// Errors that can occur during selector parsing or execution. -#[derive(Debug, thiserror::Error, Clone)] -pub enum Error { - /// Error during lexing. - #[error(transparent)] - Lex(#[from] lexer::Error), - - /// Error during parsing. - #[error(transparent)] - Parse(#[from] parser::Error), - - /// Error during runtime execution. - #[error(transparent)] - Runtime(#[from] crate::Error), -} - -/// Extract nested fields from a struct array that match a predicate. -/// -/// Returns `None` if no fields match the predicate, or if `datatype` is not a `DataType::Struct`. -pub fn extract_nested_fields

( - datatype: &DataType, - predicate: P, -) -> Option> -where - P: Fn(&DataType) -> bool, -{ - let DataType::Struct(fields) = datatype else { - return None; - }; - - let mut result = Vec::new(); - let mut queue = std::collections::VecDeque::new(); - - // Initialize queue with root fields - queue.push_back((Vec::new(), fields)); - - // Breadth-first traversal - while let Some((path, fields)) = queue.pop_front() { - for field in fields { - let mut field_path = path.clone(); - field_path.push(Segment { - kind: SegmentKind::Field(field.name().clone()), - optional: false, - }); - - match field.data_type() { - DataType::Struct(nested_fields) => { - // Queue nested struct for later processing - queue.push_back((field_path, nested_fields)); - } - DataType::List(inner) => { - // Add the Each segment to unwrap the list - field_path.push(Segment { - kind: SegmentKind::Each, - optional: false, - }); - - match inner.data_type() { - DataType::Struct(nested_fields) => { - // Queue nested struct within list for later processing - queue.push_back((field_path, nested_fields)); - } - dt if predicate(dt) => { - // Direct match on list inner type - result.push((Selector(Expr::Path(field_path)), dt.clone())); - } - _ => {} - } - } - dt if predicate(dt) => { - // Direct match on field type - result.push((Selector(Expr::Path(field_path)), dt.clone())); - } - _ => {} - } - } - } - - Vec1::try_from_vec(result).ok() -} diff --git a/crates/store/re_arrow_combinators/src/selector/parser.rs b/crates/store/re_arrow_combinators/src/selector/parser.rs deleted file mode 100644 index 44abe66378d8..000000000000 --- a/crates/store/re_arrow_combinators/src/selector/parser.rs +++ /dev/null @@ -1,444 +0,0 @@ -//! Turns a list of [`Token`]s into an executable [`Expr`]. -//! -//! The [`Parser`] should roughly follow the structure from: -//! -//! -//! # Grammar -//! -//! Simplified jq-like grammar with implicit piping: -//! -//! ```text -//! Expr → Term ( ( '|' | ε ) Term )* -//! Term → FIELD '?'? -//! | DOT -//! | '[' INTEGER ']' '?'? -//! | '[' ']' -//! ``` -//! -//! `UPPERCASE` symbols denote terminals, and `ε` denotes end of input. - -// NOTE: Please keep the grammar above up-to-date. - -use super::lexer::{Token, TokenType}; - -pub struct Parser -where - I: Iterator, -{ - tokens: std::iter::Peekable, -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum SegmentKind { - Field(String), - Index(u64), - Each, -} - -impl std::fmt::Display for SegmentKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Field(name) => write!(f, ".{name}"), - Self::Index(n) => write!(f, "[{n}]"), - Self::Each => write!(f, "[]"), - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Segment { - pub kind: SegmentKind, - pub optional: bool, -} - -impl std::fmt::Display for Segment { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.kind)?; - if self.optional { - write!(f, "?")?; - } - Ok(()) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum Expr { - Identity, - Path(Vec), - Pipe(Box, Box), -} - -impl std::fmt::Display for Expr { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Identity => write!(f, "."), - Self::Path(segments) => { - for segment in segments { - write!(f, "{segment}")?; - } - Ok(()) - } - Self::Pipe(left, right) => write!(f, "{left} | {right}"), - } - } -} - -// TODO(RR-3438): Add error location reporting. -#[derive(Debug, PartialEq, Eq, thiserror::Error, Clone)] -pub enum Error { - #[error("expected `{expected}` but found `{found}`")] - ExpectedSymbol { - expected: TokenType, - found: TokenType, - }, - - #[error("unexpected symbol `{symbol}`")] - UnexpectedSymbol { symbol: TokenType }, - - #[error("unexpected end of input")] - UnexpectedEof, -} - -type Result = std::result::Result; - -impl Parser -where - I: Iterator, -{ - /// Create a parser from any iterator of tokens - pub fn new(tokens: I) -> Self { - Self { - tokens: tokens.peekable(), - } - } - - pub fn parse(mut self) -> Result { - let expr = self.expr()?; - - if let Some(token) = self.tokens.peek() { - Err(Error::UnexpectedSymbol { - symbol: token.typ.clone(), - }) - } else { - Ok(expr) - } - } - - fn expr(&mut self) -> Result { - let mut left = self.path()?; - - while let Some(token) = self.tokens.peek() { - if token.typ == TokenType::Pipe { - self.tokens.next(); // Consume explicit pipe - let right = self.path()?; - left = Expr::Pipe(Box::new(left), Box::new(right)); - } else { - break; - } - } - - Ok(left) - } - - fn path(&mut self) -> Result { - let mut segments = Vec::new(); - - // Check if it starts with identity (.) - if let Some(token) = self.tokens.peek() { - if token.typ == TokenType::Dot { - self.tokens.next(); - // If only `.`, return Identity - if !self.is_segment_start() { - return Ok(Expr::Identity); - } - } - } else { - return Err(Error::UnexpectedEof); - } - - // Parse segments - while self.is_segment_start() { - segments.push(self.segment()?); - } - - if segments.is_empty() { - Ok(Expr::Identity) - } else { - Ok(Expr::Path(segments)) - } - } - - fn is_segment_start(&mut self) -> bool { - matches!( - self.tokens.peek().map(|t| &t.typ), - Some(TokenType::Field(_) | TokenType::LBracket) - ) - } - - fn peek_optional(&mut self) -> bool { - if let Some(token) = self.tokens.peek() - && token.typ == TokenType::QuestionMark - { - self.tokens.next(); - return true; - } - false - } - - fn segment(&mut self) -> Result { - match self.tokens.peek() { - Some(token) => match &token.typ { - TokenType::Field(s) => { - let result = s.clone(); - self.tokens.next(); - let optional = self.peek_optional(); - Ok(Segment { - kind: SegmentKind::Field(result), - optional, - }) - } - TokenType::LBracket => { - self.tokens.next(); // Consume `[` - - match self.tokens.peek() { - Some(token) => match &token.typ { - TokenType::RBracket => { - self.tokens.next(); // Consume `]` - Ok(Segment { - kind: SegmentKind::Each, - optional: false, - }) - } - TokenType::Integer(n) => { - let index = *n; - self.tokens.next(); - self.consume(TokenType::RBracket)?; - let optional = self.peek_optional(); - Ok(Segment { - kind: SegmentKind::Index(index), - optional, - }) - } - unexpected => Err(Error::UnexpectedSymbol { - symbol: unexpected.clone(), - }), - }, - None => Err(Error::UnexpectedEof), - } - } - unexpected => Err(Error::UnexpectedSymbol { - symbol: unexpected.clone(), - }), - }, - None => Err(Error::UnexpectedEof), - } - } - - /// Consume the current token if it matches the expected type, otherwise return an error. - fn consume(&mut self, expected: TokenType) -> Result { - let token = self.tokens.next().ok_or(Error::UnexpectedEof)?; - if token.typ == expected { - Ok(token) - } else { - Err(Error::ExpectedSymbol { - expected, - found: token.typ.clone(), - }) - } - } -} - -#[cfg(test)] -mod test { - use super::*; - - use super::super::lexer::Lexer; - - fn parse(input: &str) -> Result { - let tokens = Lexer::new(input).scan_tokens().unwrap(); - Parser::new(tokens.into_iter()).parse() - } - - fn field(name: &str) -> Segment { - Segment { - kind: SegmentKind::Field(name.into()), - optional: false, - } - } - - fn field_opt(name: &str) -> Segment { - Segment { - kind: SegmentKind::Field(name.into()), - optional: true, - } - } - - fn index(n: u64) -> Segment { - Segment { - kind: SegmentKind::Index(n), - optional: false, - } - } - - fn index_opt(n: u64) -> Segment { - Segment { - kind: SegmentKind::Index(n), - optional: true, - } - } - - fn each() -> Segment { - Segment { - kind: SegmentKind::Each, - optional: false, - } - } - - fn path(segments: Vec) -> Expr { - Expr::Path(segments) - } - - fn pipe(left: Expr, right: Expr) -> Expr { - Expr::Pipe(Box::new(left), Box::new(right)) - } - - #[test] - fn basic() { - assert_eq!( - parse(".a.b.c"), - Ok(path(vec![field("a"), field("b"), field("c")])) - ); - } - - #[test] - fn explicit_pipe() { - assert_eq!( - parse(".foo | .bar"), - Ok(pipe(path(vec![field("foo")]), path(vec![field("bar")]))) - ); - } - - #[test] - fn identity() { - assert_eq!(parse("."), Ok(Expr::Identity)); - } - - #[test] - fn identity_pipe() { - assert_eq!( - parse(". | .foo"), - Ok(pipe(Expr::Identity, path(vec![field("foo")]))) - ); - } - - #[test] - fn unexpected_eof() { - assert_eq!(parse(".foo |"), Err(Error::UnexpectedEof)); - } - - #[test] - fn empty_input() { - assert_eq!(parse(""), Err(Error::UnexpectedEof)); - } - - #[test] - fn array_index() { - assert_eq!(parse(".[0]"), Ok(path(vec![index(0)]))); - assert_eq!(parse(".[42]"), Ok(path(vec![index(42)]))); - } - - #[test] - fn array_index_with_pipe() { - assert_eq!( - parse(".foo | .[0]"), - Ok(pipe(path(vec![field("foo")]), path(vec![index(0)]))) - ); - } - - #[test] - fn array_index_implicit_pipe() { - assert_eq!(parse(".foo[0]"), Ok(path(vec![field("foo"), index(0)]))); - assert_eq!( - parse(".foo[0][1]"), - Ok(path(vec![field("foo"), index(0), index(1)])) - ); - } - - #[test] - fn array_each() { - assert_eq!(parse(".[]"), Ok(path(vec![each()]))); - assert_eq!(parse(".foo[]"), Ok(path(vec![field("foo"), each()]))); - assert_eq!( - parse(".foo[] | .bar"), - Ok(pipe( - path(vec![field("foo"), each()]), - path(vec![field("bar")]) - )) - ); - } - - #[test] - fn array_each_implicit_pipe() { - assert_eq!( - parse(".foo[].bar"), - Ok(path(vec![field("foo"), each(), field("bar")])) - ); - assert_eq!( - parse(".foo[][0]"), - Ok(path(vec![field("foo"), each(), index(0)])) - ); - } - - #[test] - fn array_index_errors() { - assert_eq!(parse(".[0"), Err(Error::UnexpectedEof)); - } - - #[test] - fn test_display_chain_vs_pipe() { - let chain = parse(".location.x").unwrap(); - assert_eq!(chain.to_string(), ".location.x"); - - let piped = parse(".foo | .bar").unwrap(); - assert_eq!(piped.to_string(), ".foo | .bar"); - - let identity = parse(".").unwrap(); - assert_eq!(identity.to_string(), "."); - - let complex = parse(".a.b[] | .c[0]").unwrap(); - assert_eq!(complex.to_string(), ".a.b[] | .c[0]"); - } - - #[test] - fn optional_field() { - assert_eq!(parse(".foo?"), Ok(path(vec![field_opt("foo")]))); - assert_eq!( - parse(".foo?.bar"), - Ok(path(vec![field_opt("foo"), field("bar")])) - ); - } - - #[test] - fn optional_index() { - assert_eq!(parse(".[0]?"), Ok(path(vec![index_opt(0)]))); - } - - #[test] - fn optional_each_not_supported() { - // `?` after `[]` should be a parse error (unexpected symbol) - assert!(parse(".[]?").is_err()); - } - - #[test] - fn test_display_optional() { - let expr = parse(".foo?").unwrap(); - assert_eq!(expr.to_string(), ".foo?"); - - let expr = parse(".foo?.bar").unwrap(); - assert_eq!(expr.to_string(), ".foo?.bar"); - - // Note: leading `.` is consumed by the path parser, not stored in segments. - let expr = parse(".[0]?").unwrap(); - assert_eq!(expr.to_string(), "[0]?"); - } -} diff --git a/crates/store/re_arrow_combinators/src/selector/runtime.rs b/crates/store/re_arrow_combinators/src/selector/runtime.rs deleted file mode 100644 index baac1ff2d4e2..000000000000 --- a/crates/store/re_arrow_combinators/src/selector/runtime.rs +++ /dev/null @@ -1,100 +0,0 @@ -//! Runtime execution of [`Expr`] against Arrow [`ListArray`]s. -//! -//! This module implements execution of expressions against Arrow [`ListArray`]s. - -use arrow::array::{Array as _, ListArray}; - -use crate::{ - Transform as _, - index::GetIndexList, - map::MapList, - reshape::{Flatten, GetField}, -}; - -use super::parser::{Expr, Segment, SegmentKind}; - -/// Executes the given expression against the source array. -/// -/// Returns `None` if the expression was suppressed by an optional segment (e.g. `.field?`). -/// The caller decides how to handle the absent result. -pub fn execute_per_row(expr: &Expr, source: &ListArray) -> Result, crate::Error> { - // TODO(grtlr): It would be much cleaner if `MapList` (or equivalent would be called on this level). - let result = expr.execute(source)?; - - if let Some(ref result) = result { - re_log::debug_assert_eq!( - result.len(), - source.len(), - "selectors should never change row count" - ); - } - - Ok(result) -} - -impl SegmentKind { - fn execute(&self, source: &ListArray) -> Result { - match self { - Self::Field(field_name) => { - MapList::new(GetField::new(field_name.clone())).transform(source) - } - Self::Index(index) => MapList::new(GetIndexList::new(*index)).transform(source), - Self::Each => { - // In Arrow's columnar context, [] flattens one level of list nesting - // while preserving row count, rather than exploding to create new rows. - // This reinterprets jq's streaming iteration as structural unwrapping. - if source - .values() - .as_any() - .downcast_ref::() - .is_some() - { - // Flatten nested lists: List> -> List - Flatten::new().transform(source) - } else { - Err(crate::Error::TypeMismatch { - expected: "ListArray".into(), - actual: source.value_type(), - context: "Each ([]) operator requires nested lists".into(), - }) - } - } - } - } -} - -impl Segment { - fn execute(&self, source: &ListArray) -> Result, crate::Error> { - match self.kind.execute(source) { - Ok(result) => Ok(Some(result)), - // TODO(RR-3435): FixedSizeListArray errors must be suppressed via optional, but ListArray should not need it. - Err(err) if self.optional => { - re_log::trace!("Optional segment `{self}` suppressed error: {err}"); - Ok(None) - } - Err(err) => Err(err), - } - } -} - -impl Expr { - fn execute(&self, source: &ListArray) -> Result, crate::Error> { - match self { - Self::Identity => Ok(Some(source.clone())), - Self::Path(segments) => { - let mut result = source.clone(); - for segment in segments { - match segment.execute(&result)? { - Some(next) => result = next, - None => return Ok(None), - } - } - Ok(Some(result)) - } - Self::Pipe(left, right) => match left.as_ref().execute(source)? { - Some(intermediate) => right.as_ref().execute(&intermediate), - None => Ok(None), - }, - } - } -} diff --git a/crates/store/re_arrow_combinators/src/transform.rs b/crates/store/re_arrow_combinators/src/transform.rs deleted file mode 100644 index a27812557b73..000000000000 --- a/crates/store/re_arrow_combinators/src/transform.rs +++ /dev/null @@ -1,54 +0,0 @@ -use arrow::array::Array; - -use crate::Error; - -/// A transformation that converts one Arrow array type to another. -/// -/// Transformations are read-only operations that may fail (e.g., missing field, type mismatch). -/// They can be composed using the `then` method to create complex transformation pipelines. -pub trait Transform { - /// The source array type. - type Source: Array; - - /// The target array type. - type Target: Array; - - /// Apply the transformation to the source array. - fn transform(&self, source: &Self::Source) -> Result; - - /// Chain this transformation with another transformation. - fn then(self, next: T2) -> Compose - where - Self: Sized, - T2: Transform, - { - Compose { - first: self, - second: next, - } - } -} - -/// Composes two transformations into a single transformation. -/// -/// This is the result of calling `.then()` on a transformation. -#[derive(Clone)] -pub struct Compose { - first: T1, - second: T2, -} - -impl Transform for Compose -where - T1: Transform, - T2: Transform, - M: Array, -{ - type Source = T1::Source; - type Target = T2::Target; - - fn transform(&self, source: &Self::Source) -> Result { - let mid = self.first.transform(source)?; - self.second.transform(&mid) - } -} diff --git a/crates/store/re_arrow_combinators/tests/test_explode.rs b/crates/store/re_arrow_combinators/tests/test_explode.rs deleted file mode 100644 index 851bb0565fb7..000000000000 --- a/crates/store/re_arrow_combinators/tests/test_explode.rs +++ /dev/null @@ -1,216 +0,0 @@ -mod util; - -use std::sync::Arc; - -use arrow::array::{Array as _, Int32Array, ListArray}; -use arrow::buffer::OffsetBuffer; -use arrow::datatypes::{DataType, Field, Int32Type}; -use re_arrow_combinators::Transform as _; -use re_arrow_combinators::reshape::Explode; -use util::DisplayRB; - -#[test] -fn test_explode_primitives() { - let input = ListArray::from_iter_primitive::(vec![ - Some(vec![Some(1), Some(2), Some(3)]), - Some(vec![Some(4), Some(5)]), - Some(vec![Some(6)]), - ]); - - insta::assert_snapshot!(format!("{}", DisplayRB(input.clone())), @" - ┌───────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable i32] │ - ╞═══════════════════════════════════╡ - │ [1, 2, 3] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [4, 5] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [6] │ - └───────────────────────────────────┘ - "); - - let explode = Explode; - let result = explode.transform(&input).unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result)), @" - ┌───────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable i32] │ - ╞═══════════════════════════════════╡ - │ [1] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [2] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [3] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [4] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [5] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [6] │ - └───────────────────────────────────┘ - "); -} - -#[test] -fn test_explode_with_nulls_and_empty() { - let input = ListArray::from_iter_primitive::(vec![ - Some(vec![Some(1), Some(2)]), - None, - Some(vec![]), - Some(vec![Some(3)]), - ]); - - insta::assert_snapshot!(format!("{}", DisplayRB(input.clone())), @" - ┌───────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable i32] │ - ╞═══════════════════════════════════╡ - │ [1, 2] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [3] │ - └───────────────────────────────────┘ - "); - - let explode = Explode; - let result = explode.transform(&input).unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result)), @" - ┌───────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable i32] │ - ╞═══════════════════════════════════╡ - │ [1] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [2] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [3] │ - └───────────────────────────────────┘ - "); -} - -#[test] -fn test_explode_nested_lists() { - let inner_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6]); - let inner_offsets = OffsetBuffer::new(vec![0, 2, 3, 6].into()); - let inner_field = Arc::new(Field::new_list_field(DataType::Int32, true)); - let inner_list = ListArray::new(inner_field, inner_offsets, Arc::new(inner_values), None); - - let outer_offsets = OffsetBuffer::new(vec![0, 2, 3].into()); - let outer_field = Arc::new(Field::new_list_field(inner_list.data_type().clone(), true)); - let input = ListArray::new( - outer_field, - outer_offsets, - Arc::new(inner_list.clone()), - None, - ); - - insta::assert_snapshot!(format!("{}", DisplayRB(input.clone())), @" - ┌──────────────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable List[nullable i32]] │ - ╞══════════════════════════════════════════════════╡ - │ [[1, 2], [3]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[4, 5, 6]] │ - └──────────────────────────────────────────────────┘ - "); - - let explode = Explode; - let result = explode.transform(&input).unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result)), @" - ┌──────────────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable List[nullable i32]] │ - ╞══════════════════════════════════════════════════╡ - │ [[1, 2]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[3]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[4, 5, 6]] │ - └──────────────────────────────────────────────────┘ - "); -} - -#[test] -fn test_explode_empty_input() { - // Test exploding an empty list - let input = ListArray::from_iter_primitive::(Vec::< - Option>>, - >::new()); - - let explode = Explode; - let result = explode.transform(&input).unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result)), @" - ┌───────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable i32] │ - ╞═══════════════════════════════════╡ - └───────────────────────────────────┘ - "); -} - -#[test] -fn test_explode_with_skips_in_offset_buffer() { - let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); - let offsets = OffsetBuffer::new(vec![0, 2, 7, 10].into()); - let validity = arrow::buffer::NullBuffer::from(vec![true, false, true]); - let field = Arc::new(Field::new_list_field(DataType::Int32, true)); - - let input = ListArray::new(field, offsets, Arc::new(values), Some(validity)); - - insta::assert_snapshot!(format!("{}", DisplayRB(input.clone())), @" - ┌───────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable i32] │ - ╞═══════════════════════════════════╡ - │ [0, 1] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [7, 8, 9] │ - └───────────────────────────────────┘ - "); - - let explode = Explode; - let result = explode.transform(&input).unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result)), @" - ┌───────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable i32] │ - ╞═══════════════════════════════════╡ - │ [0] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [1] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [7] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [8] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [9] │ - └───────────────────────────────────┘ - "); -} diff --git a/crates/store/re_arrow_combinators/tests/test_selector.rs b/crates/store/re_arrow_combinators/tests/test_selector.rs deleted file mode 100644 index b7de5f385011..000000000000 --- a/crates/store/re_arrow_combinators/tests/test_selector.rs +++ /dev/null @@ -1,435 +0,0 @@ -mod util; - -use std::sync::Arc; - -use arrow::{ - array::{Array as _, FixedSizeListArray, Int32Array, ListArray}, - buffer::OffsetBuffer, - datatypes::{DataType, Field, Fields}, -}; -use re_arrow_combinators::{Selector, SelectorError as Error}; -use util::DisplayRB; - -use crate::util::fixtures; - -#[test] -fn execute_nested_struct() -> Result<(), Error> { - let array = fixtures::nested_struct_column(); - - let result = ".location.x" - .parse::()? - .execute_per_row(&array)? - .unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result)), @" - ┌───────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable f64] │ - ╞═══════════════════════════════════╡ - │ [1.0] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [3.0, 5.0] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null, 7.0] │ - └───────────────────────────────────┘ - "); - - Ok(()) -} - -#[test] -fn execute_identity() -> Result<(), Error> { - let array = fixtures::nested_list_struct_column(); - - let result = ".".parse::()?.execute_per_row(&array)?.unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result)), @" - ┌───────────────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable Struct[1]] │ - ╞═══════════════════════════════════════════════════╡ - │ [{poses: [{x: 1.0, y: 2.0}, {x: 3.0, y: 4.0}]}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{poses: [{x: 5.0, y: 6.0}]}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{poses: []}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{poses: [{x: 7.0, y: null}, {x: 9.0, y: 10.0}]}] │ - └───────────────────────────────────────────────────┘ - "); - Ok(()) -} - -#[test] -fn execute_simple_field() -> Result<(), Error> { - let array = fixtures::nested_list_struct_column(); - - let result = ".poses" - .parse::()? - .execute_per_row(&array)? - .unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r" - ┌───────────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable List[Struct[2]]] │ - ╞═══════════════════════════════════════════════╡ - │ [[{x: 1.0, y: 2.0}, {x: 3.0, y: 4.0}]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[{x: 5.0, y: 6.0}]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[{x: 7.0, y: null}, {x: 9.0, y: 10.0}]] │ - └───────────────────────────────────────────────┘ - "); - Ok(()) -} - -#[test] -fn execute_index() -> Result<(), Error> { - let array = fixtures::nested_list_struct_column(); - - let result = ".poses[0]" - .parse::()? - .execute_per_row(&array)? - .unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result)), @" - ┌─────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable Struct[2]] │ - ╞═════════════════════════════════════════╡ - │ [{x: 1.0, y: 2.0}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{x: 5.0, y: 6.0}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{x: 7.0, y: null}] │ - └─────────────────────────────────────────┘ - "); - Ok(()) -} - -#[test] -fn execute_index_chained() -> Result<(), Error> { - let array = fixtures::nested_list_struct_column(); - - let result = ".poses[0].x" - .parse::()? - .execute_per_row(&array)? - .unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result)), @" - ┌───────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable f64] │ - ╞═══════════════════════════════════╡ - │ [1.0] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [5.0] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [7.0] │ - └───────────────────────────────────┘ - "); - Ok(()) -} - -#[test] -fn execute_index_to_extract_second_element() -> Result<(), Error> { - let array = fixtures::nested_list_struct_column(); - - let result = ".poses[1]" - .parse::()? - .execute_per_row(&array)? - .unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result)), @" - ┌─────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable Struct[2]] │ - ╞═════════════════════════════════════════╡ - │ [{x: 3.0, y: 4.0}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{x: 9.0, y: 10.0}] │ - └─────────────────────────────────────────┘ - "); - Ok(()) -} - -#[test] -fn execute_array_each() -> Result<(), Error> { - let array = fixtures::nested_list_struct_column(); - - let result = ".poses[]" - .parse::()? - .execute_per_row(&array)? - .unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result)), @" - ┌─────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable Struct[2]] │ - ╞═════════════════════════════════════════╡ - │ [{x: 1.0, y: 2.0}, {x: 3.0, y: 4.0}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{x: 5.0, y: 6.0}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{x: 7.0, y: null}, {x: 9.0, y: 10.0}] │ - └─────────────────────────────────────────┘ - "); - Ok(()) -} - -#[test] -fn execute_parse_error() { - let result = ".poses[".parse::(); - - assert!(matches!(result, Err(Error::Parse(_)))); -} - -#[test] -fn execute_missing_field() { - let array = fixtures::nested_list_struct_column(); - - let result = ".nonexistent" - .parse::() - .unwrap() - .execute_per_row(&array); - - assert!(matches!( - result, - Err(Error::Runtime( - re_arrow_combinators::Error::FieldNotFound { .. } - )) - )); -} - -#[test] -fn execute_index_out_of_bounds() -> Result<(), Error> { - let array = fixtures::nested_list_struct_column(); - - let result = ".poses[10]" - .parse::()? - .execute_per_row(&array)? - .unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result)), @" - ┌─────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable Struct[2]] │ - ╞═════════════════════════════════════════╡ - │ [null] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null] │ - └─────────────────────────────────────────┘ - "); - Ok(()) -} - -// TODO(RR-3435): Implement indexing into `FixedSizeListArray`. -#[test] -fn execute_index_on_fixed_size_list() -> Result<(), Error> { - let values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]); - let fixed_field = Arc::new(Field::new("item", DataType::Int32, true)); - let fixed_list = FixedSizeListArray::new(fixed_field, 3, Arc::new(values), None); - - let offsets = OffsetBuffer::new(vec![0, 2, 3].into()); - let list_field = Arc::new(Field::new_list_field(fixed_list.data_type().clone(), true)); - let array = ListArray::new(list_field, offsets, Arc::new(fixed_list), None); - - insta::assert_snapshot!(format!("{}", DisplayRB(array.clone())), @" - ┌──────────────────────────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable FixedSizeList[nullable i32; 3]] │ - ╞══════════════════════════════════════════════════════════════╡ - │ [[1, 2, 3], [4, 5, 6]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[7, 8, 9]] │ - └──────────────────────────────────────────────────────────────┘ - "); - - let result = ".[0][1]".parse::()?.execute_per_row(&array); - - assert!(matches!(result, Err(Error::Runtime(..)))); - - Ok(()) -} - -#[test] -fn execute_optional_field() -> Result<(), Error> { - let array = fixtures::nested_struct_column(); - - // Without `?`, accessing a field that doesn't exist errors. - let err = ".location.z".parse::()?.execute_per_row(&array); - assert!(matches!( - err, - Err(Error::Runtime( - re_arrow_combinators::Error::FieldNotFound { .. } - )) - )); - - // With `?`, the missing field is suppressed and we get `None` instead. - let result = ".location.z?" - .parse::()? - .execute_per_row(&array)?; - - assert!(result.is_none(), "optional segment should return None"); - - Ok(()) -} - -fn formatted(pair: impl IntoIterator) -> String { - pair.into_iter() - .map(|(sel, dt)| format!("{sel} ({dt})")) - .collect::>() - .join("\n") -} - -#[test] -fn extract_scalar_fields_from_nested_struct() { - // Schema: - // ┌─ a (struct) - // │ ├─ b: Float64 - // │ └─ c: Int32 - // └─ d: Int32 - - let bc_fields = Fields::from(vec![ - Field::new("b", DataType::Float64, true), - Field::new("c", DataType::Int32, true), - ]); - - let root_fields = Fields::from(vec![ - Field::new("a", DataType::Struct(bc_fields), true), - Field::new("d", DataType::Int32, true), - ]); - - let datatype = DataType::Struct(root_fields); - - let result = re_arrow_combinators::extract_nested_fields(&datatype, |dt| { - matches!(dt, DataType::Float64 | DataType::Int32) - }) - .expect("Should find nested fields"); - - insta::assert_snapshot!(formatted(result), @" - .d (Int32) - .a.b (Float64) - .a.c (Int32) - "); -} - -#[test] -fn extract_scalar_fields_from_nested_list_struct() { - // Schema: - // ┌─ a (struct) - // │ ├─ b: [Float64] - // │ └─ c: [Int32] - // └─ d: [Float64] - - let b_list = DataType::List(Arc::new(Field::new_list_field(DataType::Float64, true))); - let c_list = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))); - let bc_fields = Fields::from(vec![ - Field::new("b", b_list, true), - Field::new("c", c_list, true), - ]); - - let d_list = DataType::List(Arc::new(Field::new_list_field(DataType::Float64, true))); - let root_fields = Fields::from(vec![ - Field::new("a", DataType::Struct(bc_fields), true), - Field::new("d", d_list, true), - ]); - - let datatype = DataType::Struct(root_fields); - - let result = re_arrow_combinators::extract_nested_fields(&datatype, |dt| { - matches!(dt, DataType::Float64 | DataType::Int32) - }) - .expect("Should find nested fields"); - - insta::assert_snapshot!(formatted(result), @" - .d[] (Float64) - .a.b[] (Float64) - .a.c[] (Int32) - "); -} - -#[test] -fn extract_nested_fields_fixtures() { - let array = fixtures::nested_struct_column(); - let result = re_arrow_combinators::extract_nested_fields(&array.value_type(), |dt| { - matches!(dt, DataType::Float64) - }) - .expect("Should find nested fields"); - - insta::assert_snapshot!(formatted(result), @" - .location.x (Float64) - .location.y (Float64) - "); - - let array = fixtures::nested_list_struct_column(); - let result = re_arrow_combinators::extract_nested_fields(&array.value_type(), |dt| { - matches!(dt, DataType::Float64) - }) - .expect("Should find nested fields"); - - insta::assert_snapshot!(formatted(result), @" - .poses[].x (Float64) - .poses[].y (Float64) - "); -} diff --git a/crates/store/re_arrow_combinators/tests/test_string_transforms.rs b/crates/store/re_arrow_combinators/tests/test_string_transforms.rs deleted file mode 100644 index 161e38ad8fc8..000000000000 --- a/crates/store/re_arrow_combinators/tests/test_string_transforms.rs +++ /dev/null @@ -1,179 +0,0 @@ -mod util; - -use re_arrow_combinators::Transform as _; -use re_arrow_combinators::map::{MapList, StringPrefix, StringSuffix}; -use re_arrow_combinators::reshape::GetField; - -use crate::util::{DisplayRB, fixtures::nested_string_struct_column}; - -/// Tests that `StringPrefix` and `StringSuffix` work correctly when the `StringArray` -/// is extracted from a nested struct where string arrays share a common values buffer. -#[test] -fn test_string_transforms_from_nested_struct() { - let list_array = nested_string_struct_column(); - - let names_list = MapList::new(GetField::new("data")) - .then(MapList::new(GetField::new("names"))) - .transform(&list_array) - .expect("failed to extract names"); - insta::assert_snapshot!(DisplayRB(names_list.clone()), @r" - ┌────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable Utf8] │ - ╞════════════════════════════════════╡ - │ [alice] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null, dave] │ - └────────────────────────────────────┘ - "); - - let colors_list = MapList::new(GetField::new("data")) - .then(MapList::new(GetField::new("colors"))) - .transform(&list_array) - .expect("failed to extract colors"); - insta::assert_snapshot!(DisplayRB(colors_list.clone()), @r" - ┌────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable Utf8] │ - ╞════════════════════════════════════╡ - │ [red] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null, yellow] │ - └────────────────────────────────────┘ - "); - - // Test prefix on names array using MapList. - let prefix_names = MapList::new(StringPrefix::new("user:")) - .transform(&names_list) - .expect("prefix transformation failed"); - insta::assert_snapshot!(DisplayRB(prefix_names.clone()), @r" - ┌────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable Utf8] │ - ╞════════════════════════════════════╡ - │ [user:alice] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null, user:dave] │ - └────────────────────────────────────┘ - "); - - // Test suffix on colors array using MapList. - let suffix_colors = MapList::new(StringSuffix::new("_color")) - .transform(&colors_list) - .expect("suffix transformation failed"); - insta::assert_snapshot!(DisplayRB(suffix_colors.clone()), @r" - ┌────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable Utf8] │ - ╞════════════════════════════════════╡ - │ [red_color] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null, yellow_color] │ - └────────────────────────────────────┘ - "); - - // Test chaining on names array using MapList and Compose (via .then()). - let chained_names = MapList::new(StringPrefix::new("<").then(StringSuffix::new(">"))) - .transform(&names_list) - .expect("chained transformation failed"); - insta::assert_snapshot!(DisplayRB(chained_names.clone()), @r" - ┌────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable Utf8] │ - ╞════════════════════════════════════╡ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null, ] │ - └────────────────────────────────────┘ - "); - - // Verify original nested list structure is unaffected by the transformations. - insta::assert_snapshot!(DisplayRB(list_array.clone()), @r" - ┌───────────────────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable Struct[1]] │ - ╞═══════════════════════════════════════════════════════╡ - │ [{data: {names: alice, colors: red}}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{data: null}, {data: {names: dave, colors: yellow}}] │ - └───────────────────────────────────────────────────────┘ - "); -} - -/// Tests that `StringPrefix` and `StringSuffix` preserve empty strings as-is when configured to do so. -#[test] -fn test_string_transforms_preserve_empty_strings() { - use arrow::array::StringArray; - - let input = StringArray::from(vec![Some("hello"), Some(""), None, Some("world")]); - - let prefixed = StringPrefix::new("prefix_") - .with_prefix_empty_string(false) - .transform(&input) - .unwrap(); - insta::assert_snapshot!(DisplayRB(prefixed), @r" - ┌─────────────────────┐ - │ col │ - │ --- │ - │ type: nullable Utf8 │ - ╞═════════════════════╡ - │ prefix_hello │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ prefix_world │ - └─────────────────────┘ - "); - - let suffixed = StringSuffix::new("_suffix") - .with_suffix_empty_string(false) - .transform(&input) - .unwrap(); - insta::assert_snapshot!(DisplayRB(suffixed), @r" - ┌─────────────────────┐ - │ col │ - │ --- │ - │ type: nullable Utf8 │ - ╞═════════════════════╡ - │ hello_suffix │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ world_suffix │ - └─────────────────────┘ - "); -} diff --git a/crates/store/re_arrow_combinators/tests/test_transform.rs b/crates/store/re_arrow_combinators/tests/test_transform.rs deleted file mode 100644 index 860e43307cef..000000000000 --- a/crates/store/re_arrow_combinators/tests/test_transform.rs +++ /dev/null @@ -1,446 +0,0 @@ -mod util; - -use std::str::FromStr as _; - -use arrow::array::{Float32Array, Float64Array, Int32Builder, ListArray, ListBuilder, UInt8Array}; -use re_arrow_combinators::Selector; -use re_arrow_combinators::Transform as _; -use re_arrow_combinators::cast::{ListToFixedSizeList, PrimitiveCast}; -use re_arrow_combinators::map::{MapFixedSizeList, MapList, MapPrimitive, ReplaceNull}; -use re_arrow_combinators::reshape::{RowMajorToColumnMajor, StructToFixedList}; -use util::DisplayRB; - -use crate::util::fixtures; - -#[test] -fn simple() { - let array = fixtures::nested_list_struct_column(); - println!("{}", DisplayRB(array.clone())); - - let pipeline = Selector::from_str(".poses[]") - .unwrap() - .then(MapList::new(StructToFixedList::new(["x", "y"]))); - - let result: ListArray = pipeline.transform(&array).unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result.clone())), @r" - ┌──────────────────────────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable FixedSizeList[nullable f64; 2]] │ - ╞══════════════════════════════════════════════════════════════╡ - │ [[1.0, 2.0], [3.0, 4.0]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[5.0, 6.0]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[7.0, null], [9.0, 10.0]] │ - └──────────────────────────────────────────────────────────────┘ - "); -} - -#[test] -fn add_one_to_leaves() { - let array = fixtures::nested_list_struct_column(); - println!("{}", DisplayRB(array.clone())); - - let pipeline = Selector::from_str(".poses[]") - .unwrap() - .then(MapList::new(StructToFixedList::new(["x", "y"]))) - .then(MapList::new(MapFixedSizeList::new(MapPrimitive::< - arrow::datatypes::Float64Type, - _, - >::new(|x| { - x + 1.0 - })))); - - let result = pipeline.transform(&array).unwrap(); - - insta::assert_snapshot!( - format!("{}", DisplayRB(result.clone())) - , @r" - ┌──────────────────────────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable FixedSizeList[nullable f64; 2]] │ - ╞══════════════════════════════════════════════════════════════╡ - │ [[2.0, 3.0], [4.0, 5.0]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[6.0, 7.0]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[8.0, null], [10.0, 11.0]] │ - └──────────────────────────────────────────────────────────────┘ - " - ); -} - -#[test] -fn convert_to_f32() { - let array = fixtures::nested_list_struct_column(); - println!("{}", DisplayRB(array.clone())); - - let pipeline = Selector::from_str(".poses[]") - .unwrap() - .then(MapList::new(StructToFixedList::new(["x", "y"]))) - .then(MapList::new(MapFixedSizeList::new(PrimitiveCast::< - Float64Array, - Float32Array, - >::new()))); - - let result = pipeline.transform(&array).unwrap(); - - insta::assert_snapshot!(DisplayRB(result.clone()), @r" - ┌──────────────────────────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable FixedSizeList[nullable f32; 2]] │ - ╞══════════════════════════════════════════════════════════════╡ - │ [[1.0, 2.0], [3.0, 4.0]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[5.0, 6.0]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[7.0, null], [9.0, 10.0]] │ - └──────────────────────────────────────────────────────────────┘ - "); -} - -#[test] -fn replace_nulls() { - let array = fixtures::nested_list_struct_column(); - println!("{}", DisplayRB(array.clone())); - - let pipeline = Selector::from_str(".poses[]") - .unwrap() - .then(MapList::new(StructToFixedList::new(["x", "y"]))) - .then(MapList::new(MapFixedSizeList::new(ReplaceNull::< - arrow::datatypes::Float64Type, - >::new(1337.0)))); - - let result = pipeline.transform(&array).unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result.clone())), @r" - ┌─────────────────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable FixedSizeList[f64; 2]] │ - ╞═════════════════════════════════════════════════════╡ - │ [[1.0, 2.0], [3.0, 4.0]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[5.0, 6.0]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[7.0, 1337.0], [9.0, 10.0]] │ - └─────────────────────────────────────────────────────┘ - "); -} - -#[test] -fn test_flatten_single_element() { - let array = fixtures::nested_list_struct_column(); - println!("{}", DisplayRB(array.clone())); - - let pipeline = Selector::from_str(".poses[]").unwrap(); - - let result = pipeline.transform(&array).unwrap(); - - insta::assert_snapshot!( - format!("{}", DisplayRB(result.clone())), @" - ┌─────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable Struct[2]] │ - ╞═════════════════════════════════════════╡ - │ [{x: 1.0, y: 2.0}, {x: 3.0, y: 4.0}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{x: 5.0, y: 6.0}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{x: 7.0, y: null}, {x: 9.0, y: 10.0}] │ - └─────────────────────────────────────────┘ - " - ); -} - -#[test] -fn test_flatten_multiple_elements() { - let inner_builder = ListBuilder::new(arrow::array::Int32Builder::new()); - let mut outer_builder = ListBuilder::new(inner_builder); - - // Row 0: [[1, 2], [3, 4]] -> should flatten to [1, 2, 3, 4] - outer_builder.values().values().append_value(1); - outer_builder.values().values().append_value(2); - outer_builder.values().append(true); - outer_builder.values().values().append_value(3); - outer_builder.values().values().append_value(4); - outer_builder.values().append(true); - outer_builder.append(true); - - // Row 1: [[5, null], [6, 7, 8]] -> should flatten to [5, null, 6, 7, 8] - outer_builder.values().values().append_value(5); - outer_builder.values().values().append_null(); - outer_builder.values().append(true); - outer_builder.values().values().append_value(6); - outer_builder.values().values().append_value(7); - outer_builder.values().values().append_value(8); - outer_builder.values().append(true); - outer_builder.append(true); - - // Row 2: [[]] -> should flatten to [] - outer_builder.values().append(true); - outer_builder.append(true); - - // Row 3: [[], [9]] -> should flatten to [9] - outer_builder.values().append(true); - outer_builder.values().values().append_value(9); - outer_builder.values().append(true); - outer_builder.append(true); - - // Row 4: null -> should remain null - outer_builder.append(false); - - // Row 5: [[10, 11]] -> should flatten to [10, 11] - outer_builder.values().values().append_value(10); - outer_builder.values().values().append_value(11); - outer_builder.values().append(true); - outer_builder.append(true); - - // Row 6: [[32], [33, 34], [], null] -> should flatten to [32, 33, 34] - outer_builder.values().values().append_value(32); - outer_builder.values().append(true); - outer_builder.values().values().append_value(33); - outer_builder.values().values().append_value(34); - outer_builder.values().append(true); - outer_builder.values().append(true); - outer_builder.values().append(false); - outer_builder.append(true); - - let list_of_lists = outer_builder.finish(); - - println!("{}", DisplayRB(list_of_lists.clone())); - - let result = Selector::from_str(".[]") - .unwrap() - .transform(&list_of_lists) - .unwrap(); - - insta::assert_snapshot!( - format!("{}", DisplayRB(result.clone())), @" - ┌───────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable i32] │ - ╞═══════════════════════════════════╡ - │ [1, 2, 3, 4] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [5, null, 6, 7, 8] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [9] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [10, 11] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [32, 33, 34] │ - └───────────────────────────────────┘ - " - ); -} - -#[test] -fn test_row_major_to_col_major() { - let inner_builder = Int32Builder::new(); - let mut outer_builder = ListBuilder::new(inner_builder); - - // First list represents a 4x3 matrix in row-major order with some null elements. - // Row 0 - outer_builder.values().append_value(1); - outer_builder.values().append_null(); - outer_builder.values().append_value(3); - // Row 1 - outer_builder.values().append_value(4); - outer_builder.values().append_value(5); - outer_builder.values().append_value(6); - // Row 2 - outer_builder.values().append_value(7); - outer_builder.values().append_value(8); - outer_builder.values().append_null(); - // Row 3 - outer_builder.values().append_value(10); - outer_builder.values().append_value(11); - outer_builder.values().append_value(12); - outer_builder.append(true); - - // Second list is invalid / null. - for _ in 0..12 { - // Add dummy values for Arrow's fixed-size requirements. - // See: https://docs.rs/arrow/latest/arrow/array/struct.FixedSizeListArray.html#representation - outer_builder.values().append_value(0); - } - outer_builder.append(false); - - // Third list represents a 4x3 matrix in row-major order without null elements. - // Row 0 - outer_builder.values().append_value(13); - outer_builder.values().append_value(14); - outer_builder.values().append_value(15); - // Row 1 - outer_builder.values().append_value(16); - outer_builder.values().append_value(17); - outer_builder.values().append_value(18); - // Row 2 - outer_builder.values().append_value(19); - outer_builder.values().append_value(20); - outer_builder.values().append_value(21); - // Row 3 - outer_builder.values().append_value(22); - outer_builder.values().append_value(23); - outer_builder.values().append_value(24); - outer_builder.append(true); - - let input_array = outer_builder.finish(); - - // Cast to `FixedSizeListArray` and convert to column-major order. - let fixed_size_list_array = ListToFixedSizeList::new(12) - .transform(&input_array) - .unwrap(); - let result = RowMajorToColumnMajor::new(4, 3) - .transform(&fixed_size_list_array) - .unwrap(); - - insta::assert_snapshot!( - format!("{}", DisplayRB(result.clone())), @" - ┌──────────────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable FixedSizeList[nullable i32; 12] │ - ╞══════════════════════════════════════════════════╡ - │ [1, 4, 7, 10, null, 5, 8, 11, 3, 6, null, 12] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [13, 16, 19, 22, 14, 17, 20, 23, 15, 18, 21, 24] │ - └──────────────────────────────────────────────────┘ - " - ); -} - -#[test] -fn test_map_list_nullability() { - let array = fixtures::nested_list_struct_column(); - println!("{}", DisplayRB(array.clone())); - - let pipeline = Selector::from_str(".poses[]") - .unwrap() - .then(MapList::new(StructToFixedList::new(["x", "y"]))); - - let result: ListArray = pipeline.transform(&array).unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result.clone())), @r" - ┌──────────────────────────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable FixedSizeList[nullable f64; 2]] │ - ╞══════════════════════════════════════════════════════════════╡ - │ [[1.0, 2.0], [3.0, 4.0]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[5.0, 6.0]] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [[7.0, null], [9.0, 10.0]] │ - └──────────────────────────────────────────────────────────────┘ - "); -} - -#[test] -fn test_map_list_outer_nullability() { - let array = fixtures::list_not_nullable(); - println!("{}", DisplayRB(array.clone())); - - let pipeline = MapList::new(PrimitiveCast::::new()); - - let result: ListArray = pipeline.transform(&array).unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result.clone())), @" - ┌──────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[f32] │ - ╞══════════════════════════╡ - │ [1.0, 2.0] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [3.0, 4.0, 5.0] │ - └──────────────────────────┘ - "); - - let array = fixtures::list_with_nulls(); - println!("{}", DisplayRB(array.clone())); - - let result: ListArray = pipeline.transform(&array).unwrap(); - insta::assert_snapshot!(format!("{}", DisplayRB(result.clone())), @r" - ┌───────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable f32] │ - ╞═══════════════════════════════════╡ - │ [1.0, 2.0] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - └───────────────────────────────────┘ - "); -} - -#[test] -fn test_map_list_outer_nullability_identity() { - let array = fixtures::list_not_nullable(); - println!("{}", DisplayRB(array.clone())); - - let pipeline = MapList::new(MapPrimitive::::new(|x| x)); - - let result: ListArray = pipeline.transform(&array).unwrap(); - - insta::assert_snapshot!(format!("{}", DisplayRB(result.clone())), @r" - ┌─────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[u8] │ - ╞═════════════════════════╡ - │ [1, 2] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [3, 4, 5] │ - └─────────────────────────┘ - "); -} diff --git a/crates/store/re_chunk/examples/chunk_latest_at.rs b/crates/store/re_chunk/examples/chunk_latest_at.rs index b335002cd95b..699373012aa7 100644 --- a/crates/store/re_chunk/examples/chunk_latest_at.rs +++ b/crates/store/re_chunk/examples/chunk_latest_at.rs @@ -11,11 +11,14 @@ fn main() -> anyhow::Result<()> { let query = LatestAtQuery::new(TimelineName::new("frame"), 4); // Find all relevant data for a query: - let chunk = chunk.latest_at(&query, MyPoints::descriptor_points().component); - eprintln!("{:?} @ {query:?}:\n{chunk}", MyPoints::descriptor_points()); + let Some(unit) = chunk.latest_at(&query, MyPoints::descriptor_points().component) else { + eprintln!("No data found for {query:?}"); + return Ok(()); + }; + eprintln!("{:?} @ {query:?}:\n{unit}", MyPoints::descriptor_points()); // And then slice it as appropriate: - let chunk = chunk + let chunk = unit .timeline_sliced(TimelineName::log_time()) .component_sliced(MyPoints::descriptor_points().component); eprintln!("Sliced down to specific timeline and component:\n{chunk}"); diff --git a/crates/store/re_chunk/src/chunk.rs b/crates/store/re_chunk/src/chunk.rs index 983095ec7629..1c86ca58608e 100644 --- a/crates/store/re_chunk/src/chunk.rs +++ b/crates/store/re_chunk/src/chunk.rs @@ -9,7 +9,7 @@ use arrow::array::{ use arrow::buffer::{NullBuffer as ArrowNullBuffer, ScalarBuffer as ArrowScalarBuffer}; use itertools::{Either, Itertools as _, izip}; use nohash_hasher::IntMap; -use re_arrow_util::{ArrowArrayDowncastRef as _, DisplayDataType, widen_binary_arrays}; +use re_arrow_util::{ArrowArrayDowncastRef as _, widen_binary_arrays}; use re_byte_size::SizeBytes as _; use re_log::debug_assert; use re_log_types::{ @@ -215,6 +215,9 @@ impl FromIterator for ChunkComponents { /// /// This is the in-memory representation of a chunk, optimized for efficient manipulation of the /// data within. For transport, see [`re_sorbet::ChunkBatch`] instead. +/// +/// Each [`Chunk`] has a globally unique [`ChunkId`]. +/// Each time a new [`Chunk`] is created or modified, it should be assigned a new [`ChunkId`]. pub struct Chunk { pub(crate) id: ChunkId, @@ -247,6 +250,22 @@ pub struct Chunk { pub(crate) components: ChunkComponents, } +/// Generates sequential [`RowId`]s derived from a [`ChunkId`]. +/// +/// A core assumption of the data model is that the value of a cell +/// defined by (`RowId` x column descriptor) is not allowed to change, +/// which is why we have to generate new `RowId`s before modifying +/// components or indexes. +fn auto_row_ids(id: ChunkId, count: usize) -> FixedSizeBinaryArray { + let row_ids: Vec = + std::iter::successors(Some(RowId::from_tuid((*id).next())), |row_id| { + Some(row_id.next()) + }) + .take(count) + .collect(); + RowId::arrow_from_slice(&row_ids) +} + impl std::fmt::Debug for Chunk { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let Self { @@ -279,10 +298,11 @@ impl std::fmt::Debug for Chunk { } impl PartialEq for Chunk { + /// NOTE: the [`ChunkId`] is _not_ compared, only the data. #[inline] fn eq(&self, other: &Self) -> bool { let Self { - id, + id: _, entity_path, heap_size_bytes: _, is_sorted, @@ -291,8 +311,7 @@ impl PartialEq for Chunk { components, } = self; - *id == other.id - && *entity_path == other.entity_path + *entity_path == other.entity_path && *is_sorted == other.is_sorted && *row_ids == other.row_ids && *timelines == other.timelines @@ -303,9 +322,7 @@ impl PartialEq for Chunk { impl Chunk { /// Returns a version of us with a new [`ChunkId`]. /// - /// Reminder: - /// * The returned [`Chunk`] will re-use the exact same [`RowId`]s as `self`. - /// * Duplicated [`RowId`]s in the `ChunkStore` is undefined behavior. + /// The returned [`Chunk`] will re-use the exact same [`RowId`]s as `self`. #[must_use] #[inline] pub fn with_id(mut self, id: ChunkId) -> Self { @@ -406,16 +423,65 @@ impl Chunk { && components.0 == other.components.0 } - /// Clones the chunk and renames a component. + /// Creates a new chunk with a mapped component column. /// - /// Note: archetype information and component type information is lost. + /// The returned chunk always gets a new unique [`ChunkId`] and new auto-generated [`RowId`]s, + /// since the component data may have been modified by the closure. + /// + /// When `target` is `None` the new column will carry over the [`ComponentDescriptor`] from + /// the `source` column. pub fn with_mapped_component( + &self, + source: ComponentIdentifier, + target: Option, + f: impl FnOnce(ArrowListArray) -> Result, + ) -> Result { + let mut new_components = self.components.clone(); + if let Some(old_entry) = new_components.remove(&source) { + new_components.insert(SerializedComponentColumn { + descriptor: target.unwrap_or(old_entry.descriptor), + list_array: f(old_entry.list_array)?, + }); + } + + let id = ChunkId::new(); + let row_ids = auto_row_ids(id, self.num_rows()); + + Ok(Self { + id, + entity_path: self.entity_path.clone(), + heap_size_bytes: AtomicU64::new(0), + is_sorted: true, + row_ids, + timelines: self.timelines.clone(), + components: new_components, + }) + } + + /// Creates a new chunk with a mapped component column, keeping the original [`RowId`]s. + /// + /// The returned chunk gets a new unique [`ChunkId`] but retains the original [`RowId`]s. + /// + /// In most cases, prefer [`Self::with_mapped_component`] instead, which generates new + /// [`RowId`]s to maintain the data model invariants. + /// + /// # Warning + /// + /// This is **only safe** when the closure does **not** modify the actual component data + /// (e.g. it introduces a new component column). If an existing column is modified, the + /// invariant that the value of a cell defined by (`RowId` x column descriptor) is immutable + /// will be violated, and the caller has to handle this appropriately. + /// + /// Note: archetype information and component type information is lost on the mapped component. + // + // TODO(grtlr): This should be revised when implementing caching of mapped chunks. + pub fn with_shadowed_component( &self, source: ComponentIdentifier, target: ComponentIdentifier, f: impl FnOnce(ArrowListArray) -> Result, ) -> Result { - let mut new_chunk = self.clone(); + let mut new_chunk = self.clone_with_new_id(); if let Some(old_entry) = new_chunk.components.remove(&source) { new_chunk.components.insert(SerializedComponentColumn { descriptor: ComponentDescriptor { @@ -434,8 +500,15 @@ impl Chunk { impl Clone for Chunk { #[inline] fn clone(&self) -> Self { + self.clone_with_same_id() + } +} + +impl Chunk { + #[inline] + pub fn clone_with_id(&self, id: ChunkId) -> Self { Self { - id: self.id, + id, entity_path: self.entity_path.clone(), heap_size_bytes: AtomicU64::new(self.heap_size_bytes.load(Ordering::Relaxed)), is_sorted: self.is_sorted, @@ -444,6 +517,16 @@ impl Clone for Chunk { components: self.components.clone(), } } + + #[inline] + pub fn clone_with_same_id(&self) -> Self { + self.clone_with_id(self.id) + } + + #[inline] + pub fn clone_with_new_id(&self) -> Self { + self.clone_with_id(ChunkId::new()) + } } impl Chunk { @@ -469,9 +552,8 @@ impl Chunk { .collect_vec(); let new_chunk = Self { - id, row_ids: RowId::arrow_from_slice(&row_ids), - ..self.clone() + ..self.clone_with_id(id) }; // Need to reset the cache size here as `row_ids`'s capacity could have @@ -490,12 +572,18 @@ impl Chunk { } /// Clones the chunk into a new chunk where all [`RowId`]s are [`RowId::ZERO`]. + /// + /// The returned chunk always gets a new unique [`ChunkId`]. pub fn zeroed(self) -> Self { let row_ids = vec![RowId::ZERO; self.row_ids.len()]; let row_ids = RowId::arrow_from_slice(&row_ids); - let new_chunk = Self { row_ids, ..self }; + let new_chunk = Self { + id: ChunkId::new(), + row_ids, + ..self + }; // Need to reset the cache size here as `row_ids`'s capacity could have // changed here. @@ -787,7 +875,7 @@ pub enum TimeColumnError { ContainsNulls, #[error("Unsupported data type : {0}")] - UnsupportedDataType(DisplayDataType), + UnsupportedDataType(arrow::datatypes::DataType), } impl Chunk { @@ -852,32 +940,93 @@ impl Chunk { /// This will fail if the passed in data is malformed in any way -- see [`Self::sanity_check`] /// for details. /// - /// The data is assumed to be sorted in `RowId`-order. Sequential `RowId`s will be generated for each - /// row in the chunk. + /// The input order will NOT be respected. + /// The rows will be stably reordered so that the time columns are non-decreasing + /// (lexicographically across timelines, in deterministic timeline-name order). + /// Sequential `RowId`s are then assigned to the reordered rows. + /// This makes it less likely that we end up with "out-of-order" chunks + /// (chunks where some time columns are not sorted with respect to `RowId`). pub fn from_auto_row_ids( id: ChunkId, entity_path: EntityPath, timelines: IntMap, components: ChunkComponents, ) -> ChunkResult { + re_tracing::profile_function!(); + let count = components .list_arrays() .next() .map_or(0, |list_array| list_array.len()); - let row_ids = std::iter::from_fn({ - let tuid: re_tuid::Tuid = *id; - let mut row_id = RowId::from_tuid(tuid.next()); - move || { - let yielded = row_id; - row_id = row_id.next(); - Some(yielded) + // Compute a stable permutation that lexicographically sorts the rows by their + // time-column values. We pick a deterministic timeline order (sorted by name) so the + // result does not depend on `IntMap` iteration order. + let timeline_order: Vec = timelines.keys().copied().sorted().collect(); + + let mut swaps: Vec = (0..count).collect(); + swaps.sort_by(|&a, &b| { + for name in &timeline_order { + let times = timelines[name].times_raw(); + let ord = times[a].cmp(×[b]); + if ord != std::cmp::Ordering::Equal { + return ord; + } } - }) - .take(count) - .collect_vec(); + std::cmp::Ordering::Equal + }); - Self::from_native_row_ids(id, entity_path, Some(true), &row_ids, timelines, components) + // Build the chunk with placeholder sequential row_ids, then permute everything via + // `shuffle_with`, then reassign sequential row_ids so the chunk is RowId-sorted again. + let placeholder_row_ids = auto_row_ids(id, count); + let mut chunk = Self::new( + id, + entity_path, + Some(true), + placeholder_row_ids, + timelines, + components, + )?; + + let already_sorted = swaps + .iter() + .copied() + .enumerate() + .all(|(to, from)| to == from); + if !already_sorted { + chunk.shuffle_with(&swaps); + chunk.row_ids = auto_row_ids(chunk.id, count); + chunk.is_sorted = true; + + #[cfg(debug_assertions)] + #[expect(clippy::unwrap_used)] // dev only + chunk.sanity_check().unwrap(); + } + + Ok(chunk) + } + + /// Creates a new [`Chunk`] from columnar data. + /// + /// Pass an empty iterator for `timelines` to create static data. + /// + /// The input order will NOT be respected. + /// The rows will be stably reordered so that the time columns are non-decreasing + /// (lexicographically across timelines, in deterministic timeline-name order). + /// Sequential `RowId`s are then assigned to the reordered rows. + /// This makes it less likely that we end up with "out-of-order" chunks + /// (chunks where some time columns are not sorted with respect to `RowId`). + pub fn from_columns( + entity_path: impl Into, + timelines: impl IntoIterator, + components: impl IntoIterator, + ) -> ChunkResult { + let timelines: IntMap = timelines + .into_iter() + .map(|tc| (*tc.timeline().name(), tc)) + .collect(); + let components: ChunkComponents = components.into_iter().collect(); + Self::from_auto_row_ids(ChunkId::new(), entity_path.into(), timelines, components) } /// Simple helper for [`Self::new`] for static data. @@ -924,6 +1073,7 @@ impl Chunk { &mut self, component_column: SerializedComponentColumn, ) -> ChunkResult<()> { + self.id = ChunkId::new(); self.components.insert(component_column); self.reset_cached_heap_size_bytes(); self.sanity_check() @@ -936,6 +1086,8 @@ impl Chunk { /// This will fail if the end result is malformed in any way -- see [`Self::sanity_check`]. #[inline] pub fn add_timeline(&mut self, chunk_timeline: TimeColumn) -> ChunkResult<()> { + self.id = ChunkId::new(); + self.timelines .insert(*chunk_timeline.timeline.name(), chunk_timeline); self.reset_cached_heap_size_bytes(); @@ -1164,7 +1316,7 @@ impl TimeColumn { Ok((times.values().clone(), times.nulls().cloned())) } else { Err(TimeColumnError::UnsupportedDataType( - array.data_type().clone().into(), + array.data_type().clone(), )) } } @@ -1451,6 +1603,53 @@ impl TimeColumn { }) .collect() } + + /// Find the earliest time strictly after `after` in this time column. + pub fn find_next_time(&self, after: TimeInt) -> Option { + if self.is_sorted() { + let times = self.times_raw(); + let idx = times.partition_point(|&t| t <= after.as_i64()); + (idx < times.len()).then(|| TimeInt::new_temporal(times[idx])) + } else { + self.times_raw() + .iter() + .filter(|&&t| t > after.as_i64()) + .min() + .map(|&t| TimeInt::new_temporal(t)) + } + } + + /// Find the latest time strictly before `before` in this time column. + pub fn find_prev_time(&self, before: TimeInt) -> Option { + if self.is_sorted() { + let times = self.times_raw(); + let idx = times.partition_point(|&t| t < before.as_i64()); + (idx > 0).then(|| TimeInt::new_temporal(times[idx - 1])) + } else { + self.times_raw() + .iter() + .filter(|&&t| t < before.as_i64()) + .max() + .map(|&t| TimeInt::new_temporal(t)) + } + } + + /// Returns a new [`TimeColumn`] with all time values offset by `offset_ns` nanoseconds. + /// + /// Uses saturating arithmetic. + pub fn offset_by_nanos(&self, offset_ns: i64) -> Self { + let new_times: Vec = self + .times + .iter() + .map(|&t| NonMinI64::saturating_from_i64(t.saturating_add(offset_ns)).get()) + .collect(); + + Self::new( + Some(self.is_sorted), + self.timeline, + ArrowScalarBuffer::from(new_times), + ) + } } impl Chunk { diff --git a/crates/store/re_chunk/src/latest_at.rs b/crates/store/re_chunk/src/latest_at.rs index fff0323b2de8..1199ea180150 100644 --- a/crates/store/re_chunk/src/latest_at.rs +++ b/crates/store/re_chunk/src/latest_at.rs @@ -3,7 +3,7 @@ use re_byte_size::SizeBytes; use re_log_types::{TimeInt, TimelineName}; use re_types_core::ComponentIdentifier; -use crate::{Chunk, RowId}; +use crate::{Chunk, RowId, UnitChunkShared}; // --- @@ -67,28 +67,30 @@ impl LatestAtQuery { impl Chunk { /// Runs a [`LatestAtQuery`] filter on a [`Chunk`]. /// - /// This behaves as a row-based filter: the result is a new [`Chunk`] that is vertically + /// This behaves as a row-based filter: the result is a [`UnitChunkShared`] that is vertically /// sliced to only contain the row relevant for the specified `query`. /// - /// The resulting [`Chunk`] is guaranteed to contain all the same columns has the queried + /// The resulting chunk is guaranteed to contain all the same columns as the queried /// chunk: there is no horizontal slicing going on. /// - /// An empty [`Chunk`] (i.e. 0 rows, but N columns) is returned if the `query` yields nothing. + /// Returns `None` if the `query` yields nothing. /// /// Because the resulting chunk doesn't discard any column information, you can find extra relevant /// information by inspecting the data, for examples timestamps on other timelines. /// See [`Self::timeline_sliced`] and [`Self::component_sliced`] if you do want to filter this /// extra data. - pub fn latest_at(&self, query: &LatestAtQuery, component: ComponentIdentifier) -> Self { + pub fn latest_at( + &self, + query: &LatestAtQuery, + component: ComponentIdentifier, + ) -> Option { if self.is_empty() { - return self.clone(); + return None; } re_tracing::profile_function!(format!("{query:?}")); - let Some(component_list_array) = self.components.get_array(component) else { - return self.emptied(); - }; + let component_list_array = self.components.get_array(component)?; let mut index = None; @@ -126,9 +128,7 @@ impl Chunk { } } } else { - let Some(time_column) = self.timelines.get(&query.timeline()) else { - return self.emptied(); - }; + let time_column = self.timelines.get(&query.timeline())?; let is_sorted_by_time = time_column.is_sorted(); let times = time_column.times_raw(); @@ -174,6 +174,6 @@ impl Chunk { } } - index.map_or_else(|| self.emptied(), |i| self.row_sliced_shallow(i, 1)) + index.map(|i| self.row_sliced_unit_shallow(i)) } } diff --git a/crates/store/re_chunk/src/lib.rs b/crates/store/re_chunk/src/lib.rs index 6a6270513b7d..dd912dd0e016 100644 --- a/crates/store/re_chunk/src/lib.rs +++ b/crates/store/re_chunk/src/lib.rs @@ -6,7 +6,6 @@ mod builder; mod chunk; -mod helpers; mod iter; mod latest_at; mod merge; @@ -15,6 +14,7 @@ mod shuffle; mod slice; mod split; mod transport; +mod unit_chunk; #[cfg(not(target_arch = "wasm32"))] mod batcher; @@ -37,13 +37,13 @@ pub use self::builder::{ChunkBuilder, TimeColumnBuilder}; pub use self::chunk::{ Chunk, ChunkComponents, ChunkError, ChunkResult, TimeColumn, TimeColumnError, }; -pub use self::helpers::{ChunkShared, UnitChunkShared}; pub use self::iter::{ ChunkComponentIter, ChunkComponentIterItem, ChunkComponentSlicer, ChunkIndicesIter, }; pub use self::latest_at::LatestAtQuery; pub use self::range::{RangeQuery, RangeQueryOptions}; pub use self::split::ChunkSplitConfig; +pub use self::unit_chunk::{ChunkShared, UnitChunkShared}; pub mod external { #[cfg(not(target_arch = "wasm32"))] diff --git a/crates/store/re_chunk/src/merge.rs b/crates/store/re_chunk/src/merge.rs index 9f2fd7901513..5a64d24c60ed 100644 --- a/crates/store/re_chunk/src/merge.rs +++ b/crates/store/re_chunk/src/merge.rs @@ -11,6 +11,40 @@ use crate::{Chunk, ChunkError, ChunkId, ChunkResult, TimeColumn}; // --- impl Chunk { + /// Picks the order intelligently, and sorts the result. + pub fn concat_and_sort(left: &Self, right: &Self) -> ChunkResult { + re_tracing::profile_function!(); + + let left_rowid_min = right.row_id_range().map(|(min, _)| min); + let right_rowid_min = left.row_id_range().map(|(min, _)| min); + let mut compacted = if right_rowid_min < left_rowid_min { + left.concatenated(right)? + } else { + right.concatenated(left)? + }; + + compacted.sort_if_unsorted(); + + // Sanity check that timelines haven't become unsorted. + // If they have, we have an unsorted timeline, which is good to know about. + + for (name, column) in compacted.timelines() { + if !column.is_sorted() { + let left_was_sorted = left.timelines().get(name).is_none_or(|c| c.is_sorted()); + let right_was_sorted = right.timelines().get(name).is_none_or(|c| c.is_sorted()); + + if left_was_sorted && right_was_sorted { + let entity_path = compacted.entity_path(); + re_log::debug_warn_once!( + "Timeline '{name}' became unsorted after concatenation for entity '{entity_path}'. This may cause performance issues." + ); + } + } + } + + Ok(compacted) + } + /// Concatenates two `Chunk`s into a new one. /// /// The order of the arguments matter: `self`'s contents will precede `rhs`' contents in the diff --git a/crates/store/re_chunk/src/range.rs b/crates/store/re_chunk/src/range.rs index 2268179b1d9f..341dc776f8c1 100644 --- a/crates/store/re_chunk/src/range.rs +++ b/crates/store/re_chunk/src/range.rs @@ -204,6 +204,9 @@ impl Chunk { /// extra data. // // TODO(RR-3865): Use arrow's `ListView` to avoid cloning data when the chunk requires sorting. + // + /// The returned [`Chunk`] always gets a new [`crate::ChunkId`], unless the input chunk + /// is empty (in which case it's cloned as-is with the original ID). pub fn range(&self, query: &RangeQuery, component: ComponentIdentifier) -> Self { if self.is_empty() { return self.clone(); @@ -220,25 +223,29 @@ impl Chunk { // Pre-slice the data if the caller allowed us: this will make further slicing // (e.g. the range query itself) much cheaper to compute. use std::borrow::Cow; - let chunk = if !keep_extra_timelines { - Cow::Owned(self.timeline_sliced(*query.timeline())) - } else { + let chunk = if keep_extra_timelines { Cow::Borrowed(self) - }; - let chunk = if !keep_extra_components { - Cow::Owned(chunk.component_sliced(component)) } else { + Cow::Owned(self.timeline_sliced(*query.timeline())) + }; + let chunk = if keep_extra_components { chunk + } else { + Cow::Owned(chunk.component_sliced(component)) }; if chunk.is_static() { // NOTE: A given component for a given entity can only have one static entry associated // with it, and this entry overrides everything else, which means it is functionally // equivalent to just running a latest-at query. - chunk.latest_at( + if let Some(unit) = chunk.latest_at( &crate::LatestAtQuery::new(*query.timeline(), TimeInt::MAX), component, - ) + ) { + std::sync::Arc::unwrap_or_clone(unit.into_chunk()) + } else { + chunk.emptied() + } } else { let Some(is_sorted_by_time) = chunk .timelines diff --git a/crates/store/re_chunk/src/shuffle.rs b/crates/store/re_chunk/src/shuffle.rs index 34f92c2e8184..4c75bfe1aa7a 100644 --- a/crates/store/re_chunk/src/shuffle.rs +++ b/crates/store/re_chunk/src/shuffle.rs @@ -3,7 +3,7 @@ use arrow::buffer::{OffsetBuffer as ArrowOffsets, ScalarBuffer as ArrowScalarBuf use itertools::Itertools as _; use re_log_types::TimelineName; -use crate::{Chunk, TimeColumn}; +use crate::{Chunk, ChunkId, TimeColumn}; // --- @@ -67,6 +67,8 @@ impl Chunk { /// Sort the chunk, if needed. /// /// The underlying arrow data will be copied and shuffled in memory in order to make it contiguous. + /// + /// If the chunk changes, it is given a new unique [`ChunkId`]. #[inline] pub fn sort_if_unsorted(&mut self) { if self.is_sorted() { @@ -75,6 +77,8 @@ impl Chunk { re_tracing::profile_function!(); + self.id = ChunkId::new(); + #[cfg(not(target_arch = "wasm32"))] let now = std::time::Instant::now(); @@ -107,19 +111,20 @@ impl Chunk { /// /// This is a no-op if the underlying timeline is already sorted appropriately (happy path). /// - /// WARNING: the returned chunk has the same old [`crate::ChunkId`]! Change it with [`Self::with_id`]. + /// If the chunk is already sorted, the original [`crate::ChunkId`] is preserved. + /// Otherwise, the returned chunk gets a new unique [`crate::ChunkId`]. #[must_use] pub fn sorted_by_timeline_if_unsorted(&self, timeline: &TimelineName) -> Self { - let mut chunk = self.clone(); - - let Some(time_column) = chunk.timelines.get(timeline) else { - return chunk; + let Some(time_column) = self.timelines.get(timeline) else { + return self.clone_with_same_id(); }; if time_column.is_sorted() { - return chunk; + return self.clone_with_same_id(); } + let mut chunk = self.clone_with_new_id(); + re_tracing::profile_function!(); #[cfg(not(target_arch = "wasm32"))] @@ -155,6 +160,7 @@ impl Chunk { /// /// The underlying arrow data will be copied and shuffled in memory in order to make it contiguous. #[inline] + #[cfg(debug_assertions)] // only for tests pub fn shuffle_random(&mut self, seed: u64) { re_tracing::profile_function!(); @@ -194,6 +200,8 @@ impl Chunk { pub(crate) fn shuffle_with(&mut self, swaps: &[usize]) { re_tracing::profile_function!(); + self.id = ChunkId::new(); + // Row IDs { re_tracing::profile_scope!("row ids"); @@ -306,6 +314,7 @@ impl TimeColumn { mod tests { use re_log_types::example_components::{MyColor, MyPoint, MyPoints}; use re_log_types::{EntityPath, Timeline}; + use re_types_core::ComponentBatch as _; use super::*; use crate::{ChunkId, RowId}; @@ -599,4 +608,74 @@ mod tests { Ok(()) } + + /// `Chunk::from_auto_row_ids` should reorder its inputs so that the time columns become + /// sorted as well as possible, to avoid "out-of-order" chunks where some time columns are not sorted with respect to `RowId`. + #[test] + fn from_auto_row_ids_sorts_lexicographically() -> anyhow::Result<()> { + let entity_path: EntityPath = "a/b/c".into(); + + // Two timelines named such that "alpha" sorts before "beta". + // Construct deliberately unsorted inputs: + // row | alpha | beta | color + // ----|-------|------|------ + // 0 | 2 | 5 | 100 + // 1 | 1 | 9 | 200 + // 2 | 1 | 7 | 300 + // 3 | 2 | 3 | 400 + // 4 | 1 | 9 | 500 (duplicate of row 1's key — tests stability) + // + // After lex-sort by (alpha, beta) the expected row order is: + // 2 (1,7,300), 1 (1,9,200), 4 (1,9,500), 3 (2,3,400), 0 (2,5,100) + let alpha = TimeColumn::new_sequence("alpha", [2_i64, 1, 1, 2, 1]); + let beta = TimeColumn::new_sequence("beta", [5_i64, 9, 7, 3, 9]); + + let colors = vec![ + MyColor(100), + MyColor(200), + MyColor(300), + MyColor(400), + MyColor(500), + ]; + let colors_array = colors.to_arrow_list_array()?; + + // `Chunk::from_columns` uses `Chunk::from_auto_row_ids`. + let chunk = Chunk::from_columns( + entity_path, + [alpha, beta], + [(MyPoints::descriptor_colors(), colors_array)], + )?; + + eprintln!("{chunk}"); + + assert!(chunk.is_sorted()); + assert!(chunk.is_sorted_uncached()); + + let alpha = chunk.timelines().get(&"alpha".into()).unwrap(); + let beta = chunk.timelines().get(&"beta".into()).unwrap(); + + // The primary timeline (alphabetically first) must be globally sorted; the secondary + // one is only sorted within each primary-key group. + assert!(alpha.is_sorted()); + assert!(!beta.is_sorted()); + + assert_eq!(alpha.times_raw().to_vec(), vec![1, 1, 1, 2, 2]); + assert_eq!(beta.times_raw().to_vec(), vec![7, 9, 9, 3, 5]); + + // Verify the components were permuted in lockstep with the time columns, + // and that ties (rows 1 and 4) preserved their original order (stable sort). + let got_colors: Vec = chunk + .iter_slices::(MyPoints::descriptor_colors().component) + .flat_map(<[u32]>::to_vec) + .collect(); + assert_eq!(got_colors, vec![300, 200, 500, 400, 100]); + + // RowIds must be sequential ascending. + let row_ids: Vec<_> = chunk.row_ids().collect(); + for w in row_ids.windows(2) { + assert!(w[0] < w[1], "row_ids must be strictly ascending"); + } + + Ok(()) + } } diff --git a/crates/store/re_chunk/src/slice.rs b/crates/store/re_chunk/src/slice.rs index 9b041399181d..ea5da5d1d917 100644 --- a/crates/store/re_chunk/src/slice.rs +++ b/crates/store/re_chunk/src/slice.rs @@ -7,7 +7,7 @@ use nohash_hasher::IntSet; use re_log_types::TimelineName; use re_types_core::{ComponentIdentifier, SerializedComponentColumn}; -use crate::{Chunk, RowId, TimeColumn, UnitChunkShared}; +use crate::{Chunk, ChunkId, RowId, TimeColumn, UnitChunkShared}; // --- @@ -18,8 +18,6 @@ impl Chunk { /// Returns the cell corresponding to the specified [`RowId`] for a given [`re_types_core::ComponentIdentifier`]. /// /// This is `O(log(n))` if `self.is_sorted()`, and `O(n)` otherwise. - /// - /// Reminder: duplicated `RowId`s results in undefined behavior. pub fn cell(&self, row_id: RowId, component: ComponentIdentifier) -> Option { let list_array = self.components.get_array(component)?; @@ -41,7 +39,7 @@ impl Chunk { /// run out of bounds. /// This can result in an empty [`Chunk`] being returned if the slice is completely OOB. /// - /// WARNING: the returned chunk has the same old [`crate::ChunkId`]! Change it with [`Self::with_id`]. + /// The returned chunk always gets a new unique [`crate::ChunkId`]. /// /// ## When to use shallow vs. deep slicing? /// @@ -65,8 +63,11 @@ impl Chunk { /// /// See also [`Self::row_sliced_shallow`]. pub fn row_sliced_unit_shallow(&self, index: usize) -> UnitChunkShared { + let original_chunk_id = self.id(); #[expect(clippy::unwrap_used)] // cannot fail: we always have exactly one row - self.row_sliced_shallow(index, 1).into_unit().unwrap() + self.row_sliced_shallow(index, 1) + .into_unit_with_original_chunk_id(original_chunk_id) + .unwrap() } /// Deep-slices the [`Chunk`] vertically. @@ -77,7 +78,7 @@ impl Chunk { /// run out of bounds. /// This can result in an empty [`Chunk`] being returned if the slice is completely OOB. /// - /// WARNING: the returned chunk has the same old [`crate::ChunkId`]! Change it with [`Self::with_id`]. + /// The returned chunk always gets a new unique [`crate::ChunkId`]. /// /// ## When to use shallow vs. deep slicing? /// @@ -102,7 +103,7 @@ impl Chunk { re_tracing::profile_function!(if deep { "deep" } else { "shallow" }); let Self { - id, + id: _, entity_path, heap_size_bytes: _, is_sorted, @@ -128,7 +129,7 @@ impl Chunk { let is_sorted = *is_sorted || (len < 2); let mut chunk = Self { - id: *id, + id: ChunkId::new(), entity_path: entity_path.clone(), heap_size_bytes: Default::default(), is_sorted, @@ -177,10 +178,6 @@ impl Chunk { // The original chunk is unsorted, but the new sliced one actually ends up being sorted. chunk.is_sorted = is_sorted || chunk.is_sorted_uncached(); - #[cfg(debug_assertions)] - #[expect(clippy::unwrap_used)] // debug-only - chunk.sanity_check().unwrap(); - chunk } @@ -192,12 +189,12 @@ impl Chunk { /// If `timeline` is not found within the [`Chunk`], the end result will be the same as the /// current chunk but without any timeline column. /// - /// WARNING: the returned chunk has the same old [`crate::ChunkId`]! Change it with [`Self::with_id`]. + /// The returned chunk always gets a new unique [`crate::ChunkId`]. #[must_use] #[inline] pub fn timeline_sliced(&self, timeline: TimelineName) -> Self { let Self { - id, + id: _, entity_path, heap_size_bytes: _, is_sorted, @@ -207,7 +204,7 @@ impl Chunk { } = self; let chunk = Self { - id: *id, + id: ChunkId::new(), entity_path: entity_path.clone(), heap_size_bytes: Default::default(), is_sorted: *is_sorted, @@ -227,7 +224,7 @@ impl Chunk { chunk } - /// Slices the [`Chunk`] horizontally by keeping only the selected `component`. + /// Slices the [`Chunk`] by keeping only the selected `component` column. /// /// The result is a new [`Chunk`] with the same rows and (at-most) one component column. /// All non-component columns will be kept as-is. @@ -235,12 +232,42 @@ impl Chunk { /// If `component` is not found within the [`Chunk`], the end result will be the same as the /// current chunk but without any component column. /// - /// WARNING: the returned chunk has the same old [`crate::ChunkId`]! Change it with [`Self::with_id`]. + /// The returned chunk always gets a new unique [`crate::ChunkId`]. #[must_use] #[inline] pub fn component_sliced(&self, component: ComponentIdentifier) -> Self { + self.components_sliced(&[component]) + } + + /// Slices the [`Chunk`] by removing the specified `component` column. + /// + /// The result is a new [`Chunk`] with the same rows and all component columns + /// except the one matching `component`. All non-component columns are kept as-is. + /// + /// If `component` is not found within the [`Chunk`], the chunk is returned unchanged + /// (preserving the original [`crate::ChunkId`]). + /// Otherwise, the returned chunk gets a new unique [`crate::ChunkId`]. + #[must_use] + #[inline] + pub fn component_dropped(&self, component: ComponentIdentifier) -> Self { + self.components_dropped(&[component]) + } + + /// Slices the [`Chunk`] by keeping only the listed component columns. + /// + /// The result is a new [`Chunk`] with the same rows but only the component columns + /// whose [`ComponentIdentifier`] appears in `components_to_keep`. + /// All non-component columns (entity path, timelines, row IDs) are preserved. + /// + /// If none of the listed components exist in the [`Chunk`], the end result will be the same as the + /// current chunk but without any component column. + /// + /// The returned chunk always gets a new unique [`crate::ChunkId`]. + #[must_use] + #[inline] + pub fn components_sliced(&self, components_to_keep: &[ComponentIdentifier]) -> Self { let Self { - id, + id: _, entity_path, heap_size_bytes: _, is_sorted, @@ -250,21 +277,78 @@ impl Chunk { } = self; let chunk = Self { - id: *id, + id: ChunkId::new(), + entity_path: entity_path.clone(), + heap_size_bytes: Default::default(), + is_sorted: *is_sorted, + row_ids: row_ids.clone(), + timelines: timelines.clone(), + components: components_to_keep + .iter() + .filter_map(|c| { + components.get(*c).map(|column| { + SerializedComponentColumn::new( + column.list_array.clone(), + column.descriptor.clone(), + ) + }) + }) + .collect(), + }; + + #[cfg(debug_assertions)] + #[expect(clippy::unwrap_used)] // debug-only + chunk.sanity_check().unwrap(); + + chunk + } + + /// Slices the [`Chunk`] by removing the listed component columns. + /// + /// The result is a new [`Chunk`] with the same rows and all component columns + /// except those whose [`ComponentIdentifier`] appears in `components_to_drop`. + /// All non-component columns are kept as-is. + /// + /// If none of the listed components exist in the [`Chunk`], the chunk is returned unchanged + /// (preserving the original [`crate::ChunkId`]). + /// Otherwise, the returned chunk gets a new unique [`crate::ChunkId`]. + #[must_use] + #[inline] + pub fn components_dropped(&self, components_to_drop: &[ComponentIdentifier]) -> Self { + if !self + .components + .keys() + .any(|id| components_to_drop.contains(id)) + { + return self.clone_with_same_id(); + } + + let Self { + id: _, + entity_path, + heap_size_bytes: _, + is_sorted, + row_ids, + timelines, + components, + } = self; + + let chunk = Self { + id: ChunkId::new(), entity_path: entity_path.clone(), heap_size_bytes: Default::default(), is_sorted: *is_sorted, row_ids: row_ids.clone(), timelines: timelines.clone(), components: components - .get(component) - .map(|column| { + .iter() + .filter(|(id, _)| !components_to_drop.contains(id)) + .map(|(_id, column)| { SerializedComponentColumn::new( column.list_array.clone(), column.descriptor.clone(), ) }) - .into_iter() .collect(), }; @@ -283,12 +367,12 @@ impl Chunk { /// If none of the selected timelines exist in the [`Chunk`], the end result will be the same as the /// current chunk but without any timeline column. /// - /// WARNING: the returned chunk has the same old [`crate::ChunkId`]! Change it with [`Self::with_id`]. + /// The returned chunk always gets a new unique [`crate::ChunkId`]. #[must_use] #[inline] pub fn timelines_sliced(&self, timelines_to_keep: &IntSet) -> Self { let Self { - id, + id: _, entity_path, heap_size_bytes: _, is_sorted, @@ -298,7 +382,7 @@ impl Chunk { } = self; let chunk = Self { - id: *id, + id: ChunkId::new(), entity_path: entity_path.clone(), heap_size_bytes: Default::default(), is_sorted: *is_sorted, @@ -326,17 +410,17 @@ impl Chunk { /// The result is a new [`Chunk`] where the `component_pov` column is guaranteed to be dense. /// /// If `component_pov` doesn't exist in this [`Chunk`], or if it is already dense, this method - /// is a no-op. + /// is a no-op (preserving the original [`crate::ChunkId`]). /// /// Returns `false` if the operation was a no-op (i.e. the chunk was already dense), true otherwise /// (i.e. the data had to be reallocated). /// - /// WARNING: the returned chunk has the same old [`crate::ChunkId`]! Change it with [`Self::with_id`]. + /// If not a no-op, the returned chunk gets a new unique [`crate::ChunkId`]. #[must_use] #[inline] pub fn densified(&self, component_pov: ComponentIdentifier) -> (Self, bool) { let Self { - id, + id: _, entity_path, heap_size_bytes: _, is_sorted, @@ -346,15 +430,15 @@ impl Chunk { } = self; if self.is_empty() { - return (self.clone(), false); + return (self.clone_with_same_id(), false); } let Some(component_list_array) = self.components.get_array(component_pov) else { - return (self.clone(), false); + return (self.clone_with_same_id(), false); }; let Some(validity) = component_list_array.nulls() else { - return (self.clone(), false); + return (self.clone_with_same_id(), false); }; re_tracing::profile_function!(); @@ -364,7 +448,7 @@ impl Chunk { let validity_filter = ArrowBooleanArray::from(mask); let mut chunk = Self { - id: *id, + id: ChunkId::new(), entity_path: entity_path.clone(), heap_size_bytes: Default::default(), is_sorted, @@ -422,12 +506,12 @@ impl Chunk { /// /// The result is a new [`Chunk`] with the same columns but zero rows. /// - /// WARNING: the returned chunk has the same old [`crate::ChunkId`]! Change it with [`Self::with_id`]. + /// The returned chunk always gets a new unique [`crate::ChunkId`]. #[must_use] #[inline] pub fn emptied(&self) -> Self { let Self { - id, + id: _, entity_path, heap_size_bytes: _, is_sorted: _, @@ -439,7 +523,7 @@ impl Chunk { re_tracing::profile_function!(); Self { - id: *id, + id: ChunkId::new(), entity_path: entity_path.clone(), heap_size_bytes: Default::default(), is_sorted: true, @@ -469,12 +553,12 @@ impl Chunk { /// The result is a new [`Chunk`] with the same number of rows and the same index columns, but /// no components. /// - /// WARNING: the returned chunk has the same old [`crate::ChunkId`]! Change it with [`Self::with_id`]. + /// The returned chunk always gets a new unique [`crate::ChunkId`]. #[must_use] #[inline] pub fn components_removed(self) -> Self { let Self { - id, + id: _, entity_path, heap_size_bytes: _, is_sorted, @@ -484,7 +568,7 @@ impl Chunk { } = self; Self { - id, + id: ChunkId::new(), entity_path, heap_size_bytes: Default::default(), // (!) lazily recompute is_sorted, @@ -544,7 +628,7 @@ impl Chunk { }; let chunk = Self { - id: self.id, + id: ChunkId::new(), entity_path: self.entity_path.clone(), heap_size_bytes: Default::default(), is_sorted: self.is_sorted, @@ -587,12 +671,13 @@ impl Chunk { /// /// [filter]: arrow::compute::kernels::filter /// - /// WARNING: the returned chunk has the same old [`crate::ChunkId`]! Change it with [`Self::with_id`]. + /// If the chunk is empty, the original [`crate::ChunkId`] is preserved. + /// Otherwise, the returned chunk gets a new unique [`crate::ChunkId`]. #[must_use] #[inline] pub fn filtered(&self, filter: &ArrowBooleanArray) -> Option { let Self { - id, + id: _, entity_path, heap_size_bytes: _, is_sorted, @@ -607,7 +692,7 @@ impl Chunk { } if self.is_empty() { - return Some(self.clone()); + return Some(self.clone_with_same_id()); } let num_filtered = filter.values().iter().filter(|&b| b).count(); @@ -620,7 +705,7 @@ impl Chunk { let is_sorted = *is_sorted || num_filtered < 2; let mut chunk = Self { - id: *id, + id: ChunkId::new(), entity_path: entity_path.clone(), heap_size_bytes: Default::default(), is_sorted, @@ -673,12 +758,12 @@ impl Chunk { /// /// [take]: arrow::compute::kernels::take /// - /// WARNING: the returned chunk has the same old [`crate::ChunkId`]! Change it with [`Self::with_id`]. + /// The returned chunk always gets a new unique [`crate::ChunkId`]. #[must_use] #[inline] pub fn taken(&self, indices: &arrow::array::Int32Array) -> Self { let Self { - id, + id: _, entity_path, heap_size_bytes: _, is_sorted, @@ -700,7 +785,7 @@ impl Chunk { let is_sorted = *is_sorted || (indices.len() < 2); let mut chunk = Self { - id: *id, + id: ChunkId::new(), entity_path: entity_path.clone(), heap_size_bytes: Default::default(), is_sorted, @@ -1709,4 +1794,138 @@ mod tests { Ok(()) } + + #[test] + fn components_sliced() -> anyhow::Result<()> { + let points_descr = MyPoints::descriptor_points(); + let colors_descr = MyPoints::descriptor_colors(); + let labels_descr = MyPoints::descriptor_labels(); + + let row_id = RowId::new(); + let timepoint = [(Timeline::new_sequence("frame"), 1)]; + + let chunk = Chunk::builder("my/entity") + .with_sparse_component_batches( + row_id, + timepoint, + [ + (points_descr.clone(), Some(&[MyPoint::new(1.0, 2.0)] as _)), + ( + colors_descr.clone(), + Some(&[MyColor::from_rgb(255, 0, 0)] as _), + ), + (labels_descr.clone(), Some(&[MyLabel("hello".into())] as _)), + ], + ) + .build()?; + assert_eq!(chunk.num_components(), 3); + + // Keep points and labels — colors removed + let sliced = chunk.components_sliced(&[points_descr.component, labels_descr.component]); + assert_eq!(sliced.num_components(), 2); + assert!( + sliced + .components() + .contains_component(points_descr.component) + ); + assert!( + !sliced + .components() + .contains_component(colors_descr.component) + ); + assert!( + sliced + .components() + .contains_component(labels_descr.component) + ); + assert_eq!(sliced.num_rows(), chunk.num_rows()); + assert_ne!(sliced.id(), chunk.id()); + + // Partial: keep [points, nonexistent] — only points survives + let partial = chunk.components_sliced(&[ + points_descr.component, + ComponentIdentifier::from("Nonexistent:foo"), + ]); + assert_eq!(partial.num_components(), 1); + assert!( + partial + .components() + .contains_component(points_descr.component) + ); + + // None present — empty component set + let empty = chunk.components_sliced(&[ + ComponentIdentifier::from("Nonexistent:a"), + ComponentIdentifier::from("Nonexistent:b"), + ]); + assert_eq!(empty.num_components(), 0); + assert_eq!(empty.num_rows(), chunk.num_rows()); + + Ok(()) + } + + #[test] + fn components_dropped() -> anyhow::Result<()> { + let points_descr = MyPoints::descriptor_points(); + let colors_descr = MyPoints::descriptor_colors(); + let labels_descr = MyPoints::descriptor_labels(); + + let row_id = RowId::new(); + let timepoint = [(Timeline::new_sequence("frame"), 1)]; + + let chunk = Chunk::builder("my/entity") + .with_sparse_component_batches( + row_id, + timepoint, + [ + (points_descr.clone(), Some(&[MyPoint::new(1.0, 2.0)] as _)), + ( + colors_descr.clone(), + Some(&[MyColor::from_rgb(255, 0, 0)] as _), + ), + (labels_descr.clone(), Some(&[MyLabel("hello".into())] as _)), + ], + ) + .build()?; + assert_eq!(chunk.num_components(), 3); + + // Drop points and colors — labels remain + let dropped = chunk.components_dropped(&[points_descr.component, colors_descr.component]); + assert_eq!(dropped.num_components(), 1); + assert!( + !dropped + .components() + .contains_component(points_descr.component) + ); + assert!( + !dropped + .components() + .contains_component(colors_descr.component) + ); + assert!( + dropped + .components() + .contains_component(labels_descr.component) + ); + assert_eq!(dropped.num_rows(), chunk.num_rows()); + assert_ne!(dropped.id(), chunk.id()); + + // Drop nonexistent — chunk unchanged (same id) + let noop = chunk.components_dropped(&[ + ComponentIdentifier::from("Nonexistent:a"), + ComponentIdentifier::from("Nonexistent:b"), + ]); + assert_eq!(noop.num_components(), 3); + assert_eq!(noop.id(), chunk.id()); + + // Drop all — empty component set + let all_dropped = chunk.components_dropped(&[ + points_descr.component, + colors_descr.component, + labels_descr.component, + ]); + assert_eq!(all_dropped.num_components(), 0); + + Ok(()) + } } diff --git a/crates/store/re_chunk/src/split.rs b/crates/store/re_chunk/src/split.rs index 3b7671c0c281..f5647065edb6 100644 --- a/crates/store/re_chunk/src/split.rs +++ b/crates/store/re_chunk/src/split.rs @@ -1,7 +1,6 @@ use std::sync::Arc; use re_byte_size::SizeBytes; -use re_types_core::ChunkId; use crate::Chunk; @@ -79,9 +78,7 @@ impl Chunk { let remaining_rows = chunk.num_rows() - start_idx; let chunk_size = remaining_rows.min(target_rows); - let split_chunk = chunk - .row_sliced_deep(start_idx, chunk_size) - .with_id(ChunkId::new()); + let split_chunk = chunk.row_sliced_deep(start_idx, chunk_size); result.push(Arc::new(split_chunk)); diff --git a/crates/store/re_chunk/src/helpers.rs b/crates/store/re_chunk/src/unit_chunk.rs similarity index 84% rename from crates/store/re_chunk/src/helpers.rs rename to crates/store/re_chunk/src/unit_chunk.rs index aa9eecef2e9d..efe42000f96a 100644 --- a/crates/store/re_chunk/src/helpers.rs +++ b/crates/store/re_chunk/src/unit_chunk.rs @@ -5,7 +5,7 @@ use re_log::debug_assert; use re_log_types::{TimeInt, TimelineName}; use re_types_core::{Component, ComponentIdentifier}; -use crate::{Chunk, ChunkResult, RowId}; +use crate::{Chunk, ChunkId, ChunkResult, RowId}; // --- Helpers --- @@ -169,22 +169,46 @@ impl Chunk { pub type ChunkShared = Arc; /// A [`ChunkShared`] that is guaranteed to always contain a single row's worth of data. -#[derive(Debug, Clone, PartialEq)] -pub struct UnitChunkShared(ChunkShared); +#[derive(Debug, Clone)] +pub struct UnitChunkShared { + chunk: ChunkShared, + + /// The [`ChunkId`] of the original chunk this unit was derived from. + /// + /// If the original chunk was just a single row to begin with, then this is the same as `chunk.id()`. + /// + /// This is the ID that the `ChunkStore` knows about, + /// as opposed to [`Chunk::id`] which is always unique per derived chunk. + original_chunk_id: ChunkId, // TODO(emilk): consider adding this to `Chunk` directly, perhaps as a `Vec`. +} + +impl PartialEq for UnitChunkShared { + /// NOTE: the [`Self::original_chunk_id`] is _not_ compared, only the data. + #[inline] + fn eq(&self, other: &Self) -> bool { + self.chunk == other.chunk + } +} impl std::ops::Deref for UnitChunkShared { type Target = Chunk; #[inline] fn deref(&self) -> &Self::Target { - &self.0 + &self.chunk + } +} + +impl std::fmt::Display for UnitChunkShared { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.chunk.fmt(f) } } impl re_byte_size::SizeBytes for UnitChunkShared { #[inline] fn heap_size_bytes(&self) -> u64 { - Chunk::heap_size_bytes(&self.0) + Chunk::heap_size_bytes(&self.chunk) } } @@ -192,13 +216,34 @@ impl Chunk { /// Turns the chunk into a [`UnitChunkShared`], if possible. #[inline] pub fn to_unit(self: &ChunkShared) -> Option { - (self.num_rows() == 1).then(|| UnitChunkShared(Arc::clone(self))) + (self.num_rows() == 1).then(|| UnitChunkShared { + original_chunk_id: self.id(), + chunk: Arc::clone(self), + }) } /// Turns the chunk into a [`UnitChunkShared`], if possible. #[inline] pub fn into_unit(self) -> Option { - (self.num_rows() == 1).then(|| UnitChunkShared(Arc::new(self))) + (self.num_rows() == 1).then(|| UnitChunkShared { + original_chunk_id: self.id(), + chunk: Arc::new(self), + }) + } + + /// Turns the chunk into a [`UnitChunkShared`], if possible. + /// + /// `original_chunk_id` is the [`ChunkId`] of the chunk this was derived from, + /// i.e. the id that the `ChunkStore` knows about. + #[inline] + pub fn into_unit_with_original_chunk_id( + self, + original_chunk_id: ChunkId, + ) -> Option { + (self.num_rows() == 1).then(|| UnitChunkShared { + original_chunk_id, + chunk: Arc::new(self), + }) } } @@ -206,7 +251,16 @@ impl UnitChunkShared { /// Turns the unit chunk back into a standard [`Chunk`]. #[inline] pub fn into_chunk(self) -> ChunkShared { - self.0 + self.chunk + } + + /// The [`ChunkId`] of the original chunk this unit was derived from. + /// + /// This is the ID that the `ChunkStore` knows about, + /// as opposed to [`Chunk::id`] which is always unique per derived chunk. + #[inline] + pub fn original_chunk_id(&self) -> ChunkId { + self.original_chunk_id } } diff --git a/crates/store/re_chunk/tests/latest_at.rs b/crates/store/re_chunk/tests/latest_at.rs index 75ce63dfbaab..d643eab16d4b 100644 --- a/crates/store/re_chunk/tests/latest_at.rs +++ b/crates/store/re_chunk/tests/latest_at.rs @@ -542,20 +542,24 @@ fn query_and_compare( eprintln!("Query: {component_desc} @ {query:?}"); eprintln!("Data:\n{chunk}"); - eprintln!("Expected:\n{expected}"); - eprintln!("Results:\n{results}"); - - assert_eq!( - *expected, - results, - "{}", - similar_asserts::SimpleDiff::from_str( - &format!("{results}"), - &format!("{expected}"), - // &format!("{results:#?}"), - // &format!("{expected:#?}"), - "got", - "expected", - ), - ); + + if expected.is_empty() { + assert!(results.is_none(), "Expected no results, but got some"); + } else { + let results = results.expect("Expected latest_at to return a result"); + eprintln!("Expected:\n{expected}"); + eprintln!("Results:\n{results}"); + + assert_eq!( + expected, + &*results, + "{}", + similar_asserts::SimpleDiff::from_str( + &format!("{results}"), + &format!("{expected}"), + "got", + "expected", + ), + ); + } } diff --git a/crates/store/re_chunk/tests/snapshots/formatting__format_chunk.snap b/crates/store/re_chunk/tests/snapshots/formatting__format_chunk.snap index 97c0a8a71014..439508f85827 100644 --- a/crates/store/re_chunk/tests/snapshots/formatting__format_chunk.snap +++ b/crates/store/re_chunk/tests/snapshots/formatting__format_chunk.snap @@ -2,22 +2,22 @@ source: crates/store/re_chunk/tests/formatting.rs expression: "format!(\"{:240}\", chunk)" --- -┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /this/that │ -│ * id: chunk_0000000000661EFDf2e3b19f7c045f15 │ +┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /this/that │ +│ * id: chunk_0000000000661EFDf2e3b19f7c045f15 │ │ * version: [**REDACTED**] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌──────────────────────────────────────────────┬──────────────────────┬───────────────────────────────┬───────────────────────────────────┬────────────────────────────────────┐ │ -│ │ RowId ┆ frame_nr ┆ log_time ┆ my_index ┆ example.MyPoints:colors │ │ -│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u64] ┆ type: nullable List[nullable u32] │ │ -│ │ ARROW:extension:metadata: ┆ index_name: frame_nr ┆ index_name: log_time ┆ component: my_index ┆ archetype: example.MyPoints │ │ -│ │ {"namespace":"row"} ┆ is_sorted: true ┆ is_sorted: true ┆ component_type: example.MyIndex ┆ component: example.MyPoints:colors │ │ -│ │ ARROW:extension:name: TUID ┆ kind: index ┆ kind: index ┆ kind: data ┆ component_type: example.MyColor │ │ -│ │ is_sorted: true ┆ ┆ ┆ ┆ kind: data │ │ -│ │ kind: control ┆ ┆ ┆ ┆ │ │ -│ ╞══════════════════════════════════════════════╪══════════════════════╪═══════════════════════════════╪═══════════════════════════════════╪════════════════════════════════════╡ │ -│ │ row_0000000067816A6Bb4b8c1254d40007b ┆ 1 ┆ 2025-01-10T18:43:42.123456789 ┆ [0, 1, 2] ┆ [0, 1, 2] │ │ -│ └──────────────────────────────────────────────┴──────────────────────┴───────────────────────────────┴───────────────────────────────────┴────────────────────────────────────┘ │ -└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌──────────────────────────────────────────────┬──────────────────────┬───────────────────────────────┬─────────────────────────────────┬────────────────────────────────────┐ │ +│ │ RowId ┆ frame_nr ┆ log_time ┆ my_index ┆ example.MyPoints:colors │ │ +│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt64) ┆ type: List(UInt32) │ │ +│ │ ARROW:extension:metadata: ┆ index_name: frame_nr ┆ index_name: log_time ┆ component: my_index ┆ archetype: example.MyPoints │ │ +│ │ {"namespace":"row"} ┆ is_sorted: true ┆ is_sorted: true ┆ component_type: example.MyIndex ┆ component: example.MyPoints:colors │ │ +│ │ ARROW:extension:name: TUID ┆ kind: index ┆ kind: index ┆ kind: data ┆ component_type: example.MyColor │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ kind: data │ │ +│ │ kind: control ┆ ┆ ┆ ┆ │ │ +│ ╞══════════════════════════════════════════════╪══════════════════════╪═══════════════════════════════╪═════════════════════════════════╪════════════════════════════════════╡ │ +│ │ row_0000000067816A6Bb4b8c1254d40007b ┆ 1 ┆ 2025-01-10T18:43:42.123456789 ┆ [0, 1, 2] ┆ [0, 1, 2] │ │ +│ └──────────────────────────────────────────────┴──────────────────────┴───────────────────────────────┴─────────────────────────────────┴────────────────────────────────────┘ │ +└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_chunk/tests/snapshots/formatting__format_chunk_redacted.snap b/crates/store/re_chunk/tests/snapshots/formatting__format_chunk_redacted.snap index d1f307415dcf..24ac433c5def 100644 --- a/crates/store/re_chunk/tests/snapshots/formatting__format_chunk_redacted.snap +++ b/crates/store/re_chunk/tests/snapshots/formatting__format_chunk_redacted.snap @@ -2,22 +2,22 @@ source: crates/store/re_chunk/tests/formatting.rs expression: "format!(\"{:-240}\", chunk)" --- -┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /this/that │ -│ * id: [**REDACTED**] │ -│ * version: [**REDACTED**] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌──────────────────────────────────────────────┬──────────────────────┬───────────────────────────────┬───────────────────────────────────┬────────────────────────────────────┐ │ -│ │ RowId ┆ frame_nr ┆ log_time ┆ my_index ┆ example.MyPoints:colors │ │ -│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u64] ┆ type: nullable List[nullable u32] │ │ -│ │ ARROW:extension:metadata: ┆ index_name: frame_nr ┆ index_name: log_time ┆ component: my_index ┆ archetype: example.MyPoints │ │ -│ │ {"namespace":"row"} ┆ is_sorted: true ┆ is_sorted: true ┆ component_type: example.MyIndex ┆ component: example.MyPoints:colors │ │ -│ │ ARROW:extension:name: TUID ┆ kind: index ┆ kind: index ┆ kind: data ┆ component_type: example.MyColor │ │ -│ │ is_sorted: true ┆ ┆ ┆ ┆ kind: data │ │ -│ │ kind: control ┆ ┆ ┆ ┆ │ │ -│ ╞══════════════════════════════════════════════╪══════════════════════╪═══════════════════════════════╪═══════════════════════════════════╪════════════════════════════════════╡ │ -│ │ row_[**REDACTED**] ┆ 1 ┆ 2025-01-10T18:43:42.123456789 ┆ [0, 1, 2] ┆ [0, 1, 2] │ │ -│ └──────────────────────────────────────────────┴──────────────────────┴───────────────────────────────┴───────────────────────────────────┴────────────────────────────────────┘ │ -└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /this/that │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌──────────────────────────────────────────────┬──────────────────────┬───────────────────────────────┬─────────────────────────────────┬────────────────────────────────────┐ │ +│ │ RowId ┆ frame_nr ┆ log_time ┆ my_index ┆ example.MyPoints:colors │ │ +│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt64) ┆ type: List(UInt32) │ │ +│ │ ARROW:extension:metadata: ┆ index_name: frame_nr ┆ index_name: log_time ┆ component: my_index ┆ archetype: example.MyPoints │ │ +│ │ {"namespace":"row"} ┆ is_sorted: true ┆ is_sorted: true ┆ component_type: example.MyIndex ┆ component: example.MyPoints:colors │ │ +│ │ ARROW:extension:name: TUID ┆ kind: index ┆ kind: index ┆ kind: data ┆ component_type: example.MyColor │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ kind: data │ │ +│ │ kind: control ┆ ┆ ┆ ┆ │ │ +│ ╞══════════════════════════════════════════════╪══════════════════════╪═══════════════════════════════╪═════════════════════════════════╪════════════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 1 ┆ 2025-01-10T18:43:42.123456789 ┆ [0, 1, 2] ┆ [0, 1, 2] │ │ +│ └──────────────────────────────────────────────┴──────────────────────┴───────────────────────────────┴─────────────────────────────────┴────────────────────────────────────┘ │ +└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_chunk/tests/with_mapped_component.rs b/crates/store/re_chunk/tests/with_mapped_component.rs new file mode 100644 index 000000000000..16390e438796 --- /dev/null +++ b/crates/store/re_chunk/tests/with_mapped_component.rs @@ -0,0 +1,31 @@ +use re_chunk::{Chunk, RowId, Timeline}; +use re_log_types::example_components::{MyLabel, MyPoints}; + +#[test] +fn with_mapped_component_creates_new_ids() -> anyhow::Result<()> { + let row_id = RowId::new(); + let timepoint = [(Timeline::new_sequence("frame"), 1)]; + let labels = &[MyLabel("hello".into())]; + + let chunk = Chunk::builder("my/entity") + .with_component_batches( + row_id, + timepoint, + [(MyPoints::descriptor_labels(), labels as _)], + ) + .build()?; + + let mapped = + chunk.with_mapped_component(MyPoints::descriptor_labels().component, None, |arr| { + Ok::<_, std::convert::Infallible>(arr) + })?; + + assert_ne!(mapped.id(), chunk.id()); + + let old_row_ids: Vec<_> = chunk.row_ids().collect(); + let new_row_ids: Vec<_> = mapped.row_ids().collect(); + assert_eq!(old_row_ids.len(), new_row_ids.len()); + assert_ne!(old_row_ids, new_row_ids); + + Ok(()) +} diff --git a/crates/store/re_chunk_store/Cargo.toml b/crates/store/re_chunk_store/Cargo.toml index b2025d7e27f2..5c00dc6df88c 100644 --- a/crates/store/re_chunk_store/Cargo.toml +++ b/crates/store/re_chunk_store/Cargo.toml @@ -50,16 +50,17 @@ itertools.workspace = true nohash-hasher.workspace = true parking_lot = { workspace = true, features = ["arc_lock"] } saturating_cast.workspace = true -tap.workspace = true thiserror.workspace = true web-time.workspace = true [dev-dependencies] re_format.workspace = true +re_log_encoding = { workspace = true, features = ["decoder", "encoder"] } re_sdk_types = { workspace = true, features = ["testing"] } anyhow.workspace = true +tempfile.workspace = true criterion.workspace = true insta = { workspace = true, features = ["filters"] } mimalloc.workspace = true diff --git a/crates/store/re_chunk_store/src/compact.rs b/crates/store/re_chunk_store/src/compact.rs new file mode 100644 index 000000000000..761a91ee76ba --- /dev/null +++ b/crates/store/re_chunk_store/src/compact.rs @@ -0,0 +1,324 @@ +use std::sync::Arc; + +use re_sdk_types::components::VideoCodec; + +use crate::{ChunkStore, ChunkStoreConfig, ChunkStoreError}; + +/// Callback to detect whether a video sample is the start of a GoP (keyframe). +pub type IsStartOfGop = Arc anyhow::Result + Send + Sync>; + +/// Options for [`ChunkStore::compacted`]. +#[derive(Clone)] +pub struct CompactionOptions { + /// Controls chunk size thresholds for both merging and splitting. + pub config: ChunkStoreConfig, + + /// Maximum number of extra compaction passes to run. + /// + /// Compaction is iterative: each pass merges small neighboring chunks. + /// Stops early if the chunk count converges. + /// Defaults to 50 if `None`. + pub num_extra_passes: Option, + + /// If set, video stream chunks will be rebatched so that each chunk + /// aligns to GoP (Group of Pictures) boundaries. + /// + /// The callback should return `true` if the given sample data is a keyframe + /// for the given codec. Use `re_video::is_start_of_gop` wrapped in a closure. + /// + /// If `None`, no video rebatching is performed. + /// + /// **Note:** GoP rebatching never splits a GoP across chunks, so if a single + /// GoP is larger than [`ChunkStoreConfig::chunk_max_bytes`], it becomes one + /// oversized chunk regardless of the ceiling. Streams with long keyframe + /// intervals (e.g. 10+ seconds between I-frames) can therefore produce chunks + /// that are many megabytes in size. + pub is_start_of_gop: Option, + + /// If set, chunks are split so no two archetype groups sharing a chunk differ + /// in byte size by more than this factor. Values should be `>= 1`; at `1.0`, + /// every archetype is forced into its own chunk. + /// + /// This keeps "thick" columns (images, videos, blobs) out of the same chunk as + /// "thin" columns (scalars, transforms, text). Components belonging to the same + /// archetype always stay together. + /// + /// `None` disables the split. + pub split_size_ratio: Option, +} + +impl ChunkStore { + /// Return a new, compacted version of this store. + /// + /// Compaction merges small neighboring chunks that share the same entity path, timelines, and + /// datatypes, up to the thresholds in the config. Large chunks may be split. + /// + /// If `is_start_of_gop` is provided, video stream chunks are rebatched to align + /// with GoP boundaries after compaction. + /// + /// If `split_size_ratio` is provided, chunks are split on entry so no two + /// archetype groups sharing a chunk differ in byte size by more than that factor. + /// + /// The returned store has compaction **disabled** ([`ChunkStoreConfig::ALL_DISABLED`]). + // TODO(RR-4328): we should improve this by exploiting the chunk index, hopefully making it + // memory-bounded + pub fn compacted(&self, options: &CompactionOptions) -> Result { + re_tracing::profile_function!(); + + // Initial pass: re-insert all chunks into a compaction-enabled store. + let mut store = Self::new(self.id().clone(), options.config.clone()); + for chunk in self.iter_physical_chunks() { + store.insert_chunk(chunk)?; + } + + store.finalize_compaction(options) + } + + /// Finalize a compaction-enabled store: run up to + /// [`CompactionOptions::num_extra_passes`] additional compaction passes + /// (stopping early when the chunk count stops decreasing), optionally rebatch + /// video chunks along GoP boundaries, then disable compaction on the returned + /// store ([`ChunkStoreConfig::ALL_DISABLED`] config). + /// + /// Consumes `self`. Assumes `self` was built with a compaction-enabled + /// config (otherwise each pass is a no-op). + pub fn finalize_compaction( + mut self, + options: &CompactionOptions, + ) -> Result { + re_tracing::profile_function!(); + + if self.config.chunk_max_bytes == 0 + && self.config.chunk_max_rows == 0 + && self.config.chunk_max_rows_if_unsorted == 0 + { + re_log::debug_warn!( + "Finalizing compaction on a store that does not have compaction enabled. \ + Extra compaction passes will have no effects." + ); + } + + let CompactionOptions { + config, + num_extra_passes, + is_start_of_gop, + split_size_ratio, + } = options; + + let num_extra_passes = num_extra_passes.unwrap_or(50); + + // If `split_size_ratio` is set, re-insert each chunk split into one piece per + // size tier. Compaction's merge-candidate search is per-component, so a thin + // chunk can only merge with another thin chunk — it won't pull a thick + // sibling back in through the shared small component. + if let Some(&ratio) = split_size_ratio.as_ref() { + let chunks: Vec<_> = self.iter_physical_chunks().cloned().collect(); + let mut new_store = Self::new(self.id().clone(), config.clone()); + for chunk in &chunks { + if let Some(splits) = crate::split_thick_thin::split_chunk(chunk, ratio) { + for split in splits { + new_store.insert_chunk(&Arc::new(split))?; + } + } else { + new_store.insert_chunk(chunk)?; + } + } + self = new_store; + } + + for pass in 0..num_extra_passes { + let now = web_time::Instant::now(); + let num_before = self.num_physical_chunks(); + let chunks: Vec<_> = self.iter_physical_chunks().cloned().collect(); + let mut new_store = Self::new(self.id().clone(), config.clone()); + for chunk in &chunks { + new_store.insert_chunk(chunk)?; + } + let num_after = new_store.num_physical_chunks(); + self = new_store; + + re_log::info!( + pass, + num_before, + num_after, + time = ?now.elapsed(), + "compaction pass completed", + ); + + if num_after >= num_before { + re_log::info!(pass, "converged, stopping early"); + break; + } + } + + // Rebatch video stream chunks along GoP boundaries. + if let Some(is_start_of_gop) = is_start_of_gop { + let now = web_time::Instant::now(); + + match crate::rebatch_videos::rebatch_video_chunks_to_gops( + &self, + config, + is_start_of_gop.as_ref(), + ) { + Ok(new_store) => { + self = new_store; + re_log::info!(time = ?now.elapsed(), "video GoP rebatching completed"); + } + Err(err) => { + re_log::warn!(%err, "video GoP rebatching failed"); + } + } + } + + // Post-condition: returned store is inert. + self.config = ChunkStoreConfig::ALL_DISABLED; + Ok(self) + } +} + +#[cfg(test)] +mod tests { + use re_chunk::{Chunk, RowId}; + use re_log_types::{EntityPath, StoreId, StoreKind, Timeline, example_components::MyPoint}; + use re_sdk_types::components::Blob; + use re_types_core::{ArchetypeName, ComponentDescriptor}; + + use super::*; + + #[test] + fn finalize_compaction_resets_config_to_all_disabled() { + let store_id = re_log_types::StoreId::random(re_log_types::StoreKind::Recording, "test"); + let store = ChunkStore::new(store_id, ChunkStoreConfig::CHANGELOG_DISABLED); + let options = CompactionOptions { + config: ChunkStoreConfig::CHANGELOG_DISABLED, + num_extra_passes: Some(0), + is_start_of_gop: None, + split_size_ratio: None, + }; + let result = store + .finalize_compaction(&options) + .expect("zero passes should succeed"); + assert_eq!(result.config, ChunkStoreConfig::ALL_DISABLED); + } + + /// Produce a chunk carrying one blob row and one point row under two different archetypes. + fn mixed_chunk(entity: &EntityPath, frame: i64, blob_bytes: usize) -> Arc { + let blob_descriptor = ComponentDescriptor { + archetype: Some(ArchetypeName::from("my.Video")), + component: "Video:blob".into(), + component_type: None, + }; + let point_descriptor = ComponentDescriptor { + archetype: Some(ArchetypeName::from("my.Points")), + component: "Points:pos".into(), + component_type: None, + }; + + let blob = Blob::from(vec![0u8; blob_bytes]); + let point = &[MyPoint::new(frame as f32, frame as f32)]; + + let chunk = Chunk::builder(entity.clone()) + .with_component_batches( + RowId::new(), + [(Timeline::new_sequence("frame"), frame)], + [ + ( + blob_descriptor, + &[blob] as &dyn re_types_core::ComponentBatch, + ), + ( + point_descriptor, + point as &dyn re_types_core::ComponentBatch, + ), + ], + ) + .build() + .unwrap(); + Arc::new(chunk) + } + + #[test] + fn compacted_splits_thick_from_thin() -> anyhow::Result<()> { + re_log::setup_logging(); + + let entity = EntityPath::from("camera"); + let blob_bytes = 128 * 1024; // well above the scalar payload + + let mut store = ChunkStore::new( + StoreId::random(StoreKind::Recording, "test_app"), + ChunkStoreConfig::ALL_DISABLED, + ); + for frame in 0..4 { + store.insert_chunk(&mixed_chunk(&entity, frame, blob_bytes))?; + } + + let options = CompactionOptions { + config: ChunkStoreConfig::DEFAULT, + num_extra_passes: Some(3), + is_start_of_gop: None, + split_size_ratio: Some(10.0), + }; + let compacted = store.compacted(&options)?; + + // After compaction, no output chunk may mix the two archetypes. + for chunk in compacted.iter_physical_chunks() { + let archetypes: std::collections::BTreeSet<_> = chunk + .components() + .values() + .map(|c| c.descriptor.archetype) + .collect(); + assert_eq!( + archetypes.len(), + 1, + "chunk mixes archetypes after thick/thin split: {archetypes:?}", + ); + } + + // And we should still end up with at least one chunk per archetype. + let archetypes_seen: std::collections::BTreeSet<_> = compacted + .iter_physical_chunks() + .flat_map(|c| c.components().values().map(|c| c.descriptor.archetype)) + .collect(); + assert_eq!(archetypes_seen.len(), 2); + + Ok(()) + } + + #[test] + fn compacted_leaves_mixed_chunk_alone_without_ratio() -> anyhow::Result<()> { + re_log::setup_logging(); + + let entity = EntityPath::from("camera"); + let mut store = ChunkStore::new( + StoreId::random(StoreKind::Recording, "test_app"), + ChunkStoreConfig::ALL_DISABLED, + ); + for frame in 0..4 { + store.insert_chunk(&mixed_chunk(&entity, frame, 128 * 1024))?; + } + + let options = CompactionOptions { + config: ChunkStoreConfig::DEFAULT, + num_extra_passes: Some(3), + is_start_of_gop: None, + split_size_ratio: None, + }; + let compacted = store.compacted(&options)?; + + // Without the ratio, thick and thin stay together. + let any_mixed = compacted.iter_physical_chunks().any(|chunk| { + let archetypes: std::collections::BTreeSet<_> = chunk + .components() + .values() + .map(|c| c.descriptor.archetype) + .collect(); + archetypes.len() > 1 + }); + assert!( + any_mixed, + "expected at least one chunk to still mix archetypes" + ); + + Ok(()) + } +} diff --git a/crates/store/re_chunk_store/src/dataframe.rs b/crates/store/re_chunk_store/src/dataframe.rs index 2e74dee65ab6..ac49ceda175d 100644 --- a/crates/store/re_chunk_store/src/dataframe.rs +++ b/crates/store/re_chunk_store/src/dataframe.rs @@ -3,16 +3,13 @@ use std::collections::{BTreeMap, BTreeSet}; use std::ops::{Deref, DerefMut}; -use arrow::array::ListArray as ArrowListArray; -use arrow::datatypes::{DataType as ArrowDatatype, Field as ArrowField}; -use itertools::Itertools as _; +use arrow::datatypes::DataType as ArrowDatatype; use re_chunk::{ComponentIdentifier, LatestAtQuery, RangeQuery, TimelineName}; use re_log_types::{AbsoluteTimeRange, EntityPath, TimeInt, Timeline}; use re_sorbet::{ ChunkColumnDescriptors, ColumnSelector, ComponentColumnDescriptor, ComponentColumnSelector, IndexColumnDescriptor, TimeColumnSelector, }; -use tap::Tap as _; use crate::{ChunkStore, ColumnMetadata}; @@ -308,76 +305,6 @@ impl QueryExpression { // --- impl ChunkStore { - /// Returns the full schema of the store. - /// - /// This will include a column descriptor for every timeline and every component on every - /// entity that has been written to the store so far. - /// - /// The order of the columns is guaranteed to be in a specific order: - /// * first, the time columns in lexical order (`frame_nr`, `log_time`, ...); - /// * second, the component columns in lexical order (`Color`, `Radius, ...`). - pub fn schema(&self) -> ChunkColumnDescriptors { - re_tracing::profile_function!(); - - let indices = self - .timelines() - .values() - .map(|timeline| IndexColumnDescriptor::from(*timeline)) - .collect(); - - let components = self - .per_column_metadata - .iter() - .flat_map(|(entity_path, per_identifier)| { - per_identifier - .values() - .map(move |(descr, _, datatype)| (entity_path, descr, datatype)) - }) - .filter_map(|(entity_path, component_descr, datatype)| { - let metadata = - self.lookup_column_metadata(entity_path, component_descr.component)?; - - Some(((entity_path, component_descr), (metadata, datatype))) - }) - .map(|((entity_path, component_descr), (metadata, datatype))| { - let ColumnMetadata { - is_static, - is_tombstone, - is_semantically_empty, - } = metadata; - - if let Some(c) = component_descr.component_type { - c.sanity_check(); - } - - ComponentColumnDescriptor { - // NOTE: The data is always a at least a list, whether it's latest-at or range. - // It might be wrapped further in e.g. a dict, but at the very least - // it's a list. - store_datatype: ArrowListArray::DATA_TYPE_CONSTRUCTOR( - ArrowField::new("item", datatype.clone(), true).into(), - ), - - entity_path: entity_path.clone(), - archetype: component_descr.archetype, - component: component_descr.component, - component_type: component_descr.component_type, - is_static, - is_tombstone, - is_semantically_empty, - } - }) - .collect_vec() - .tap_mut(|components| components.sort()); - - ChunkColumnDescriptors { - row_id: self.row_id_descriptor(), - indices, - components, - } - .tap(|schema| schema.sanity_check()) - } - #[expect(clippy::unused_self)] pub fn row_id_descriptor(&self) -> re_sorbet::RowIdColumnDescriptor { re_sorbet::RowIdColumnDescriptor::from_sorted(false) @@ -385,7 +312,7 @@ impl ChunkStore { /// Given a [`TimeColumnSelector`], returns the corresponding [`IndexColumnDescriptor`]. pub fn resolve_time_selector(&self, selector: &TimeColumnSelector) -> IndexColumnDescriptor { - let timelines = self.timelines(); + let timelines = self.schema.timelines(); let timeline = timelines .get(&selector.timeline) @@ -421,21 +348,22 @@ impl ChunkStore { is_semantically_empty: false, }; - let per_identifier = self.per_column_metadata.get(&selector.entity_path)?; + let per_identifier = self + .schema + .per_column_metadata_for_entity(&selector.entity_path)?; // We perform a scan over all component descriptors in the queried entity path. - let (component_descr, _, datatype) = - per_identifier.get(&selector.component.as_str().into())?; + let entry = per_identifier.get(&selector.component.as_str().into())?; - result.store_datatype = datatype.clone(); - result.archetype = component_descr.archetype; - result.component_type = component_descr.component_type; + result.store_datatype = entry.datatype.clone(); + result.archetype = entry.descriptor.archetype; + result.component_type = entry.descriptor.component_type; if let Some(ColumnMetadata { is_static, is_tombstone, is_semantically_empty, - }) = self.lookup_column_metadata(&selector.entity_path, component_descr.component) + }) = self.lookup_column_metadata(&selector.entity_path, entry.descriptor.component) { result.is_static = is_static; result.is_tombstone = is_tombstone; @@ -455,7 +383,9 @@ impl ChunkStore { let filter = Self::create_component_filter_from_query(query); - self.schema().filter_components(filter) + self.schema + .chunk_column_descriptors() + .filter_components(filter) } pub fn create_component_filter_from_query( diff --git a/crates/store/re_chunk_store/src/drop_time_range.rs b/crates/store/re_chunk_store/src/drop_time_range.rs index 7a4af57ae761..8b2e5e6d4423 100644 --- a/crates/store/re_chunk_store/src/drop_time_range.rs +++ b/crates/store/re_chunk_store/src/drop_time_range.rs @@ -1,10 +1,8 @@ -use std::sync::Arc; - -use re_chunk::{ChunkId, TimelineName}; +use re_chunk::TimelineName; use re_log::debug_assert; use re_log_types::AbsoluteTimeRange; -use crate::{ChunkStore, ChunkStoreEvent}; +use crate::{ChunkDeletionReason, ChunkStore, ChunkStoreDiff, ChunkStoreEvent}; impl ChunkStore { /// Drop all events that are in the given range on the given timeline. @@ -20,9 +18,10 @@ impl ChunkStore { &mut self, timeline: &TimelineName, drop_range: AbsoluteTimeRange, + reason: ChunkDeletionReason, ) -> Vec { let deep_removal = false; - self.drop_time_range(timeline, drop_range, deep_removal) + self.drop_time_range(timeline, drop_range, deep_removal, reason) } /// Drop all events that are in the given range on the given timeline. @@ -39,9 +38,10 @@ impl ChunkStore { &mut self, timeline: &TimelineName, drop_range: AbsoluteTimeRange, + reason: ChunkDeletionReason, ) -> Vec { let deep_removal = true; - self.drop_time_range(timeline, drop_range, deep_removal) + self.drop_time_range(timeline, drop_range, deep_removal, reason) } fn drop_time_range( @@ -49,6 +49,7 @@ impl ChunkStore { timeline: &TimelineName, drop_range: AbsoluteTimeRange, deep_removal: bool, + reason: ChunkDeletionReason, ) -> Vec { re_tracing::profile_function!(); @@ -73,13 +74,13 @@ impl ChunkStore { // The whole chunk should be dropped! chunks_to_drop.push(chunk.clone()); } else if drop_range.intersects(chunk_range) { - let chunk = chunk.sorted_by_timeline_if_unsorted(timeline); + let sorted = chunk.sorted_by_timeline_if_unsorted(timeline); - let num_rows = chunk.num_rows(); + let num_rows = sorted.num_rows(); // Get the sorted times: #[expect(clippy::unwrap_used)] // We already know the chunk has the timeline - let time_column = chunk.timelines().get(timeline).unwrap(); + let time_column = sorted.timelines().get(timeline).unwrap(); let times = time_column.times_raw(); let drop_range_min = drop_range.min().as_i64(); @@ -104,17 +105,13 @@ impl ChunkStore { } if min_idx < max_idx { - chunks_to_drop.push(Arc::new(chunk.clone())); + // Drop the original chunk (not the sorted copy) so the store can find it by ID. + chunks_to_drop.push(chunk.clone()); if 0 < min_idx { - new_chunks - .push(chunk.row_sliced_shallow(0, min_idx).with_id(ChunkId::new())); + new_chunks.push(sorted.row_sliced_shallow(0, min_idx)); } if max_idx < num_rows { - new_chunks.push( - chunk - .row_sliced_shallow(max_idx, num_rows - max_idx) - .with_id(ChunkId::new()), - ); + new_chunks.push(sorted.row_sliced_shallow(max_idx, num_rows - max_idx)); } } } @@ -123,27 +120,19 @@ impl ChunkStore { // ------------------ // Apply the changes: - let generation = self.generation(); - let mut events: Vec = vec![]; + let mut deletion_diffs: Vec = vec![]; for chunk in chunks_to_drop { let dels = if deep_removal { - self.remove_chunks_deep(vec![chunk], None) + self.remove_chunks_deep(vec![chunk], None, reason) } else { - self.remove_chunks_shallow(vec![chunk], None) + self.remove_chunks_shallow(vec![chunk], None, reason) }; - - for del in dels { - events.push(ChunkStoreEvent { - store_id: self.id.clone(), - store_generation: generation.clone(), - event_id: self - .event_id - .fetch_add(1, std::sync::atomic::Ordering::Relaxed), - diff: del.into(), - }); - } + deletion_diffs.extend(dels.into_iter().map(ChunkStoreDiff::from)); } + + let mut events = self.finalize_events(deletion_diffs); + for mut chunk in new_chunks { chunk.sort_if_unsorted(); #[expect(clippy::unwrap_used)] // The chunk came from the store, so it should be fine diff --git a/crates/store/re_chunk_store/src/entity_tree.rs b/crates/store/re_chunk_store/src/entity_tree.rs new file mode 100644 index 000000000000..1cf7f18ebc10 --- /dev/null +++ b/crates/store/re_chunk_store/src/entity_tree.rs @@ -0,0 +1,183 @@ +use std::collections::BTreeMap; + +use re_log_types::{EntityPath, EntityPathPart}; + +// ---------------------------------------------------------------------------- + +/// A recursive tree structure that maintains the entity hierarchy. +/// +/// The tree contains a list of subtrees, and so on recursively. +#[derive(Debug, Clone)] +pub struct EntityTree { + /// Full path prefix to the root of this (sub)tree. + pub path: EntityPath, + + /// Direct descendants of this (sub)tree. + pub children: BTreeMap, +} + +impl Default for EntityTree { + fn default() -> Self { + Self::root() + } +} + +impl EntityTree { + pub fn root() -> Self { + Self::new(EntityPath::root()) + } + + pub fn new(path: EntityPath) -> Self { + Self { + path, + children: Default::default(), + } + } + + /// Has no child entities. + pub fn is_leaf(&self) -> bool { + self.children.is_empty() + } + + pub fn on_new_entity(&mut self, entity_path: &EntityPath) { + re_tracing::profile_function!(); + + // Book-keeping for each level in the hierarchy: + let mut tree = self; + for (i, part) in entity_path.iter().enumerate() { + tree = tree + .children + .entry(part.clone()) + .or_insert_with(|| Self::new(entity_path.as_slice()[..=i].into())); + } + } + + pub fn subtree(&self, path: &EntityPath) -> Option<&Self> { + fn subtree_recursive<'tree>( + this: &'tree EntityTree, + path: &[EntityPathPart], + ) -> Option<&'tree EntityTree> { + match path { + [] => Some(this), + [first, rest @ ..] => { + let child = this.children.get(first)?; + subtree_recursive(child, rest) + } + } + } + + subtree_recursive(self, path.as_slice()) + } + + /// Invokes visitor for `self` and all children recursively. + pub fn visit_children_recursively(&self, mut visitor: impl FnMut(&EntityPath)) { + fn visit(this: &EntityTree, visitor: &mut impl FnMut(&EntityPath)) { + visitor(&this.path); + for child in this.children.values() { + visit(child, visitor); + } + } + + visit(self, &mut visitor); + } + + /// Removes leaf entities that have no children and for which `entity_has_data` returns false. + /// + /// This is called after store deletions to keep the tree in sync with the actual data. + pub fn prune_empty_entities(&mut self, entity_has_data: &impl Fn(&EntityPath) -> bool) { + self.children.retain(|_, child| { + child.prune_empty_entities(entity_has_data); + let has_children = !child.children.is_empty(); + let has_data = entity_has_data(&child.path); + has_children || has_data + }); + } + + /// Invokes the `predicate` for `self` and all children recursively, + /// returning the _first_ entity for which the `predicate` returns `true`. + /// + /// Note that this function has early return semantics, meaning if multiple + /// entities would return `true`, only the first is returned. + /// The entities are yielded in order of their entity paths. + pub fn find_first_child_recursive( + &self, + mut predicate: impl FnMut(&EntityPath) -> bool, + ) -> Option<&Self> { + fn visit<'a>( + this: &'a EntityTree, + predicate: &mut impl FnMut(&EntityPath) -> bool, + ) -> Option<&'a EntityTree> { + if predicate(&this.path) { + return Some(this); + } + + for child in this.children.values() { + if let Some(subtree) = visit(child, predicate) { + // Early return + return Some(subtree); + } + } + + None + } + + visit(self, &mut predicate) + } +} + +impl re_byte_size::SizeBytes for EntityTree { + fn heap_size_bytes(&self) -> u64 { + let Self { path, children } = self; + path.heap_size_bytes() + children.heap_size_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn prune_removes_empty_leaves() { + let mut tree = EntityTree::root(); + let parent: EntityPath = "parent".into(); + let child: EntityPath = "parent/child".into(); + let grandchild: EntityPath = "parent/child/grandchild".into(); + + tree.on_new_entity(&grandchild); + + assert!(tree.subtree(&parent).is_some()); + assert!(tree.subtree(&child).is_some()); + assert!(tree.subtree(&grandchild).is_some()); + + // Only grandchild has data + tree.prune_empty_entities(&|path| *path == grandchild); + assert!(tree.subtree(&parent).is_some()); + assert!(tree.subtree(&child).is_some()); + assert!(tree.subtree(&grandchild).is_some()); + + // No entity has data, all should be pruned + tree.prune_empty_entities(&|_| false); + assert!(tree.subtree(&parent).is_none()); + assert!(tree.subtree(&child).is_none()); + assert!(tree.subtree(&grandchild).is_none()); + assert!(tree.children.is_empty()); + } + + #[test] + fn prune_keeps_parents_with_children() { + let mut tree = EntityTree::root(); + let parent: EntityPath = "parent".into(); + let child_a: EntityPath = "parent/a".into(); + let child_b: EntityPath = "parent/b".into(); + + tree.on_new_entity(&child_a); + tree.on_new_entity(&child_b); + + // Only child_b has data -- parent and child_a have no data + // but parent should stay because child_b is still there + tree.prune_empty_entities(&|path| *path == child_b); + assert!(tree.subtree(&parent).is_some()); + assert!(tree.subtree(&child_a).is_none()); + assert!(tree.subtree(&child_b).is_some()); + } +} diff --git a/crates/store/re_chunk_store/src/events.rs b/crates/store/re_chunk_store/src/events.rs index e59922633443..44d1cefdeb81 100644 --- a/crates/store/re_chunk_store/src/events.rs +++ b/crates/store/re_chunk_store/src/events.rs @@ -14,7 +14,7 @@ use crate::{ChunkId, ChunkStore, ChunkStoreSubscriber, RowId}; /// Per-component information for chunks. /// /// Created from either a physical chunk or virtual manifest metadata. -#[derive(Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct ChunkComponentMeta { pub descriptor: re_sdk_types::ComponentDescriptor, @@ -28,20 +28,55 @@ pub struct ChunkComponentMeta { /// For virtual this means `row_count > 0`. pub has_data: bool, - /// Whether this component only has static data. - pub is_static_only: bool, + /// Whether this component has ever been written as static data. + /// + /// Once a component is static, it stays static. This flag is monotonic + /// and never transitions back to `false`. + pub is_static: bool, } /// Chunk meta originating from either a virtual or physical chunk. /// /// Useful for chunk store subscribers that do the same logic /// for physical and virtual additions. -#[derive(Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct ChunkMeta { pub entity_path: re_chunk::EntityPath, pub components: Vec, } +impl ChunkMeta { + /// Build a [`ChunkMeta`] from an existing physical [`Chunk`]. + pub fn from_chunk(chunk: &Chunk) -> Self { + let components: Vec = chunk + .components() + .values() + .map(|column| ChunkComponentMeta { + descriptor: column.descriptor.clone(), + inner_arrow_datatype: Some(column.list_array.value_type()), + has_data: !column.list_array.values().is_empty(), + is_static: chunk.is_static(), + }) + .collect(); + + Self { + entity_path: chunk.entity_path().clone(), + components, + } + } + + /// Build [`ChunkMeta`]s from an [`RrdManifest`], one per entity path. + pub fn from_manifest(manifest: &RrdManifest) -> Vec { + re_tracing::profile_function!(); + // Reuse the same logic as ChunkStoreDiffVirtualAddition::chunk_metas. + ChunkStoreDiffVirtualAddition { + rrd_manifest: Arc::new(manifest.clone()), + } + .chunk_metas() + .collect() + } +} + /// The atomic unit of change in the Rerun [`ChunkStore`]. /// /// A [`ChunkStoreEvent`] describes the changes caused by the addition or deletion of a @@ -106,6 +141,22 @@ pub enum ChunkStoreDiff { /// When a physical chunk has been evicted. Deletion(ChunkStoreDiffDeletion), + + /// Newly discovered entity/component columns in the schema. + /// + /// Also emitted when a component's `is_static` flag transitions from `false` to `true`. + /// Note: `has_data` does not influence the emission of `SchemaAddition` events. + SchemaAddition(ChunkStoreDiffSchemaAddition), +} + +/// Describes newly added columns to the store schema. +/// +/// This event is emitted when previously unseen entity/component pairs are +/// discovered, either from a physical chunk addition or from an RRD manifest. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ChunkStoreDiffSchemaAddition { + /// Newly discovered entity/component pairs, grouped by entity. + pub new_columns: Vec, } impl From for ChunkStoreDiff { @@ -126,6 +177,12 @@ impl From for ChunkStoreDiff { } } +impl From for ChunkStoreDiff { + fn from(value: ChunkStoreDiffSchemaAddition) -> Self { + Self::SchemaAddition(value) + } +} + impl ChunkStoreDiff { pub fn addition( chunk_before_processing: Arc, @@ -143,8 +200,8 @@ impl ChunkStoreDiff { Self::VirtualAddition(ChunkStoreDiffVirtualAddition { rrd_manifest }) } - pub fn deletion(chunk: Arc) -> Self { - Self::Deletion(ChunkStoreDiffDeletion { chunk }) + pub fn deletion(chunk: Arc, reason: ChunkDeletionReason) -> Self { + Self::Deletion(ChunkStoreDiffDeletion { chunk, reason }) } pub fn is_addition(&self) -> bool { @@ -159,6 +216,10 @@ impl ChunkStoreDiff { matches!(self, Self::Deletion(_)) } + pub fn is_schema_addition(&self) -> bool { + matches!(self, Self::SchemaAddition(_)) + } + pub fn into_addition(self) -> Option { match self { Self::Addition(addition) => Some(addition), @@ -206,7 +267,7 @@ impl ChunkStoreDiff { pub fn delta(&self) -> i64 { match self { Self::Addition(_) => 1, - Self::VirtualAddition(_) => 0, + Self::VirtualAddition(_) | Self::SchemaAddition(_) => 0, Self::Deletion(_) => -1, } } @@ -224,7 +285,7 @@ impl ChunkStoreDiff { pub fn delta_chunk(&self) -> Option<&Arc> { match self { Self::Addition(addition) => Some(addition.delta_chunk()), - Self::VirtualAddition(_) => None, + Self::VirtualAddition(_) | Self::SchemaAddition(_) => None, Self::Deletion(deletion) => Some(&deletion.chunk), } } @@ -344,7 +405,7 @@ impl ChunkStoreDiffAddition { descriptor: column.descriptor.clone(), inner_arrow_datatype: Some(column.list_array.value_type()), has_data: !column.list_array.values().is_empty(), - is_static_only: delta_chunk.is_static(), + is_static: delta_chunk.is_static(), }) .collect(); @@ -403,16 +464,16 @@ impl ChunkStoreDiffVirtualAddition { inner_arrow_datatype: Some(inner_arrow_datatype), // These fields are filled in later in this function has_data: false, - is_static_only: false, + is_static: false, }, ) }) .collect(); - /// Helper to track what's know about a component from the manifest's static/temporal maps. + /// Helper to track what's known about a component from the manifest's static/temporal maps. #[derive(Default)] struct VirtualComponentInfo { - has_temporal: bool, + is_static: bool, has_rows: bool, } @@ -430,7 +491,7 @@ impl ChunkStoreDiffVirtualAddition { entry.insert( component, VirtualComponentInfo { - has_temporal: false, + is_static: true, has_rows: true, }, ); @@ -448,7 +509,6 @@ impl ChunkStoreDiffVirtualAddition { let has_rows = per_chunk.values().any(|e| e.num_rows > 0); let existing = entry.entry(component).or_default(); - existing.has_temporal = true; existing.has_rows |= has_rows; } } @@ -462,17 +522,17 @@ impl ChunkStoreDiffVirtualAddition { .into_iter() .map(|(component, info)| { let has_data = info.has_rows; - let is_static_only = !info.has_temporal; + let is_static = info.is_static; if let Some(meta) = component_schema_info.get(&component) { ChunkComponentMeta { has_data, - is_static_only, + is_static, ..meta.clone() } } else { ChunkComponentMeta { has_data, - is_static_only, + is_static, descriptor: re_sdk_types::ComponentDescriptor::partial(component), inner_arrow_datatype: None, } @@ -483,6 +543,32 @@ impl ChunkStoreDiffVirtualAddition { } } +/// Why a chunk was removed from the store. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ChunkDeletionReason { + /// Garbage collection due to memory pressure. + GarbageCollection, + + /// A virtual chunk was replaced by its physical data. + VirtualToPhysicalReplacement, + + /// Old split chunks cleaned up when their root chunk is re-inserted. + /// + /// When a root chunk is inserted, the store may split it into smaller physical chunks. + /// If the same root chunk is later re-downloaded (e.g. after GC eviction), + /// the old splits are stale duplicates and must be removed before the new insertion. + DanglingSplitCleanup, + + /// A chunk was replaced by a compacted version. + Compaction, + + /// A static chunk was overwritten by a newer value. + Overwrite, + + /// Explicitly dropped by user action (e.g. undo/redo stack operations). + ExplicitDrop, +} + /// An atomic deletion event. /// /// Reminder: ⚠ Do not confuse _a deletion_ and _a clear_ ⚠. @@ -501,21 +587,25 @@ pub struct ChunkStoreDiffDeletion { // downstream subscribers get a chance to inspect the data in the chunk before it gets permanently // deallocated. pub chunk: Arc, + + /// Why this chunk was removed. + pub reason: ChunkDeletionReason, } impl std::fmt::Debug for ChunkStoreDiffDeletion { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let Self { chunk } = self; + let Self { chunk, reason } = self; f.debug_tuple("ChunkStoreDiffDeletion") .field(&chunk.id()) + .field(reason) .finish() } } impl PartialEq for ChunkStoreDiffDeletion { fn eq(&self, other: &Self) -> bool { - let Self { chunk } = self; - chunk.id() == other.chunk.id() + let Self { chunk, reason } = self; + chunk.id() == other.chunk.id() && *reason == other.reason } } @@ -530,7 +620,7 @@ impl ChunkStoreDiffDeletion { #[cfg(test)] mod tests { - use std::collections::BTreeMap; + use std::collections::{BTreeMap, BTreeSet}; use re_chunk::{RowId, TimelineName}; use re_log_types::example_components::{MyColor, MyIndex, MyPoint, MyPoints}; @@ -580,7 +670,9 @@ mod tests { for event in events { let delta = event.delta(); - let delta_chunk = event.delta_chunk().unwrap(); + let Some(delta_chunk) = event.delta_chunk() else { + continue; + }; let delta_rows = delta * delta_chunk.num_rows() as i64; for row_id in delta_chunk.row_ids() { @@ -613,6 +705,18 @@ mod tests { } } + /// Helper to extract the set of new component descriptors from a [`ChunkStoreDiff::SchemaAddition`]. + fn schema_addition_descriptors(event: &ChunkStoreEvent) -> BTreeSet { + match &event.diff { + ChunkStoreDiff::SchemaAddition(sa) => sa + .new_columns + .iter() + .flat_map(|m| m.components.iter().map(|c| c.descriptor.clone())) + .collect(), + other => panic!("expected SchemaAddition, got {other:?}"), + } + } + #[test] fn store_events() -> anyhow::Result<()> { let mut store = ChunkStore::new( @@ -641,7 +745,18 @@ mod tests { ) .build()?; - view.on_events(&store.insert_chunk(&Arc::new(chunk1))?); + let events = store.insert_chunk(&Arc::new(chunk1))?; + + // chunk1 introduces entity_a with MyIndex — expect Addition + SchemaAddition. + assert_eq!(events.len(), 2); + assert!(events[0].is_addition()); + assert!(events[1].is_schema_addition()); + assert_eq!( + schema_addition_descriptors(&events[1]), + BTreeSet::from([MyIndex::partial_descriptor()]), + ); + + view.on_events(&events); similar_asserts::assert_eq!( GlobalCounts::new( @@ -693,7 +808,18 @@ mod tests { .build()? }; - view.on_events(&store.insert_chunk(&Arc::new(chunk2))?); + let events = store.insert_chunk(&Arc::new(chunk2))?; + + // chunk2 introduces entity_b with Points + Colors — expect Addition + SchemaAddition. + assert_eq!(events.len(), 2); + assert!(events[0].is_addition()); + assert!(events[1].is_schema_addition()); + assert_eq!( + schema_addition_descriptors(&events[1]), + BTreeSet::from([MyPoints::descriptor_points(), MyPoints::descriptor_colors(),]), + ); + + view.on_events(&events); similar_asserts::assert_eq!( GlobalCounts::new( @@ -745,7 +871,21 @@ mod tests { .build()? }; - view.on_events(&store.insert_chunk(&Arc::new(chunk3))?); + let events = store.insert_chunk(&Arc::new(chunk3))?; + + // chunk3 adds MyIndex to entity_b (new!) and re-uses Colors (not new, but transitions to static). + // Colors already existed on entity_b, but this is a static chunk so Colors gets an + // is_static transition (false → true). MyIndex is new on entity_b. + assert_eq!(events.len(), 2); + assert!(events[0].is_addition()); + assert!(events[1].is_schema_addition()); + assert_eq!( + schema_addition_descriptors(&events[1]), + BTreeSet::from([MyIndex::partial_descriptor(), MyPoints::descriptor_colors(),]), + "MyIndex is new on entity_b; Colors gets is_static transition" + ); + + view.on_events(&events); similar_asserts::assert_eq!( GlobalCounts::new( @@ -779,6 +919,16 @@ mod tests { ); let events = store.gc(&GarbageCollectionOptions::gc_everything()).0; + + // GC should only produce Deletion events, never SchemaAddition. + for event in &events { + assert!( + event.is_deletion(), + "GC should only produce deletions, got: {:?}", + event.diff + ); + } + view.on_events(&events); similar_asserts::assert_eq!( diff --git a/crates/store/re_chunk_store/src/gc.rs b/crates/store/re_chunk_store/src/gc.rs index f096573d8e63..b6ec2373c485 100644 --- a/crates/store/re_chunk_store/src/gc.rs +++ b/crates/store/re_chunk_store/src/gc.rs @@ -12,8 +12,8 @@ use re_chunk::{Chunk, ChunkId, TimelineName}; use re_log_types::{AbsoluteTimeRange, TimeInt}; use crate::{ - ChunkStore, ChunkStoreChunkStats, ChunkStoreDiff, ChunkStoreDiffDeletion, ChunkStoreEvent, - ChunkStoreStats, + ChunkDeletionReason, ChunkStore, ChunkStoreChunkStats, ChunkStoreDiff, ChunkStoreDiffDeletion, + ChunkStoreEvent, ChunkStoreStats, }; // Used all over in docstrings. @@ -236,25 +236,9 @@ impl ChunkStore { "GC done" ); - let events: Vec<_> = diffs - .into_iter() - .map(|diff| ChunkStoreEvent { - store_id: self.id.clone(), - store_generation: self.generation(), - event_id: self - .event_id - .fetch_add(1, std::sync::atomic::Ordering::Relaxed), - diff, - }) - .collect(); - if cfg!(debug_assertions) { - let any_event_other_than_deletion = events.iter().any(|e| !e.is_deletion()); - assert!(!any_event_other_than_deletion); - } + re_log::debug_assert!(diffs.iter().all(|d| d.is_deletion())); - if self.config.enable_changelog { - Self::on_events(&events); - } + let events = self.finalize_events(diffs); (events, stats_before - stats_after) } @@ -276,10 +260,10 @@ impl ChunkStore { self.temporal_chunk_ids_per_entity_per_component .values() .flat_map(|temporal_chunk_ids_per_timeline| { - temporal_chunk_ids_per_timeline.iter().flat_map( - |(_timeline, temporal_chunk_ids_per_component)| { - temporal_chunk_ids_per_component.iter().flat_map( - |(_, temporal_chunk_ids_per_time)| { + temporal_chunk_ids_per_timeline.values().flat_map( + |temporal_chunk_ids_per_component| { + temporal_chunk_ids_per_component.values().flat_map( + |temporal_chunk_ids_per_time| { itertools::chain!( temporal_chunk_ids_per_time .per_start_time @@ -362,12 +346,18 @@ impl ChunkStore { }; let now = Instant::now(); - let dels1 = - self.remove_chunks_shallow(chunks_to_be_shallow_removed, Some(sweep_time_budget)); + let dels1 = self.remove_chunks_shallow( + chunks_to_be_shallow_removed, + Some(sweep_time_budget), + ChunkDeletionReason::GarbageCollection, + ); let remaining_budget = sweep_time_budget.saturating_sub(now.elapsed()); - let dels2 = - self.remove_chunks_deep(chunks_to_be_deeply_removed, Some(remaining_budget)); + let dels2 = self.remove_chunks_deep( + chunks_to_be_deeply_removed, + Some(remaining_budget), + ChunkDeletionReason::GarbageCollection, + ); dels1.into_iter().chain(dels2).map(Into::into).collect() } @@ -450,7 +440,8 @@ impl ChunkStore { let chunks_in_priority_order = self .physical_chunk_ids_per_min_row_id - .values() + .iter() + .map(|(_, chunk_id)| chunk_id) .filter(move |chunk_id| !protected_chunk_ids.contains(chunk_id)) .filter_map(|chunk_id| self.physical_chunks_per_chunk_id.get(chunk_id).cloned()) // physical only .filter(|chunk| !chunk.is_static()) // cannot gc static data @@ -502,6 +493,7 @@ impl ChunkStore { &mut self, chunks_to_be_removed: Vec>, time_budget: Option, + reason: ChunkDeletionReason, ) -> Vec { re_tracing::profile_function!(); @@ -510,13 +502,12 @@ impl ChunkStore { // The deep diff is always a superset of the shallow one (because you can remove physical // chunks while keeping virtual ones, but not vice-versa). let deletions_shallow = - self.remove_chunks_shallow(chunks_to_be_removed.clone(), time_budget); + self.remove_chunks_shallow(chunks_to_be_removed.clone(), time_budget, reason); let Self { id: _, config: _, - time_type_registry: _, // purely additive - per_column_metadata: _, // purely additive + schema: _, // purely additive physical_chunks_per_chunk_id: _, // handled by shallow impl physical_chunk_ids_per_min_row_id: _, // handled by shallow impl chunks_lineage, // purely additive @@ -633,7 +624,7 @@ impl ChunkStore { } if was_removed { - deletions.push(ChunkStoreDiffDeletion { chunk }); + deletions.push(ChunkStoreDiffDeletion { chunk, reason }); } } @@ -673,14 +664,14 @@ impl ChunkStore { &mut self, chunks_to_be_removed: Vec>, time_budget: Option, + reason: ChunkDeletionReason, ) -> Vec { re_tracing::profile_function!(); let Self { id: _, config: _, - time_type_registry: _, // purely additive - per_column_metadata: _, // purely additive + schema: _, // purely additive physical_chunks_per_chunk_id, physical_chunk_ids_per_min_row_id, chunks_lineage: _, // virtual @@ -704,7 +695,7 @@ impl ChunkStore { let mut deletions = Vec::with_capacity(chunks_to_be_removed.len()); for chunk in chunks_to_be_removed { if let Some(row_id_min) = chunk.row_id_range().map(|(min, _)| min) { - physical_chunk_ids_per_min_row_id.remove(&row_id_min); + physical_chunk_ids_per_min_row_id.remove(&(row_id_min, chunk.id())); } let Some(chunk) = physical_chunks_per_chunk_id.remove(&chunk.id()) else { continue; @@ -718,7 +709,7 @@ impl ChunkStore { *temporal_physical_chunks_stats -= ChunkStoreChunkStats::from_chunk(&chunk); - deletions.push(ChunkStoreDiffDeletion { chunk }); + deletions.push(ChunkStoreDiffDeletion { chunk, reason }); // Only check time budget once we have removed at least one chunk. if time_budget <= start_time.elapsed() { diff --git a/crates/store/re_chunk_store/src/lazy_rrd_store.rs b/crates/store/re_chunk_store/src/lazy_rrd_store.rs new file mode 100644 index 000000000000..c1c83283de20 --- /dev/null +++ b/crates/store/re_chunk_store/src/lazy_rrd_store.rs @@ -0,0 +1,532 @@ +use std::fs::File; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use ahash::{HashMap, HashMapExt as _}; +use nohash_hasher::{IntMap, IntSet}; +use parking_lot::Mutex; + +use re_chunk::{Chunk, ChunkId}; +use re_log_encoding::{CodecResult, RawRrdManifest, RrdManifest}; +use re_log_types::{AbsoluteTimeRange, EntityPath, StoreId, Timeline}; + +use crate::{ + ChunkStore, ChunkStoreConfig, ChunkStoreHandle, ChunkStoreResult, ChunkTrackingMode, + EntityTree, ExtractPropertiesError, LatestAtQuery, QueryResults, RangeQuery, StoreSchema, +}; + +/// A [`ChunkStore`] backed by an RRD file, with index loaded but chunks loaded on demand. +/// +/// Constructed from a single store's [`RrdManifest`]. Store selection (which manifest to extract +/// from the `RrdFooter`) is the caller's responsibility. +/// +/// On construction, the `ChunkStore`'s virtual index is populated via `insert_rrd_manifest()`. +/// No physical chunk data is loaded until [`Self::load_chunks`] or [`Self::load_all_chunks`] +/// is called. +/// +/// Holds the RRD file open for the lifetime of the store, so that lazy chunk reads succeed +/// even if the file is deleted from the filesystem after construction. +//TODO(RR-4341): this abstraction is very primitive. We need a more general `ChunkProvider`-style +// abstraction to cover for the many larger-than-RAM use cases. +pub struct LazyRrdStore { + store: ChunkStoreHandle, + file: Mutex, + rrd_path: PathBuf, + raw_manifest: Arc, + manifest: Arc, + + /// Precomputed map from `ChunkId` to manifest row index. + chunk_id_to_index: HashMap, + + /// Precomputed per-chunk timeline ranges. + timeline_ranges: HashMap>, +} + +impl LazyRrdStore { + /// Create a new lazy store from a manifest and an open file handle. + /// Populates the virtual index (no data loaded). + /// + /// The caller is responsible for reading the `RrdFooter` from the file and selecting + /// the appropriate manifest (e.g. filtering by `StoreKind::Recording`). This keeps + /// store-selection policy out of `re_chunk_store`. The manifest **must** come from + /// the same file — byte offsets in the manifest are meaningless otherwise. + /// + /// `rrd_path` is kept for diagnostic messages only; all I/O goes through `file`. + pub fn try_new( + file: File, + rrd_path: PathBuf, + raw_manifest: Arc, + ) -> CodecResult { + let manifest = Arc::new(RrdManifest::try_new(&raw_manifest)?); + + // IMPORTANT: `ALL_DISABLED` here is load-bearing, since the `ChunkStore` is essentially + // acting as a cache for the underlying RRD. Any compaction, etc. would lead to unexpected + // consequences. + let mut store = + ChunkStore::new(manifest.store_id().clone(), ChunkStoreConfig::ALL_DISABLED); + + #[expect(clippy::let_underscore_must_use)] + let _ = store.insert_rrd_manifest(Arc::clone(&manifest)); + + let chunk_id_to_index: HashMap = manifest + .col_chunk_ids() + .iter() + .enumerate() + .map(|(i, &id)| (id, i)) + .collect(); + + let timeline_ranges = Self::build_timeline_ranges(&manifest); + + Ok(Self { + store: ChunkStoreHandle::new(store), + file: Mutex::new(file), + rrd_path, + raw_manifest, + manifest, + chunk_id_to_index, + timeline_ranges, + }) + } + + fn build_timeline_ranges( + manifest: &RrdManifest, + ) -> HashMap> { + let mut result: HashMap> = HashMap::new(); + for per_entity in manifest.temporal_map().values() { + for (timeline, per_component) in per_entity { + for per_chunk in per_component.values() { + for (&chunk_id, entry) in per_chunk { + let e = result.entry(chunk_id).or_default(); + e.entry(*timeline) + .and_modify(|existing| { + *existing = existing.union(entry.time_range); + }) + .or_insert(entry.time_range); + } + } + } + } + result + } + + /// Load specific chunks from disk into the store. + /// + /// Chunks that are already physically loaded are skipped. + /// Returns an error if any chunk ID is not in the manifest. + /// All I/O happens without holding any store lock. + pub fn load_chunks(&self, chunk_ids: &[ChunkId]) -> ChunkStoreResult>> { + // 1. Filter out chunks that are already physical. + let to_load: Vec = { + let guard = self.store.read(); + chunk_ids + .iter() + .filter(|id| guard.physical_chunk(id).is_none()) + .copied() + .collect() + }; + + if to_load.is_empty() { + return Ok(Vec::new()); + } + + // 2. Read from disk — NO store lock held. + // Returns `CodecError::ChunkNotInManifest` if any ID is unknown. + let loaded = { + let mut file = self.file.lock(); + re_log_encoding::read_chunks(&mut file, &self.manifest, &to_load)? + }; + + // 3. Insert into store. + let mut store = self.store.write(); + for chunk in &loaded { + // insert_chunk on an already-present ChunkId is a no-op. + store.insert_chunk(chunk)?; + } + + Ok(loaded) + } + + /// Load all chunks from the RRD file into the store. + pub fn load_all_chunks(&self) -> ChunkStoreResult<()> { + self.load_chunks(self.manifest.col_chunk_ids())?; + Ok(()) + } + + /// The store's schema, populated from the manifest (available without loading chunks). + #[inline] + pub fn schema(&self) -> StoreSchema { + self.store.read().schema().clone() + } + + /// The entity tree, populated from the manifest (available without loading chunks). + pub fn entity_tree(&self) -> EntityTree { + self.store.read().entity_tree().clone() + } + + /// The number of chunks described by the manifest (physical + virtual). + pub fn num_chunks(&self) -> usize { + self.manifest.num_chunks() + } + + /// The number of chunks currently loaded in memory. + pub fn num_physical_chunks(&self) -> usize { + self.store.read().num_physical_chunks() + } + + /// Whether a specific chunk is currently loaded in memory. + pub fn has_physical_chunk(&self, chunk_id: &ChunkId) -> bool { + self.store.read().physical_chunk(chunk_id).is_some() + } + + /// Load all chunks, then return a compacted copy of the store. + pub fn compacted(&self, options: &crate::CompactionOptions) -> ChunkStoreResult { + self.load_all_chunks()?; + self.store.read().compacted(options) + } + + /// Load all chunks and return them. + pub fn collect_physical_chunks(&self) -> ChunkStoreResult>> { + self.load_all_chunks()?; + Ok(self.store.read().iter_physical_chunks().cloned().collect()) + } + + /// Path to the source RRD file. + pub fn rrd_path(&self) -> &Path { + &self.rrd_path + } + + /// The parsed manifest for this store. + pub fn manifest(&self) -> &Arc { + &self.manifest + } + + /// The raw manifest as-parsed from the RRD footer, before validation/extraction. + /// + /// Kept around so the server can synthesize `GetRrdManifest` responses without materializing + /// chunks: the footer already contains everything a client needs to pick which chunks to fetch. + pub fn raw_manifest(&self) -> &Arc { + &self.raw_manifest + } + + /// Look up the manifest row index for a given chunk ID. + pub fn chunk_row_index(&self, chunk_id: &ChunkId) -> Option { + self.chunk_id_to_index.get(chunk_id).copied() + } + + /// Per-chunk timeline ranges. + pub fn timeline_ranges(&self) -> &HashMap> { + &self.timeline_ranges + } + + /// The store ID (from the manifest, no store lock needed). + pub fn store_id(&self) -> &StoreId { + self.manifest.store_id() + } + + /// All entity paths known to this store (populated from the virtual index). + pub fn all_entities(&self) -> IntSet { + self.store.read().all_entities() + } + + /// Get a physical chunk by ID if it's already loaded. Returns `None` for + /// virtual-only chunks — use [`Self::load_chunks`] to materialize them first. + pub fn physical_chunk(&self, id: &ChunkId) -> Option> { + self.store.read().physical_chunk(id).cloned() + } + + /// Extract properties, automatically loading the required property chunks + /// on demand if they are still virtual. + //TODO(RR-4458): currently takes one disk round-trip per property entity with virtual + // chunks because `ChunkStore::extract_properties` short-circuits on the first missing + // entity. Once it reports the full union of missing chunks, this will converge in a + // single retry. + pub fn extract_properties(&self) -> Result { + self.with_autoload(|store| store.extract_properties()) + } + + /// Run an operation against the inner [`ChunkStore`], auto-loading any chunks the + /// operation reports as missing and retrying until it succeeds or returns a different + /// error. + /// + /// The closure receives `&ChunkStore` rather than `&self`, which structurally prevents + /// the read guard from escaping a single iteration — [`Self::load_chunks`] needs the + /// write lock, and holding a read guard across that call would deadlock. + /// + /// A generous fixed attempt cap guards against a bug downstream (e.g. `load_chunks` + /// silently no-ops while `MissingData` keeps being reported): exceeding it surfaces + /// as an `Internal` error instead of spinning forever. In practice this loop converges + /// in a handful of iterations; the cap is a paranoia valve, not a tight bound. + fn with_autoload(&self, mut op: F) -> Result + where + F: FnMut(&ChunkStore) -> Result, + { + const MAX_AUTOLOAD_ATTEMPTS: usize = 1024; + for _ in 0..MAX_AUTOLOAD_ATTEMPTS { + // IMPORTANT: bind to a local first so the read-guard temporary from + // `self.store.read()` is dropped at this statement's semicolon. Matching on + // `op(&self.store.read())` directly would extend the scrutinee's temporaries + // through the arms and `self.load_chunks` (write lock) would deadlock. + let result = op(&self.store.read()); + match result { + Err(ExtractPropertiesError::MissingData(missing_ids)) => { + self.load_chunks(&missing_ids) + .map_err(|err| ExtractPropertiesError::Internal(err.to_string()))?; + } + other => return other, + } + } + Err(ExtractPropertiesError::Internal(format!( + "autoload did not converge after {MAX_AUTOLOAD_ATTEMPTS} attempts" + ))) + } + + /// Run a latest-at query against the virtual index. + /// + /// Returns [`QueryResults`] with physical chunks in `chunks` and + /// not-yet-loaded chunk IDs in `missing_virtual`. + pub fn latest_at_relevant_chunks_for_all_components( + &self, + report_mode: ChunkTrackingMode, + query: &LatestAtQuery, + entity_path: &EntityPath, + include_static: bool, + ) -> QueryResults { + self.store + .read() + .latest_at_relevant_chunks_for_all_components( + report_mode, + query, + entity_path, + include_static, + ) + } + + /// Run a range query against the virtual index. + /// + /// Returns [`QueryResults`] with physical chunks in `chunks` and + /// not-yet-loaded chunk IDs in `missing_virtual`. + pub fn range_relevant_chunks_for_all_components( + &self, + report_mode: ChunkTrackingMode, + query: &RangeQuery, + entity_path: &EntityPath, + include_static: bool, + ) -> QueryResults { + self.store.read().range_relevant_chunks_for_all_components( + report_mode, + query, + entity_path, + include_static, + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use re_chunk::{RowId, TimePoint, Timeline}; + use re_log_encoding::EncodingOptions; + use re_log_types::{ + EntityPath, LogMsg, SetStoreInfo, StoreId, StoreInfo, StoreKind, StoreSource, + example_components::{MyPoint, MyPoints}, + }; + + /// Helper: create test chunks and encode to RRD file. + /// Returns `(path, open file handle, store_id, chunks)`. + fn create_test_rrd( + dir: &Path, + num_entities: usize, + num_frames: usize, + ) -> (PathBuf, File, StoreId, Vec>) { + let path = dir.join("test.rrd"); + let store_id = StoreId::random(StoreKind::Recording, "test"); + let store_info = StoreInfo::new(store_id.clone(), StoreSource::Unknown); + let timeline = Timeline::new_sequence("frame"); + + let mut chunks = Vec::new(); + for entity_idx in 0..num_entities { + for frame_idx in 0..num_frames { + let entity_path = EntityPath::from(format!("/entity_{entity_idx}")); + let row_id = RowId::new(); + let points = MyPoint::from_iter(frame_idx as u32..frame_idx as u32 + 1); + let chunk = Chunk::builder(entity_path) + .with_sparse_component_batches( + row_id, + #[expect(clippy::cast_possible_wrap)] + TimePoint::default().with(timeline, frame_idx as i64), + [(MyPoints::descriptor_points(), Some(&points as _))], + ) + .build() + .unwrap(); + chunks.push(Arc::new(chunk)); + } + } + + // Encode to file. + let set_store_info = LogMsg::SetStoreInfo(SetStoreInfo { + row_id: *RowId::ZERO, + info: store_info, + }); + let mut file = std::fs::File::create(&path).unwrap(); + let mut encoder = re_log_encoding::Encoder::new_eager( + re_log_encoding::CrateVersion::LOCAL, + EncodingOptions::PROTOBUF_COMPRESSED, + &mut file, + ) + .unwrap(); + encoder.append(&set_store_info).unwrap(); + for chunk in &chunks { + let arrow_msg = chunk.to_arrow_msg().unwrap(); + let msg = LogMsg::ArrowMsg(store_id.clone(), arrow_msg); + encoder.append(&msg).unwrap(); + } + encoder.finish().unwrap(); + + // Re-open for reading. + let file = File::open(&path).unwrap(); + (path, file, store_id, chunks) + } + + fn read_raw_manifest(file: &mut File, store_id: &StoreId) -> Arc { + let footer = re_log_encoding::read_rrd_footer(file).unwrap().unwrap(); + Arc::new(footer.manifests[store_id].clone()) + } + + #[test] + fn test_lazy_store_no_physical_chunks() { + let dir = tempfile::tempdir().unwrap(); + let (path, mut file, store_id, chunks) = create_test_rrd(dir.path(), 2, 3); + let raw = read_raw_manifest(&mut file, &store_id); + + let lazy = LazyRrdStore::try_new(file, path, raw).unwrap(); + + assert_eq!(lazy.num_physical_chunks(), 0); + assert_eq!( + lazy.manifest().col_chunk_ids().len(), + chunks.len(), + "All chunk IDs should be in manifest" + ); + } + + #[test] + fn test_lazy_store_entities_visible() { + let dir = tempfile::tempdir().unwrap(); + let (path, mut file, store_id, _) = create_test_rrd(dir.path(), 3, 2); + let raw = read_raw_manifest(&mut file, &store_id); + + let lazy = LazyRrdStore::try_new(file, path, raw).unwrap(); + let entity_tree = lazy.entity_tree(); + + let mut entities = Vec::new(); + entity_tree.visit_children_recursively(|path| { + if !path.is_root() { + entities.push(path.clone()); + } + }); + // 3 entities + intermediate paths + assert!(entities.len() >= 3, "Should have at least 3 leaf entities"); + } + + #[test] + fn test_lazy_store_load_all() { + let dir = tempfile::tempdir().unwrap(); + let (path, mut file, store_id, chunks) = create_test_rrd(dir.path(), 2, 3); + let raw = read_raw_manifest(&mut file, &store_id); + + let lazy = LazyRrdStore::try_new(file, path, raw).unwrap(); + let loaded = lazy.collect_physical_chunks().unwrap(); + assert_eq!(loaded.len(), chunks.len()); + } + + #[test] + fn test_lazy_store_load_single_chunk() { + let dir = tempfile::tempdir().unwrap(); + let (path, mut file, store_id, chunks) = create_test_rrd(dir.path(), 2, 3); + let raw = read_raw_manifest(&mut file, &store_id); + + let lazy = LazyRrdStore::try_new(file, path, raw).unwrap(); + let first_chunk_id = lazy.manifest().col_chunk_ids()[0]; + let loaded = lazy.load_chunks(&[first_chunk_id]).unwrap(); + + assert_eq!(loaded.len(), 1); + assert_eq!(lazy.num_physical_chunks(), 1); + assert!(lazy.has_physical_chunk(&first_chunk_id)); + + // Other chunks are still virtual. + let total_chunks = chunks.len(); + assert!(total_chunks > 1); + } + + #[test] + fn test_lazy_store_load_idempotent() { + let dir = tempfile::tempdir().unwrap(); + let (path, mut file, store_id, _) = create_test_rrd(dir.path(), 1, 3); + let raw = read_raw_manifest(&mut file, &store_id); + + let lazy = LazyRrdStore::try_new(file, path, raw).unwrap(); + lazy.load_all_chunks().unwrap(); + + let count_before = lazy.num_physical_chunks(); + + // Loading again should be a no-op. + let loaded = lazy.load_chunks(lazy.manifest().col_chunk_ids()).unwrap(); + assert!(loaded.is_empty(), "Already-loaded chunks should be skipped"); + + let count_after = lazy.num_physical_chunks(); + assert_eq!(count_before, count_after); + } + + #[test] + fn test_lazy_store_schema() { + let dir = tempfile::tempdir().unwrap(); + let (path, mut file, store_id, _) = create_test_rrd(dir.path(), 2, 3); + let raw = read_raw_manifest(&mut file, &store_id); + + let lazy = LazyRrdStore::try_new(file, path, raw).unwrap(); + let schema = lazy.schema(); + + // Schema should be non-empty even without physical chunks. + let columns = schema.chunk_column_descriptors(); + assert!( + !columns.components.is_empty() || !columns.indices.is_empty(), + "Schema should be populated from manifest" + ); + } + + #[test] + fn test_lazy_vs_eager_equivalence() { + let dir = tempfile::tempdir().unwrap(); + let (path, mut file, store_id, _) = create_test_rrd(dir.path(), 2, 3); + let raw = read_raw_manifest(&mut file, &store_id); + + // Lazy path: create lazy store, load all chunks. + let lazy = LazyRrdStore::try_new(file, path.clone(), raw).unwrap(); + lazy.load_all_chunks().unwrap(); + + // Eager path: load the same file fully. + let eager_stores = + ChunkStore::from_rrd_filepath(&ChunkStoreConfig::ALL_DISABLED, &path).unwrap(); + let eager_store = eager_stores.into_values().next().unwrap(); + + let collect_entities = |tree: &crate::EntityTree| { + let mut entities = Vec::new(); + tree.visit_children_recursively(|path| { + if !path.is_root() { + entities.push(path.clone()); + } + }); + entities.sort(); + entities + }; + let lazy_entities = collect_entities(&lazy.entity_tree()); + let eager_entities = collect_entities(eager_store.entity_tree()); + + assert_eq!(lazy_entities, eager_entities, "Same entities"); + assert_eq!( + lazy.num_physical_chunks(), + eager_store.num_physical_chunks(), + "Same number of physical chunks" + ); + } +} diff --git a/crates/store/re_chunk_store/src/lib.rs b/crates/store/re_chunk_store/src/lib.rs index 499791e8d18b..487cba093837 100644 --- a/crates/store/re_chunk_store/src/lib.rs +++ b/crates/store/re_chunk_store/src/lib.rs @@ -14,16 +14,24 @@ #![doc = document_features::document_features!()] //! +mod compact; mod dataframe; + mod drop_time_range; +pub mod entity_tree; mod events; mod gc; +#[cfg(not(target_arch = "wasm32"))] +mod lazy_rrd_store; mod lineage; mod missing_chunk_reporter; mod properties; mod query; +mod rebatch_videos; +mod split_thick_thin; mod stats; mod store; +mod store_schema; mod subscribers; mod writes; @@ -38,13 +46,16 @@ pub use { re_sorbet::{ColumnDescriptor, ComponentColumnDescriptor, IndexColumnDescriptor}, }; +pub use self::compact::{CompactionOptions, IsStartOfGop}; pub use self::dataframe::{ Index, IndexRange, IndexValue, QueryExpression, SparseFillStrategy, StaticColumnSelection, ViewContentsSelector, }; +pub use self::entity_tree::EntityTree; pub use self::events::{ - ChunkComponentMeta, ChunkMeta, ChunkStoreDiff, ChunkStoreDiffAddition, ChunkStoreDiffDeletion, - ChunkStoreDiffVirtualAddition, ChunkStoreEvent, + ChunkComponentMeta, ChunkDeletionReason, ChunkMeta, ChunkStoreDiff, ChunkStoreDiffAddition, + ChunkStoreDiffDeletion, ChunkStoreDiffSchemaAddition, ChunkStoreDiffVirtualAddition, + ChunkStoreEvent, }; pub use self::gc::{GarbageCollectionOptions, GarbageCollectionTarget}; pub use self::lineage::{ChunkDirectLineage, ChunkDirectLineageReport}; @@ -56,10 +67,14 @@ pub use self::store::{ ChunkStore, ChunkStoreConfig, ChunkStoreGeneration, ChunkStoreHandle, ChunkStoreHandleWeak, ColumnMetadata, QueriedChunkIdTracker, }; +pub use self::store_schema::StoreSchema; pub use self::subscribers::{ ChunkStoreSubscriber, ChunkStoreSubscriberHandle, PerStoreChunkSubscriber, }; +#[cfg(not(target_arch = "wasm32"))] +pub use self::lazy_rrd_store::LazyRrdStore; + pub(crate) use self::store::ColumnMetadataState; pub mod external { @@ -104,6 +119,7 @@ pub enum ChunkTrackingMode { /// Panic when a chunk is missing. /// - /// Only use this in tests! + /// Only use this in tests, or contexts where there really can't be + /// any virtual chunks, and you rather panic than have silent bugs. PanicOnMissing, } diff --git a/crates/store/re_chunk_store/src/lineage.rs b/crates/store/re_chunk_store/src/lineage.rs index a4c1c70be7bc..e49d59c793dd 100644 --- a/crates/store/re_chunk_store/src/lineage.rs +++ b/crates/store/re_chunk_store/src/lineage.rs @@ -4,7 +4,6 @@ use std::sync::Arc; use itertools::Itertools as _; use re_chunk::{Chunk, ChunkId}; -use re_log_encoding::RrdManifest; use crate::ChunkStore; @@ -19,7 +18,7 @@ use crate::ChunkStore; /// This makes it usable in virtual contexts where lineage information alone should never force the /// underlying data to remain in local memory, such as the store's virtual indexes. /// Use [`ChunkDirectLineage::to_report`] to generate a [`ChunkDirectLineageReport`] instead. -#[derive(Clone, PartialEq)] +#[derive(Clone, PartialEq, Eq)] pub enum ChunkDirectLineage { /// This chunk resulted from the splitting of that other chunk. It must have siblings, somewhere. /// @@ -58,7 +57,7 @@ pub enum ChunkDirectLineage { /// /// Even if it gets garbage collected, it can be re-fetched as needed (as long as the backing /// Redap server is still available). - ReferencedFrom(Arc), + RootFromManifest { is_static: bool }, /// This chunk's data was originally logged from volatile memory. /// @@ -73,10 +72,7 @@ impl re_byte_size::SizeBytes for ChunkDirectLineage { chunk_id.heap_size_bytes() + chunk_ids.heap_size_bytes() } Self::CompactedFrom(btree_set) => btree_set.heap_size_bytes(), - Self::ReferencedFrom(_rrd_manifest) => { - 0 // calculating the size of each RrdManifest over and over again is too slow. It is also amortized, so doesn't matter much. - } - Self::Volatile => 0, + Self::RootFromManifest { .. } | Self::Volatile => 0, } } } @@ -94,9 +90,8 @@ impl std::fmt::Debug for ChunkDirectLineage { chunk_ids.iter().join(", ") )), - Self::ReferencedFrom(rrd_manifest) => { - // We don't compute the sha256 here, because it is too expensive - write!(f, "origin:{rrd_manifest:?}") + Self::RootFromManifest { is_static } => { + write!(f, "origin:(static: {is_static})") } Self::Volatile => f.write_str("origin: (cannot be re-fetched)"), @@ -122,9 +117,9 @@ impl From<&ChunkDirectLineageReport> for ChunkDirectLineage { Self::CompactedFrom(chunks.keys().copied().collect()) } - ChunkDirectLineageReport::ReferencedFrom(rrd_manifest) => { - Self::ReferencedFrom(rrd_manifest.clone()) - } + ChunkDirectLineageReport::RootFromManifest { is_static } => Self::RootFromManifest { + is_static: *is_static, + }, ChunkDirectLineageReport::Volatile => Self::Volatile, } @@ -165,9 +160,11 @@ impl ChunkDirectLineage { Some(ChunkDirectLineageReport::CompactedFrom(chunks)) } - Self::ReferencedFrom(rrd_manifest) => Some(ChunkDirectLineageReport::ReferencedFrom( - rrd_manifest.clone(), - )), + Self::RootFromManifest { is_static } => { + Some(ChunkDirectLineageReport::RootFromManifest { + is_static: *is_static, + }) + } Self::Volatile => Some(ChunkDirectLineageReport::Volatile), } @@ -223,7 +220,7 @@ pub enum ChunkDirectLineageReport { /// /// Even if it gets garbage collected, it can be re-fetched as needed (as long as the backing /// Redap server is still available). - ReferencedFrom(Arc), + RootFromManifest { is_static: bool }, /// This chunk's data was originally logged from volatile memory. /// @@ -243,8 +240,8 @@ impl std::fmt::Debug for ChunkDirectLineageReport { .debug_map() .entries(map.iter().map(|(k, v)| (k, v.id()))) .finish(), - Self::ReferencedFrom(_manifest) => { - f.debug_tuple("ReferencedFrom").finish_non_exhaustive() + Self::RootFromManifest { is_static } => { + write!(f, "RootFromManifest(static: {is_static})") } Self::Volatile => write!(f, "Volatile"), } @@ -263,15 +260,12 @@ impl ChunkStore { } // OTOH, if it has been offloaded, now we need to track down its roots and determine - // from an RRD manifest whether it is static or not, if possible. - for (_, rrd_manifest) in store.find_root_rrd_manifests(chunk_id) { - for (id, is_static) in itertools::izip!( - rrd_manifest.col_chunk_ids(), - rrd_manifest.col_chunk_is_static(), - ) { - if chunk_id == id { - return if is_static { "yes" } else { "no" }; - } + // whether it is static or not from the lineage info. + for root_id in store.find_root_manifest_chunks(chunk_id) { + if let Some(ChunkDirectLineage::RootFromManifest { is_static }) = + store.chunks_lineage.get(&root_id) + { + return if *is_static { "yes" } else { "no" }; } } @@ -349,7 +343,7 @@ impl ChunkStore { }; matches!( lineage, - ChunkDirectLineage::ReferencedFrom(_) | ChunkDirectLineage::Volatile + ChunkDirectLineage::RootFromManifest { .. } | ChunkDirectLineage::Volatile ) } @@ -359,7 +353,7 @@ impl ChunkStore { /// possible (and even common) for a chunk to have more than one root. /// /// The resulting root chunks might or might not be volatile. - /// If you only care about chunks that are still available for download, see [`Self::find_root_rrd_manifests`]. + /// If you only care about chunks that are still available for download, see [`Self::find_root_manifest_chunks`]. pub fn find_root_chunks(&self, chunk_id: &ChunkId) -> Vec { let mut roots = Vec::new(); self.collect_root_ids(chunk_id, &mut roots); @@ -380,7 +374,7 @@ impl ChunkStore { } } - Some(ChunkDirectLineage::ReferencedFrom(_) | ChunkDirectLineage::Volatile) => { + Some(ChunkDirectLineage::RootFromManifest { .. } | ChunkDirectLineage::Volatile) => { roots.push(*chunk_id); } @@ -395,34 +389,30 @@ impl ChunkStore { /// one RRD manifest. /// /// The resulting root chunks are guaranteed to be backed by an RRD manifest (non-volatile). - /// If you want to find all root chunks regardless of their origin, refer to [`Self::find_root_rrd_manifests`] + /// If you want to find all root chunks regardless of their origin, refer to [`Self::find_root_chunks`] /// instead. - pub fn find_root_rrd_manifests(&self, chunk_id: &ChunkId) -> Vec<(ChunkId, Arc)> { + pub fn find_root_manifest_chunks(&self, chunk_id: &ChunkId) -> Vec { let mut roots = Vec::new(); - self.collect_root_rrd_manifests(chunk_id, &mut roots); + self.collect_root_manifest_chunks(chunk_id, &mut roots); roots } - /// See [`Self::find_root_rrd_manifests`]. - pub fn collect_root_rrd_manifests( - &self, - chunk_id: &ChunkId, - roots: &mut Vec<(ChunkId, Arc)>, - ) { + /// See [`Self::find_root_manifest_chunks`]. + fn collect_root_manifest_chunks(&self, chunk_id: &ChunkId, roots: &mut Vec) { let lineage = self.chunks_lineage.get(chunk_id); match lineage { Some(ChunkDirectLineage::SplitFrom(chunk_id, _sibling_ids)) => { - self.collect_root_rrd_manifests(chunk_id, roots); + self.collect_root_manifest_chunks(chunk_id, roots); } Some(ChunkDirectLineage::CompactedFrom(chunk_ids)) => { for chunk_id in chunk_ids { - self.collect_root_rrd_manifests(chunk_id, roots); + self.collect_root_manifest_chunks(chunk_id, roots); } } - Some(ChunkDirectLineage::ReferencedFrom(rrd_manifest)) => { - roots.push((*chunk_id, rrd_manifest.clone())); + Some(ChunkDirectLineage::RootFromManifest { .. }) => { + roots.push(*chunk_id); } _ => {} @@ -541,6 +531,7 @@ impl ChunkStore { #[expect(clippy::bool_assert_comparison)] // I like it that way, sue me mod tests { use re_chunk::{Chunk, EntityPath, RowId, Timeline}; + use re_log_encoding::RrdManifest; use re_log_types::StoreId; use re_log_types::example_components::{MyPoint, MyPoints}; use re_log_types::external::re_tuid::Tuid; @@ -593,8 +584,7 @@ mod tests { for chunk in &chunks { let events = store.insert_chunk(chunk).unwrap(); - for event in events { - let diff = event.to_addition().unwrap(); + for diff in events.iter().filter_map(|event| event.to_addition()) { if let ChunkDirectLineageReport::SplitFrom(src, _siblings) = &diff.direct_lineage { assert_eq!( diff.chunk_before_processing.id(), @@ -616,7 +606,7 @@ mod tests { "all these chunks' respective roots should come from the starting set" ); assert!( - store.find_root_rrd_manifests(&chunk.id()).is_empty(), + store.find_root_manifest_chunks(&chunk.id()).is_empty(), "none of these chunks should have a root RRD manifest" ); } @@ -669,7 +659,7 @@ mod tests { ); // Load it virtually. - store.insert_rrd_manifest(rrd_manifest.clone()).unwrap(); + let _ignored_events = store.insert_rrd_manifest(rrd_manifest.clone()); // Load it physically. for chunk in &chunks { @@ -684,8 +674,11 @@ mod tests { assert_eq!(chunk.id(), diff.chunk.id(), "ghost index"); } - for event in events.into_iter().skip(1) { - let diff = event.to_addition().unwrap(); + for diff in events + .iter() + .filter_map(|event| event.to_addition()) + .skip(1) + { if let ChunkDirectLineageReport::SplitFrom(src, _siblings) = &diff.direct_lineage { assert_eq!( diff.chunk_before_processing.id(), @@ -710,12 +703,11 @@ mod tests { "all these chunks' respective roots should come from the starting set" ); - for (root_chunk_id, root_manifest) in store.find_root_rrd_manifests(&chunk.id()) { + for root_chunk_id in store.find_root_manifest_chunks(&chunk.id()) { assert!( chunks.iter().any(|c| c.id() == root_chunk_id), "all these chunks' respective roots should come from the starting manifest", ); - assert_eq!(rrd_manifest, root_manifest); } } } @@ -756,12 +748,16 @@ mod tests { // We will end up with 4 split chunks. let events = store.insert_chunk(&chunk).unwrap(); - assert_eq!(4, events.len()); - for event in &events { + assert_eq!(5, events.len()); + assert!( + events[4].is_schema_addition(), + "the first write should emit a schema addition for newly seen columns" + ); + for event in &events[..4] { assert_eq!(true, event.is_addition()); // Check that splits are always flattened, very important! - let siblings = events + let siblings = events[..4] .iter() .filter(|e| e.delta_chunk().unwrap().id() != event.delta_chunk().unwrap().id()) .map(|e| e.delta_chunk().unwrap().clone()) @@ -856,10 +852,14 @@ mod tests { // We will end up with 2 split chunks, both below the num_rows threshold. let events = store.insert_chunk(&chunk1).unwrap(); - assert_eq!(2, events.len()); - for event in events { + assert_eq!(3, events.len()); + for event in &events[..2] { assert_eq!(true, event.is_addition()); } + assert!( + events[2].is_schema_addition(), + "the first write should emit a schema addition for newly seen columns" + ); assert_eq!(2, store.num_physical_chunks()); for chunk in store.iter_physical_chunks() { @@ -878,7 +878,8 @@ mod tests { { let chunk_ids = store .physical_chunk_ids_per_min_row_id - .values() + .iter() + .map(|(_, id)| id) .collect_vec(); assert_eq!(true, store.descends_from_a_split(chunk_ids[0])); @@ -909,7 +910,8 @@ mod tests { { let chunk_ids = store .physical_chunk_ids_per_min_row_id - .values() + .iter() + .map(|(_, id)| id) .collect_vec(); assert_eq!(true, store.descends_from_a_split(chunk_ids[0])); @@ -959,15 +961,16 @@ mod tests { let chunk2 = build_chunk(9); let events = store.insert_chunk(&chunk1).unwrap(); - assert_eq!(1, events.len()); - for event in events { - assert_eq!(true, event.is_addition()); - } + assert_eq!(2, events.len()); + assert_eq!(true, events[0].is_addition()); + assert!( + events[1].is_schema_addition(), + "the first write should emit a schema addition for newly seen columns" + ); + let events = store.insert_chunk(&chunk2).unwrap(); assert_eq!(1, events.len()); - for event in events { - assert_eq!(true, event.is_addition()); - } + assert_eq!(true, events[0].is_addition()); // The chunks should just not get compacted since the result would be beyond the num_rows // threshold, and therefore will never be split either since there will never be a chunk @@ -1022,11 +1025,21 @@ mod tests { let chunks = (0..10).map(|_| build_chunk(1)).collect_vec(); let mut prev_chunk: Option> = None; + let mut is_first_insert = true; for chunk in chunks { let mut events = store.insert_chunk(&chunk).unwrap(); - assert_eq!(1, events.len()); + if is_first_insert { + assert_eq!(2, events.len()); + assert!( + events[1].is_schema_addition(), + "the first write should emit a schema addition for newly seen columns" + ); + is_first_insert = false; + } else { + assert_eq!(1, events.len()); + } - let event = events.pop().unwrap(); + let event = events.remove(0); let event = event.to_addition().unwrap(); assert_eq!(chunk.id(), event.chunk_before_processing.id()); @@ -1043,8 +1056,8 @@ mod tests { store.descends_from_a_split(&event.chunk_after_processing.id()) ); + let lineage: ChunkDirectLineage = event.direct_lineage.clone().into(); if let Some(prev_chunk) = prev_chunk.take() { - let lineage: ChunkDirectLineage = event.direct_lineage.clone().into(); let expected = ChunkDirectLineage::CompactedFrom( [chunk.id(), prev_chunk.id()].into_iter().collect(), ); @@ -1054,7 +1067,6 @@ mod tests { store.descends_from_a_compaction(&event.chunk_after_processing.id()) ); } else { - let lineage: ChunkDirectLineage = event.direct_lineage.clone().into(); let expected = ChunkDirectLineage::Volatile; assert_eq!(expected, lineage); assert_eq!( diff --git a/crates/store/re_chunk_store/src/properties.rs b/crates/store/re_chunk_store/src/properties.rs index 094dde4cbe70..461bad7e515a 100644 --- a/crates/store/re_chunk_store/src/properties.rs +++ b/crates/store/re_chunk_store/src/properties.rs @@ -41,37 +41,51 @@ impl ChunkStore { let mut fields = vec![]; let mut data = vec![]; - for entity in self + // Sweep all property entities first and collect the union of missing virtual chunks + // across all of them. This way callers that auto-load (e.g. `LazyRrdStore::extract_properties`) + // see the full batch in one shot and converge in a single retry instead of one disk + // round-trip per entity. + let per_entity: Vec<(EntityPath, QueryResults)> = self .all_entities() .into_iter() .filter(EntityPath::is_property) - { - let QueryResults { chunks, missing_virtual } = self - // TODO(zehiko) we should be able to get static chunks without specifying the timeline - .latest_at_relevant_chunks_for_all_components( - ChunkTrackingMode::Report, - &LatestAtQuery::new( - TimelineName::log_tick(), /* timeline is irrelevant, these are static chunks */ - TimeInt::MIN, - ), - &entity, - true, /* yes, we want static chunks */ - ); - - if !missing_virtual.is_empty() { - return Err(ExtractPropertiesError::MissingData(missing_virtual)); - } + .map(|entity| { + let results = self + // TODO(zehiko) we should be able to get static chunks without specifying the timeline + .latest_at_relevant_chunks_for_all_components( + ChunkTrackingMode::Report, + &LatestAtQuery::new( + TimelineName::log_tick(), /* timeline is irrelevant, these are static chunks */ + TimeInt::MIN, + ), + &entity, + true, /* yes, we want static chunks */ + ); + (entity, results) + }) + .collect(); + + let all_missing: Vec<_> = per_entity + .iter() + .flat_map(|(_, qr)| qr.missing_virtual.iter().copied()) + .collect(); + if !all_missing.is_empty() { + return Err(ExtractPropertiesError::MissingData(all_missing)); + } + for (entity, QueryResults { chunks, .. }) in per_entity { for chunk in chunks { for component_desc in chunk.component_descriptors() { let component = component_desc.component; // it's possible to have multiple values for the same component, hence we take the latest value - let chunk_comp_latest = chunk.latest_at( + let Some(chunk_comp_latest) = chunk.latest_at( /* same as above, timeline is irrelevant as these are static chunks */ &LatestAtQuery::new(TimelineName::log_tick(), TimeInt::MIN), component, - ); + ) else { + continue; + }; let (_, column) = chunk_comp_latest .components() .iter() diff --git a/crates/store/re_chunk_store/src/query.rs b/crates/store/re_chunk_store/src/query.rs index d95b8e566816..e58b6b768da7 100644 --- a/crates/store/re_chunk_store/src/query.rs +++ b/crates/store/re_chunk_store/src/query.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::BTreeSet; use std::sync::Arc; use itertools::{Either, Itertools as _}; @@ -6,9 +6,11 @@ use nohash_hasher::IntSet; use re_log::debug_assert; use saturating_cast::SaturatingCast as _; -use re_chunk::{Chunk, ChunkId, ComponentIdentifier, LatestAtQuery, RangeQuery, TimelineName}; -use re_log_types::{AbsoluteTimeRange, EntityPath, TimeInt, Timeline}; -use re_types_core::{ComponentDescriptor, ComponentSet, UnorderedComponentSet}; +use re_chunk::{ + Chunk, ChunkId, ComponentIdentifier, LatestAtQuery, RangeQuery, TimeColumn, TimelineName, +}; +use re_log_types::{AbsoluteTimeRange, EntityPath, TimeInt}; +use re_types_core::{ComponentSet, UnorderedComponentSet}; use crate::{ChunkStore, ChunkTrackingMode}; // Used all over in docstrings. @@ -25,15 +27,6 @@ use crate::store::ChunkIdSetPerTime; // Meta queries impl ChunkStore { - /// Retrieve all [`Timeline`]s in the store. - #[inline] - pub fn timelines(&self) -> BTreeMap { - self.time_type_registry - .iter() - .map(|(name, typ)| (*name, Timeline::new(*name, *typ))) - .collect() - } - /// Retrieve all [`EntityPath`]s in the store. #[inline] pub fn all_entities(&self) -> IntSet { @@ -169,7 +162,7 @@ impl ChunkStore { } /// Retrieve all the [`ComponentIdentifier`]s that have been written to for a given [`EntityPath`] on - /// the specified [`Timeline`]. + /// the specified [`re_chunk::Timeline`]. /// /// Static components are always included in the results. /// @@ -218,7 +211,7 @@ impl ChunkStore { } /// Retrieve all the [`ComponentIdentifier`]s that have been written to for a given [`EntityPath`] on - /// the specified [`Timeline`]. + /// the specified [`re_chunk::Timeline`]. /// /// Static components are always included in the results. /// @@ -266,97 +259,6 @@ impl ChunkStore { } } - /// Retrieve all the [`ComponentIdentifier`]s that have been written to for a given [`EntityPath`]. - /// - /// Static components are always included in the results. - /// - /// Returns `None` if the entity has never had any data logged to it. - pub fn all_components_for_entity( - &self, - entity_path: &EntityPath, - ) -> Option { - re_tracing::profile_function!(); - - let static_components: Option = self - .static_chunk_ids_per_entity - .get(entity_path) - .map(|static_chunks_per_component| { - static_chunks_per_component.keys().copied().collect() - }); - - let temporal_components: Option = self - .temporal_chunk_ids_per_entity_per_component - .get(entity_path) - .map(|temporal_chunk_ids_per_timeline| { - temporal_chunk_ids_per_timeline - .iter() - .flat_map(|(_, temporal_chunk_ids_per_component)| { - temporal_chunk_ids_per_component.keys().copied() - }) - .collect() - }); - - match (static_components, temporal_components) { - (None, None) => None, - (None, comps @ Some(_)) | (comps @ Some(_), None) => comps, - (Some(static_comps), Some(temporal_comps)) => { - Some(static_comps.into_iter().chain(temporal_comps).collect()) - } - } - } - - /// Retrieve all the [`ComponentIdentifier`]s that have been written to for a given [`EntityPath`]. - /// - /// Static components are always included in the results. - /// - /// Returns `None` if the entity has never had any data logged to it. - pub fn all_components_for_entity_sorted( - &self, - entity_path: &EntityPath, - ) -> Option { - re_tracing::profile_function!(); - - let static_components: Option = self - .static_chunk_ids_per_entity - .get(entity_path) - .map(|static_chunks_per_component| { - static_chunks_per_component.keys().copied().collect() - }); - - let temporal_components: Option = self - .temporal_chunk_ids_per_entity_per_component - .get(entity_path) - .map(|temporal_chunk_ids_per_timeline| { - temporal_chunk_ids_per_timeline - .iter() - .flat_map(|(_, temporal_chunk_ids_per_component)| { - temporal_chunk_ids_per_component.keys().copied() - }) - .collect() - }); - - match (static_components, temporal_components) { - (None, None) => None, - (None, comps @ Some(_)) | (comps @ Some(_), None) => comps, - (Some(static_comps), Some(temporal_comps)) => { - Some(static_comps.into_iter().chain(temporal_comps).collect()) - } - } - } - - /// Retrieves the [`ComponentDescriptor`] at a given [`EntityPath`] that has a certain [`ComponentIdentifier`]. - // TODO(andreas): The descriptor for a given identifier should never change within a recording. - pub fn entity_component_descriptor( - &self, - entity_path: &EntityPath, - component: ComponentIdentifier, - ) -> Option { - self.per_column_metadata - .get(entity_path) - .and_then(|per_identifier| per_identifier.get(&component)) - .map(|(component_descr, _, _)| component_descr.clone()) - } - /// Check whether an entity has a static component or a temporal component on the specified timeline. /// /// This does _not_ check if the entity actually currently holds any data for that component. @@ -464,7 +366,20 @@ impl ChunkStore { || self.entity_has_physical_temporal_data_on_timeline(entity_path, timeline) } - /// Check whether an entity has any physical static data or any temporal data on any timeline. + /// Check whether an entity has any indexed data, physical or virtual. + /// + /// Returns true if the entity has any static or temporal chunk IDs, + /// regardless of whether those chunks are currently loaded in memory. + /// + /// An entity path can exist in the schema/entity tree but return `false` here + /// if all of its chunks have been removed by garbage collection or otherwise removed. + #[inline] + pub fn entity_has_data(&self, entity_path: &EntityPath) -> bool { + self.static_chunk_ids_per_entity.contains_key(entity_path) + || self.temporal_chunk_ids_per_entity.contains_key(entity_path) + } + + /// Check whether an entity has any physical data. /// /// This is different from checking if the entity has any component, it also ensures /// that some _data_ currently exists in the store for this entity. @@ -620,6 +535,120 @@ impl ChunkStore { Some(AbsoluteTimeRange::new(*start, *end)) } + fn search_chunk_by_time( + &self, + timeline: &TimelineName, + chunk_id: &ChunkId, + search: impl Fn(&TimeColumn) -> Option, + ) -> Option { + let chunk = self.physical_chunks_per_chunk_id.get(chunk_id)?; + let time_col = chunk.timelines().get(timeline)?; + search(time_col) + } + + /// Returns the next non-static time with data on the given timeline, strictly after `after`. + /// + /// Searches physical chunks across all entities. Returns `None` if there is no later temporal data. + /// + /// This scales linearly with the number of chunks on the timeline. + pub fn next_time_on_timeline( + &self, + timeline: &TimelineName, + after: TimeInt, + ) -> Option { + re_tracing::profile_function!(); + + let mut result: Option = None; + + for per_timeline in self.temporal_chunk_ids_per_entity.values() { + let Some(per_time) = per_timeline.get(timeline) else { + continue; + }; + + // Check chunks whose start time is after our cursor. + for (&start_time, chunk_ids) in per_time + .per_start_time + .range((std::ops::Bound::Excluded(after), std::ops::Bound::Unbounded)) + { + if result.is_some_and(|r| r <= start_time) { + break; + } + for chunk_id in chunk_ids { + result = opt_min( + result, + self.search_chunk_by_time(timeline, chunk_id, |tc| { + tc.find_next_time(after) + }), + ); + } + } + + // Also check chunks that start at or before `after` but may contain times after it. + for (_start_time, chunk_ids) in per_time.per_start_time.range(..=after).rev() { + for chunk_id in chunk_ids { + result = opt_min( + result, + self.search_chunk_by_time(timeline, chunk_id, |tc| { + tc.find_next_time(after) + }), + ); + } + } + } + + result + } + + /// Returns the previous non-static time with data on the given timeline, strictly before `before`. + /// + /// Searches physical chunks across all entities. Returns `None` if there is no earlier temporal data. + /// + /// This scales linearly with the number of chunks on the timeline. + pub fn prev_time_on_timeline( + &self, + timeline: &TimelineName, + before: TimeInt, + ) -> Option { + re_tracing::profile_function!(); + + let mut result: Option = None; + + for per_timeline in self.temporal_chunk_ids_per_entity.values() { + let Some(per_time) = per_timeline.get(timeline) else { + continue; + }; + + // Check chunks whose end time is before our cursor. + for (&end_time, chunk_ids) in per_time.per_end_time.range(..before).rev() { + if result.is_some_and(|r| r >= end_time) { + break; + } + for chunk_id in chunk_ids { + result = opt_max( + result, + self.search_chunk_by_time(timeline, chunk_id, |tc| { + tc.find_prev_time(before) + }), + ); + } + } + + // Also check chunks that end after `before` but may contain times before it. + for (_end_time, chunk_ids) in per_time.per_end_time.range(before..) { + for chunk_id in chunk_ids { + result = opt_max( + result, + self.search_chunk_by_time(timeline, chunk_id, |tc| { + tc.find_prev_time(before) + }), + ); + } + } + } + + result + } + /// Returns the min and max times at which data was logged on a specific timeline, considering /// all entities. /// @@ -640,6 +669,20 @@ impl ChunkStore { } } +fn opt_min(a: Option, b: Option) -> Option { + match (a, b) { + (Some(a), Some(b)) => Some(a.min(b)), + (a, b) => a.or(b), + } +} + +fn opt_max(a: Option, b: Option) -> Option { + match (a, b) { + (Some(a), Some(b)) => Some(a.max(b)), + (a, b) => a.or(b), + } +} + // --- /// The results of a latest-at and/or range relevancy query. @@ -1567,6 +1610,258 @@ mod tests { } } + #[test] + fn next_and_prev_time_on_timeline_single_row_chunks() { + let mut store = ChunkStore::new( + re_log_types::StoreId::random(re_log_types::StoreKind::Recording, "test_app"), + crate::ChunkStoreConfig::ALL_DISABLED, + ); + + let entity_path: EntityPath = "entity".into(); + let timeline = Timeline::new_sequence("frame"); + let tl = timeline.name(); + let point = MyPoint::new(1.0, 1.0); + let mut next_chunk_id = next_chunk_id_generator(0xAA); + + // Insert single-row chunks at times 10, 20, 30. + for t in [10, 20, 30] { + let chunk = create_chunk_with_point( + next_chunk_id(), + entity_path.clone(), + TimePoint::from_iter([(timeline, t)]), + point, + ); + store.insert_chunk(&chunk).unwrap(); + } + + // Empty store on a different timeline. + let other = TimelineName::from("other"); + assert_eq!( + store.next_time_on_timeline(&other, TimeInt::new_temporal(0)), + None + ); + assert_eq!( + store.prev_time_on_timeline(&other, TimeInt::new_temporal(99)), + None + ); + + // next: before all data + assert_eq!( + store.next_time_on_timeline(tl, TimeInt::new_temporal(0)), + Some(TimeInt::new_temporal(10)) + ); + + // next: exactly on a data point returns the following one + assert_eq!( + store.next_time_on_timeline(tl, TimeInt::new_temporal(10)), + Some(TimeInt::new_temporal(20)) + ); + assert_eq!( + store.next_time_on_timeline(tl, TimeInt::new_temporal(20)), + Some(TimeInt::new_temporal(30)) + ); + + // next: between data points + assert_eq!( + store.next_time_on_timeline(tl, TimeInt::new_temporal(15)), + Some(TimeInt::new_temporal(20)) + ); + + // next: at or after last data point + assert_eq!( + store.next_time_on_timeline(tl, TimeInt::new_temporal(30)), + None + ); + assert_eq!( + store.next_time_on_timeline(tl, TimeInt::new_temporal(99)), + None + ); + + // prev: after all data + assert_eq!( + store.prev_time_on_timeline(tl, TimeInt::new_temporal(99)), + Some(TimeInt::new_temporal(30)) + ); + + // prev: exactly on a data point returns the preceding one + assert_eq!( + store.prev_time_on_timeline(tl, TimeInt::new_temporal(30)), + Some(TimeInt::new_temporal(20)) + ); + assert_eq!( + store.prev_time_on_timeline(tl, TimeInt::new_temporal(20)), + Some(TimeInt::new_temporal(10)) + ); + + // prev: between data points + assert_eq!( + store.prev_time_on_timeline(tl, TimeInt::new_temporal(25)), + Some(TimeInt::new_temporal(20)) + ); + + // prev: at or before first data point + assert_eq!( + store.prev_time_on_timeline(tl, TimeInt::new_temporal(10)), + None + ); + assert_eq!( + store.prev_time_on_timeline(tl, TimeInt::new_temporal(0)), + None + ); + } + + #[test] + fn next_and_prev_time_on_timeline_multi_row_chunk() { + let mut store = ChunkStore::new( + re_log_types::StoreId::random(re_log_types::StoreKind::Recording, "test_app"), + crate::ChunkStoreConfig::ALL_DISABLED, + ); + + let entity_path: EntityPath = "entity".into(); + let timeline = Timeline::new_sequence("frame"); + let tl = timeline.name(); + let point = MyPoint::new(1.0, 1.0); + let mut next_chunk_id = next_chunk_id_generator(0xBB); + + // One chunk with three rows at times 10, 20, 30. + let chunk = Arc::new( + Chunk::builder_with_id(next_chunk_id(), entity_path.clone()) + .with_component_batch( + RowId::new(), + TimePoint::from_iter([(timeline, 10)]), + ( + MyPoints::descriptor_points(), + &[point] as &dyn re_types_core::ComponentBatch, + ), + ) + .with_component_batch( + RowId::new(), + TimePoint::from_iter([(timeline, 20)]), + ( + MyPoints::descriptor_points(), + &[point] as &dyn re_types_core::ComponentBatch, + ), + ) + .with_component_batch( + RowId::new(), + TimePoint::from_iter([(timeline, 30)]), + ( + MyPoints::descriptor_points(), + &[point] as &dyn re_types_core::ComponentBatch, + ), + ) + .build() + .unwrap(), + ); + store.insert_chunk(&chunk).unwrap(); + + assert_eq!( + store.next_time_on_timeline(tl, TimeInt::new_temporal(0)), + Some(TimeInt::new_temporal(10)) + ); + assert_eq!( + store.next_time_on_timeline(tl, TimeInt::new_temporal(10)), + Some(TimeInt::new_temporal(20)) + ); + assert_eq!( + store.next_time_on_timeline(tl, TimeInt::new_temporal(15)), + Some(TimeInt::new_temporal(20)) + ); + assert_eq!( + store.next_time_on_timeline(tl, TimeInt::new_temporal(30)), + None + ); + + assert_eq!( + store.prev_time_on_timeline(tl, TimeInt::new_temporal(99)), + Some(TimeInt::new_temporal(30)) + ); + assert_eq!( + store.prev_time_on_timeline(tl, TimeInt::new_temporal(30)), + Some(TimeInt::new_temporal(20)) + ); + assert_eq!( + store.prev_time_on_timeline(tl, TimeInt::new_temporal(25)), + Some(TimeInt::new_temporal(20)) + ); + assert_eq!( + store.prev_time_on_timeline(tl, TimeInt::new_temporal(10)), + None + ); + } + + #[test] + fn next_and_prev_time_on_timeline_multiple_entities() { + let mut store = ChunkStore::new( + re_log_types::StoreId::random(re_log_types::StoreKind::Recording, "test_app"), + crate::ChunkStoreConfig::ALL_DISABLED, + ); + + let timeline = Timeline::new_sequence("frame"); + let tl = timeline.name(); + let point = MyPoint::new(1.0, 1.0); + let mut next_chunk_id = next_chunk_id_generator(0xCC); + + // Entity A has data at 10, 30. + // Entity B has data at 20, 40. + for (entity, times) in [("a", vec![10, 30]), ("b", vec![20, 40])] { + let entity_path: EntityPath = entity.into(); + for t in times { + let chunk = create_chunk_with_point( + next_chunk_id(), + entity_path.clone(), + TimePoint::from_iter([(timeline, t)]), + point, + ); + store.insert_chunk(&chunk).unwrap(); + } + } + + // next should find the global minimum across entities. + assert_eq!( + store.next_time_on_timeline(tl, TimeInt::new_temporal(0)), + Some(TimeInt::new_temporal(10)) + ); + assert_eq!( + store.next_time_on_timeline(tl, TimeInt::new_temporal(10)), + Some(TimeInt::new_temporal(20)) + ); + assert_eq!( + store.next_time_on_timeline(tl, TimeInt::new_temporal(20)), + Some(TimeInt::new_temporal(30)) + ); + assert_eq!( + store.next_time_on_timeline(tl, TimeInt::new_temporal(30)), + Some(TimeInt::new_temporal(40)) + ); + assert_eq!( + store.next_time_on_timeline(tl, TimeInt::new_temporal(40)), + None + ); + + // prev should find the global maximum across entities. + assert_eq!( + store.prev_time_on_timeline(tl, TimeInt::new_temporal(50)), + Some(TimeInt::new_temporal(40)) + ); + assert_eq!( + store.prev_time_on_timeline(tl, TimeInt::new_temporal(40)), + Some(TimeInt::new_temporal(30)) + ); + assert_eq!( + store.prev_time_on_timeline(tl, TimeInt::new_temporal(30)), + Some(TimeInt::new_temporal(20)) + ); + assert_eq!( + store.prev_time_on_timeline(tl, TimeInt::new_temporal(20)), + Some(TimeInt::new_temporal(10)) + ); + assert_eq!( + store.prev_time_on_timeline(tl, TimeInt::new_temporal(10)), + None + ); + } + fn next_chunk_id_generator(prefix: u64) -> impl FnMut() -> re_chunk::ChunkId { let mut chunk_id = re_chunk::ChunkId::from_tuid(Tuid::from_nanos_and_inc(prefix, 0)); move || { diff --git a/crates/store/re_chunk_store/src/rebatch_videos.rs b/crates/store/re_chunk_store/src/rebatch_videos.rs new file mode 100644 index 000000000000..cfc4a212cb7e --- /dev/null +++ b/crates/store/re_chunk_store/src/rebatch_videos.rs @@ -0,0 +1,488 @@ +use std::collections::BTreeMap; +use std::sync::Arc; + +use ahash::{HashMap, HashSet}; +use itertools::izip; +use re_byte_size::SizeBytes as _; +use re_chunk::{Chunk, ChunkId, ChunkShared, EntityPath, Timeline, TimelineName}; +use re_format::format_bytes; +use re_log_types::TimeInt; +use re_sdk_types::archetypes::VideoStream; +use re_sdk_types::components::{VideoCodec, VideoSample}; + +use crate::{ChunkStore, ChunkStoreConfig, ChunkTrackingMode}; + +/// Info about a single video sample (frame) in the index. +#[derive(Clone, Copy)] +struct SampleInfo { + chunk_id: ChunkId, + + /// Row index within the source chunk. + row_index: usize, + + /// Timestamp on an automatically chosen timeline + time: TimeInt, + + /// Is sync/keyframe? + is_start_of_gop: bool, +} + +/// Rebatch video stream chunks along GoP boundaries. +/// +/// Each output chunk contains one or more complete GoPs. Multiple GoPs are packed +/// into the same chunk as long as the total size stays within `chunk_max_bytes`. +/// If `chunk_max_bytes` is 0, each GoP gets its own chunk. +/// +/// Non-video chunks are passed through unchanged. +/// +/// This allows for much faster random-reads of video frames, +/// as you always need to load at most one chunk. +/// +/// `is_start_of_gop` can use `re_video::is_start_of_gop`. +/// We do dependency injection here in order to avoid a direct dependency on `re_video`. +pub fn rebatch_video_chunks_to_gops( + store: &ChunkStore, + config: &ChunkStoreConfig, + is_start_of_gop: &dyn Fn(&[u8], VideoCodec) -> anyhow::Result, +) -> anyhow::Result { + re_tracing::profile_function!(); + + let sample_component = VideoStream::descriptor_sample().component; + + // Collect all temporal chunks that contain video samples, grouped by entity. + let mut sample_chunks_per_entity: HashMap> = + Default::default(); + for chunk in store.iter_physical_chunks() { + if !chunk.is_static() && chunk.components().contains_component(sample_component) { + sample_chunks_per_entity + .entry(chunk.entity_path().clone()) + .or_default() + .insert(chunk.id(), chunk.clone()); + } + } + + if sample_chunks_per_entity.is_empty() { + return Ok(store.clone()); // no video streams in the store + } + + let mut replaced_chunk_ids: HashSet = HashSet::default(); + let mut new_chunks: Vec = Vec::new(); + + re_log::info!( + num_video_entities = sample_chunks_per_entity.len(), + "found video entities for GoP realignment" + ); + + for (entity_path, sample_chunks) in &sample_chunks_per_entity { + match rebatch_video_entity(store, config, is_start_of_gop, entity_path, sample_chunks) { + Ok(new_entity_chunks) => { + replaced_chunk_ids.extend(sample_chunks.keys().copied()); + new_chunks.extend(new_entity_chunks); + } + Err(err) => { + re_log::warn!(entity = %entity_path, %err, "failed to rebatch video entity, skipping"); + } + } + } + + if replaced_chunk_ids.is_empty() { + return Ok(store.clone()); + } + + let new_config = ChunkStoreConfig::ALL_DISABLED; // So that we don't undo what we just did! + let mut new_store = ChunkStore::new(store.id(), new_config); + + for chunk in store.iter_physical_chunks() { + if !replaced_chunk_ids.contains(&chunk.id()) { + new_store.insert_chunk(chunk)?; + } + } + + let mut overall_max_chunk_bytes: u64 = 0; + for chunk in new_chunks { + overall_max_chunk_bytes = overall_max_chunk_bytes.max(chunk.heap_size_bytes()); + new_store.insert_chunk(&Arc::new(chunk))?; + } + + /// Warn once per compaction if any rebatched chunk exceeds this size. + /// + /// GoP rebatching never splits a GoP across chunks, so streams with long + /// keyframe intervals can produce chunks much larger than `chunk_max_bytes`. + const LARGE_CHUNK_WARN_THRESHOLD: u64 = 10 * 1024 * 1024; + + if LARGE_CHUNK_WARN_THRESHOLD < overall_max_chunk_bytes { + re_log::warn_once!( + "GoP rebatching produced a video chunk of size {}. \ + Consider re-encoding the source with shorter keyframe intervals, \ + or turn off GoP-batching, or fix this code to allow splitting large GoPs-batches", + format_bytes(overall_max_chunk_bytes as _) + ); + } + + Ok(new_store) +} + +/// Rebatch a single video entity's chunks along GoP boundaries. +/// +/// Returns the new chunks that replace the old ones. +fn rebatch_video_entity( + store: &ChunkStore, + config: &ChunkStoreConfig, + is_start_of_gop: &dyn Fn(&[u8], VideoCodec) -> anyhow::Result, + entity_path: &EntityPath, + sample_chunks: &HashMap, +) -> anyhow::Result> { + re_tracing::profile_function!(); + + for chunk in sample_chunks.values() { + let unsorted_timelines: Vec<_> = chunk + .timelines() + .iter() + .filter(|(_, tc)| !tc.is_sorted()) + .map(|(name, _)| name) + .collect(); + if !unsorted_timelines.is_empty() { + // We could try pick one of the timelines _are_ sorted (w/ relation to RowId), + // but let's be better safe than sorry for now. + anyhow::bail!( + "chunk {} for entity '{entity_path}' has unsorted timelines: {:?} (compared to RowId). Video playback on these timelines may already be broken, and rebatching may make things worse", + chunk.id(), + unsorted_timelines + ); + } + } + + let timeline_name = + choose_timeline(sample_chunks).ok_or_else(|| anyhow::anyhow!("no timeline found"))?; + + let codec = extract_codec(store, entity_path, timeline_name) + .ok_or_else(|| anyhow::anyhow!("couldn't resolve video codec"))?; + + let sample_index = build_sample_index(is_start_of_gop, sample_chunks, timeline_name, codec)?; + + anyhow::ensure!(!sample_index.is_empty(), "no video samples found"); + + let gop_groups = split_into_gop_groups(entity_path, &sample_index); + + // Materialize each GoP into its own chunk: + let gop_chunks: Vec = gop_groups + .iter() + .map(|group| chunk_from_gop(group, sample_chunks)) + .collect::>()?; + + log_gop_stats(entity_path, &gop_chunks); + + // Merge consecutive GoP chunks as long as the total stays within chunk_max_bytes. + let merged = merge_chunks(config, gop_chunks)?; + + log_entity_chunk_stats(entity_path, timeline_name, codec, &merged); + + Ok(merged) +} + +/// Pick the best timeline for sorting video samples. +fn choose_timeline(sample_chunks: &HashMap) -> Option { + let mut counts: HashMap = Default::default(); + for chunk in sample_chunks.values() { + for tc in chunk.timelines().values() { + *counts.entry(*tc.timeline()).or_default() += chunk.num_rows() as u64; + } + } + + if counts.is_empty() { + return None; + } + + let timelines: Vec<_> = counts.keys().copied().collect(); + let best = Timeline::pick_best_timeline(&timelines, |t| counts.get(t).copied().unwrap_or(0)); + Some(*best.name()) +} + +fn extract_codec( + store: &ChunkStore, + entity_path: &EntityPath, + timeline_name: TimelineName, +) -> Option { + let codec_component = VideoStream::descriptor_codec().component; + + let results = store.latest_at_relevant_chunks( + ChunkTrackingMode::PanicOnMissing, + &crate::LatestAtQuery::new(timeline_name, TimeInt::MAX), + entity_path, + codec_component, + ); + + results + .chunks + .iter() + .flat_map(|chunk| chunk.iter_component::(codec_component)) + .find_map(|codec| codec.as_slice().first().copied()) +} + +/// Build an index of all video samples across all chunks for one entity. +/// +/// Returns a flat list of [`SampleInfo`], sorted by time. +fn build_sample_index( + is_start_of_gop: &dyn Fn(&[u8], VideoCodec) -> anyhow::Result, + sample_chunks: &HashMap, + timeline_name: TimelineName, + codec: VideoCodec, +) -> anyhow::Result> { + re_tracing::profile_function!(); + + let sample_component = VideoStream::descriptor_sample().component; + + let mut sample_index = Vec::new(); + + for chunk in sample_chunks.values() { + if !chunk.timelines().contains_key(&timeline_name) { + anyhow::bail!( + "chunk {} has no values on timeline {timeline_name}", + chunk.id() + ); + } + + let chunk_id = chunk.id(); + + // We need the positional row index to later extract rows via `taken()`. + let row_id_to_index: HashMap<_, _> = chunk + .row_ids() + .enumerate() + .map(|(idx, rid)| (rid, idx)) + .collect(); + + // `iter_component_indices` only yields rows where the component is non-null, + // which is exactly what we want — skip rows without a sample. + for ((time, row_id), sample) in izip!( + chunk.iter_component_indices(timeline_name, sample_component), + chunk.iter_component::(sample_component) + ) { + let Some(sample) = sample.as_slice().first() else { + continue; + }; + + let row_index = row_id_to_index[&row_id]; + + sample_index.push(SampleInfo { + chunk_id, + row_index, + time, + is_start_of_gop: is_start_of_gop(sample.0.inner().as_slice(), codec)?, + }); + } + } + + sample_index.sort_by_key(|sample| (sample.time, sample.chunk_id, sample.row_index)); + Ok(sample_index) +} + +/// Split a sorted sample index into groups, one per GoP. +/// +/// Each group starts at a keyframe sample, except possibly the first +/// group which collects any orphan frames before the first keyframe. +fn split_into_gop_groups<'a>( + entity: &EntityPath, + sample_index: &'a [SampleInfo], +) -> Vec<&'a [SampleInfo]> { + re_tracing::profile_function!(); + + if sample_index.is_empty() { + return Vec::new(); + } + + // Find indices where new GoPs start. + let mut split_points: Vec = sample_index + .iter() + .enumerate() + .filter(|(_, s)| s.is_start_of_gop) + .map(|(i, _)| i) + .collect(); + + // If the first sample isn't a keyframe, include the leading orphan group. + if split_points.first().copied() != Some(0) { + re_log::warn!(?entity, "first sample is not a keyframe"); + split_points.insert(0, 0); + } + + split_points + .windows(2) + .map(|w| &sample_index[w[0]..w[1]]) + .chain(std::iter::once( + &sample_index[*split_points.last().unwrap_or(&0)..], + )) + .filter(|group| !group.is_empty()) + .collect() +} + +/// Materialize a GoP group into a single [`Chunk`] by extracting rows from source chunks. +/// +/// Uses `Chunk::taken()` to batch-extract rows from the same source chunk, +/// then concatenates the per-source-chunk results. +fn chunk_from_gop( + group: &[SampleInfo], + chunks_by_id: &HashMap, +) -> anyhow::Result { + re_tracing::profile_function!(); + + // Group row indices by source chunk, preserving the order of first appearance. + let mut rows_per_chunk: BTreeMap> = BTreeMap::new(); + let mut chunk_order: Vec = Vec::new(); + for sample in group { + let row_index = i32::try_from(sample.row_index) + .map_err(|_err| anyhow::anyhow!("row index {} exceeds i32::MAX", sample.row_index))?; + + rows_per_chunk + .entry(sample.chunk_id) + .or_insert_with(|| { + chunk_order.push(sample.chunk_id); + vec![row_index] + }) + .push(row_index); + } + + let mut result: Option = None; + for chunk_id in &chunk_order { + let source_chunk = &chunks_by_id[chunk_id]; + let indices = &rows_per_chunk[chunk_id]; + + let indices_array = arrow::array::Int32Array::from(indices.clone()); + let extracted = source_chunk.taken(&indices_array); + + result = Some(match result { + None => extracted, + Some(prev) => Chunk::concat_and_sort(&prev, &extracted)?, + }); + } + + let mut chunk = result.ok_or_else(|| anyhow::anyhow!("GoP group is empty — this is a bug"))?; + chunk.sort_if_unsorted(); + Ok(chunk) +} + +/// Merge consecutive GoP chunks together as long as the combined size stays within +/// `chunk_max_bytes`. If `chunk_max_bytes` is 0, no merging is done. +/// +/// The input chunks must already be sorted by time. +fn merge_chunks(config: &ChunkStoreConfig, gop_chunks: Vec) -> anyhow::Result> { + re_tracing::profile_function!(); + + let chunk_max_bytes = config.chunk_max_bytes; + + re_log::debug!( + num_gops = gop_chunks.len(), + chunk_max_bytes = %format_bytes(chunk_max_bytes as _), + "merging GoPs into chunks" + ); + + if chunk_max_bytes == 0 || gop_chunks.len() <= 1 { + re_log::debug!("skipping merge (max_bytes=0 or ≤1 GoP)"); + return Ok(gop_chunks); + } + + let mut merged: Vec = Vec::new(); + let mut accumulator: Option = None; + let mut accumulator_bytes: u64 = 0; + + for gop in gop_chunks { + let gop_bytes = gop.heap_size_bytes(); + + if let Some(acc) = accumulator.take() { + if accumulator_bytes + gop_bytes <= chunk_max_bytes { + let combined = Chunk::concat_and_sort(&acc, &gop)?; + accumulator_bytes += gop_bytes; + accumulator = Some(combined); + continue; + } else { + merged.push(acc); + } + } + + accumulator_bytes = gop_bytes; + accumulator = Some(gop); + } + + if let Some(acc) = accumulator { + merged.push(acc); + } + + re_log::debug!( + num_gops_in = merged.iter().map(|c| c.num_rows()).sum::(), + num_chunks_out = merged.len(), + "merge complete" + ); + + Ok(merged) +} + +fn log_gop_stats(entity_path: &EntityPath, gop_chunks: &[Chunk]) { + if gop_chunks.is_empty() { + return; + } + + let num_gops = gop_chunks.len() as u64; + + let gop_frames: Vec = gop_chunks.iter().map(|c| c.num_rows() as u64).collect(); + let gop_bytes: Vec = gop_chunks.iter().map(|c| c.heap_size_bytes()).collect(); + + let min_frames = gop_frames.iter().copied().min().unwrap_or(0); + let max_frames = gop_frames.iter().copied().max().unwrap_or(0); + let avg_frames = gop_frames.iter().sum::() / num_gops; + + let min_bytes = gop_bytes.iter().copied().min().unwrap_or(0); + let max_bytes = gop_bytes.iter().copied().max().unwrap_or(0); + let avg_bytes = gop_bytes.iter().sum::() / num_gops; + + re_log::info!( + entity = %entity_path, + num_gops, + frames_per_gop = %format!("{min_frames}/{avg_frames}/{max_frames}"), + bytes_per_gop = %format!( + "{}/{}/{}", + format_bytes(min_bytes as _), + format_bytes(avg_bytes as _), + format_bytes(max_bytes as _), + ), + "GoP stats (min/avg/max)" + ); +} + +fn log_entity_chunk_stats( + entity_path: &EntityPath, + timeline_name: TimelineName, + codec: VideoCodec, + chunks: &[Chunk], +) { + let num_chunks = chunks.len() as u64; + if num_chunks == 0 { + return; + } + + let chunk_frames: Vec = chunks.iter().map(|c| c.num_rows() as u64).collect(); + let chunk_bytes: Vec = chunks.iter().map(|c| c.heap_size_bytes()).collect(); + + let total_frames: u64 = chunk_frames.iter().sum(); + let min_frames = chunk_frames.iter().copied().min().unwrap_or(0); + let max_frames = chunk_frames.iter().copied().max().unwrap_or(0); + let avg_frames = total_frames / num_chunks; + + let total_bytes: u64 = chunk_bytes.iter().sum(); + let min_bytes = chunk_bytes.iter().copied().min().unwrap_or(0); + let max_bytes = chunk_bytes.iter().copied().max().unwrap_or(0); + let avg_bytes = total_bytes / num_chunks; + + re_log::info!( + entity = %entity_path, + timeline = %timeline_name, + codec = ?codec, + num_chunks, + total_frames, + frames_per_chunk = %format!("{min_frames}/{avg_frames}/{max_frames}"), + bytes_per_chunk = %format!( + "{}/{}/{}", + format_bytes(min_bytes as _), + format_bytes(avg_bytes as _), + format_bytes(max_bytes as _), + ), + "rebatched video entity (min/avg/max)" + ); +} diff --git a/crates/store/re_chunk_store/src/snapshots/re_chunk_store__lineage__tests__lineage_bootstrapped.snap b/crates/store/re_chunk_store/src/snapshots/re_chunk_store__lineage__tests__lineage_bootstrapped.snap index 445d2a1495f9..15de2e36ac98 100644 --- a/crates/store/re_chunk_store/src/snapshots/re_chunk_store__lineage__tests__lineage_bootstrapped.snap +++ b/crates/store/re_chunk_store/src/snapshots/re_chunk_store__lineage__tests__lineage_bootstrapped.snap @@ -3,22 +3,22 @@ source: crates/store/re_chunk_store/src/lineage.rs expression: generate_redacted_lineage_report(&store) --- chunk_00000000000005390000000000000004 (status:loaded static:no) - origin:RrdManifest { .. } + origin:(static: false) chunk_00000000000005390000000000000005 (status:loaded static:no) - origin:RrdManifest { .. } + origin:(static: false) chunk_00000000000005390000000000000006 (status:loaded static:no) - origin:RrdManifest { .. } + origin:(static: false) chunk_00000000000005390000000000000009 (status:loaded static:no) compacted-from: chunk_00000000000005390000000000000003 (status:offloaded static:no) - origin:RrdManifest { .. } - compacted-from: chunk_00000000000005390000000000000008 (status:offloaded static:unknown) + origin:(static: false) + compacted-from: chunk_00000000000005390000000000000008 (status:offloaded static:no) compacted-from: chunk_00000000000005390000000000000001 (status:offloaded static:no) - origin:RrdManifest { .. } + origin:(static: false) compacted-from: chunk_00000000000005390000000000000002 (status:offloaded static:no) - origin:RrdManifest { .. } + origin:(static: false) chunk_0000000000000539000000000000000a (status:loaded static:no siblings:[chunk_0000000000000539000000000000000b]) split-from: chunk_00000000000005390000000000000007 (status:offloaded static:no) - origin:RrdManifest { .. } + origin:(static: false) chunk_0000000000000539000000000000000b (status:loaded static:no siblings:[chunk_0000000000000539000000000000000a]) split-from: chunk_00000000000005390000000000000007 (status:offloaded static:no) - origin:RrdManifest { .. } + origin:(static: false) diff --git a/crates/store/re_chunk_store/src/split_thick_thin.rs b/crates/store/re_chunk_store/src/split_thick_thin.rs new file mode 100644 index 000000000000..6d48ffbe6c96 --- /dev/null +++ b/crates/store/re_chunk_store/src/split_thick_thin.rs @@ -0,0 +1,266 @@ +//! Split chunks that mix "thick" columns (e.g. images, videos, blobs) and "thin" columns +//! (e.g. scalars, text, transforms) into separate chunks. +//! +//! The heuristic groups components by archetype, sorts those groups by byte size, and +//! splits wherever two neighbors differ by more than the given ratio. An archetype is +//! always kept together, components without an archetype are treated as a group of one. + +use ahash::{HashMap, HashMapExt as _}; +use itertools::Itertools as _; + +use re_byte_size::SizeBytes as _; +use re_chunk::Chunk; +use re_types_core::{ArchetypeName, ComponentIdentifier}; + +/// How we group components before deciding where to split. +/// +/// Components that belong to the same archetype always stay together. Components without +/// an archetype can be placed independently. +#[derive(Clone, PartialEq, Eq, Hash)] +enum ComponentGroup { + Archetype(ArchetypeName), + Component(ComponentIdentifier), +} + +/// Split a chunk so that no two groups sharing an output chunk differ in size by more than `ratio`. +/// +/// Groups are sorted by byte size and split at every neighbor pair whose size ratio +/// meets or exceeds the threshold. A chunk with `k` such gaps becomes `k + 1` chunks. +/// +/// Returns `None` if no split is needed. +pub(crate) fn split_chunk(chunk: &Chunk, ratio: f64) -> Option> { + struct Group { + bytes: u64, + components: Vec, + } + + if chunk.components().len() < 2 { + return None; + } + + let mut groups: HashMap = HashMap::new(); + for column in chunk.components().values() { + let key = match column.descriptor.archetype { + Some(name) => ComponentGroup::Archetype(name), + None => ComponentGroup::Component(column.descriptor.component), + }; + let group = groups.entry(key).or_insert_with(|| Group { + bytes: 0, + components: Vec::new(), + }); + group.bytes += column.heap_size_bytes(); + group.components.push(column.descriptor.component); + } + + if groups.len() < 2 { + return None; + } + + let sorted: Vec = groups + .into_values() + .sorted_by_key(|g| std::cmp::Reverse(g.bytes)) + .collect(); + + let mut split_points = Vec::new(); + for (i, window) in sorted.windows(2).enumerate() { + let heavier = window[0].bytes as f64; + let lighter = window[1].bytes.max(1) as f64; + if heavier / lighter >= ratio { + split_points.push(i + 1); + } + } + + if split_points.is_empty() { + return None; + } + + let mut splits = Vec::new(); + let mut start = 0; + for end in split_points + .iter() + .copied() + .chain(std::iter::once(sorted.len())) + { + let mut components = Vec::new(); + for group in &sorted[start..end] { + components.extend_from_slice(&group.components); + } + + // This will result in duplicate row ids since row ids are + // preserved with `components_sliced`. Which is fine since + // 1. We're not mutating the data the row contains. + // 2. We're not splitting things in the same archetype. + splits.push(chunk.components_sliced(&components)); + start = end; + } + + re_log::debug!( + entity = %chunk.entity_path(), + num_groups = sorted.len(), + num_splits = splits.len(), + "splitting chunk on thick/thin boundaries" + ); + + Some(splits) +} + +#[cfg(test)] +mod tests { + use super::*; + + use re_chunk::RowId; + use re_log_types::{EntityPath, Timeline, example_components::MyPoint}; + use re_sdk_types::components::Blob; + use re_types_core::{ArchetypeName, ComponentDescriptor}; + + #[test] + fn splits_thick_from_thin() { + re_log::setup_logging(); + + let entity_path = EntityPath::from("mixed"); + let timepoint = [(Timeline::new_sequence("frame"), 1)]; + + let points = &[MyPoint::new(1.0, 1.0)]; + let blob_bytes = 1024 * 128; + let blob = Blob::from(vec![0u8; blob_bytes]); + + let points_descriptor = ComponentDescriptor { + archetype: Some(ArchetypeName::from("my.archetype.Points")), + component: "Points:points".into(), + component_type: None, + }; + let blob_descriptor = ComponentDescriptor { + archetype: Some(ArchetypeName::from("my.archetype.Video")), + component: "Video:blob".into(), + component_type: None, + }; + + let chunk = Chunk::builder(entity_path.clone()) + .with_component_batches( + RowId::new(), + timepoint, + [ + ( + points_descriptor, + points as &dyn re_types_core::ComponentBatch, + ), + ( + blob_descriptor, + &[blob] as &dyn re_types_core::ComponentBatch, + ), + ], + ) + .build() + .unwrap(); + + let splits = split_chunk(&chunk, 10.0).expect("should split"); + assert_eq!(splits.len(), 2); + let sizes: Vec = splits.iter().map(Chunk::heap_size_bytes).collect(); + assert!( + sizes[0] > sizes[1] * 10, + "expected thick split to dwarf thin split, got {sizes:?}" + ); + } + + #[test] + fn splits_three_tiers() { + re_log::setup_logging(); + + let entity_path = EntityPath::from("three_tiers"); + let timepoint = [(Timeline::new_sequence("frame"), 1)]; + + let points = &[MyPoint::new(1.0, 1.0)]; + let medium = Blob::from(vec![0u8; 4 * 1024]); + let heavy = Blob::from(vec![0u8; 512 * 1024]); + + let small_descriptor = ComponentDescriptor { + archetype: Some(ArchetypeName::from("my.Points")), + component: "Points:pos".into(), + component_type: None, + }; + let medium_descriptor = ComponentDescriptor { + archetype: Some(ArchetypeName::from("my.Image")), + component: "Image:blob".into(), + component_type: None, + }; + let heavy_descriptor = ComponentDescriptor { + archetype: Some(ArchetypeName::from("my.Video")), + component: "Video:blob".into(), + component_type: None, + }; + + let chunk = Chunk::builder(entity_path) + .with_component_batches( + RowId::new(), + timepoint, + [ + ( + small_descriptor, + points as &dyn re_types_core::ComponentBatch, + ), + ( + medium_descriptor, + &[medium] as &dyn re_types_core::ComponentBatch, + ), + ( + heavy_descriptor, + &[heavy] as &dyn re_types_core::ComponentBatch, + ), + ], + ) + .build() + .unwrap(); + + let splits = split_chunk(&chunk, 10.0).expect("should split"); + assert_eq!( + splits.len(), + 3, + "three clearly-separated tiers produce three chunks" + ); + + let sizes: Vec = splits.iter().map(Chunk::heap_size_bytes).collect(); + assert!( + sizes[0] > sizes[1] && sizes[1] > sizes[2], + "splits come out sorted heaviest-first, got {sizes:?}" + ); + } + + #[test] + fn leaves_uniform_chunk_alone() { + re_log::setup_logging(); + + let entity_path = EntityPath::from("balanced"); + let timepoint = [(Timeline::new_sequence("frame"), 1)]; + + let p1 = &[MyPoint::new(1.0, 1.0)]; + let p2 = &[MyPoint::new(2.0, 2.0)]; + + let chunk = Chunk::builder(entity_path) + .with_component_batches( + RowId::new(), + timepoint, + [ + ( + ComponentDescriptor { + archetype: Some(ArchetypeName::from("my.Points")), + component: "Points:a".into(), + component_type: None, + }, + p1 as &dyn re_types_core::ComponentBatch, + ), + ( + ComponentDescriptor { + archetype: Some(ArchetypeName::from("my.Points")), + component: "Points:b".into(), + component_type: None, + }, + p2 as &dyn re_types_core::ComponentBatch, + ), + ], + ) + .build() + .unwrap(); + + assert!(split_chunk(&chunk, 10.0).is_none()); + } +} diff --git a/crates/store/re_chunk_store/src/stats.rs b/crates/store/re_chunk_store/src/stats.rs index 9c1925e779f5..5d5fdf05e01b 100644 --- a/crates/store/re_chunk_store/src/stats.rs +++ b/crates/store/re_chunk_store/src/stats.rs @@ -347,7 +347,8 @@ impl ChunkStore { entity_path: &EntityPath, component: ComponentIdentifier, ) -> u64 { - self.timelines() + self.schema + .timelines() .keys() .map(|timeline| { self.num_physical_temporal_events_for_component_on_timeline( @@ -371,8 +372,7 @@ impl SizeBytes for ChunkStore { temporal_chunk_ids_per_entity_per_component, id, config, - time_type_registry, - per_column_metadata, + schema, physical_chunk_ids_per_min_row_id, chunks_lineage, dangling_splits, @@ -425,12 +425,8 @@ impl SizeBytes for ChunkStore { + id.heap_size_bytes() + config.heap_size_bytes() + { - profile_scope!("time_type_registry"); - time_type_registry.heap_size_bytes() - } - + { - profile_scope!("per_column_metadata"); - per_column_metadata.heap_size_bytes() + profile_scope!("schema"); + schema.heap_size_bytes() } + { profile_scope!("physical_chunk_ids_per_min_row_id"); @@ -484,12 +480,17 @@ impl MemUsageTreeCapture for ChunkStore { } } - let mut node = MemUsageNode::new(); - + let mut entities_node = MemUsageNode::new(); for (entity_path, size) in memory_per_entity { - node.add(entity_path.to_string(), MemUsageTree::Bytes(size)); + entities_node.add(entity_path.to_string(), MemUsageTree::Bytes(size)); } - node.with_total_size_bytes(self.total_size_bytes()) + MemUsageNode::new() + .with_child( + "schema", + MemUsageTree::Bytes(self.schema.total_size_bytes()), + ) + .with_child("entities", entities_node.into_tree()) + .with_total_size_bytes(self.total_size_bytes()) } } diff --git a/crates/store/re_chunk_store/src/store.rs b/crates/store/re_chunk_store/src/store.rs index 000183233085..f66e27836302 100644 --- a/crates/store/re_chunk_store/src/store.rs +++ b/crates/store/re_chunk_store/src/store.rs @@ -3,20 +3,32 @@ use std::sync::Arc; use std::sync::atomic::AtomicU64; use ahash::{HashMap, HashSet}; -use arrow::datatypes::DataType as ArrowDataType; use itertools::Itertools as _; use nohash_hasher::IntMap; use parking_lot::RwLock; use re_log::debug_assert; use re_chunk::{Chunk, ChunkId, ComponentIdentifier, RowId, TimelineName}; -use re_log_types::{EntityPath, StoreId, TimeInt, TimeType}; -use re_types_core::{ComponentDescriptor, ComponentType}; +use re_log_types::{EntityPath, StoreId, TimeInt}; use crate::{ChunkDirectLineage, ChunkStoreChunkStats, ChunkStoreError, ChunkStoreResult}; // --- +/// Configuration for the [`ChunkStore`]. +/// +/// The size thresholds (`chunk_max_bytes`, `chunk_max_rows`, `chunk_max_rows_if_unsorted`) +/// serve as target chunk sizes for both **compaction** (merging small chunks together) and +/// **splitting** (breaking large chunks apart on ingestion): +/// +/// * During **compaction**, two chunks are only merged if their combined size stays within +/// these thresholds. +/// * During **splitting**, incoming chunks that exceed these thresholds are recursively split +/// into smaller ones. +/// +/// In other words, these thresholds define the target chunk size window from both directions. +/// +// TODO(emilk): we should be able to turn on/off merging and splitting independently. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ChunkStoreConfig { /// If `true` (the default), the store will emit events when its contents are modified in @@ -28,6 +40,9 @@ pub struct ChunkStoreConfig { /// What is the threshold, in bytes, after which a [`Chunk`] cannot be compacted any further? /// + /// This threshold is used both as a ceiling for compaction (don't merge beyond this) + /// and as a trigger for splitting (split incoming chunks that exceed this). + /// /// This is a multi-dimensional trade-off: /// * Larger chunks lead to less fixed overhead introduced by metadata, indices and such. Good. /// * Larger chunks lead to slower query execution on some unhappy paths. Bad. @@ -46,6 +61,9 @@ pub struct ChunkStoreConfig { /// This specifically applies to time-sorted chunks. /// See also [`ChunkStoreConfig::chunk_max_rows_if_unsorted`]. /// + /// Like `chunk_max_bytes`, this is used both as a ceiling for compaction and as a + /// trigger for splitting. + /// /// This is a multi-dimensional trade-off: /// * Larger chunks lead to less fixed overhead introduced by metadata, indices and such. Good. /// * Larger chunks lead to slower query execution on some unhappy paths. Bad. @@ -64,6 +82,9 @@ pub struct ChunkStoreConfig { /// This specifically applies to _non_ time-sorted chunks. /// See also [`ChunkStoreConfig::chunk_max_rows`]. /// + /// Like `chunk_max_bytes`, this is used both as a ceiling for compaction and as a + /// trigger for splitting. + /// /// This is a multi-dimensional trade-off: /// * Larger chunks lead to less fixed overhead introduced by metadata, indices and such. Good. /// * Larger chunks lead to slower query execution on some unhappy paths. Bad. @@ -341,15 +362,21 @@ pub struct ColumnMetadataState { /// This is purely additive: once false, it will always be false. Even in case of garbage /// collection. pub is_semantically_empty: bool, + + /// Whether this column has ever been written as static data. + /// + /// Starts as `false` and flips to `true` once static data is observed. Never goes back. + pub is_static: bool, } impl re_byte_size::SizeBytes for ColumnMetadataState { fn heap_size_bytes(&self) -> u64 { let Self { is_semantically_empty, + is_static, } = self; - is_semantically_empty.heap_size_bytes() + is_semantically_empty.heap_size_bytes() + is_static.heap_size_bytes() } } @@ -495,18 +522,11 @@ pub struct ChunkStore { /// The configuration of the chunk store (e.g. compaction settings). pub(crate) config: ChunkStoreConfig, - /// Keeps track of the _latest_ datatype for each time column. + /// Incrementally maintained store schema. /// - /// This index is purely additive: it is never affected by garbage collection in any way. - /// - /// See also [`Self::time_column_type`]. - pub(crate) time_type_registry: IntMap, - - // TODO(grtlr): Can we slim this map down by getting rid of `ColumnIdentifier`-level here? - pub(crate) per_column_metadata: IntMap< - EntityPath, - IntMap, - >, + /// Contains all column descriptors and per-entity component sets. + /// Purely additive: never affected by garbage collection. + pub(crate) schema: crate::StoreSchema, /// All the *physical* chunks currently loaded in the store, mapped by their respective IDs. /// @@ -520,7 +540,7 @@ pub struct ChunkStore { /// just hints that some data is missing and must first be re-inserted by the caller. pub(crate) physical_chunks_per_chunk_id: BTreeMap>, - /// All *physical* [`ChunkId`]s currently in the store, indexed by the smallest [`RowId`] in + /// All *physical* [`ChunkId`]s currently in the store, ordered by the smallest [`RowId`] in /// each of them. /// /// This is effectively all chunks in global data order. Used for garbage collection. @@ -528,7 +548,9 @@ pub struct ChunkStore { /// During garbage collection, physical chunks are offloaded from memory and become virtual /// chunks instead. At the same time, their IDs are removed from this set, which is how we /// distinguish virtual from physical chunks. - pub(crate) physical_chunk_ids_per_min_row_id: BTreeMap, + /// + /// Keyed on `(min RowId, ChunkId)` so that multiple chunks can share a min `RowId`. + pub(crate) physical_chunk_ids_per_min_row_id: BTreeSet<(RowId, ChunkId)>, /// Keeps track of where each individual chunks, both virtual & physical, came from. /// @@ -609,7 +631,7 @@ pub struct ChunkStore { ChunkIdSetPerTimePerComponentPerTimelinePerEntity, /// All *physical & virtual* temporal [`ChunkId`]s for all entities on all timelines, without the - /// [`ComponentType`] index. + /// [`re_types_core::ComponentType`] index. /// /// This index is purely additive: it is never affected by garbage collection in any way. /// This implies that the chunk IDs present in this set might be either physical/loaded or @@ -677,8 +699,7 @@ impl Clone for ChunkStore { Self { id: self.id.clone(), config: self.config.clone(), - time_type_registry: self.time_type_registry.clone(), - per_column_metadata: self.per_column_metadata.clone(), + schema: self.schema.clone(), physical_chunks_per_chunk_id: self.physical_chunks_per_chunk_id.clone(), chunks_lineage: self.chunks_lineage.clone(), dangling_splits: self.dangling_splits.clone(), @@ -705,9 +726,8 @@ impl std::fmt::Display for ChunkStore { let Self { id, config, - time_type_registry: _, - per_column_metadata: _, - physical_chunks_per_chunk_id: chunks_per_chunk_id, + schema: _, + physical_chunks_per_chunk_id, physical_chunk_ids_per_min_row_id: chunk_ids_per_min_row_id, chunks_lineage, dangling_splits: _, @@ -737,8 +757,8 @@ impl std::fmt::Display for ChunkStore { f.write_str(&indent::indent_all_by(4, "}\n"))?; f.write_str(&indent::indent_all_by(4, "physical chunks: [\n"))?; - for chunk_id in chunk_ids_per_min_row_id.values() { - if let Some(chunk) = chunks_per_chunk_id.get(chunk_id) { + for (_, chunk_id) in chunk_ids_per_min_row_id { + if let Some(chunk) = physical_chunks_per_chunk_id.get(chunk_id) { f.write_str(&indent::indent_all_by( 8, format!("{}\n", self.format_lineage(chunk_id)), @@ -758,7 +778,7 @@ impl std::fmt::Display for ChunkStore { f.write_str(&indent::indent_all_by(4, "virtual chunks: [\n"))?; for chunk_id in chunks_lineage.keys().sorted() { - if chunks_per_chunk_id.contains_key(chunk_id) { + if physical_chunks_per_chunk_id.contains_key(chunk_id) { continue; } @@ -788,8 +808,7 @@ impl ChunkStore { Self { id, config, - time_type_registry: Default::default(), - per_column_metadata: Default::default(), + schema: Default::default(), physical_chunk_ids_per_min_row_id: Default::default(), chunks_lineage: Default::default(), dangling_splits: Default::default(), @@ -840,6 +859,67 @@ impl ChunkStore { &self.config } + /// The hierarchical tree of all entities registered in the store. + #[inline] + pub fn entity_tree(&self) -> &crate::EntityTree { + self.schema.entity_tree() + } + + /// Prunes leaf entities from the entity tree that have no indexed data. + /// + /// Called after store deletions to keep the tree in sync with actual data. + fn prune_entity_tree(&mut self) { + let static_ids = &self.static_chunk_ids_per_entity; + let temporal_ids = &self.temporal_chunk_ids_per_entity; + self.schema.prune_entity_tree(&|path| { + static_ids.contains_key(path) || temporal_ids.contains_key(path) + }); + } + + /// Converts diffs into store events, prunes the entity tree if there are deletions, + /// and notifies subscribers. + pub(crate) fn finalize_events( + &mut self, + diffs: impl IntoIterator, + ) -> Vec { + let mut events: Vec<_> = diffs + .into_iter() + .map(|diff| crate::ChunkStoreEvent { + store_id: self.id.clone(), + store_generation: self.generation(), + event_id: self + .event_id + .fetch_add(1, std::sync::atomic::Ordering::Relaxed), + diff, + }) + .collect(); + + let new_columns = self.schema.on_events(&events); + + if !new_columns.is_empty() { + events.push(crate::ChunkStoreEvent { + store_id: self.id.clone(), + store_generation: self.generation(), + event_id: self + .event_id + .fetch_add(1, std::sync::atomic::Ordering::Relaxed), + diff: crate::ChunkStoreDiff::SchemaAddition(crate::ChunkStoreDiffSchemaAddition { + new_columns, + }), + }); + } + + if events.iter().any(|e| e.is_deletion()) { + self.prune_entity_tree(); + } + + if self.config.enable_changelog { + Self::on_events(&events); + } + + events + } + /// Iterate over all *physical* chunks in the store, in ascending [`ChunkId`] order. #[inline] pub fn iter_physical_chunks(&self) -> impl Iterator> + '_ { @@ -854,13 +934,11 @@ impl ChunkStore { /// Get a *physical* chunk based on its ID and track the chunk as either /// used or missing, to signal that it should be kept or fetched. + /// + /// If the given chunk isn't physical `None` is returned and the ID is reported + /// missing. #[track_caller] - pub fn use_physical_chunk_or_report_missing(&self, id: &ChunkId) -> Option<&Arc> { - debug_assert!( - !self.split_on_ingest.contains(id), - "Asked for a physical chunk, but this chunk was split on ingestion and was never physical: {id}" - ); - + pub fn use_chunk_or_report_missing(&self, id: &ChunkId) -> Option<&Arc> { let chunk = self.physical_chunk(id); if chunk.is_some() { @@ -878,17 +956,20 @@ impl ChunkStore { self.physical_chunks_per_chunk_id.len() } + /// The incrementally maintained store schema. + /// + /// Contains all column descriptors, per-entity component sets, + /// timeline types, and per-column metadata. + #[inline] + pub fn schema(&self) -> &crate::StoreSchema { + &self.schema + } + /// All the currently loaded chunks pub fn physical_chunks(&self) -> impl Iterator> + '_ { self.physical_chunks_per_chunk_id.values() } - /// Lookup the _latest_ [`TimeType`] used by a specific [`TimelineName`]. - #[inline] - pub fn time_column_type(&self, timeline_name: &TimelineName) -> Option { - self.time_type_registry.get(timeline_name).copied() - } - /// Lookup the [`ColumnMetadata`] for a specific [`EntityPath`] and [`re_types_core::Component`]. pub fn lookup_column_metadata( &self, @@ -897,11 +978,10 @@ impl ChunkStore { ) -> Option { let ColumnMetadataState { is_semantically_empty, + is_static: _, } = self - .per_column_metadata - .get(entity_path) - .and_then(|per_identifier| per_identifier.get(&component)) - .map(|(_, metadata_state, _)| metadata_state)?; + .schema + .lookup_column_metadata_state(entity_path, component)?; let is_static = self .static_chunk_ids_per_entity @@ -920,40 +1000,6 @@ impl ChunkStore { }) } - /// Get the [`ComponentType`] and [`ArrowDataType`] for a specific [`EntityPath`] and [`ComponentIdentifier`]. - pub fn lookup_component_type( - &self, - entity_path: &EntityPath, - component: ComponentIdentifier, - ) -> Option<(Option, ArrowDataType)> { - let (component_descr, _, datatype) = self - .per_column_metadata - .get(entity_path) - .and_then(|per_identifier| per_identifier.get(&component))?; - Some((component_descr.component_type, datatype.clone())) - } - - /// Checks whether any column in the store with the given [`ComponentType`] has a datatype - /// that differs from `expected_datatype`. - /// - /// This iterates over all entities, so it should not be called in a hot path. - pub fn has_mismatched_datatype_for_component_type( - &self, - component_type: &ComponentType, - expected_datatype: &ArrowDataType, - ) -> Option<&ArrowDataType> { - for per_component in self.per_column_metadata.values() { - for (descr, _, datatype) in per_component.values() { - if descr.component_type.as_ref() == Some(component_type) - && datatype != expected_datatype - { - return Some(datatype); - } - } - } - None - } - /// Returns and iterator over [`ChunkId`]s that were detected as /// used or missing since the last time since method was called. /// @@ -994,16 +1040,6 @@ impl ChunkStore { self.chunks_lineage.contains_key(&chunk_id), "A chunk was reported missing, with no known lineage: {chunk_id}" ); - if self.split_on_ingest.contains(&chunk_id) { - if cfg!(debug_assertions) { - re_log::warn_once!( - "Tried to report a chunk missing that was the source of a split (manual)" - ); - } - re_log::debug_once!( - "Tried to report a chunk missing that was the source of a split: {chunk_id} (manual)" - ); - } self.queried_chunk_id_tracker .write() diff --git a/crates/store/re_chunk_store/src/store_schema.rs b/crates/store/re_chunk_store/src/store_schema.rs new file mode 100644 index 000000000000..7b4d5d40d06e --- /dev/null +++ b/crates/store/re_chunk_store/src/store_schema.rs @@ -0,0 +1,469 @@ +//! Incrementally maintained store schema. +//! +//! Tracks all column descriptors and per-entity component sets. +//! +//! Never affected by garbage collection. + +use std::collections::BTreeMap; + +use arrow::array::ListArray as ArrowListArray; +use arrow::datatypes::{DataType as ArrowDataType, Field as ArrowField}; +use nohash_hasher::IntMap; + +use re_byte_size::SizeBytes; +use re_chunk::ComponentIdentifier; +use re_log_types::{EntityPath, TimeType, Timeline, TimelineName}; +use re_sdk_types::ComponentDescriptor; +use re_sorbet::{ + ChunkColumnDescriptors, ComponentColumnDescriptor, IndexColumnDescriptor, RowIdColumnDescriptor, +}; +use re_types_core::{ArchetypeName, ComponentSet, ComponentType}; + +use crate::ColumnMetadataState; + +/// Per-column metadata for a single component on a single entity. +#[derive(Debug, Clone)] +pub struct ColumnMetadataEntry { + pub descriptor: ComponentDescriptor, + pub metadata_state: ColumnMetadataState, + pub datatype: ArrowDataType, +} + +impl re_byte_size::SizeBytes for ColumnMetadataEntry { + fn heap_size_bytes(&self) -> u64 { + let Self { + descriptor, + metadata_state, + datatype, + } = self; + descriptor.heap_size_bytes() + metadata_state.heap_size_bytes() + datatype.heap_size_bytes() + } +} + +use crate::{ChunkComponentMeta, ChunkMeta, ChunkStoreEvent}; + +// --- + +/// Key for looking up a [`ComponentColumnDescriptor`] in the schema. +/// +/// Matches the fields used by the `Ord` implementation of [`ComponentColumnDescriptor`]. +type SchemaComponentKey = ( + EntityPath, + Option, + ComponentIdentifier, + Option, +); + +fn schema_component_key(descr: &ComponentColumnDescriptor) -> SchemaComponentKey { + ( + descr.entity_path.clone(), + descr.archetype, + descr.component, + descr.component_type, + ) +} + +// --- + +/// Incrementally maintained store schema. +/// +/// Contains [`ChunkColumnDescriptors`], per-entity component sets, and the entity tree. +/// Updated via [`Self::on_events`] when chunks are inserted or RRD manifests are ingested. +/// The schema itself is purely additive, but the entity tree is pruned on deletions. +#[derive(Debug, Clone, Default)] +pub struct StoreSchema { + /// The _latest_ [`TimeType`] for each timeline name. + time_type_registry: IntMap, + + /// All component column descriptors ever seen, keyed for fast lookup/update. + components: BTreeMap, + + /// Per-entity set of all components ever seen (sorted). + components_per_entity: IntMap, + + // TODO(grtlr): Can we slim this map down by getting rid of `ComponentIdentifier`-level here? + // Ideally, we'd even merge this with the above fields. We are currently storing a lot of + // redundant information. + per_column_metadata: IntMap>, + + /// Hierarchical tree of all entities that have been registered in the store. + /// + /// Entities are pruned on deletions but not during GC. + entity_tree: crate::EntityTree, +} + +impl StoreSchema { + /// The hierarchical tree of all entities registered in the store. + #[inline] + pub fn entity_tree(&self) -> &crate::EntityTree { + &self.entity_tree + } + + /// Retrieve all timelines in the store. + #[inline] + pub fn timelines(&self) -> BTreeMap { + self.time_type_registry + .iter() + .map(|(name, typ)| (*name, Timeline::new(*name, *typ))) + .collect() + } + + /// Lookup the _latest_ [`TimeType`] used by a specific [`TimelineName`]. + #[inline] + pub fn time_column_type(&self, timeline_name: &TimelineName) -> Option { + self.time_type_registry.get(timeline_name).copied() + } + + /// Returns all [`ComponentIdentifier`]s that have ever been written to the given entity, sorted. + /// + /// Returns `None` if the entity has never had any data logged to it. + #[inline] + pub fn all_components_for_entity(&self, entity_path: &EntityPath) -> Option<&ComponentSet> { + self.components_per_entity.get(entity_path) + } + + /// Retrieves the [`ComponentDescriptor`] at a given [`EntityPath`] that has a certain [`ComponentIdentifier`]. + pub fn entity_component_descriptor( + &self, + entity_path: &EntityPath, + component: ComponentIdentifier, + ) -> Option { + self.per_column_metadata + .get(entity_path) + .and_then(|per_identifier| per_identifier.get(&component)) + .map(|entry| entry.descriptor.clone()) + } + + /// Get the [`re_types_core::ComponentType`] and [`ArrowDataType`] for a specific [`EntityPath`] and [`ComponentIdentifier`]. + pub fn lookup_component_type( + &self, + entity_path: &EntityPath, + component: ComponentIdentifier, + ) -> Option<(Option, ArrowDataType)> { + let entry = self + .per_column_metadata + .get(entity_path) + .and_then(|per_identifier| per_identifier.get(&component))?; + Some((entry.descriptor.component_type, entry.datatype.clone())) + } + + /// Lookup the `ColumnMetadataState` for a specific [`EntityPath`] and [`ComponentIdentifier`]. + pub fn lookup_column_metadata_state( + &self, + entity_path: &EntityPath, + component: ComponentIdentifier, + ) -> Option<&ColumnMetadataState> { + self.per_column_metadata + .get(entity_path) + .and_then(|per_identifier| per_identifier.get(&component)) + .map(|entry| &entry.metadata_state) + } + + /// Checks whether any column in the store with the given [`re_types_core::ComponentType`] has a datatype + /// that differs from `expected_datatype`. + /// + /// This iterates over all entities, so it should not be called in a hot path. + pub fn has_mismatched_datatype_for_component_type( + &self, + component_type: &ComponentType, + expected_datatype: &ArrowDataType, + ) -> Option<&ArrowDataType> { + re_tracing::profile_function!(); + for per_component in self.per_column_metadata.values() { + for entry in per_component.values() { + if entry.descriptor.component_type.as_ref() == Some(component_type) + && entry.datatype != *expected_datatype + { + return Some(&entry.datatype); + } + } + } + None + } + + /// Access the per-column metadata for a given entity. + pub fn per_column_metadata_for_entity( + &self, + entity_path: &EntityPath, + ) -> Option<&IntMap> { + self.per_column_metadata.get(entity_path) + } + + /// Returns the full schema of the store. + /// + /// This will include a column descriptor for every timeline and every component on every + /// entity that has been written to the store so far. + /// + /// The order of the columns is guaranteed to be in a specific order: + /// * first, the time columns in lexical order (`frame_nr`, `log_time`, ...); + /// * second, the component columns in lexical order (`Color`, `Radius, ...`). + pub fn chunk_column_descriptors(&self) -> ChunkColumnDescriptors { + let mut indices: Vec = self + .time_type_registry + .iter() + .map(|(name, typ)| IndexColumnDescriptor::from(Timeline::new(*name, *typ))) + .collect(); + indices.sort(); + + ChunkColumnDescriptors { + row_id: RowIdColumnDescriptor::from_sorted(false), + indices, + components: self.components.values().cloned().collect(), + } + } + + /// Update per-entity component set and per-column metadata for a single component. + /// + /// Returns `Some(ChunkComponentMeta)` when a schema event should be emitted, + /// i.e. when the column is genuinely new or `is_static` transitions from `false` to `true`. + fn update_column_metadata( + &mut self, + col_descr: &ComponentColumnDescriptor, + ) -> Option { + let ComponentColumnDescriptor { + entity_path, + component, + is_static, + is_semantically_empty, + store_datatype: _, + component_type: _, + archetype: _, + is_tombstone: _, + } = col_descr; + let descriptor = col_descr.component_descriptor(); + let inner_datatype = col_descr.inner_datatype(); + let metadata_state = ColumnMetadataState { + is_semantically_empty: *is_semantically_empty, + is_static: *is_static, + }; + + let key = schema_component_key(col_descr); + self.components + .entry(key) + .and_modify(|existing| { + existing.is_static |= is_static; + existing.is_semantically_empty &= is_semantically_empty; + }) + .or_insert_with(|| col_descr.clone()); + + let is_new = self + .components_per_entity + .entry(entity_path.clone()) + .or_default() + .insert(*component); + + let prev_is_static = self + .per_column_metadata + .get(entity_path) + .and_then(|per_id| per_id.get(component)) + .map(|e| e.metadata_state.is_static); + + let entry = self + .per_column_metadata + .entry(entity_path.clone()) + .or_default() + .entry(*component) + .and_modify(|e| { + if e.datatype != inner_datatype { + // TODO(grtlr): If we encounter two different data types, we should split the chunk. + // More information: https://github.com/rerun-io/rerun/pull/10082#discussion_r2140549340 + re_log::warn_once!( + "Datatype of column {} in {entity_path} has changed from {} to {inner_datatype}", + e.descriptor, + e.datatype, + ); + e.datatype = inner_datatype.clone(); + } + e.metadata_state.is_static |= is_static; + e.metadata_state.is_semantically_empty &= is_semantically_empty; + }) + .or_insert_with(|| ColumnMetadataEntry { + descriptor: descriptor.clone(), + metadata_state, + datatype: inner_datatype.clone(), + }); + + let new_is_static = entry.metadata_state.is_static; + let static_changed = prev_is_static.is_some_and(|prev| !prev && new_is_static); + + if is_new || static_changed { + Some(ChunkComponentMeta { + descriptor: descriptor.clone(), + inner_arrow_datatype: Some(inner_datatype.clone()), + has_data: !entry.metadata_state.is_semantically_empty, + is_static: new_is_static, + }) + } else { + None + } + } + + // --- Updating via events --- + + /// Update the schema from store events. + /// + /// This processes addition events (both physical chunk additions and virtual + /// manifest additions). Deletion events and schema column addition events are + /// ignored since the schema is purely additive and schema events are output, not input. + /// + /// Returns newly discovered entity/component pairs grouped by entity. + pub fn on_events(&mut self, events: &[ChunkStoreEvent]) -> Vec { + re_tracing::profile_function!(); + + let mut all_new: nohash_hasher::IntMap> = + Default::default(); + + for event in events { + match &event.diff { + crate::ChunkStoreDiff::Addition(add) => { + for new_col in self.on_chunk_addition(&add.chunk_after_processing) { + all_new + .entry(add.chunk_after_processing.entity_path().clone()) + .or_default() + .push(new_col); + } + } + crate::ChunkStoreDiff::VirtualAddition(vadd) => { + for (entity_path, new_cols) in self.on_rrd_manifest(&vadd.rrd_manifest) { + all_new.entry(entity_path).or_default().extend(new_cols); + } + } + crate::ChunkStoreDiff::Deletion(_) | crate::ChunkStoreDiff::SchemaAddition(_) => { + // Schema is purely additive — deletions and schema column addition events are ignored. + } + } + } + + all_new + .into_iter() + .map(|(entity_path, components)| ChunkMeta { + entity_path, + components, + }) + .collect() + } + + /// Returns [`ChunkComponentMeta`] for each genuinely new component column. + fn on_chunk_addition(&mut self, chunk: &re_chunk::Chunk) -> Vec { + let is_static = chunk.is_static(); + + // Update time type registry + for (name, time_column) in chunk.timelines() { + let new_typ = time_column.timeline().typ(); + if let Some(old_typ) = self.time_type_registry.insert(*name, new_typ) + && old_typ != new_typ + { + re_log::warn_once!( + "Timeline '{name}' changed type from {old_typ:?} to {new_typ:?}. \ + Rerun does not support using different types for the same timeline.", + ); + } + } + + let entity_path = chunk.entity_path(); + self.entity_tree.on_new_entity(entity_path); + + let mut new_columns = Vec::new(); + + // Update component columns and per-entity component sets + for column in chunk.components().values() { + let descriptor = &column.descriptor; + let component = descriptor.component; + + let is_semantically_empty = + re_arrow_util::is_list_array_semantically_empty(&column.list_array); + + use re_types_core::Archetype as _; + let is_tombstone = re_types_core::archetypes::Clear::all_components() + .iter() + .any(|descr| descr.component == component); + + let col_descr = ComponentColumnDescriptor { + store_datatype: ArrowListArray::DATA_TYPE_CONSTRUCTOR( + ArrowField::new("item", column.list_array.value_type().clone(), true).into(), + ), + entity_path: entity_path.clone(), + archetype: descriptor.archetype, + component: descriptor.component, + component_type: descriptor.component_type, + is_static, + is_tombstone, + is_semantically_empty, + }; + + if let Some(meta) = self.update_column_metadata(&col_descr) { + new_columns.push(meta); + } + } + + new_columns + } + + /// Returns newly inserted columns grouped by entity path. + fn on_rrd_manifest( + &mut self, + rrd_manifest: &re_log_encoding::RrdManifest, + ) -> Vec<(EntityPath, Vec)> { + let sorbet_schema = rrd_manifest.recording_schema(); + + // Update time type registry + for descr in sorbet_schema.columns.index_columns() { + self.time_type_registry + .insert(descr.timeline_name(), descr.timeline().typ()); + } + + // Update entity tree + for entity in sorbet_schema.all_entities() { + self.entity_tree.on_new_entity(entity); + } + + let mut new_per_entity: nohash_hasher::IntMap> = + Default::default(); + + // Update component columns and per-entity component sets + for descr in sorbet_schema.columns.component_columns() { + if let Some(meta) = self.update_column_metadata(descr) { + new_per_entity + .entry(descr.entity_path.clone()) + .or_default() + .push(meta); + } + } + + new_per_entity.into_iter().collect() + } + + /// Remove all data for a given entity path. + /// + /// Called from `ChunkStore::drop_entity_path`. + pub fn drop_entity(&mut self, entity_path: &EntityPath) { + self.components.retain(|key, _| key.0 != *entity_path); + self.components_per_entity.remove(entity_path); + self.per_column_metadata.remove(entity_path); + } + + /// Prunes leaf entities from the entity tree that have no indexed data. + /// + /// Called after store deletions to keep the tree in sync with actual data. + pub fn prune_entity_tree(&mut self, entity_has_data: &impl Fn(&EntityPath) -> bool) { + self.entity_tree.prune_empty_entities(entity_has_data); + } +} + +impl SizeBytes for StoreSchema { + fn heap_size_bytes(&self) -> u64 { + let Self { + time_type_registry, + components, + components_per_entity, + per_column_metadata, + entity_tree, + } = self; + + time_type_registry.heap_size_bytes() + + components.heap_size_bytes() + + components_per_entity.heap_size_bytes() + + per_column_metadata.heap_size_bytes() + + entity_tree.heap_size_bytes() + } +} diff --git a/crates/store/re_chunk_store/src/subscribers.rs b/crates/store/re_chunk_store/src/subscribers.rs index 31edb1398428..93ffd7747e47 100644 --- a/crates/store/re_chunk_store/src/subscribers.rs +++ b/crates/store/re_chunk_store/src/subscribers.rs @@ -1,6 +1,7 @@ use ahash::HashMap; use itertools::Itertools as _; use parking_lot::RwLock; +use re_byte_size::{MemUsageNode, MemUsageTree, MemUsageTreeCapture}; use re_log_types::StoreId; use crate::{ChunkStore, ChunkStoreEvent}; @@ -14,9 +15,9 @@ type SharedStoreSubscriber = RwLock>; /// through [`ChunkStoreEvent`]s. /// /// [`ChunkStoreSubscriber`]s can be used to build both secondary indices and trigger systems. -// -// TODO(#4204): StoreSubscriber should require SizeBytes so they can be part of memstats. -pub trait ChunkStoreSubscriber: std::any::Any + Send + Sync { +/// +/// The [`MemUsageTreeCapture`] bound lets the viewer's memory panel show how much memory each subscriber uses. +pub trait ChunkStoreSubscriber: MemUsageTreeCapture + std::any::Any + Send + Sync { /// Arbitrary name for the subscriber. /// /// Does not need to be unique. @@ -66,7 +67,9 @@ pub trait ChunkStoreSubscriber: std::any::Any + Send + Sync { } /// A [`ChunkStoreSubscriber`] that is instantiated for each unique [`StoreId`]. -pub trait PerStoreChunkSubscriber: Send + Sync + Default { +/// +/// The [`MemUsageTreeCapture`] bound lets the viewer's memory panel show memory usage per [`StoreId`]. +pub trait PerStoreChunkSubscriber: MemUsageTreeCapture + Send + Sync + Default { /// Arbitrary name for the subscriber. /// /// Does not need to be unique. @@ -251,6 +254,29 @@ impl ChunkStore { subscriber.write().on_events(events); } } + + /// Captures the memory usage of all registered subscribers. + /// + /// Names are disambiguated with a `#idx` suffix when multiple subscribers share the same name. + pub fn capture_all_subscribers_mem_usage_tree() -> MemUsageTree { + re_tracing::profile_function!(); + let subscribers = SUBSCRIBERS.read(); + let mut node = MemUsageNode::new(); + let mut name_counts: HashMap = HashMap::default(); + for subscriber in subscribers.iter() { + let subscriber = subscriber.read(); + let base_name = subscriber.name(); + let count = name_counts.entry(base_name.clone()).or_insert(0); + let name = if *count == 0 { + base_name + } else { + format!("{base_name}#{count}") + }; + *count += 1; + node.add(name, subscriber.capture_mem_usage_tree()); + } + node.into_tree() + } } /// Utility that makes a [`PerStoreChunkSubscriber`] a [`ChunkStoreSubscriber`]. @@ -298,6 +324,23 @@ impl ChunkStoreSubscriber } } +impl MemUsageTreeCapture + for PerStoreStoreSubscriberWrapper +{ + fn capture_mem_usage_tree(&self) -> MemUsageTree { + let mut node = MemUsageNode::new(); + for (store_id, subscriber) in &self.subscribers { + let name = format!( + "{}/{}", + store_id.application_id().as_str(), + store_id.recording_id().as_str() + ); + node.add(name, subscriber.capture_mem_usage_tree()); + } + node.into_tree() + } +} + #[cfg(test)] mod tests { use std::sync::Arc; @@ -326,6 +369,12 @@ mod tests { } } + impl MemUsageTreeCapture for AllEvents { + fn capture_mem_usage_tree(&self) -> MemUsageTree { + MemUsageTree::Bytes(0) + } + } + impl ChunkStoreSubscriber for AllEvents { fn name(&self) -> String { "rerun.testing.store_subscribers.AllEvents".into() diff --git a/crates/store/re_chunk_store/src/writes.rs b/crates/store/re_chunk_store/src/writes.rs index 1bd6835d51fd..241996710994 100644 --- a/crates/store/re_chunk_store/src/writes.rs +++ b/crates/store/re_chunk_store/src/writes.rs @@ -12,9 +12,9 @@ use re_log_encoding::{RrdManifest, RrdManifestTemporalMapEntry}; use crate::store::ChunkIdSetPerTime; use crate::{ - ChunkDirectLineage, ChunkDirectLineageReport, ChunkId, ChunkStore, ChunkStoreChunkStats, - ChunkStoreConfig, ChunkStoreDiff, ChunkStoreDiffAddition, ChunkStoreError, ChunkStoreEvent, - ChunkStoreResult, ColumnMetadataState, + ChunkDeletionReason, ChunkDirectLineage, ChunkDirectLineageReport, ChunkId, ChunkStore, + ChunkStoreChunkStats, ChunkStoreConfig, ChunkStoreDiff, ChunkStoreDiffAddition, + ChunkStoreError, ChunkStoreEvent, ChunkStoreResult, }; // --- @@ -24,18 +24,14 @@ impl ChunkStore { /// /// All queries will return partial results until the missing physical data gets loaded in. #[must_use = "The chunk store events should be handled"] - pub fn insert_rrd_manifest( - &mut self, - rrd_manifest: Arc, - ) -> ChunkStoreResult { + pub fn insert_rrd_manifest(&mut self, rrd_manifest: Arc) -> Vec { re_tracing::profile_function!(); let Self { id: _, config: _, - time_type_registry, - per_column_metadata, - physical_chunks_per_chunk_id: _, // physical data only + schema: _, // handled below + physical_chunks_per_chunk_id: _, // physical data only physical_chunk_ids_per_min_row_id: _, // physical data only chunks_lineage, dangling_splits: _, // cannot split during virtual insert @@ -52,46 +48,6 @@ impl ChunkStore { event_id: _, } = self; - let sorbet_schema = re_sorbet::SorbetSchema::try_from_raw_arrow_schema(Arc::new( - rrd_manifest.sorbet_schema().clone(), - ))?; - - time_type_registry.extend( - sorbet_schema - .columns - .index_columns() - .map(|descr| (descr.timeline_name(), descr.timeline().typ())), - ); - - for descr in sorbet_schema.columns.component_columns() { - let inner_datatype = descr.inner_datatype(); - let previous = per_column_metadata - .entry(descr.entity_path.clone()) - .or_default() - .insert( - descr.component, - ( - descr.component_descriptor(), - ColumnMetadataState { - is_semantically_empty: descr.is_semantically_empty, - }, - inner_datatype.clone(), - ), - ); - - if let Some(previous) = previous - && previous.2 != inner_datatype - { - re_log::warn_once!( - "Component '{}' on entity '{}' changed type from {} to {}", - descr.component, - descr.entity_path, - re_arrow_util::format_data_type(&previous.2), - re_arrow_util::format_data_type(&inner_datatype) - ); - } - } - let native_static_map = rrd_manifest.static_map(); chunks_lineage.extend( native_static_map @@ -100,11 +56,16 @@ impl ChunkStore { .map(|chunk_id| { ( *chunk_id, - ChunkDirectLineage::ReferencedFrom(rrd_manifest.clone()), + ChunkDirectLineage::RootFromManifest { is_static: true }, ) }), ); - *static_chunk_ids_per_entity = native_static_map.clone(); + for (entity_path, per_component) in native_static_map { + static_chunk_ids_per_entity + .entry(entity_path.clone()) + .or_default() + .extend(per_component.iter().map(|(&k, &v)| (k, v))); + } let native_temporal_map = rrd_manifest.temporal_map(); chunks_lineage.extend( @@ -116,7 +77,7 @@ impl ChunkStore { .map(|chunk_id| { ( *chunk_id, - ChunkDirectLineage::ReferencedFrom(rrd_manifest.clone()), + ChunkDirectLineage::RootFromManifest { is_static: false }, ) }), ); @@ -186,11 +147,28 @@ impl ChunkStore { diff: ChunkStoreDiff::virtual_addition(rrd_manifest), }; + let new_columns = self.schema.on_events(std::slice::from_ref(&event)); + + let mut events = vec![event]; + + if !new_columns.is_empty() { + events.push(ChunkStoreEvent { + store_id: self.id.clone(), + store_generation: self.generation(), + event_id: self + .event_id + .fetch_add(1, std::sync::atomic::Ordering::Relaxed), + diff: ChunkStoreDiff::SchemaAddition(crate::ChunkStoreDiffSchemaAddition { + new_columns, + }), + }); + } + if self.config.enable_changelog { - Self::on_events(std::slice::from_ref(&event)); + Self::on_events(&events); } - Ok(event) + events } /// Inserts a [`Chunk`] in the store. @@ -208,24 +186,7 @@ impl ChunkStore { } let diffs = self.insert_chunk_impl(chunk, ChunkDirectLineageReport::Volatile)?; - - let events: Vec<_> = diffs - .into_iter() - .map(|diff| ChunkStoreEvent { - store_id: self.id.clone(), - store_generation: self.generation(), - event_id: self - .event_id - .fetch_add(1, std::sync::atomic::Ordering::Relaxed), - diff, - }) - .collect(); - - if self.config.enable_changelog { - Self::on_events(&events); - } - - Ok(events) + Ok(self.finalize_events(diffs)) } fn insert_chunk_impl( @@ -261,7 +222,9 @@ impl ChunkStore { chunk.id() ); } else { - re_log::warn_once!("The same chunk was inserted twice (this has no effect)"); + re_log::warn_once!( + "[DEBUG] The same chunk was inserted twice (this has no effect)" + ); } } else { re_log::debug_once!("The same chunk was inserted twice (this has no effect)"); @@ -275,6 +238,15 @@ impl ChunkStore { return Err(ChunkStoreError::UnsortedChunk); } + for (timeline, time_column) in chunk.timelines() { + if !time_column.is_sorted() { + let entity_path = chunk.entity_path(); + re_log::debug_warn_once!( + "Found chunk for entity '{entity_path}' where timeline '{timeline}' was unsorted (compared to RowId). This may cause performance issues." + ); + } + } + re_tracing::profile_function!(); { @@ -294,7 +266,7 @@ impl ChunkStore { if matches!( self.direct_lineage(&chunk.id()), - Some(&ChunkDirectLineage::ReferencedFrom(_)) + Some(&ChunkDirectLineage::RootFromManifest { .. }) ) { // If we reach here, then a chunk that was previously virtually inserted using `insert_rrd_manifest` // is about to be physically inserted for real. @@ -310,9 +282,13 @@ impl ChunkStore { // The fix is simple: always unconditionally clean up the indexes when a virtual chunk // gets physically inserted. all_diffs.extend( - self.remove_chunks_deep(vec![chunk.clone()], None) - .into_iter() - .map(Into::into), + self.remove_chunks_deep( + vec![chunk.clone()], + None, + ChunkDeletionReason::VirtualToPhysicalReplacement, + ) + .into_iter() + .map(Into::into), ); } @@ -331,6 +307,7 @@ impl ChunkStore { }) .collect(), None, + ChunkDeletionReason::DanglingSplitCleanup, ) .into_iter() .map(Into::into), @@ -549,8 +526,8 @@ impl ChunkStore { let chunk_id_removed = self .physical_chunk_ids_per_min_row_id - .remove(&chunk_row_id_min); - debug_assert!(chunk_id_removed.is_some()); + .remove(&(chunk_row_id_min, chunk_id)); + debug_assert!(chunk_id_removed); let chunk_removed = self.physical_chunks_per_chunk_id.remove(&chunk_id); debug_assert!(chunk_removed.is_some()); @@ -558,7 +535,10 @@ impl ChunkStore { if let Some(chunk_removed) = chunk_removed { self.static_chunks_stats -= ChunkStoreChunkStats::from_chunk(&chunk_removed); - diffs.push(ChunkStoreDiff::deletion(chunk_removed)); + diffs.push(ChunkStoreDiff::deletion( + chunk_removed, + ChunkDeletionReason::Overwrite, + )); } } } @@ -575,21 +555,7 @@ impl ChunkStore { let elected_chunk = self.find_and_elect_compaction_candidate(chunk); let chunk_or_compacted = if let Some(elected_chunk) = &elected_chunk { - let chunk_rowid_min = chunk.row_id_range().map(|(min, _)| min); - let elected_rowid_min = elected_chunk.row_id_range().map(|(min, _)| min); - - let mut compacted = if elected_rowid_min < chunk_rowid_min { - re_tracing::profile_scope!("concat"); - elected_chunk.concatenated(chunk)? - } else { - re_tracing::profile_scope!("concat"); - chunk.concatenated(elected_chunk)? - }; - - { - re_tracing::profile_scope!("sort"); - compacted.sort_if_unsorted(); - } + let compacted = Chunk::concat_and_sort(elected_chunk, chunk)?; re_log::trace!( "compacted {} ({} rows) and {} ({} rows) together, resulting in {} ({} rows)", @@ -708,9 +674,13 @@ impl ChunkStore { std::iter::once((chunk_before_processing.id(), chunk_before_processing)) .chain( // NOTE: deep removal, we don't want a compacted chunk to linger on! - self.remove_chunks_deep(vec![elected_chunk.clone()], None) - .into_iter() - .map(|diff| (diff.chunk.id(), diff.chunk)), + self.remove_chunks_deep( + vec![elected_chunk.clone()], + None, + ChunkDeletionReason::Compaction, + ) + .into_iter() + .map(|diff| (diff.chunk.id(), diff.chunk)), ) .collect(); @@ -756,66 +726,9 @@ impl ChunkStore { // NOTE: ⚠️Make sure to recompute the Row ID range! The chunk might have been compacted // with another one, which might or might not have modified the range. - if let Some(min_row_id) = chunk_after_processing.row_id_range().map(|(min, _)| min) - && self - .physical_chunk_ids_per_min_row_id - .insert(min_row_id, chunk_after_processing.id()) - .is_some() - { - re_log::warn_once!( - "Detected duplicated RowId in the data, this might lead to undefined behavior" - ); - } - - for (name, columns) in chunk_after_processing.timelines() { - let new_typ = columns.timeline().typ(); - if let Some(old_typ) = self.time_type_registry.insert(*name, new_typ) - && old_typ != new_typ - { - re_log::warn_once!( - "Timeline '{name}' changed type from {old_typ:?} to {new_typ:?}. \ - Rerun does not support using different types for the same timeline.", - ); - } - } - - for column in chunk_after_processing.components().values() { - let re_types_core::SerializedComponentColumn { - list_array, - descriptor, - } = column; - - let (descr, column_metadata_state, datatype) = self - .per_column_metadata - .entry(chunk_after_processing.entity_path().clone()) - .or_default() - .entry(descriptor.component) - .or_insert_with(|| { - ( - descriptor.clone(), - ColumnMetadataState { - is_semantically_empty: true, - }, - list_array.value_type().clone(), - ) - }); - { - if *datatype != list_array.value_type() { - // TODO(grtlr): If we encounter two different data types, we should split the chunk. - // More information: https://github.com/rerun-io/rerun/pull/10082#discussion_r2140549340 - re_log::warn!( - "Datatype of column {descr} in {} has changed from {datatype} to {}", - chunk_after_processing.entity_path(), - list_array.value_type() - ); - *datatype = list_array.value_type().clone(); - } - - let is_semantically_empty = - re_arrow_util::is_list_array_semantically_empty(list_array); - - column_metadata_state.is_semantically_empty &= is_semantically_empty; - } + if let Some(min_row_id) = chunk_after_processing.row_id_range().map(|(min, _)| min) { + self.physical_chunk_ids_per_min_row_id + .insert((min_row_id, chunk_after_processing.id())); } Ok(all_diffs) @@ -1005,13 +918,10 @@ impl ChunkStore { self.gc_id += 1; // close enough - let generation = self.generation(); - let Self { - id, + id: _, config: _, - time_type_registry: _, - per_column_metadata, + schema, physical_chunks_per_chunk_id: chunks_per_chunk_id, chunks_lineage: _, // lineage metadata must never be dropped, regardless dangling_splits: _, // this counts as lineage metadata too @@ -1026,10 +936,10 @@ impl ChunkStore { queried_chunk_id_tracker: _, insert_id: _, gc_id: _, - event_id, + event_id: _, } = self; - per_column_metadata.remove(entity_path); + schema.drop_entity(entity_path); let dropped_static_chunks = { let dropped_static_chunk_ids: BTreeSet<_> = static_chunk_ids_per_entity @@ -1043,7 +953,7 @@ impl ChunkStore { .get(chunk_id) .and_then(|chunk| chunk.row_id_range().map(|(min, _)| min)) { - chunk_ids_per_min_row_id.remove(&min_row_id); + chunk_ids_per_min_row_id.remove(&(min_row_id, *chunk_id)); } } @@ -1075,7 +985,7 @@ impl ChunkStore { .get(chunk_id) .and_then(|chunk| chunk.row_id_range().map(|(min, _)| min)) { - chunk_ids_per_min_row_id.remove(&min_row_id); + chunk_ids_per_min_row_id.remove(&(min_row_id, *chunk_id)); } } @@ -1096,29 +1006,19 @@ impl ChunkStore { *temporal_physical_chunks_stats -= ChunkStoreChunkStats::from_chunk(chunk); }); - let events: Vec<_> = dropped_static_chunks + let diffs: Vec<_> = dropped_static_chunks .into_iter() .chain(dropped_temporal_chunks) - .map(ChunkStoreDiff::deletion) - .map(|diff| ChunkStoreEvent { - store_id: id.clone(), - store_generation: generation.clone(), - event_id: event_id.fetch_add(1, std::sync::atomic::Ordering::Relaxed), - diff, - }) + .map(|chunk| ChunkStoreDiff::deletion(chunk, ChunkDeletionReason::ExplicitDrop)) .collect(); - if self.config.enable_changelog { - Self::on_events(&events); - } - - events + self.finalize_events(diffs) } } #[cfg(test)] mod tests { - use std::collections::BTreeMap; + use std::collections::BTreeSet; use re_chunk::{TimeInt, TimePoint, Timeline}; use re_log_types::example_components::{MyColor, MyLabel, MyPoint, MyPoints}; @@ -1452,18 +1352,48 @@ mod tests { let chunk4 = Arc::new(chunk4); let events = store.insert_chunk(&chunk1)?; + assert_eq!(events.len(), 2); + assert_eq!(events[0].delta_chunk().unwrap().id(), chunk1.id()); assert!( - events.len() == 1 - && events[0].delta_chunk().unwrap().id() == chunk1.id() - && matches!(events[0].diff, ChunkStoreDiff::Addition(_)), - "the first write should result in the addition of chunk1 and nothing else" + events[0].is_addition(), + "the first write should result in the addition of chunk1" + ); + // chunk1 introduces 3 new components on this entity: points, colors, labels. + let schema_add = match &events[1].diff { + ChunkStoreDiff::SchemaAddition(sa) => sa, + other => panic!("expected SchemaAddition, got {other:?}"), + }; + assert_eq!(schema_add.new_columns.len(), 1); + assert_eq!(schema_add.new_columns[0].entity_path, entity_path); + let new_descriptors: BTreeSet<_> = schema_add.new_columns[0] + .components + .iter() + .map(|c| c.descriptor.clone()) + .collect(); + assert_eq!( + new_descriptors, + BTreeSet::from([ + MyPoints::descriptor_points(), + MyPoints::descriptor_colors(), + MyPoints::descriptor_labels(), + ]), + "points, colors, labels" ); + // All should be is_static since chunk1 is static. + for comp in &schema_add.new_columns[0].components { + assert!( + comp.is_static, + "{} should be is_static after a static insert", + comp.descriptor + ); + } let events = store.insert_chunk(&chunk2)?; + // chunk2 only has points and colors which already exist — no new schema columns. + assert_eq!(events.len(), 1); + assert_eq!(events[0].delta_chunk().unwrap().id(), chunk2.id()); assert!( - events.len() == 1 - && events[0].delta_chunk().unwrap().id() == chunk2.id() - && matches!(events[0].diff, ChunkStoreDiff::Addition(_)), + events[0].is_addition(), "the second write should result in the addition of chunk2 and nothing else" ); @@ -1481,13 +1411,13 @@ mod tests { } let events = store.insert_chunk(&chunk3)?; + assert_eq!(events.len(), 2); + assert_eq!(events[0].delta_chunk().unwrap().id(), chunk3.id()); + assert!(events[0].is_addition()); + assert_eq!(events[1].delta_chunk().unwrap().id(), chunk1.id()); assert!( - events.len() == 2 - && events[0].delta_chunk().unwrap().id() == chunk3.id() - && matches!(events[0].diff, ChunkStoreDiff::Addition(_)) - && events[1].delta_chunk().unwrap().id() == chunk1.id() - && matches!(events[1].diff, ChunkStoreDiff::Deletion(_)), - "the third write should result in the addition of chunk3 _and_ the deletion of the now fully overwritten chunk1" + events[1].is_deletion(), + "the third write should result in the addition of chunk3 and the deletion of chunk1" ); let stats_after = store.stats(); @@ -1525,6 +1455,206 @@ mod tests { Ok(()) } + /// Temporal data first, then static: `is_static` should transition and re-emit a `SchemaAddition`. + #[test] + fn schema_temporal_then_static() -> anyhow::Result<()> { + re_log::setup_logging(); + + let mut store = ChunkStore::new( + re_log_types::StoreId::random(re_log_types::StoreKind::Recording, "test_app"), + Default::default(), + ); + + let entity_path = EntityPath::from("this/that"); + let points = &[MyPoint::new(1.0, 1.0)]; + + // Temporal insert: new component, is_static = false. + let events = store.insert_chunk(&Arc::new( + Chunk::builder(entity_path.clone()) + .with_component_batches( + RowId::new(), + [(Timeline::new_sequence("frame"), 1)], + [(MyPoints::descriptor_points(), points as _)], + ) + .build()?, + ))?; + assert_eq!(events.len(), 2); + assert!(events[0].is_addition()); + let schema_add = match &events[1].diff { + ChunkStoreDiff::SchemaAddition(sa) => sa, + other => panic!("expected SchemaAddition, got {other:?}"), + }; + assert!(!schema_add.new_columns[0].components[0].is_static); + + // Static insert: same component, triggers is_static transition. + let events = store.insert_chunk(&Arc::new( + Chunk::builder(entity_path.clone()) + .with_component_batches( + RowId::new(), + TimePoint::STATIC, + [(MyPoints::descriptor_points(), points as _)], + ) + .build()?, + ))?; + assert_eq!(events.len(), 2); + assert!(events[0].is_addition()); + let schema_add = match &events[1].diff { + ChunkStoreDiff::SchemaAddition(sa) => sa, + other => panic!("expected SchemaAddition for is_static transition, got {other:?}"), + }; + assert!( + schema_add.new_columns[0].components[0].is_static, + "component should now be is_static" + ); + + // Another temporal insert: no further transition. + let events = store.insert_chunk(&Arc::new( + Chunk::builder(entity_path.clone()) + .with_component_batches( + RowId::new(), + [(Timeline::new_sequence("frame"), 2)], + [(MyPoints::descriptor_points(), points as _)], + ) + .build()?, + ))?; + assert!( + !events.iter().any(|e| e.is_schema_addition()), + "no SchemaAddition after transition already happened" + ); + + Ok(()) + } + + /// `insert_rrd_manifest` should emit a `SchemaAddition` with the manifest's columns. + #[test] + fn schema_addition_from_manifest() -> anyhow::Result<()> { + re_log::setup_logging(); + + let store_id = + re_log_types::StoreId::random(re_log_types::StoreKind::Recording, "test_app"); + let mut store = ChunkStore::new(store_id.clone(), Default::default()); + + let entity_path = EntityPath::from("this/that"); + let tl = Timeline::new_sequence("frame"); + let point = MyPoint::new(1.0, 1.0); + + let chunks: Vec> = [10, 20] + .into_iter() + .map(|t| { + Arc::new( + Chunk::builder(entity_path.clone()) + .with_component_batch( + RowId::new(), + TimePoint::from_iter([(tl, t)]), + (MyPoints::descriptor_points(), &[point] as _), + ) + .build() + .unwrap(), + ) + }) + .collect(); + + let rrd_manifest = re_log_encoding::RrdManifest::build_in_memory_from_chunks( + store_id, + chunks.iter().map(|c| &**c), + )?; + + let events = store.insert_rrd_manifest(rrd_manifest); + assert_eq!(events.len(), 2); + assert!(events[0].is_virtual_addition()); + let schema_add = match &events[1].diff { + ChunkStoreDiff::SchemaAddition(sa) => sa, + other => panic!("expected SchemaAddition, got {other:?}"), + }; + assert_eq!(schema_add.new_columns.len(), 1); + assert_eq!(schema_add.new_columns[0].entity_path, entity_path); + assert!(!schema_add.new_columns[0].components.is_empty()); + + // Inserting the same manifest again should NOT emit a second SchemaAddition. + let rrd_manifest2 = re_log_encoding::RrdManifest::build_in_memory_from_chunks( + re_log_types::StoreId::random(re_log_types::StoreKind::Recording, "test_app"), + chunks.iter().map(|c| &**c), + )?; + let events2 = store.insert_rrd_manifest(rrd_manifest2); + assert!( + !events2.iter().any(|e| e.is_schema_addition()), + "re-inserting a manifest with the same columns should not emit SchemaAddition" + ); + + Ok(()) + } + + /// Manifest with temporal data followed by manifest with static data: + /// `is_static` should transition and re-emit a `SchemaAddition`. + #[test] + fn schema_static_transition_from_manifest() -> anyhow::Result<()> { + re_log::setup_logging(); + + let store_id = + re_log_types::StoreId::random(re_log_types::StoreKind::Recording, "test_app"); + let mut store = ChunkStore::new(store_id.clone(), Default::default()); + + let entity_path = EntityPath::from("this/that"); + let tl = Timeline::new_sequence("frame"); + let point = MyPoint::new(1.0, 1.0); + + // First manifest: temporal-only data. + let temporal_chunk = Arc::new( + Chunk::builder(entity_path.clone()) + .with_component_batch( + RowId::new(), + TimePoint::from_iter([(tl, 10)]), + (MyPoints::descriptor_points(), &[point] as _), + ) + .build()?, + ); + let manifest_temporal = re_log_encoding::RrdManifest::build_in_memory_from_chunks( + store_id.clone(), + std::iter::once(&*temporal_chunk), + )?; + + let events = store.insert_rrd_manifest(manifest_temporal); + assert_eq!(events.len(), 2); + assert!(events[0].is_virtual_addition()); + let schema_add = match &events[1].diff { + ChunkStoreDiff::SchemaAddition(sa) => sa, + other => panic!("expected SchemaAddition, got {other:?}"), + }; + assert!( + !schema_add.new_columns[0].components[0].is_static, + "first manifest is temporal-only" + ); + + // Second manifest: same component but with static data. + let static_chunk = Arc::new( + Chunk::builder(entity_path.clone()) + .with_component_batch( + RowId::new(), + TimePoint::STATIC, + (MyPoints::descriptor_points(), &[point] as _), + ) + .build()?, + ); + let manifest_static = re_log_encoding::RrdManifest::build_in_memory_from_chunks( + store_id, + std::iter::once(&*static_chunk), + )?; + + let events = store.insert_rrd_manifest(manifest_static); + assert_eq!(events.len(), 2); + assert!(events[0].is_virtual_addition()); + let schema_add = match &events[1].diff { + ChunkStoreDiff::SchemaAddition(sa) => sa, + other => panic!("expected SchemaAddition for is_static transition, got {other:?}"), + }; + assert!( + schema_add.new_columns[0].components[0].is_static, + "component should now be is_static after static manifest" + ); + + Ok(()) + } + #[test] fn row_id_min_overwrites() -> anyhow::Result<()> { re_log::setup_logging(); @@ -1562,7 +1692,7 @@ mod tests { chunks: impl IntoIterator, ) { assert_eq!( - chunks.into_iter().collect::>(), + chunks.into_iter().collect::>(), store.physical_chunk_ids_per_min_row_id ); } diff --git a/crates/store/re_chunk_store/tests/compact.rs b/crates/store/re_chunk_store/tests/compact.rs new file mode 100644 index 000000000000..faa438631f50 --- /dev/null +++ b/crates/store/re_chunk_store/tests/compact.rs @@ -0,0 +1,80 @@ +//! Tests for `compacted()` and `finalize_compaction`. + +#![cfg(test)] + +use std::sync::Arc; + +use re_chunk::{Chunk, RowId}; +use re_chunk_store::{ChunkStore, ChunkStoreConfig, CompactionOptions}; +use re_log_types::example_components::{MyPoint, MyPoints}; +use re_log_types::{EntityPath, TimePoint, Timeline}; + +/// Builds a store with many single-row chunks sharing entity `/sensor` and +/// timeline `"frame"`. Intentionally fragmented to trigger compaction. +fn fragmented_store() -> ChunkStore { + let store_id = re_log_types::StoreId::random(re_log_types::StoreKind::Recording, "test_app"); + let mut store = ChunkStore::new(store_id, ChunkStoreConfig::ALL_DISABLED); + + let entity_path: EntityPath = "/sensor".into(); + let timeline_frame = Timeline::new_sequence("frame"); + + for i in 0..20 { + let timepoint = TimePoint::from_iter([(timeline_frame, i as i64)]); + let point = MyPoint::new(i as f32, i as f32); + let chunk = Chunk::builder(entity_path.clone()) + .with_component_batch( + RowId::new(), + timepoint, + (MyPoints::descriptor_points(), &[point]), + ) + .build() + .expect("build chunk"); + store.insert_chunk(&Arc::new(chunk)).expect("insert chunk"); + } + + store +} + +fn options(num_extra_passes: Option) -> CompactionOptions { + CompactionOptions { + config: ChunkStoreConfig::DEFAULT, + num_extra_passes, + is_start_of_gop: None, + split_size_ratio: None, + } +} + +#[test] +fn compacted_reduces_chunk_count() { + let store = fragmented_store(); + let before = store.num_physical_chunks(); + let compacted = store.compacted(&options(Some(50))).expect("compacted"); + assert!(compacted.num_physical_chunks() < before); +} + +#[test] +fn finalize_compaction_converges() { + let store = fragmented_store() + .compacted(&options(Some(50))) + .expect("initial"); + let before = store.num_physical_chunks(); + let store2 = store + .finalize_compaction(&options(Some(5))) + .expect("idempotent"); + assert_eq!(before, store2.num_physical_chunks()); +} + +#[test] +fn compacted_preserves_row_count() { + let store = fragmented_store(); + let rows_before: u64 = store + .iter_physical_chunks() + .map(|c| c.num_rows() as u64) + .sum(); + let compacted = store.compacted(&options(Some(50))).expect("compacted"); + let rows_after: u64 = compacted + .iter_physical_chunks() + .map(|c| c.num_rows() as u64) + .sum(); + assert_eq!(rows_before, rows_after); +} diff --git a/crates/store/re_chunk_store/tests/correctness.rs b/crates/store/re_chunk_store/tests/correctness.rs index ccb304aa2632..02d19a03cec5 100644 --- a/crates/store/re_chunk_store/tests/correctness.rs +++ b/crates/store/re_chunk_store/tests/correctness.rs @@ -33,7 +33,7 @@ fn query_latest_component( .to_iter() .unwrap() .filter_map(|chunk| { - let unit = chunk.latest_at(query, component).into_unit()?; + let unit = chunk.latest_at(query, component)?; unit.index(&query.timeline()).map(|index| (index, unit)) }) .max_by_key(|(index, _unit)| *index)?; diff --git a/crates/store/re_chunk_store/tests/dataframe.rs b/crates/store/re_chunk_store/tests/dataframe.rs index 2320c7af2c30..cae4888f4f7b 100644 --- a/crates/store/re_chunk_store/tests/dataframe.rs +++ b/crates/store/re_chunk_store/tests/dataframe.rs @@ -43,7 +43,7 @@ fn schema() -> anyhow::Result<()> { let chunk1 = Arc::new(chunk1); store.insert_chunk(&chunk1)?; - let ChunkColumnDescriptors { components, .. } = store.schema(); + let ChunkColumnDescriptors { components, .. } = store.schema().chunk_column_descriptors(); assert_eq!( components diff --git a/crates/store/re_chunk_store/tests/drop_time_range.rs b/crates/store/re_chunk_store/tests/drop_time_range.rs index a7da80a4d147..bd0e7a1d149e 100644 --- a/crates/store/re_chunk_store/tests/drop_time_range.rs +++ b/crates/store/re_chunk_store/tests/drop_time_range.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use re_chunk::{Chunk, RowId}; -use re_chunk_store::{ChunkStore, ChunkStoreConfig}; +use re_chunk_store::{ChunkDeletionReason, ChunkStore, ChunkStoreConfig}; use re_log_types::example_components::{MyColor, MyPoints}; use re_log_types::{AbsoluteTimeRange, EntityPath, TimePoint, Timeline}; @@ -111,17 +111,19 @@ fn drop_time_range() -> anyhow::Result<()> { assert_eq!(num_events(&store), 12); + let reason = ChunkDeletionReason::ExplicitDrop; + // Drop nothing: - store.drop_time_range_deep(timeline.name(), AbsoluteTimeRange::new(10, 100)); - store.drop_time_range_deep(timeline.name(), AbsoluteTimeRange::new(-100, -10)); + store.drop_time_range_deep(timeline.name(), AbsoluteTimeRange::new(10, 100), reason); + store.drop_time_range_deep(timeline.name(), AbsoluteTimeRange::new(-100, -10), reason); assert_eq!(num_events(&store), 12); // Drop stuff from the middle of the first chunk, and the start of the second: - store.drop_time_range_deep(timeline.name(), AbsoluteTimeRange::new(1, 2)); + store.drop_time_range_deep(timeline.name(), AbsoluteTimeRange::new(1, 2), reason); assert_eq!(num_events(&store), 9); // Drop a bunch in the middle (including all of middle chunk): - store.drop_time_range_deep(timeline.name(), AbsoluteTimeRange::new(2, 5)); + store.drop_time_range_deep(timeline.name(), AbsoluteTimeRange::new(2, 5), reason); assert_eq!(num_events(&store), 3); } diff --git a/crates/store/re_chunk_store/tests/gc.rs b/crates/store/re_chunk_store/tests/gc.rs index d9017dfadfff..d7cd394adb0b 100644 --- a/crates/store/re_chunk_store/tests/gc.rs +++ b/crates/store/re_chunk_store/tests/gc.rs @@ -32,7 +32,7 @@ fn query_latest_array( .chunks .into_iter() .filter_map(|chunk| { - let chunk = chunk.latest_at(query, component).into_unit()?; + let chunk = chunk.latest_at(query, component)?; chunk.index(&query.timeline()).map(|index| (index, chunk)) }) .max_by_key(|(index, _chunk)| *index)?; diff --git a/crates/store/re_chunk_store/tests/reads.rs b/crates/store/re_chunk_store/tests/reads.rs index ac37e32600f1..ca5428e21669 100644 --- a/crates/store/re_chunk_store/tests/reads.rs +++ b/crates/store/re_chunk_store/tests/reads.rs @@ -33,9 +33,7 @@ fn query_latest_array( .to_iter() .unwrap() .filter_map(|chunk| { - let chunk = chunk - .latest_at(query, component_descr.component) - .into_unit()?; + let chunk = chunk.latest_at(query, component_descr.component)?; chunk.index(&query.timeline()).map(|index| (index, chunk)) }) .max_by_key(|(index, _chunk)| *index)?; diff --git a/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap b/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap index 620a2254ed89..a677cd05e06b 100644 --- a/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap +++ b/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap @@ -14,25 +14,25 @@ ChunkStore { physical chunks: [ chunk_0000000000661EFDf2e3b19f7c045f15 (status:loaded static:no) origin: (cannot be re-fetched) - ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ - │ METADATA: │ - │ * entity_path: /this/that │ - │ * id: chunk_0000000000661EFDf2e3b19f7c045f15 │ + ┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ + │ METADATA: │ + │ * entity_path: /this/that │ + │ * id: chunk_0000000000661EFDf2e3b19f7c045f15 │ │ * version: [**REDACTED**] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ ┌─────────────────────────────────────────────┬──────────────────────┬───────────────────────────────┬───────────────────────────────────┬────────────────────────────────────┐ │ - │ │ RowId ┆ frame_nr ┆ log_time ┆ my_index ┆ example.MyPoints:colors │ │ - │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ - │ │ type: FixedSizeBinary[16] ┆ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u64] ┆ type: nullable List[nullable u32] │ │ - │ │ ARROW:extension:metadata: ┆ index_name: frame_nr ┆ index_name: log_time ┆ component: my_index ┆ archetype: example.MyPoints │ │ - │ │ {"namespace":"row"} ┆ is_sorted: true ┆ is_sorted: true ┆ component_type: example.MyIndex ┆ component: example.MyPoints:colors │ │ - │ │ ARROW:extension:name: TUID ┆ kind: index ┆ kind: index ┆ kind: data ┆ component_type: example.MyColor │ │ - │ │ is_sorted: true ┆ ┆ ┆ ┆ kind: data │ │ - │ │ kind: control ┆ ┆ ┆ ┆ │ │ - │ ╞═════════════════════════════════════════════╪══════════════════════╪═══════════════════════════════╪═══════════════════════════════════╪════════════════════════════════════╡ │ - │ │ row_0000000067816A6Bb4b8c1254d40007b ┆ 1 ┆ 2025-01-10T18:43:42.123456789 ┆ [0, 1, 2] ┆ [0, 1, 2] │ │ - │ └─────────────────────────────────────────────┴──────────────────────┴───────────────────────────────┴───────────────────────────────────┴────────────────────────────────────┘ │ - └─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ ┌─────────────────────────────────────────────┬──────────────────────┬───────────────────────────────┬─────────────────────────────────┬────────────────────────────────────┐ │ + │ │ RowId ┆ frame_nr ┆ log_time ┆ my_index ┆ example.MyPoints:colors │ │ + │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ + │ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt64) ┆ type: List(UInt32) │ │ + │ │ ARROW:extension:metadata: ┆ index_name: frame_nr ┆ index_name: log_time ┆ component: my_index ┆ archetype: example.MyPoints │ │ + │ │ {"namespace":"row"} ┆ is_sorted: true ┆ is_sorted: true ┆ component_type: example.MyIndex ┆ component: example.MyPoints:colors │ │ + │ │ ARROW:extension:name: TUID ┆ kind: index ┆ kind: index ┆ kind: data ┆ component_type: example.MyColor │ │ + │ │ is_sorted: true ┆ ┆ ┆ ┆ kind: data │ │ + │ │ kind: control ┆ ┆ ┆ ┆ │ │ + │ ╞═════════════════════════════════════════════╪══════════════════════╪═══════════════════════════════╪═════════════════════════════════╪════════════════════════════════════╡ │ + │ │ row_0000000067816A6Bb4b8c1254d40007b ┆ 1 ┆ 2025-01-10T18:43:42.123456789 ┆ [0, 1, 2] ┆ [0, 1, 2] │ │ + │ └─────────────────────────────────────────────┴──────────────────────┴───────────────────────────────┴─────────────────────────────────┴────────────────────────────────────┘ │ + └───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ] virtual chunks: [ ] diff --git a/crates/store/re_data_loader/README.md b/crates/store/re_data_loader/README.md deleted file mode 100644 index bca69af0434b..000000000000 --- a/crates/store/re_data_loader/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# re_data_loader - -Part of the [`rerun`](https://github.com/rerun-io/rerun) family of crates. - -[![Latest version](https://img.shields.io/crates/v/re_data_loader.svg)](https://crates.io/crates/re_data_loader) -[![Documentation](https://docs.rs/re_data_loader/badge.svg)](https://docs.rs/re_data_loader) -![MIT](https://img.shields.io/badge/license-MIT-blue.svg) -![Apache](https://img.shields.io/badge/license-Apache-blue.svg) - -Handles loading of Rerun data from file using data loader plugins diff --git a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/camera_calibration.rs b/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/camera_calibration.rs deleted file mode 100644 index e7a25633ada1..000000000000 --- a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/camera_calibration.rs +++ /dev/null @@ -1,49 +0,0 @@ -use re_lenses::{Lens, LensError, Op}; -use re_log_types::{EntityPathFilter, TimeType}; -use re_sdk_types::archetypes::Pinhole; - -use crate::loader_mcap::lenses::helpers::{ - list_3x3_row_major_to_column_major, width_height_to_resolution, -}; - -use super::{FOXGLOVE_TIMESTAMP, IMAGE_PLANE_SUFFIX}; - -/// Creates a lens for [`foxglove.CameraCalibration`] messages. -/// -/// [`foxglove.CameraCalibration`]: https://docs.foxglove.dev/docs/sdk/schemas/camera-calibration -pub fn camera_calibration() -> Result { - Ok(Lens::for_input_column( - EntityPathFilter::all(), - "foxglove.CameraCalibration:message", - ) - .output_columns(|out| { - out.time( - FOXGLOVE_TIMESTAMP, - TimeType::TimestampNs, - [Op::selector(".timestamp"), Op::time_spec_to_nanos()], - ) - .component( - Pinhole::descriptor_child_frame(), - [ - Op::selector(".frame_id"), - Op::string_suffix_nonempty(IMAGE_PLANE_SUFFIX), - ], - ) - .component( - Pinhole::descriptor_resolution(), - [Op::func(width_height_to_resolution)], - ) - .component( - Pinhole::descriptor_image_from_camera(), - [ - Op::selector(".K"), - Op::func(list_3x3_row_major_to_column_major), - ], - ) - .component( - Pinhole::descriptor_parent_frame(), - [Op::selector(".frame_id")], - ) - })? - .build()) -} diff --git a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/compressed_image.rs b/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/compressed_image.rs deleted file mode 100644 index 17beb0465148..000000000000 --- a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/compressed_image.rs +++ /dev/null @@ -1,39 +0,0 @@ -use re_lenses::{Lens, LensError, Op}; -use re_log_types::{EntityPathFilter, TimeType}; -use re_sdk_types::archetypes::{CoordinateFrame, EncodedImage}; - -use super::{FOXGLOVE_TIMESTAMP, IMAGE_PLANE_SUFFIX}; - -/// Creates a lens for [`foxglove.CompressedImage`] messages. -/// -/// [`foxglove.CompressedImage`]: https://docs.foxglove.dev/docs/sdk/schemas/compressed-image -pub fn compressed_image() -> Result { - Ok( - Lens::for_input_column(EntityPathFilter::all(), "foxglove.CompressedImage:message") - .output_columns(|out| { - out.time( - FOXGLOVE_TIMESTAMP, - TimeType::TimestampNs, - [Op::selector(".timestamp"), Op::time_spec_to_nanos()], - ) - .component( - CoordinateFrame::descriptor_frame(), - [ - Op::selector(".frame_id"), - Op::string_suffix_nonempty(IMAGE_PLANE_SUFFIX), - ], - ) - // The format field can be "jpeg", "png", "webp" or "avif" in the Foxglove schema. - // We prefix with "image/" to get valid MIME types for Rerun. - .component( - EncodedImage::descriptor_media_type(), - [Op::selector(".format"), Op::string_prefix("image/")], - ) - .component( - EncodedImage::descriptor_blob(), - [Op::selector(".data"), Op::binary_to_list_uint8()], - ) - })? - .build(), - ) -} diff --git a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/compressed_video.rs b/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/compressed_video.rs deleted file mode 100644 index 98d0d607c960..000000000000 --- a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/compressed_video.rs +++ /dev/null @@ -1,37 +0,0 @@ -use re_lenses::{Lens, LensError, Op}; -use re_log_types::{EntityPathFilter, TimeType}; -use re_sdk_types::archetypes::{CoordinateFrame, VideoStream}; - -use super::{FOXGLOVE_TIMESTAMP, IMAGE_PLANE_SUFFIX}; - -/// Creates a lens for [`foxglove.CompressedVideo`] messages. -/// -/// [`foxglove.CompressedVideo`]: https://docs.foxglove.dev/docs/sdk/schemas/compressed-video -pub fn compressed_video() -> Result { - Ok( - Lens::for_input_column(EntityPathFilter::all(), "foxglove.CompressedVideo:message") - .output_columns(|out| { - out.time( - FOXGLOVE_TIMESTAMP, - TimeType::TimestampNs, - [Op::selector(".timestamp"), Op::time_spec_to_nanos()], - ) - .component( - CoordinateFrame::descriptor_frame(), - [ - Op::selector(".frame_id"), - Op::string_suffix_nonempty(IMAGE_PLANE_SUFFIX), - ], - ) - .component( - VideoStream::descriptor_codec(), - [Op::selector(".format"), Op::string_to_video_codec()], - ) - .component( - VideoStream::descriptor_sample(), - [Op::selector(".data"), Op::binary_to_list_uint8()], - ) - })? - .build(), - ) -} diff --git a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/frame_transform.rs b/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/frame_transform.rs deleted file mode 100644 index 00b0f5a220e8..000000000000 --- a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/frame_transform.rs +++ /dev/null @@ -1,48 +0,0 @@ -use re_lenses::{Lens, LensError, Op}; -use re_log_types::{EntityPathFilter, TimeType}; -use re_sdk_types::archetypes::Transform3D; - -use crate::loader_mcap::lenses::helpers::{ - list_xyz_struct_to_list_fixed, list_xyzw_struct_to_list_fixed, -}; - -use super::FOXGLOVE_TIMESTAMP; - -/// Creates a lens for [`foxglove.FrameTransform`] messages. -/// -/// [`foxglove.FrameTransform`]: https://docs.foxglove.dev/docs/sdk/schemas/frame-transform -pub fn frame_transform() -> Result { - Ok( - Lens::for_input_column(EntityPathFilter::all(), "foxglove.FrameTransform:message") - .output_scatter_columns(|out| { - out.time( - FOXGLOVE_TIMESTAMP, - TimeType::TimestampNs, - [Op::selector(".timestamp"), Op::time_spec_to_nanos()], - ) - .component( - Transform3D::descriptor_parent_frame(), - [Op::selector(".parent_frame_id")], - ) - .component( - Transform3D::descriptor_child_frame(), - [Op::selector(".child_frame_id")], - ) - .component( - Transform3D::descriptor_translation(), - [ - Op::selector(".translation"), - Op::func(list_xyz_struct_to_list_fixed), - ], - ) - .component( - Transform3D::descriptor_quaternion(), - [ - Op::selector(".rotation"), - Op::func(list_xyzw_struct_to_list_fixed), - ], - ) - })? - .build(), - ) -} diff --git a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/frame_transforms.rs b/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/frame_transforms.rs deleted file mode 100644 index b6e6b5bd0a72..000000000000 --- a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/frame_transforms.rs +++ /dev/null @@ -1,51 +0,0 @@ -use re_lenses::{Lens, LensError, Op}; -use re_log_types::{EntityPathFilter, TimeType}; -use re_sdk_types::archetypes::Transform3D; - -use crate::loader_mcap::lenses::helpers::{ - list_xyz_struct_to_list_fixed, list_xyzw_struct_to_list_fixed, -}; - -use super::FOXGLOVE_TIMESTAMP; - -/// Creates a lens for [`foxglove.FrameTransforms`] messages. -/// -/// [`foxglove.FrameTransforms`]: https://docs.foxglove.dev/docs/sdk/schemas/frame-transforms -pub fn frame_transforms() -> Result { - Ok( - Lens::for_input_column(EntityPathFilter::all(), "foxglove.FrameTransforms:message") - .output_scatter_columns(|out| { - out.time( - FOXGLOVE_TIMESTAMP, - TimeType::TimestampNs, - [ - Op::selector(".transforms[].timestamp"), - Op::time_spec_to_nanos(), - ], - ) - .component( - Transform3D::descriptor_parent_frame(), - [Op::selector(".transforms[].parent_frame_id")], - ) - .component( - Transform3D::descriptor_child_frame(), - [Op::selector(".transforms[].child_frame_id")], - ) - .component( - Transform3D::descriptor_translation(), - [ - Op::selector(".transforms[].translation"), - Op::func(list_xyz_struct_to_list_fixed), - ], - ) - .component( - Transform3D::descriptor_quaternion(), - [ - Op::selector(".transforms[].rotation"), - Op::func(list_xyzw_struct_to_list_fixed), - ], - ) - })? - .build(), - ) -} diff --git a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/log.rs b/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/log.rs deleted file mode 100644 index f3ca6675473c..000000000000 --- a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/log.rs +++ /dev/null @@ -1,59 +0,0 @@ -use arrow::array::{ListArray, StringArray}; -use re_arrow_combinators::{Transform, map::MapList}; -use re_lenses::{Lens, LensError, Op, OpError}; -use re_log_types::{EntityPathFilter, TimeType}; -use re_sdk_types::archetypes::TextLog; - -use super::FOXGLOVE_TIMESTAMP; - -/// Creates a lens for converting [`foxglove.Log`] messages to Rerun's [`TextLog`] archetype. -/// -/// [`foxglove.Log`]: https://docs.foxglove.dev/docs/sdk/schemas/log -pub fn log() -> Result { - Ok( - Lens::for_input_column(EntityPathFilter::all(), "foxglove.Log:message") - .output_columns(|out| { - out.time( - FOXGLOVE_TIMESTAMP, - TimeType::TimestampNs, - [Op::selector(".timestamp"), Op::time_spec_to_nanos()], - ) - .component(TextLog::descriptor_text(), [Op::selector(".message")]) - .component( - TextLog::descriptor_level(), - [ - Op::selector(".level.name"), - Op::func(foxglove_level_to_rerun), - ], - ) - })? - .build(), - ) -} - -/// Maps Foxglove log level names to Rerun [`re_sdk_types::components::TextLogLevel`] names. -fn foxglove_level_to_rerun(list_array: &ListArray) -> Result { - Ok(MapList::new(FoxgloveToRerunLogLevel).transform(list_array)?) -} - -/// Maps Foxglove log level strings to Rerun [`re_sdk_types::components::TextLogLevel`] strings. -struct FoxgloveToRerunLogLevel; - -impl Transform for FoxgloveToRerunLogLevel { - type Source = StringArray; - type Target = StringArray; - - fn transform(&self, source: &StringArray) -> Result { - Ok(source - .iter() - .map(|level| match level { - Some("WARNING") => Some("WARN"), - Some("FATAL") => Some("CRITICAL"), - // Rerun has no UNKNOWN level. - Some("UNKNOWN") | None => None, - // DEBUG, INFO, ERROR can be passed through as-is. - other => other, - }) - .collect()) - } -} diff --git a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/packed_element_field.rs b/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/packed_element_field.rs deleted file mode 100644 index 86bc3078d1a9..000000000000 --- a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/packed_element_field.rs +++ /dev/null @@ -1,320 +0,0 @@ -//! Helper functions for decoding byte arrays of [`PackedElementField`] data, -//! e.g. for extracting positions and colors from [`foxglove.PointCloud`] messages. -//! -//! [`PackedElementField`]: https://docs.foxglove.dev/docs/sdk/schemas/packed-element-field -//! [`foxglove.PointCloud`]: https://docs.foxglove.dev/docs/sdk/schemas/point-cloud - -use arrow::array::builder::{FixedSizeListBuilder, Float32Builder, ListBuilder, UInt32Builder}; -use arrow::array::{ - Array as _, BinaryArray, Int32Array, ListArray, StringArray, StructArray, UInt32Array, -}; -use arrow::datatypes::{DataType, Field}; -use re_arrow_combinators::Transform; -use re_arrow_combinators::map::MapList; -use re_arrow_combinators::reshape::Flatten; -use re_lenses::OpError; - -use crate::loader_mcap::lenses::helpers::get_field_as; - -/// Extracts position data from point cloud messages as a `List>`. -pub fn extract_positions(list_array: &ListArray) -> Result { - Ok(MapList::new(ExtractPositions) - .then(Flatten::new()) - .transform(list_array)?) -} - -/// Extracts RGBA color data from point cloud messages as a `List`. -pub fn extract_colors(list_array: &ListArray) -> Result { - Ok(MapList::new(ExtractColors) - .then(Flatten::new()) - .transform(list_array)?) -} - -/// Foxglove [`NumericType`] enum. -/// -/// [`NumericType`]: https://docs.foxglove.dev/docs/sdk/schemas/numeric-type -#[derive(Clone, Copy)] -#[repr(i32)] -enum NumericType { - Uint8 = 1, - Int8 = 2, - Uint16 = 3, - Int16 = 4, - Uint32 = 5, - Int32 = 6, - Float32 = 7, - Float64 = 8, -} - -impl TryFrom for NumericType { - type Error = re_arrow_combinators::Error; - - fn try_from(value: i32) -> Result { - match value { - 1 => Ok(Self::Uint8), - 2 => Ok(Self::Int8), - 3 => Ok(Self::Uint16), - 4 => Ok(Self::Int16), - 5 => Ok(Self::Uint32), - 6 => Ok(Self::Int32), - 7 => Ok(Self::Float32), - 8 => Ok(Self::Float64), - _ => Err(re_arrow_combinators::Error::Other(format!( - "unknown NumericType value: {value}" - ))), - } - } -} - -impl NumericType { - fn byte_size(self) -> usize { - match self { - Self::Uint8 | Self::Int8 => 1, - Self::Uint16 | Self::Int16 => 2, - Self::Uint32 | Self::Int32 | Self::Float32 => 4, - Self::Float64 => 8, - } - } - - /// Reads a value from packed data at the given byte offset and converts it to `f32`. - #[expect(clippy::cast_possible_wrap)] - fn read_as_f32(self, data: &[u8], byte_offset: usize) -> f32 { - if byte_offset + self.byte_size() > data.len() { - return 0.0; - } - let bytes = &data[byte_offset..]; - match self { - Self::Uint8 => bytes[0] as f32, - Self::Int8 => (bytes[0] as i8) as f32, - Self::Uint16 => u16::from_le_bytes([bytes[0], bytes[1]]) as f32, - Self::Int16 => i16::from_le_bytes([bytes[0], bytes[1]]) as f32, - Self::Uint32 => u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as f32, - Self::Int32 => i32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as f32, - Self::Float32 => f32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]), - Self::Float64 => f64::from_le_bytes([ - bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], - ]) as f32, - } - } - - /// Reads a numeric value from packed data at the given byte offset and clamps/converts it to `u8`. - fn read_as_u8(self, data: &[u8], byte_offset: usize) -> u8 { - if byte_offset + self.byte_size() > data.len() { - return 0; - } - let bytes = &data[byte_offset..]; - match self { - Self::Uint8 => bytes[0], - // intentional reinterpretation of raw byte as signed - #[expect(clippy::cast_possible_wrap)] - Self::Int8 => (bytes[0] as i8).clamp(0, i8::MAX) as u8, - Self::Uint16 => u16::from_le_bytes([bytes[0], bytes[1]]).min(255) as u8, - Self::Int16 => i16::from_le_bytes([bytes[0], bytes[1]]).clamp(0, 255) as u8, - Self::Uint32 => { - u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]).min(255) as u8 - } - Self::Int32 => { - i32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]).clamp(0, 255) as u8 - } - Self::Float32 => (f32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) * 255.0) - .clamp(0.0, 255.0) as u8, - Self::Float64 => (f64::from_le_bytes([ - bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], - ]) * 255.0) - .clamp(0.0, 255.0) as u8, - } - } -} - -/// Byte offset and numeric type of a packed field within a point. -struct FieldDescriptor { - byte_offset: usize, - numeric_type: NumericType, -} - -/// Searches the `fields` struct array for entries matching the given names and returns -/// their byte offsets and numeric types. -fn find_field_descriptors( - fields_struct: &StructArray, - names: &[&str], -) -> Result>, re_arrow_combinators::Error> { - let name_array = fields_struct - .column_by_name("name") - .and_then(|a| a.as_any().downcast_ref::().cloned()); - let offset_array = fields_struct - .column_by_name("offset") - .and_then(|a| a.as_any().downcast_ref::().cloned()); - // Protobuf enums are stored as Struct{name: Utf8, value: Int32}; extract the `value` field. - let type_array = fields_struct - .column_by_name("type") - .and_then(|a| a.as_any().downcast_ref::()) - .and_then(|s| s.column_by_name("value")) - .and_then(|a| a.as_any().downcast_ref::().cloned()); - - let (Some(name_array), Some(offset_array), Some(type_array)) = - (name_array, offset_array, type_array) - else { - return Ok(names.iter().map(|_| None).collect()); - }; - - names - .iter() - .map(|target_name| { - for i in 0..name_array.len() { - if !name_array.is_null(i) && name_array.value(i) == *target_name { - return Ok(Some(FieldDescriptor { - byte_offset: offset_array.value(i) as usize, - numeric_type: NumericType::try_from(type_array.value(i))?, - })); - } - } - Ok(None) - }) - .collect() -} - -struct ExtractPositions; - -impl Transform for ExtractPositions { - type Source = StructArray; - type Target = ListArray; - - fn transform(&self, source: &StructArray) -> Result { - re_tracing::profile_function!(); - - let point_stride_array = get_field_as::(source, "point_stride")?; - let fields_array = get_field_as::(source, "fields")?; - let data_array = get_field_as::(source, "data")?; - - let mut builder = ListBuilder::new( - FixedSizeListBuilder::new(Float32Builder::new(), 3).with_field(Field::new( - "item", - DataType::Float32, - false, - )), - ); - - for i in 0..source.len() { - if source.is_null(i) || data_array.is_null(i) || fields_array.is_null(i) { - builder.append_null(); - continue; - } - - let point_stride = point_stride_array.value(i) as usize; - let data = data_array.value(i); - let fields_value = fields_array.value(i); - let fields_struct = fields_value - .as_any() - .downcast_ref::() - .ok_or_else(|| re_arrow_combinators::Error::TypeMismatch { - expected: "StructArray".to_owned(), - actual: fields_value.data_type().clone(), - context: "fields element".to_owned(), - })?; - - let descriptors = find_field_descriptors(fields_struct, &["x", "y", "z"])?; - - if let [Some(x_desc), Some(y_desc), Some(z_desc)] = &descriptors[..] - && point_stride > 0 - { - let num_points = data.len() / point_stride; - let points_builder = builder.values(); - for p in 0..num_points { - let base = p * point_stride; - points_builder.values().append_value( - x_desc - .numeric_type - .read_as_f32(data, base + x_desc.byte_offset), - ); - points_builder.values().append_value( - y_desc - .numeric_type - .read_as_f32(data, base + y_desc.byte_offset), - ); - points_builder.values().append_value( - z_desc - .numeric_type - .read_as_f32(data, base + z_desc.byte_offset), - ); - points_builder.append(true); - } - builder.append(true); - } else { - builder.append_null(); - } - } - - Ok(builder.finish()) - } -} - -struct ExtractColors; - -impl Transform for ExtractColors { - type Source = StructArray; - type Target = ListArray; - - fn transform(&self, source: &StructArray) -> Result { - re_tracing::profile_function!(); - - let point_stride_array = get_field_as::(source, "point_stride")?; - let fields_array = get_field_as::(source, "fields")?; - let data_array = get_field_as::(source, "data")?; - - let mut builder = ListBuilder::new(UInt32Builder::new()); - - for i in 0..source.len() { - if source.is_null(i) || data_array.is_null(i) || fields_array.is_null(i) { - builder.append_null(); - continue; - } - - let point_stride = point_stride_array.value(i) as usize; - let data = data_array.value(i); - let fields_value = fields_array.value(i); - let fields_struct = fields_value - .as_any() - .downcast_ref::() - .ok_or_else(|| re_arrow_combinators::Error::TypeMismatch { - expected: "StructArray".to_owned(), - actual: fields_value.data_type().clone(), - context: "fields element".to_owned(), - })?; - - let descriptors = - find_field_descriptors(fields_struct, &["red", "green", "blue", "alpha"])?; - - if let (Some(r_desc), Some(g_desc), Some(b_desc)) = - (&descriptors[0], &descriptors[1], &descriptors[2]) - && point_stride > 0 - { - let alpha_desc = &descriptors[3]; - let num_points = data.len() / point_stride; - for p in 0..num_points { - let base = p * point_stride; - let r = r_desc - .numeric_type - .read_as_u8(data, base + r_desc.byte_offset); - let g = g_desc - .numeric_type - .read_as_u8(data, base + g_desc.byte_offset); - let b = b_desc - .numeric_type - .read_as_u8(data, base + b_desc.byte_offset); - let a = alpha_desc.as_ref().map_or(255, |d| { - d.numeric_type.read_as_u8(data, base + d.byte_offset) - }); - // Convert to packed RGBA u32 format expected by Rerun. - builder.values().append_value( - ((r as u32) << 24) | ((g as u32) << 16) | ((b as u32) << 8) | (a as u32), - ); - } - builder.append(true); - } else { - builder.append_null(); - } - } - - Ok(builder.finish()) - } -} diff --git a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/point_cloud.rs b/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/point_cloud.rs deleted file mode 100644 index cfc7c6592543..000000000000 --- a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/point_cloud.rs +++ /dev/null @@ -1,33 +0,0 @@ -use re_lenses::{Lens, LensError, Op}; -use re_log_types::{EntityPathFilter, TimeType}; -use re_sdk_types::archetypes::{CoordinateFrame, Points3D}; - -use super::FOXGLOVE_TIMESTAMP; -use super::packed_element_field::{extract_colors, extract_positions}; - -/// Creates a lens for [`foxglove.PointCloud`] messages. -/// -/// [`foxglove.PointCloud`]: https://docs.foxglove.dev/docs/sdk/schemas/point-cloud -pub fn point_cloud() -> Result { - Ok( - // TODO(michael): support optional pose field (RR-3766). - Lens::for_input_column(EntityPathFilter::all(), "foxglove.PointCloud:message") - .output_columns(|out| { - out.time( - FOXGLOVE_TIMESTAMP, - TimeType::TimestampNs, - [Op::selector(".timestamp"), Op::time_spec_to_nanos()], - ) - .component( - CoordinateFrame::descriptor_frame(), - [Op::selector(".frame_id")], - ) - .component( - Points3D::descriptor_positions(), - [Op::func(extract_positions)], - ) - .component(Points3D::descriptor_colors(), [Op::func(extract_colors)]) - })? - .build(), - ) -} diff --git a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/pose_in_frame.rs b/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/pose_in_frame.rs deleted file mode 100644 index 3ad5286e7de8..000000000000 --- a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/pose_in_frame.rs +++ /dev/null @@ -1,44 +0,0 @@ -use re_lenses::{Lens, LensError, Op}; -use re_log_types::{EntityPathFilter, TimeType}; -use re_sdk_types::archetypes::{CoordinateFrame, InstancePoses3D}; - -use crate::loader_mcap::lenses::helpers::{ - list_xyz_struct_to_list_fixed, list_xyzw_struct_to_list_fixed, -}; - -use super::FOXGLOVE_TIMESTAMP; - -/// Creates a lens for [`foxglove.PoseInFrame`] messages. -/// -/// [`foxglove.PoseInFrame`]: https://docs.foxglove.dev/docs/sdk/schemas/pose-in-frame -pub fn pose_in_frame() -> Result { - Ok( - Lens::for_input_column(EntityPathFilter::all(), "foxglove.PoseInFrame:message") - .output_columns(|out| { - out.time( - FOXGLOVE_TIMESTAMP, - TimeType::TimestampNs, - [Op::selector(".timestamp"), Op::time_spec_to_nanos()], - ) - .component( - CoordinateFrame::descriptor_frame(), - [Op::selector(".frame_id")], - ) - .component( - InstancePoses3D::descriptor_translations(), - [ - Op::selector(".pose.position"), - Op::func(list_xyz_struct_to_list_fixed), - ], - ) - .component( - InstancePoses3D::descriptor_quaternions(), - [ - Op::selector(".pose.orientation"), - Op::func(list_xyzw_struct_to_list_fixed), - ], - ) - })? - .build(), - ) -} diff --git a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/poses_in_frame.rs b/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/poses_in_frame.rs deleted file mode 100644 index 57a372e661a8..000000000000 --- a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/poses_in_frame.rs +++ /dev/null @@ -1,44 +0,0 @@ -use re_lenses::{Lens, LensError, Op}; -use re_log_types::{EntityPathFilter, TimeType}; -use re_sdk_types::archetypes::{CoordinateFrame, InstancePoses3D}; - -use crate::loader_mcap::lenses::helpers::{ - list_xyz_struct_to_list_fixed, list_xyzw_struct_to_list_fixed, -}; - -use super::FOXGLOVE_TIMESTAMP; - -/// Creates a lens for [`foxglove.PosesInFrame`] messages. -/// -/// [`foxglove.PosesInFrame`]: https://docs.foxglove.dev/docs/sdk/schemas/poses-in-frame -pub fn poses_in_frame() -> Result { - Ok( - Lens::for_input_column(EntityPathFilter::all(), "foxglove.PosesInFrame:message") - .output_columns(|out| { - out.time( - FOXGLOVE_TIMESTAMP, - TimeType::TimestampNs, - [Op::selector(".timestamp"), Op::time_spec_to_nanos()], - ) - .component( - CoordinateFrame::descriptor_frame(), - [Op::selector(".frame_id")], - ) - .component( - InstancePoses3D::descriptor_translations(), - [ - Op::selector(".poses[].position"), - Op::func(list_xyz_struct_to_list_fixed), - ], - ) - .component( - InstancePoses3D::descriptor_quaternions(), - [ - Op::selector(".poses[].orientation"), - Op::func(list_xyzw_struct_to_list_fixed), - ], - ) - })? - .build(), - ) -} diff --git a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/raw_image.rs b/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/raw_image.rs deleted file mode 100644 index 2a138ee6c1bc..000000000000 --- a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/raw_image.rs +++ /dev/null @@ -1,36 +0,0 @@ -use re_lenses::{Lens, LensError, Op}; -use re_log_types::{EntityPathFilter, TimeType}; -use re_sdk_types::archetypes::{CoordinateFrame, Image}; - -use crate::loader_mcap::lenses::image_helpers::{encoding_to_image_format, extract_image_buffer}; - -use super::{FOXGLOVE_TIMESTAMP, IMAGE_PLANE_SUFFIX}; - -/// Creates a lens for [`foxglove.RawImage`] messages. -/// -/// [`foxglove.RawImage`]: https://docs.foxglove.dev/docs/sdk/schemas/raw-image -pub fn raw_image() -> Result { - Ok( - Lens::for_input_column(EntityPathFilter::all(), "foxglove.RawImage:message") - .output_columns(|out| { - out.time( - FOXGLOVE_TIMESTAMP, - TimeType::TimestampNs, - [Op::selector(".timestamp"), Op::time_spec_to_nanos()], - ) - .component( - CoordinateFrame::descriptor_frame(), - [ - Op::selector(".frame_id"), - Op::string_suffix_nonempty(IMAGE_PLANE_SUFFIX), - ], - ) - .component( - Image::descriptor_format(), - [Op::func(encoding_to_image_format)], - ) - .component(Image::descriptor_buffer(), [Op::func(extract_image_buffer)]) - })? - .build(), - ) -} diff --git a/crates/store/re_data_loader/src/loader_mcap/lenses/helpers.rs b/crates/store/re_data_loader/src/loader_mcap/lenses/helpers.rs deleted file mode 100644 index acf8d9c5da8b..000000000000 --- a/crates/store/re_data_loader/src/loader_mcap/lenses/helpers.rs +++ /dev/null @@ -1,62 +0,0 @@ -//! Common helper functions for transforming Arrow data in lenses. - -use arrow::array::{Array, Float32Array, Float64Array, ListArray, StructArray, UInt32Array}; -use re_arrow_combinators::Transform as _; -use re_arrow_combinators::cast::{ListToFixedSizeList, PrimitiveCast}; -use re_arrow_combinators::map::{MapFixedSizeList, MapList}; -use re_arrow_combinators::reshape::{GetField, RowMajorToColumnMajor, StructToFixedList}; -use re_lenses::OpError; - -/// Converts a list of structs with `x`, `y`, `z` fields to a list of fixed-size lists with 3 f32 values. -pub fn list_xyz_struct_to_list_fixed(list_array: &ListArray) -> Result { - let pipeline = MapList::new(StructToFixedList::new(["x", "y", "z"]).then( - MapFixedSizeList::new(PrimitiveCast::::new()), - )); - Ok(pipeline.transform(list_array)?) -} - -/// Converts a list of structs with `x`, `y`, `z`, `w` fields to a list of fixed-size lists with 4 f32 values (quaternions). -pub fn list_xyzw_struct_to_list_fixed(list_array: &ListArray) -> Result { - let pipeline = MapList::new(StructToFixedList::new(["x", "y", "z", "w"]).then( - MapFixedSizeList::new(PrimitiveCast::::new()), - )); - Ok(pipeline.transform(list_array)?) -} - -/// Converts 3x3 row-major f64 matrices stored in variable-size lists to column-major f32 fixed-size lists. -pub fn list_3x3_row_major_to_column_major(list_array: &ListArray) -> Result { - let pipeline = MapList::new( - ListToFixedSizeList::new(9) - .then(RowMajorToColumnMajor::new(3, 3)) - .then(MapFixedSizeList::new(PrimitiveCast::< - Float64Array, - Float32Array, - >::new())), - ); - Ok(pipeline.transform(list_array)?) -} - -/// Converts u32 width and height fields to a `Resolution` component (fixed-size list with two f32 values). -pub fn width_height_to_resolution(list_array: &ListArray) -> Result { - let pipeline = MapList::new(StructToFixedList::new(["width", "height"]).then( - MapFixedSizeList::new(PrimitiveCast::::new()), - )); - Ok(pipeline.transform(list_array)?) -} - -/// Extracts a struct field by name and downcasts it to the expected array type. -pub fn get_field_as( - source: &StructArray, - name: &str, -) -> Result { - let array_ref = GetField::new(name).transform(source)?; - array_ref - .as_any() - .downcast_ref::() - .cloned() - .ok_or_else(|| re_arrow_combinators::Error::TypeMismatch { - expected: std::any::type_name::().to_owned(), - actual: array_ref.data_type().clone(), - context: name.to_owned(), - }) -} diff --git a/crates/store/re_data_loader/src/loader_mcap/lenses/mod.rs b/crates/store/re_data_loader/src/loader_mcap/lenses/mod.rs deleted file mode 100644 index 08e8c0a532c6..000000000000 --- a/crates/store/re_data_loader/src/loader_mcap/lenses/mod.rs +++ /dev/null @@ -1,8 +0,0 @@ -mod foxglove; -mod helpers; -mod image_helpers; - -pub use foxglove::foxglove_lenses; - -/// The identifier used to enable/disable Foxglove lenses via [`re_mcap::SelectedLayers`]. -pub const FOXGLOVE_LENSES_IDENTIFIER: &str = "foxglove"; diff --git a/crates/store/re_data_loader/src/loader_mcap/loader.rs b/crates/store/re_data_loader/src/loader_mcap/loader.rs deleted file mode 100644 index a5f6988bd671..000000000000 --- a/crates/store/re_data_loader/src/loader_mcap/loader.rs +++ /dev/null @@ -1,310 +0,0 @@ -//! MCAP file loader implementation. - -use std::io::Cursor; -use std::path::Path; -use std::sync::Arc; - -use crossbeam::channel::Sender; -use re_chunk::RowId; -use re_lenses::Lenses; -use re_log_types::{SetStoreInfo, StoreId, StoreInfo}; -use re_mcap::{LayerIdentifier, LayerRegistry, SelectedLayers}; -use re_quota_channel::send_crossbeam; - -use crate::{DataLoader, DataLoaderError, DataLoaderSettings, LoadedData}; - -const MCAP_LOADER_NAME: &str = "McapLoader"; - -/// A [`DataLoader`] for MCAP files. -/// -/// There are many different ways to extract and interpret information from MCAP files. -/// For example, it might be interesting to query for particular fields of messages, -/// or show information directly in the Rerun viewer. Because use-cases can vary, the -/// [`McapLoader`] is made up of [`re_mcap::Layer`]s, each representing different views of the -/// underlying data. -/// -/// These layers can be specified in the CLI wen converting an MCAP file -/// to an .rrd. Here are a few examples: -/// - [`re_mcap::layers::McapProtobufLayer`] -/// - [`re_mcap::layers::McapRawLayer`] -/// -/// Optionally, [`Lenses`] can be configured via [`Self::with_lenses`] to transform -/// chunks as they are loaded (e.g., converting raw protobuf data into semantic Rerun components). -pub struct McapLoader { - selected_layers: SelectedLayers, - // TODO(RR-3491): We don't need the fallback logic anymore; use `OutputMode` instead. - raw_fallback_enabled: bool, - lenses: Option>, -} - -impl Default for McapLoader { - fn default() -> Self { - Self::new(SelectedLayers::All) - } -} - -impl McapLoader { - /// Creates a new [`McapLoader`] that extracts the specified `layers`. - pub fn new(selected_layers: SelectedLayers) -> Self { - let lenses = Self::build_lenses(&selected_layers); - Self { - selected_layers, - raw_fallback_enabled: true, - lenses, - } - } - - /// Configures whether the raw layer is used as a fallback for unsupported channels. - pub fn with_raw_fallback(mut self, raw_fallback_enabled: bool) -> Self { - self.raw_fallback_enabled = raw_fallback_enabled; - self - } - - /// Configures lenses to apply to chunks as they are loaded. - pub fn with_lenses(mut self, lenses: Lenses) -> Self { - self.lenses = Some(Arc::new(lenses)); - self - } - - fn build_lenses(selected_layers: &SelectedLayers) -> Option> { - if !selected_layers.contains(&LayerIdentifier::from( - super::lenses::FOXGLOVE_LENSES_IDENTIFIER, - )) { - return None; - } - - match super::lenses::foxglove_lenses() { - Ok(lenses) => Some(Arc::new(lenses)), - Err(err) => { - re_log::error_once!( - "Failed to build Foxglove lenses: {err}. MCAP loader will run without them." - ); - None - } - } - } -} - -impl DataLoader for McapLoader { - fn name(&self) -> crate::DataLoaderName { - MCAP_LOADER_NAME.into() - } - - #[cfg(not(target_arch = "wasm32"))] - fn load_from_path( - &self, - settings: &crate::DataLoaderSettings, - path: std::path::PathBuf, - tx: Sender, - ) -> Result<(), DataLoaderError> { - if !path.is_file() || !has_mcap_extension(&path) { - return Err(DataLoaderError::Incompatible(path)); // simply not interested - } - - re_tracing::profile_function!(); - - // NOTE(1): `spawn` is fine, this whole function is native-only. - // NOTE(2): this must spawned on a dedicated thread to avoid a deadlock! - // `load` will spawn a bunch of loaders on the common rayon thread pool and wait for - // their response via channels: we cannot be waiting for these responses on the - // common rayon thread pool. - let settings = settings.clone(); - let selected_layers = self.selected_layers.clone(); - let raw_fallback_enabled = self.raw_fallback_enabled; - let lenses = self.lenses.clone(); - std::thread::Builder::new() - .name(format!("load_mcap({path:?}")) - .spawn(move || { - if let Err(err) = load_mcap_mmap( - &path, - &settings, - &tx, - &selected_layers, - raw_fallback_enabled, - lenses.as_deref(), - ) { - re_log::error!("Failed to load MCAP file: {err}"); - } - }) - .map_err(|err| DataLoaderError::Other(err.into()))?; - - Ok(()) - } - - fn load_from_file_contents( - &self, - settings: &crate::DataLoaderSettings, - filepath: std::path::PathBuf, - contents: std::borrow::Cow<'_, [u8]>, - tx: Sender, - ) -> Result<(), crate::DataLoaderError> { - if !has_mcap_extension(&filepath) { - return Err(DataLoaderError::Incompatible(filepath)); // simply not interested - } - - re_tracing::profile_function!(); - - let contents = contents.into_owned(); - let settings = settings.clone(); - let selected_layers = self.selected_layers.clone(); - let raw_fallback_enabled = self.raw_fallback_enabled; - let lenses = self.lenses.clone(); - - // NOTE: this must be spawned on a dedicated thread to avoid a deadlock! - // `load` will spawn a bunch of loaders on the common rayon thread pool and wait for - // their response via channels: we cannot be waiting for these responses on the - // common rayon thread pool. - cfg_if::cfg_if! { - if #[cfg(target_arch = "wasm32")] { - load_mcap( - &contents, - &settings, - &tx, - &selected_layers, - raw_fallback_enabled, - lenses.as_deref(), - )?; - } else { - std::thread::Builder::new() - .name(format!("load_mcap({filepath:?})")) - .spawn(move || { - if let Err(err) = load_mcap( - &contents, - &settings, - &tx, - &selected_layers, - raw_fallback_enabled, - lenses.as_deref(), - ) { - re_log::error!("Failed to load MCAP file: {err}"); - } - }) - .map_err(|err| DataLoaderError::Other(err.into()))?; - } - } - - Ok(()) - } -} - -#[cfg(not(target_arch = "wasm32"))] -fn load_mcap_mmap( - filepath: &std::path::PathBuf, - settings: &DataLoaderSettings, - tx: &Sender, - selected_layers: &SelectedLayers, - raw_fallback_enabled: bool, - lenses: Option<&Lenses>, -) -> Result<(), DataLoaderError> { - use std::fs::File; - let file = File::open(filepath)?; - - // SAFETY: file-backed memory maps are marked unsafe because of potential UB when using the map and the underlying file is modified. - #[expect(unsafe_code)] - let mmap = unsafe { memmap2::Mmap::map(&file)? }; - - load_mcap( - &mmap, - settings, - tx, - selected_layers, - raw_fallback_enabled, - lenses, - ) -} - -pub fn load_mcap( - mcap: &[u8], - settings: &DataLoaderSettings, - tx: &Sender, - selected_layers: &SelectedLayers, - raw_fallback_enabled: bool, - lenses: Option<&Lenses>, -) -> Result<(), DataLoaderError> { - re_tracing::profile_function!(); - let store_id = settings.recommended_store_id(); - - if send_crossbeam( - tx, - LoadedData::LogMsg( - MCAP_LOADER_NAME.to_owned(), - re_log_types::LogMsg::SetStoreInfo(store_info(store_id.clone())), - ), - ) - .is_err() - { - re_log::debug_once!( - "Failed to send `SetStoreInfo` because smart channel closed unexpectedly." - ); - // If the other side decided to hang up this is not our problem. - return Ok(()); - } - - let mut send_chunk = |chunk: re_chunk::Chunk| { - // Apply lenses if configured, otherwise forward the chunk directly. - if let Some(lenses) = lenses { - for result in lenses.apply(&chunk) { - match result { - Ok(transformed_chunk) => { - send_chunk_to_channel(tx, &store_id, transformed_chunk); - } - Err(partial_chunk) => { - for error in partial_chunk.errors() { - re_log::error_once!("Lens error: {error}"); - } - if let Some(chunk) = partial_chunk.take() { - send_chunk_to_channel(tx, &store_id, chunk); - } - } - } - } - } else { - send_chunk_to_channel(tx, &store_id, chunk); - } - }; - - let reader = Cursor::new(&mcap); - - let summary = re_mcap::read_summary(reader)? - .ok_or_else(|| anyhow::anyhow!("MCAP file does not contain a summary"))?; - - // TODO(#10862): Add warning for channel that miss semantic information. - LayerRegistry::all_builtin(raw_fallback_enabled) - .select(selected_layers) - .plan(&summary)? - .run(mcap, &summary, &mut send_chunk)?; - - Ok(()) -} - -fn send_chunk_to_channel(tx: &Sender, store_id: &StoreId, chunk: re_chunk::Chunk) { - if send_crossbeam( - tx, - LoadedData::Chunk(MCAP_LOADER_NAME.to_owned(), store_id.clone(), chunk), - ) - .is_err() - { - // If the other side decided to hang up this is not our problem. - re_log::debug_once!( - "Failed to send chunk because the smart channel has been closed unexpectedly." - ); - } -} - -fn store_info(store_id: StoreId) -> SetStoreInfo { - SetStoreInfo { - row_id: *RowId::new(), - info: StoreInfo::new( - store_id, - re_log_types::StoreSource::Other(MCAP_LOADER_NAME.to_owned()), - ), - } -} - -/// Checks if a path has the `.mcap` extension. -fn has_mcap_extension(filepath: &Path) -> bool { - filepath - .extension() - .map(|ext| ext.eq_ignore_ascii_case("mcap")) - .unwrap_or(false) -} diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_compressed_video__foxglove_compressed_video.snap b/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_compressed_video__foxglove_compressed_video.snap deleted file mode 100644 index 5b593caed84d..000000000000 --- a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_compressed_video__foxglove_compressed_video.snap +++ /dev/null @@ -1,38 +0,0 @@ ---- -source: crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_compressed_video.rs -expression: "format!(\"{:-240}\", chunk)" ---- -┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /compressed_video │ -│ * id: [**REDACTED**] │ -│ * version: [**REDACTED**] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌────────────────────────────────┬──────────────────────────────┬────────────────────────────────┬───────────────────────────────┬────────────────────────────────┬────────────────────────────────┬────────────────────────────────┐ │ -│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ timestamp ┆ CoordinateFrame:frame ┆ VideoStream:codec ┆ VideoStream:sample │ │ -│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable Duration(ns) ┆ type: nullable Duration(ns) ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable ┆ type: nullable List[nullable ┆ type: nullable List[nullable │ │ -│ │ ARROW:extension:metadata: ┆ index_name: message_log_time ┆ index_name: ┆ index_name: timestamp ┆ Utf8] ┆ u32] ┆ List[u8]] │ │ -│ │ {"namespace":"row"} ┆ is_sorted: true ┆ message_publish_time ┆ is_sorted: true ┆ archetype: CoordinateFrame ┆ archetype: VideoStream ┆ archetype: VideoStream │ │ -│ │ ARROW:extension:name: TUID ┆ kind: index ┆ is_sorted: true ┆ kind: index ┆ component: ┆ component: VideoStream:codec ┆ component: VideoStream:sample │ │ -│ │ is_sorted: true ┆ ┆ kind: index ┆ ┆ CoordinateFrame:frame ┆ component_type: VideoCodec ┆ component_type: VideoSample │ │ -│ │ kind: control ┆ ┆ ┆ ┆ component_type: ┆ kind: data ┆ kind: data │ │ -│ │ ┆ ┆ ┆ ┆ TransformFrameId ┆ ┆ │ │ -│ │ ┆ ┆ ┆ ┆ kind: data ┆ ┆ │ │ -│ ╞════════════════════════════════╪══════════════════════════════╪════════════════════════════════╪═══════════════════════════════╪════════════════════════════════╪════════════════════════════════╪════════════════════════════════╡ │ -│ │ row_[**REDACTED**] ┆ P0D ┆ P0D ┆ 1970-01-01T00:00:00 ┆ [camera_frame_image_plane] ┆ [1635148593] ┆ [[0, 0, 0, 1, 103, 100, 16, │ │ -│ │ ┆ ┆ ┆ ┆ ┆ ┆ 10, 172, 184, 143, 66, 0, 0, │ │ -│ │ ┆ ┆ ┆ ┆ ┆ ┆ 3, 0, 2, 0, 0, 3, 0, 121, 8, │ │ -│ │ ┆ ┆ ┆ ┆ ┆ ┆ 0, 0, 0, 1, 1… │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.033333333S ┆ PT0.033333333S ┆ 1970-01-01T00:00:00.033333333 ┆ [camera_frame_image_plane] ┆ [1635148593] ┆ [[0, 0, 0, 1, 103, 100, 16, │ │ -│ │ ┆ ┆ ┆ ┆ ┆ ┆ 10, 172, 184, 143, 66, 0, 0, │ │ -│ │ ┆ ┆ ┆ ┆ ┆ ┆ 3, 0, 2, 0, 0, 3, 0, 121, 8, │ │ -│ │ ┆ ┆ ┆ ┆ ┆ ┆ 0, 0, 0, 1, 1… │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.066666666S ┆ PT0.066666666S ┆ 1970-01-01T00:00:00.066666666 ┆ [camera_frame_image_plane] ┆ [1635148593] ┆ [[0, 0, 0, 1, 103, 100, 16, │ │ -│ │ ┆ ┆ ┆ ┆ ┆ ┆ 10, 172, 184, 143, 66, 0, 0, │ │ -│ │ ┆ ┆ ┆ ┆ ┆ ┆ 3, 0, 2, 0, 0, 3, 0, 121, 8, │ │ -│ │ ┆ ┆ ┆ ┆ ┆ ┆ 0, 0, 0, 1, 1… │ │ -│ └────────────────────────────────┴──────────────────────────────┴────────────────────────────────┴───────────────────────────────┴────────────────────────────────┴────────────────────────────────┴────────────────────────────────┘ │ -└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/mod.rs b/crates/store/re_data_loader/src/loader_mcap/tests/mod.rs deleted file mode 100644 index b0430ddbf839..000000000000 --- a/crates/store/re_data_loader/src/loader_mcap/tests/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -//! Test utilities for MCAP data loader. - -mod foxglove; - -pub mod util; diff --git a/crates/store/re_data_loader/tests/snapshots/test_mcap_loader__tests__ros2.snap b/crates/store/re_data_loader/tests/snapshots/test_mcap_loader__tests__ros2.snap deleted file mode 100644 index 0fecad4e9106..000000000000 --- a/crates/store/re_data_loader/tests/snapshots/test_mcap_loader__tests__ros2.snap +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c97095b25f8cc8d9ae4d0a4a705f971db31c99ba5bf48977048e1ce60303e064 -size 370577 diff --git a/crates/store/re_data_source/Cargo.toml b/crates/store/re_data_source/Cargo.toml index e3a9eff7e9b3..4d0915b1762a 100644 --- a/crates/store/re_data_source/Cargo.toml +++ b/crates/store/re_data_source/Cargo.toml @@ -23,8 +23,7 @@ default = [] [dependencies] -re_data_loader.workspace = true -re_error.workspace = true +re_importer.workspace = true re_format.workspace = true re_grpc_client.workspace = true re_log_channel.workspace = true diff --git a/crates/store/re_data_source/src/data_source.rs b/crates/store/re_data_source/src/data_source.rs index 011d2fb3d7ed..c49f95a51f2d 100644 --- a/crates/store/re_data_source/src/data_source.rs +++ b/crates/store/re_data_source/src/data_source.rs @@ -2,7 +2,7 @@ use std::sync::Arc; #[cfg(not(target_arch = "wasm32"))] use anyhow::Context as _; -use re_log_channel::{LogReceiver, LogSource}; +use re_log_channel::{LogReceiver, LogSource, RecordingOpenBehavior}; use re_log_types::RecordingId; use re_redap_client::ConnectionRegistryHandle; @@ -13,6 +13,7 @@ pub type AuthErrorHandler = Arc; /// Somewhere we can get Rerun logging data from. +// TODO(emilk): there is a lot of overlap between this and `ViewerOpenUrl` #[derive(Clone, Debug, PartialEq, Eq)] pub enum LogDataSource { /// A remote file, served over http. @@ -52,14 +53,28 @@ pub enum LogDataSource { RedapDatasetSegment { uri: re_uri::DatasetSegmentUri, - /// Switch to this recording once it has been loaded? - select_when_loaded: bool, + open_behavior: RecordingOpenBehavior, }, /// A `rerun+http://` URI pointing to a proxy. RedapProxy(re_uri::ProxyUri), } +/// Options for [`LogDataSource::from_uri`]. +#[derive(Clone, Debug, Default)] +pub struct FromUriOptions { + /// If `true`, keep reading `.rrd` files past EOF, tailing new data as it arrives. + pub follow: bool, + + /// If `true`, accept extensionless HTTP URLs for magic-bytes-based format detection. + /// + /// This should be `true` at external entry points (CLI, explicit user URL input), + /// but `false` when parsing URLs from viewer-internal links, where extensionless + /// URLs (e.g. `https://rerun.io/docs/getting-started/data-in`) should fall through to be opened in + /// the browser. + pub accept_extensionless_http: bool, +} + impl LogDataSource { /// Tries to classify a URI into a [`LogDataSource`]. /// @@ -71,7 +86,7 @@ impl LogDataSource { pub fn from_uri( _file_source: re_log_types::FileSource, url: &str, - follow: bool, + options: &FromUriOptions, ) -> Option { #[cfg(not(target_arch = "wasm32"))] { @@ -88,7 +103,7 @@ impl LogDataSource { let Some(file_extension) = uri.split('.').next_back() else { return false; }; - if !re_data_loader::is_supported_file_extension(file_extension) { + if !re_importer::is_supported_file_extension(file_extension) { return false; } @@ -132,7 +147,7 @@ impl LogDataSource { return Some(Self::FilePath { file_source: _file_source, path, - follow, + follow: options.follow, }); } @@ -140,7 +155,7 @@ impl LogDataSource { return Some(Self::FilePath { file_source: _file_source, path, - follow, + follow: options.follow, }); } } @@ -148,7 +163,7 @@ impl LogDataSource { if let Ok(uri) = url.parse::() { Some(Self::RedapDatasetSegment { uri, - select_when_loaded: true, + open_behavior: RecordingOpenBehavior::OpenAndSelect, }) } else if let Ok(uri) = url.parse::() { Some(Self::RedapProxy(uri)) @@ -178,14 +193,35 @@ impl LogDataSource { // so don't try loading it as a `HttpUrl` if it doesn't have a file extension we know. let contains_viewer_query_url_param = url.query_pairs().any(|(key, _)| key == "url"); - if re_data_loader::is_supported_file_extension(extension) { - Some(Self::HttpUrl { url, follow: false }) - } else if extension.is_empty() + if re_importer::is_supported_file_extension(extension) { + Some(Self::HttpUrl { + url, + follow: options.follow, + }) + } else if options.accept_extensionless_http + && extension.is_empty() && was_proper_http_url && !contains_viewer_query_url_param { // No extension — accept the URL and try to detect format after download - Some(Self::HttpUrl { url, follow }) + Some(Self::HttpUrl { + url, + follow: options.follow, + }) + } else if contains_viewer_query_url_param { + // This is a web viewer URL with a `?url=` parameter. + // Extract the URL parameter and try to parse it as a redap URI. + let (_, value) = url.query_pairs().find(|(key, _)| key == "url")?; + if let Ok(uri) = value.parse::() { + Some(Self::RedapDatasetSegment { + uri, + open_behavior: RecordingOpenBehavior::OpenAndSelect, + }) + } else if let Ok(uri) = value.parse::() { + Some(Self::RedapProxy(uri)) + } else { + None + } } else { None // Has an extension but it's not one we support } @@ -202,6 +238,20 @@ impl LogDataSource { self, on_auth_err: AuthErrorHandler, connection_registry: &ConnectionRegistryHandle, + ) -> anyhow::Result { + self.stream_with_options( + on_auth_err, + connection_registry, + re_redap_client::StreamingOptions::default(), + ) + } + + /// Like [`Self::stream`], but with additional options controlling streaming behavior. + pub fn stream_with_options( + self, + on_auth_err: AuthErrorHandler, + connection_registry: &ConnectionRegistryHandle, + streaming_options: re_redap_client::StreamingOptions, ) -> anyhow::Result { re_tracing::profile_function!(); @@ -227,17 +277,17 @@ impl LogDataSource { follow, }); - // This recording will be communicated to all `DataLoader`s, which may or may not + // This recording will be communicated to all `Importer`s, which may or may not // decide to use it depending on whether they want to share a common recording // or not. let shared_recording_id = RecordingId::random(); - let settings = re_data_loader::DataLoaderSettings { + let settings = re_importer::ImporterSettings { opened_store_id: file_source.recommended_store_id().cloned(), force_store_info: file_source.force_store_info(), follow, - ..re_data_loader::DataLoaderSettings::recommended(shared_recording_id) + ..re_importer::ImporterSettings::recommended(shared_recording_id) }; - re_data_loader::load_from_path(&settings, file_source, &path, &tx) + re_importer::import_from_path(&settings, file_source, &path, &tx) .with_context(|| format!("{path:?}"))?; Ok(rx) @@ -251,16 +301,16 @@ impl LogDataSource { follow: false, }); - // This `StoreId` will be communicated to all `DataLoader`s, which may or may not + // This `StoreId` will be communicated to all `Importer`s, which may or may not // decide to use it depending on whether they want to share a common recording // or not. let shared_recording_id = RecordingId::random(); - let settings = re_data_loader::DataLoaderSettings { + let settings = re_importer::ImporterSettings { opened_store_id: file_source.recommended_store_id().cloned(), force_store_info: file_source.force_store_info(), - ..re_data_loader::DataLoaderSettings::recommended(shared_recording_id) + ..re_importer::ImporterSettings::recommended(shared_recording_id) }; - re_data_loader::load_from_file_contents( + re_importer::import_from_file_contents( &settings, file_source, &std::path::PathBuf::from(file_contents.name), @@ -280,22 +330,25 @@ impl LogDataSource { Ok(rx) } - Self::RedapDatasetSegment { - uri, - select_when_loaded, - } => { + Self::RedapDatasetSegment { uri, open_behavior } => { let (tx, rx) = re_log_channel::log_channel(re_log_channel::LogSource::RedapGrpcStream { uri: uri.clone(), - select_when_loaded, + open_behavior, }); let connection_registry = connection_registry.clone(); let uri_clone = uri.clone(); + let tx_err = tx.clone(); let stream_segment = async move { let client = connection_registry.client(uri_clone.origin.clone()).await?; - re_redap_client::stream_blueprint_and_segment_from_server(client, tx, uri_clone) - .await + re_redap_client::stream_blueprint_and_segment_from_server( + client, + tx, + uri_clone, + streaming_options, + ) + .await }; spawn_future(async move { @@ -303,7 +356,7 @@ impl LogDataSource { if let Some(err) = err.as_client_credentials_error() { on_auth_err(uri, err); } else { - re_log::warn!("Error while streaming: {}", re_error::format_ref(&err)); + tx_err.quit(Some(Box::new(err))).ok(); } } }); @@ -387,18 +440,32 @@ impl LogDataSource { FileSource::Sdk => "sdk", } } + + /// Concert the data source to a URI string, if possible. + pub fn as_uri(&self) -> Option { + match self { + Self::HttpUrl { url, .. } => Some(url.to_string()), + #[cfg(not(target_arch = "wasm32"))] + Self::FilePath { path, .. } => Some(format!("file://{}", path.display())), + Self::FileContents { .. } => None, + #[cfg(not(target_arch = "wasm32"))] + Self::Stdin => Some("-".to_owned()), + Self::RedapDatasetSegment { uri, .. } => Some(uri.to_string()), + Self::RedapProxy(uri) => Some(uri.to_string()), + } + } } /// Analytics data extracted from a [`LogDataSource`]. #[derive(Clone, Debug)] pub struct LogDataSourceAnalytics { - /// The type of data source (e.g., "file", "http", ``redap_grpc``, "stdin"). + /// The type of data source (e.g., "file", "http", `redap_grpc`, "stdin"). pub source_type: &'static str, /// The file extension if applicable (e.g., "rrd", "png", "glb"). pub file_extension: Option, - /// How the file was opened (e.g., "cli", ``file_dialog``, ``drag_and_drop``). + /// How the file was opened (e.g., "cli", `file_dialog`, `drag_and_drop`). /// Only applicable for file-based sources. pub file_source: Option<&'static str>, } @@ -454,14 +521,16 @@ mod tests { "https://example.com/scene.glb", "https://example.com/photo.png", "https://example.com/video.mp4", - // Extensionless URLs — accepted for magic bytes detection after download + // Since the path has an explicit extension, this will be parsed as a DataSource and + // not a `ViewerOpenUrl` (see invalid section below) + "https://example.com/some-file.rrd?url=recording.rrd", + ]; + // Extensionless URLs — only accepted when accept_extensionless_http is true + let extensionless_http = [ "https://example.com/download", "https://example.com/api/file?id=123", "https://storage.example.com/abc123?token=xyz", "https://example.com/files?my.id", - // Since the path has an explicit extension, this will be parsed as a DataSource and - // not a `ViewerOpenUrl` (see invalid section below) - "https://example.com/some-file.rrd?url=recording.rrd", ]; let grpc = [ // segment_id (new) @@ -492,9 +561,14 @@ mod tests { recommended_store_id: None, force_store_info: false, }; + let default_options = FromUriOptions::default(); + let extensionless_options = FromUriOptions { + accept_extensionless_http: true, + ..Default::default() + }; for uri in file { - let data_source = LogDataSource::from_uri(file_source.clone(), uri, false); + let data_source = LogDataSource::from_uri(file_source.clone(), uri, &default_options); if !matches!(data_source, Some(LogDataSource::FilePath { .. })) { eprintln!( "Expected {uri:?} to be categorized as FilePath. Instead it got parsed as {data_source:?}" @@ -504,7 +578,7 @@ mod tests { } for uri in http { - let data_source = LogDataSource::from_uri(file_source.clone(), uri, false); + let data_source = LogDataSource::from_uri(file_source.clone(), uri, &default_options); if !matches!(data_source, Some(LogDataSource::HttpUrl { .. })) { eprintln!( "Expected {uri:?} to be categorized as HttpUrl. Instead it got parsed as {data_source:?}" @@ -513,8 +587,29 @@ mod tests { } } + // Extensionless URLs are accepted when accept_extensionless_http is true + for uri in extensionless_http { + let data_source = + LogDataSource::from_uri(file_source.clone(), uri, &extensionless_options); + if !matches!(data_source, Some(LogDataSource::HttpUrl { .. })) { + eprintln!( + "Expected {uri:?} to be categorized as HttpUrl (with accept_extensionless_http=true). Instead it got parsed as {data_source:?}" + ); + failed = true; + } + + // …but rejected when accept_extensionless_http is false + let data_source = LogDataSource::from_uri(file_source.clone(), uri, &default_options); + if data_source.is_some() { + eprintln!( + "Expected {uri:?} to be None (with accept_extensionless_http=false). Instead it got parsed as {data_source:?}" + ); + failed = true; + } + } + for uri in grpc { - let data_source = LogDataSource::from_uri(file_source.clone(), uri, false); + let data_source = LogDataSource::from_uri(file_source.clone(), uri, &default_options); if !matches!(data_source, Some(LogDataSource::RedapDatasetSegment { .. })) { eprintln!( "Expected {uri:?} to be categorized as redap dataset segment. Instead it got parsed as {data_source:?}" @@ -524,7 +619,7 @@ mod tests { } for uri in proxy { - let data_source = LogDataSource::from_uri(file_source.clone(), uri, false); + let data_source = LogDataSource::from_uri(file_source.clone(), uri, &default_options); if !matches!(data_source, Some(LogDataSource::RedapProxy { .. })) { eprintln!( "Expected {uri:?} to be categorized as MessageProxy. Instead it got parsed as {data_source:?}" @@ -534,7 +629,8 @@ mod tests { } for uri in invalid { - let data_source = LogDataSource::from_uri(file_source.clone(), uri, false); + let data_source = + LogDataSource::from_uri(file_source.clone(), uri, &extensionless_options); if data_source.is_some() { eprintln!("Expected {uri:?} to be None. Instead it got parsed as {data_source:?}"); failed = true; @@ -543,4 +639,25 @@ mod tests { assert!(!failed, "one or more test cases failed"); } + + #[test] + fn test_data_source_from_viewer_url() { + // This is the sort of url:s we get when sharing copying links from the web viewer: + + let url = "https://customer.cloud.rerun.io/?url=rerun%3A%2F%2Fapi.customer.cloud.rerun.io%3A443%2Fdataset%2F18A23D2FAC59F8572563b312ef21f53b%3Fsegment_id%3Dthe_segment_name"; + + let data_source = LogDataSource::from_uri(FileSource::Cli, url, &FromUriOptions::default()); + assert_eq!( + data_source, + Some(LogDataSource::RedapDatasetSegment { + uri: re_uri::DatasetSegmentUri { + origin: "api.customer.cloud.rerun.io:443".parse().unwrap(), + dataset_id: "18A23D2FAC59F8572563b312ef21f53b".parse().unwrap(), + segment_id: "the_segment_name".to_owned(), + fragment: Default::default(), + }, + open_behavior: RecordingOpenBehavior::OpenAndSelect, + }) + ); + } } diff --git a/crates/store/re_data_source/src/fetch_file_from_http.rs b/crates/store/re_data_source/src/fetch_file_from_http.rs index cc50d1e04ad9..e4091964487d 100644 --- a/crates/store/re_data_source/src/fetch_file_from_http.rs +++ b/crates/store/re_data_source/src/fetch_file_from_http.rs @@ -4,12 +4,12 @@ use re_log::ResultExt as _; use re_log_channel::{LogReceiver, LogSource}; use re_log_types::{FileSource, RecordingId}; -/// Fetch a file from an HTTP URL and load it using all available data loaders. +/// Fetch a file from an HTTP URL and load it using all available importers. /// /// Unlike RRD streaming which decodes incrementally, this downloads the entire file -/// first, then passes the bytes through [`re_data_loader::load_from_file_contents`]. +/// first, then passes the bytes through [`re_importer::import_from_file_contents`]. /// -/// This works for all file types supported by the data loaders (MCAP, images, 3D models, etc.). +/// This works for all file types supported by the importers (MCAP, images, 3D models, etc.). pub fn fetch_and_load(url: &url::Url) -> LogReceiver { let url_string = url.to_string(); @@ -58,12 +58,12 @@ pub fn fetch_and_load(url: &url::Url) -> LogReceiver { let bytes: Arc<[u8]> = response.bytes.into(); let shared_recording_id = RecordingId::random(); - let settings = re_data_loader::DataLoaderSettings { + let settings = re_importer::ImporterSettings { force_store_info: true, - ..re_data_loader::DataLoaderSettings::recommended(shared_recording_id) + ..re_importer::ImporterSettings::recommended(shared_recording_id) }; - if let Err(err) = re_data_loader::load_from_file_contents( + if let Err(err) = re_importer::import_from_file_contents( &settings, FileSource::Uri, &std::path::PathBuf::from(&filename), @@ -75,7 +75,7 @@ pub fn fetch_and_load(url: &url::Url) -> LogReceiver { .warn_on_err_once("Failed to send quit marker"); } - // `load_from_file_contents` internally calls `send()` which calls `tx.quit(None)` + // `import_from_file_contents` internally calls `send()` which calls `tx.quit(None)` // when all data has been forwarded, so we don't need to call it here on success. } Err(err) => { @@ -98,7 +98,7 @@ fn detect_filename(url_filename: &str, response: &ehttp::Response, bytes: &[u8]) let has_known_extension = std::path::Path::new(url_filename) .extension() .and_then(|e| e.to_str()) - .is_some_and(re_data_loader::is_supported_file_extension); + .is_some_and(re_importer::is_supported_file_extension); if has_known_extension { return url_filename.to_owned(); @@ -106,13 +106,13 @@ fn detect_filename(url_filename: &str, response: &ehttp::Response, bytes: &[u8]) // Try Content-Type header if let Some(content_type) = response.content_type() - && let Some(ext) = re_data_loader::content_type_to_extension(content_type) + && let Some(ext) = re_importer::content_type_to_extension(content_type) { return format!("{url_filename}.{ext}"); } // Try magic bytes - if let Some(ext) = re_data_loader::detect_format_from_bytes(bytes) { + if let Some(ext) = re_importer::detect_format_from_bytes(bytes) { return format!("{url_filename}.{ext}"); } diff --git a/crates/store/re_data_source/src/lib.rs b/crates/store/re_data_source/src/lib.rs index 38554f8ef8a1..819247421f93 100644 --- a/crates/store/re_data_source/src/lib.rs +++ b/crates/store/re_data_source/src/lib.rs @@ -13,7 +13,11 @@ mod stream_rrd_from_http; #[cfg(not(target_arch = "wasm32"))] mod load_stdin; -pub use self::data_source::{AuthErrorHandler, LogDataSource, LogDataSourceAnalytics}; +pub use re_log_channel::RecordingOpenBehavior; + +pub use self::data_source::{ + AuthErrorHandler, FromUriOptions, LogDataSource, LogDataSourceAnalytics, +}; // ---------------------------------------------------------------------------- diff --git a/crates/store/re_dataframe/src/engine.rs b/crates/store/re_dataframe/src/engine.rs index 290721af7f21..57e93bf1b5df 100644 --- a/crates/store/re_dataframe/src/engine.rs +++ b/crates/store/re_dataframe/src/engine.rs @@ -68,7 +68,8 @@ impl QueryEngine { /// * second, the component columns in lexical order (`Color`, `Radius, ...`). #[inline] pub fn schema(&self) -> ChunkColumnDescriptors { - self.engine.with(|store, _cache| store.schema()) + self.engine + .with(|store, _cache| store.schema().chunk_column_descriptors()) } /// Returns the filtered schema for the given [`QueryExpression`]. diff --git a/crates/store/re_dataframe/src/query.rs b/crates/store/re_dataframe/src/query.rs index c16b8019540a..49cefad279c8 100644 --- a/crates/store/re_dataframe/src/query.rs +++ b/crates/store/re_dataframe/src/query.rs @@ -365,7 +365,7 @@ impl QueryHandle { } } - #[tracing::instrument(level = "info", skip_all)] + #[tracing::instrument(level = "debug", skip_all)] #[expect(clippy::unused_self)] fn compute_user_selection( &self, @@ -920,7 +920,7 @@ impl QueryHandle { // NOTE: cannot use vec![], it has limitations with non-cloneable options. // vec![None; state.view_chunks.len()]; std::iter::repeat(()) - .map(|_| None) + .map(|()| None) .take(state.view_chunks.len()) .collect(); for (view_column_idx, view_chunks) in state.view_chunks.iter().enumerate() { diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__async_barebones_static.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__async_barebones_static.snap index 7ca7e4d4ce28..1177449bd8b9 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__async_barebones_static.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__async_barebones_static.snap @@ -2,16 +2,16 @@ source: crates/store/re_dataframe/src/query.rs expression: DisplayRB(dataframe) --- -┌──────────────────────┬──────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ -│ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ -│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable │ -│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Struct[2]] │ -│ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ -│ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ -│ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ -│ ┆ ┆ kind: data ┆ is_static: true ┆ entity_path: /this/that │ -│ ┆ ┆ ┆ kind: data ┆ kind: data │ -╞══════════════════════╪══════════════════════════════╪════════════════════════════════════╪════════════════════════════════════╪══════════════════════════════════════╡ -│ null ┆ null ┆ null ┆ [c] ┆ null │ -└──────────────────────┴──────────────────────────────┴────────────────────────────────────┴────────────────────────────────────┴──────────────────────────────────────┘ +┌──────────────────────┬──────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ +│ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ +│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt32) ┆ type: List(Utf8) ┆ type: List(Struct("x": non-null │ +│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Float32, "y": non-null Float32)) │ +│ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ +│ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ +│ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ +│ ┆ ┆ kind: data ┆ is_static: true ┆ entity_path: /this/that │ +│ ┆ ┆ ┆ kind: data ┆ kind: data │ +╞══════════════════════╪══════════════════════╪════════════════════════════════════╪════════════════════════════════════╪══════════════════════════════════════╡ +│ null ┆ null ┆ null ┆ [c] ┆ null │ +└──────────────────────┴──────────────────────┴────────────────────────────────────┴────────────────────────────────────┴──────────────────────────────────────┘ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__async_barebones_temporal.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__async_barebones_temporal.snap index 9f5faffb202a..8837ef8e9620 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__async_barebones_temporal.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__async_barebones_temporal.snap @@ -5,8 +5,8 @@ expression: DisplayRB(dataframe) ┌──────────────────────┬───────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ │ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable │ -│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Struct[2]] │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt32) ┆ type: List(Utf8) ┆ type: List(Struct("x": non-null │ +│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Float32, "y": non-null Float32)) │ │ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ │ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ │ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__barebones-2.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__barebones-2.snap index 9f5faffb202a..8837ef8e9620 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__barebones-2.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__barebones-2.snap @@ -5,8 +5,8 @@ expression: DisplayRB(dataframe) ┌──────────────────────┬───────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ │ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable │ -│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Struct[2]] │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt32) ┆ type: List(Utf8) ┆ type: List(Struct("x": non-null │ +│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Float32, "y": non-null Float32)) │ │ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ │ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ │ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__barebones.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__barebones.snap index 7ca7e4d4ce28..1177449bd8b9 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__barebones.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__barebones.snap @@ -2,16 +2,16 @@ source: crates/store/re_dataframe/src/query.rs expression: DisplayRB(dataframe) --- -┌──────────────────────┬──────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ -│ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ -│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable │ -│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Struct[2]] │ -│ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ -│ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ -│ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ -│ ┆ ┆ kind: data ┆ is_static: true ┆ entity_path: /this/that │ -│ ┆ ┆ ┆ kind: data ┆ kind: data │ -╞══════════════════════╪══════════════════════════════╪════════════════════════════════════╪════════════════════════════════════╪══════════════════════════════════════╡ -│ null ┆ null ┆ null ┆ [c] ┆ null │ -└──────────────────────┴──────────────────────────────┴────────────────────────────────────┴────────────────────────────────────┴──────────────────────────────────────┘ +┌──────────────────────┬──────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ +│ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ +│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt32) ┆ type: List(Utf8) ┆ type: List(Struct("x": non-null │ +│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Float32, "y": non-null Float32)) │ +│ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ +│ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ +│ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ +│ ┆ ┆ kind: data ┆ is_static: true ┆ entity_path: /this/that │ +│ ┆ ┆ ┆ kind: data ┆ kind: data │ +╞══════════════════════╪══════════════════════╪════════════════════════════════════╪════════════════════════════════════╪══════════════════════════════════════╡ +│ null ┆ null ┆ null ┆ [c] ┆ null │ +└──────────────────────┴──────────────────────┴────────────────────────────────────┴────────────────────────────────────┴──────────────────────────────────────┘ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__clears-2.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__clears-2.snap index d72703f6beb6..52b1fe5900fe 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__clears-2.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__clears-2.snap @@ -5,8 +5,8 @@ expression: DisplayRB(dataframe) ┌──────────────────────┬───────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ │ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable │ -│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Struct[2]] │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt32) ┆ type: List(Utf8) ┆ type: List(Struct("x": non-null │ +│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Float32, "y": non-null Float32)) │ │ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ │ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ │ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__clears.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__clears.snap index d72703f6beb6..52b1fe5900fe 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__clears.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__clears.snap @@ -5,8 +5,8 @@ expression: DisplayRB(dataframe) ┌──────────────────────┬───────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ │ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable │ -│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Struct[2]] │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt32) ┆ type: List(Utf8) ┆ type: List(Struct("x": non-null │ +│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Float32, "y": non-null Float32)) │ │ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ │ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ │ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_index_range.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_index_range.snap index cfcab0c87da2..d6b6017fd900 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_index_range.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_index_range.snap @@ -5,8 +5,8 @@ expression: DisplayRB(dataframe) ┌──────────────────────┬───────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ │ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable │ -│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Struct[2]] │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt32) ┆ type: List(Utf8) ┆ type: List(Struct("x": non-null │ +│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Float32, "y": non-null Float32)) │ │ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ │ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ │ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_index_values.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_index_values.snap index dde2c4fbf8d4..24ddf544e065 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_index_values.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_index_values.snap @@ -2,18 +2,18 @@ source: crates/store/re_dataframe/src/query.rs expression: DisplayRB(dataframe) --- -┌──────────────────────┬──────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ -│ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ -│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable │ -│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Struct[2]] │ -│ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ -│ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ -│ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ -│ ┆ ┆ kind: data ┆ is_static: true ┆ entity_path: /this/that │ -│ ┆ ┆ ┆ kind: data ┆ kind: data │ -╞══════════════════════╪══════════════════════════════╪════════════════════════════════════╪════════════════════════════════════╪══════════════════════════════════════╡ -│ 30 ┆ null ┆ [2] ┆ [c] ┆ [{x: 2.0, y: 2.0}] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 60 ┆ null ┆ null ┆ [c] ┆ [{x: 5.0, y: 5.0}] │ -└──────────────────────┴──────────────────────────────┴────────────────────────────────────┴────────────────────────────────────┴──────────────────────────────────────┘ +┌──────────────────────┬──────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ +│ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ +│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt32) ┆ type: List(Utf8) ┆ type: List(Struct("x": non-null │ +│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Float32, "y": non-null Float32)) │ +│ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ +│ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ +│ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ +│ ┆ ┆ kind: data ┆ is_static: true ┆ entity_path: /this/that │ +│ ┆ ┆ ┆ kind: data ┆ kind: data │ +╞══════════════════════╪══════════════════════╪════════════════════════════════════╪════════════════════════════════════╪══════════════════════════════════════╡ +│ 30 ┆ null ┆ [2] ┆ [c] ┆ [{x: 2.0, y: 2.0}] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 60 ┆ null ┆ null ┆ [c] ┆ [{x: 5.0, y: 5.0}] │ +└──────────────────────┴──────────────────────┴────────────────────────────────────┴────────────────────────────────────┴──────────────────────────────────────┘ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-2.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-2.snap index 836cd0232c01..3fb8b0226754 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-2.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-2.snap @@ -2,15 +2,15 @@ source: crates/store/re_dataframe/src/query.rs expression: DisplayRB(dataframe) --- -┌──────────────────────┬──────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ -│ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ -│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable │ -│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Struct[2]] │ -│ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ -│ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ -│ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ -│ ┆ ┆ kind: data ┆ is_static: true ┆ entity_path: /this/that │ -│ ┆ ┆ ┆ kind: data ┆ kind: data │ -╞══════════════════════╪══════════════════════════════╪════════════════════════════════════╪════════════════════════════════════╪══════════════════════════════════════╡ -└──────────────────────┴──────────────────────────────┴────────────────────────────────────┴────────────────────────────────────┴──────────────────────────────────────┘ +┌──────────────────────┬──────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ +│ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ +│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt32) ┆ type: List(Utf8) ┆ type: List(Struct("x": non-null │ +│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Float32, "y": non-null Float32)) │ +│ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ +│ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ +│ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ +│ ┆ ┆ kind: data ┆ is_static: true ┆ entity_path: /this/that │ +│ ┆ ┆ ┆ kind: data ┆ kind: data │ +╞══════════════════════╪══════════════════════╪════════════════════════════════════╪════════════════════════════════════╪══════════════════════════════════════╡ +└──────────────────────┴──────────────────────┴────────────────────────────────────┴────────────────────────────────────┴──────────────────────────────────────┘ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-3.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-3.snap index 9f5faffb202a..8837ef8e9620 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-3.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-3.snap @@ -5,8 +5,8 @@ expression: DisplayRB(dataframe) ┌──────────────────────┬───────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ │ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable │ -│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Struct[2]] │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt32) ┆ type: List(Utf8) ┆ type: List(Struct("x": non-null │ +│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Float32, "y": non-null Float32)) │ │ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ │ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ │ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-4.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-4.snap index 49e869655df8..d2c05453a207 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-4.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-4.snap @@ -5,8 +5,8 @@ expression: DisplayRB(dataframe) ┌──────────────────────┬───────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ │ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable │ -│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Struct[2]] │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt32) ┆ type: List(Utf8) ┆ type: List(Struct("x": non-null │ +│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Float32, "y": non-null Float32)) │ │ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ │ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ │ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null.snap index 836cd0232c01..3fb8b0226754 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null.snap @@ -2,15 +2,15 @@ source: crates/store/re_dataframe/src/query.rs expression: DisplayRB(dataframe) --- -┌──────────────────────┬──────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ -│ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ -│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable │ -│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Struct[2]] │ -│ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ -│ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ -│ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ -│ ┆ ┆ kind: data ┆ is_static: true ┆ entity_path: /this/that │ -│ ┆ ┆ ┆ kind: data ┆ kind: data │ -╞══════════════════════╪══════════════════════════════╪════════════════════════════════════╪════════════════════════════════════╪══════════════════════════════════════╡ -└──────────────────────┴──────────────────────────────┴────────────────────────────────────┴────────────────────────────────────┴──────────────────────────────────────┘ +┌──────────────────────┬──────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ +│ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ +│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt32) ┆ type: List(Utf8) ┆ type: List(Struct("x": non-null │ +│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Float32, "y": non-null Float32)) │ +│ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ +│ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ +│ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ +│ ┆ ┆ kind: data ┆ is_static: true ┆ entity_path: /this/that │ +│ ┆ ┆ ┆ kind: data ┆ kind: data │ +╞══════════════════════╪══════════════════════╪════════════════════════════════════╪════════════════════════════════════╪══════════════════════════════════════╡ +└──────────────────────┴──────────────────────┴────────────────────────────────────┴────────────────────────────────────┴──────────────────────────────────────┘ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__query_static_any_values.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__query_static_any_values.snap index c8076eb62738..c1cb5f04b356 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__query_static_any_values.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__query_static_any_values.snap @@ -2,14 +2,14 @@ source: crates/store/re_dataframe/src/query.rs expression: DisplayRB(dataframe) --- -┌───────────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┐ -│ /test:baz ┆ /test:foo ┆ /test:yak │ -│ --- ┆ --- ┆ --- │ -│ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable Utf8] │ -│ component: baz ┆ component: foo ┆ component: yak │ -│ entity_path: /test ┆ entity_path: /test ┆ entity_path: /test │ -│ is_static: true ┆ is_static: true ┆ is_static: true │ -│ kind: data ┆ kind: data ┆ kind: data │ -╞═══════════════════════════════════╪════════════════════════════════════╪════════════════════════════════════╡ -│ [42] ┆ [bar] ┆ [yuk] │ -└───────────────────────────────────┴────────────────────────────────────┴────────────────────────────────────┘ +┌────────────────────┬────────────────────┬────────────────────┐ +│ /test:baz ┆ /test:foo ┆ /test:yak │ +│ --- ┆ --- ┆ --- │ +│ type: List(UInt32) ┆ type: List(Utf8) ┆ type: List(Utf8) │ +│ component: baz ┆ component: foo ┆ component: yak │ +│ entity_path: /test ┆ entity_path: /test ┆ entity_path: /test │ +│ is_static: true ┆ is_static: true ┆ is_static: true │ +│ kind: data ┆ kind: data ┆ kind: data │ +╞════════════════════╪════════════════════╪════════════════════╡ +│ [42] ┆ [bar] ┆ [yuk] │ +└────────────────────┴────────────────────┴────────────────────┘ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-2.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-2.snap index 65e75ad2b108..0c868b0e924b 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-2.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-2.snap @@ -5,7 +5,7 @@ expression: DisplayRB(dataframe) ┌──────────────────────┬──────────────────────┬────────────────────────────────────────┐ │ frame_nr ┆ frame_nr ┆ ATimeColumnThatDoesntExist │ │ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable i64 ┆ type: nullable null │ +│ type: Int64 ┆ type: Int64 ┆ type: Null │ │ index_name: frame_nr ┆ index_name: frame_nr ┆ index_name: ATimeColumnThatDoesntExist │ │ kind: index ┆ kind: index ┆ is_sorted: true │ │ ┆ ┆ kind: index │ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-3.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-3.snap index 8746a67a19ca..43c058d3bccc 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-3.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-3.snap @@ -6,12 +6,13 @@ expression: DisplayRB(dataframe) │ /this/that:example.MyPoints: ┆ /this/that:example.MyPoints: ┆ /non_existing_entity:example ┆ /this/that:MyPoints:AFieldTh ┆ /this/that:AFieldThatDoesntE ┆ /this/that:AArchetypeNameTha │ │ points ┆ points ┆ .MyPoints:points ┆ atDoesntExist ┆ xist ┆ tDoesNotExist:positions │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable List[nullable ┆ type: nullable List[nullable ┆ type: nullable null ┆ type: nullable null ┆ type: nullable null ┆ type: nullable null │ -│ Struct[2]] ┆ Struct[2]] ┆ component: ┆ component: MyPoints:AFieldTh ┆ component: ┆ component: AArchetypeNameTha │ -│ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ example.MyPoints:points ┆ atDoesntExist ┆ AFieldThatDoesntExist ┆ tDoesNotExist:positions │ -│ component: ┆ component: ┆ entity_path: ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ entity_path: /this/that │ -│ example.MyPoints:points ┆ example.MyPoints:points ┆ /non_existing_entity ┆ kind: data ┆ kind: data ┆ kind: data │ -│ component_type: ┆ component_type: ┆ kind: data ┆ ┆ ┆ │ +│ type: List(Struct("x": ┆ type: List(Struct("x": ┆ type: Null ┆ type: Null ┆ type: Null ┆ type: Null │ +│ non-null Float32, "y": ┆ non-null Float32, "y": ┆ component: ┆ component: MyPoints:AFieldTh ┆ component: ┆ component: AArchetypeNameTha │ +│ non-null Float32)) ┆ non-null Float32)) ┆ example.MyPoints:points ┆ atDoesntExist ┆ AFieldThatDoesntExist ┆ tDoesNotExist:positions │ +│ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ entity_path: ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ entity_path: /this/that │ +│ component: ┆ component: ┆ /non_existing_entity ┆ kind: data ┆ kind: data ┆ kind: data │ +│ example.MyPoints:points ┆ example.MyPoints:points ┆ kind: data ┆ ┆ ┆ │ +│ component_type: ┆ component_type: ┆ ┆ ┆ ┆ │ │ example.MyPoint ┆ example.MyPoint ┆ ┆ ┆ ┆ │ │ entity_path: /this/that ┆ entity_path: /this/that ┆ ┆ ┆ ┆ │ │ kind: data ┆ kind: data ┆ ┆ ┆ ┆ │ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-4.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-4.snap index 698a29b1ae1c..2b9d6d1810ac 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-4.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-4.snap @@ -5,11 +5,10 @@ expression: DisplayRB(dataframe) ┌────────────────┬────────────────┬────────────────┬────────────────┬────────────────┬────────────────┬────────────────┬────────────────┬────────────────┬────────────────┬────────────────┐ │ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ /this/that:exa │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ mple.MyPoints: │ -│ type: nullable ┆ type: nullable ┆ type: nullable ┆ type: nullable ┆ type: nullable ┆ type: nullable ┆ type: nullable ┆ type: nullable ┆ type: nullable ┆ type: nullable ┆ labels │ -│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ --- │ -│ index_name: ┆ index_name: ┆ index_name: ┆ index_name: ┆ index_name: ┆ index_name: ┆ index_name: ┆ index_name: ┆ index_name: ┆ index_name: ┆ type: nullable │ -│ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ List[nullable │ -│ kind: index ┆ kind: index ┆ kind: index ┆ kind: index ┆ kind: index ┆ kind: index ┆ kind: index ┆ kind: index ┆ kind: index ┆ kind: index ┆ Utf8] │ +│ type: Int64 ┆ type: Int64 ┆ type: Int64 ┆ type: Int64 ┆ type: Int64 ┆ type: Int64 ┆ type: Int64 ┆ type: Int64 ┆ type: Int64 ┆ type: Int64 ┆ labels │ +│ index_name: ┆ index_name: ┆ index_name: ┆ index_name: ┆ index_name: ┆ index_name: ┆ index_name: ┆ index_name: ┆ index_name: ┆ index_name: ┆ --- │ +│ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ frame_nr ┆ type: │ +│ kind: index ┆ kind: index ┆ kind: index ┆ kind: index ┆ kind: index ┆ kind: index ┆ kind: index ┆ kind: index ┆ kind: index ┆ kind: index ┆ List(Utf8) │ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ archetype: exa │ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ mple.MyPoints │ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ component: exa │ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__sparse_fill_strategy_latestatglobal.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__sparse_fill_strategy_latestatglobal.snap index 6df39c78880d..a5dbb874af53 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__sparse_fill_strategy_latestatglobal.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__sparse_fill_strategy_latestatglobal.snap @@ -5,8 +5,8 @@ expression: DisplayRB(dataframe) ┌──────────────────────┬───────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ │ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable │ -│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Struct[2]] │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt32) ┆ type: List(Utf8) ┆ type: List(Struct("x": non-null │ +│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Float32, "y": non-null Float32)) │ │ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ │ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ │ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__using_index_values-2.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__using_index_values-2.snap index bb8a14c2567a..d20814a0f3a4 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__using_index_values-2.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__using_index_values-2.snap @@ -5,8 +5,8 @@ expression: DisplayRB(dataframe) ┌──────────────────────┬───────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ │ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable │ -│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Struct[2]] │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt32) ┆ type: List(Utf8) ┆ type: List(Struct("x": non-null │ +│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Float32, "y": non-null Float32)) │ │ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ │ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ │ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__using_index_values.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__using_index_values.snap index 782ec20c6dd4..bfb6f70bcd87 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__using_index_values.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__using_index_values.snap @@ -2,28 +2,28 @@ source: crates/store/re_dataframe/src/query.rs expression: DisplayRB(dataframe) --- -┌──────────────────────┬──────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ -│ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ -│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable │ -│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Struct[2]] │ -│ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ -│ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ -│ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ -│ ┆ ┆ kind: data ┆ is_static: true ┆ entity_path: /this/that │ -│ ┆ ┆ ┆ kind: data ┆ kind: data │ -╞══════════════════════╪══════════════════════════════╪════════════════════════════════════╪════════════════════════════════════╪══════════════════════════════════════╡ -│ 0 ┆ null ┆ null ┆ [c] ┆ null │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 15 ┆ null ┆ null ┆ [c] ┆ null │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 30 ┆ null ┆ [2] ┆ [c] ┆ [{x: 2.0, y: 2.0}] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 45 ┆ null ┆ null ┆ [c] ┆ null │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 60 ┆ null ┆ null ┆ [c] ┆ [{x: 5.0, y: 5.0}] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 75 ┆ null ┆ null ┆ [c] ┆ null │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 90 ┆ null ┆ null ┆ [c] ┆ null │ -└──────────────────────┴──────────────────────────────┴────────────────────────────────────┴────────────────────────────────────┴──────────────────────────────────────┘ +┌──────────────────────┬──────────────────────┬────────────────────────────────────┬────────────────────────────────────┬──────────────────────────────────────┐ +│ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels ┆ /this/that:example.MyPoints:points │ +│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt32) ┆ type: List(Utf8) ┆ type: List(Struct("x": non-null │ +│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints ┆ Float32, "y": non-null Float32)) │ +│ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels ┆ archetype: example.MyPoints │ +│ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel ┆ component: example.MyPoints:points │ +│ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that ┆ component_type: example.MyPoint │ +│ ┆ ┆ kind: data ┆ is_static: true ┆ entity_path: /this/that │ +│ ┆ ┆ ┆ kind: data ┆ kind: data │ +╞══════════════════════╪══════════════════════╪════════════════════════════════════╪════════════════════════════════════╪══════════════════════════════════════╡ +│ 0 ┆ null ┆ null ┆ [c] ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 15 ┆ null ┆ null ┆ [c] ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 30 ┆ null ┆ [2] ┆ [c] ┆ [{x: 2.0, y: 2.0}] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 45 ┆ null ┆ null ┆ [c] ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 60 ┆ null ┆ null ┆ [c] ┆ [{x: 5.0, y: 5.0}] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 75 ┆ null ┆ null ┆ [c] ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 90 ┆ null ┆ null ┆ [c] ┆ null │ +└──────────────────────┴──────────────────────┴────────────────────────────────────┴────────────────────────────────────┴──────────────────────────────────────┘ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents-2.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents-2.snap index 26c55ccc23c8..d2c4cbecc3c7 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents-2.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents-2.snap @@ -2,22 +2,22 @@ source: crates/store/re_dataframe/src/query.rs expression: DisplayRB(dataframe) --- -┌──────────────────────┬──────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┐ -│ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels │ -│ --- ┆ --- ┆ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Utf8] │ -│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints │ -│ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels │ -│ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel │ -│ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that │ -│ ┆ ┆ kind: data ┆ is_static: true │ -│ ┆ ┆ ┆ kind: data │ -╞══════════════════════╪══════════════════════════════╪════════════════════════════════════╪════════════════════════════════════╡ -│ 30 ┆ null ┆ [2] ┆ [c] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 40 ┆ null ┆ [3] ┆ [c] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 50 ┆ null ┆ [4] ┆ [c] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 70 ┆ null ┆ [6] ┆ [c] │ -└──────────────────────┴──────────────────────────────┴────────────────────────────────────┴────────────────────────────────────┘ +┌──────────────────────┬──────────────────────┬────────────────────────────────────┬────────────────────────────────────┐ +│ frame_nr ┆ log_time ┆ /this/that:example.MyPoints:colors ┆ /this/that:example.MyPoints:labels │ +│ --- ┆ --- ┆ --- ┆ --- │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: List(UInt32) ┆ type: List(Utf8) │ +│ index_name: frame_nr ┆ index_name: log_time ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints │ +│ kind: index ┆ kind: index ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:labels │ +│ ┆ ┆ component_type: example.MyColor ┆ component_type: example.MyLabel │ +│ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that │ +│ ┆ ┆ kind: data ┆ is_static: true │ +│ ┆ ┆ ┆ kind: data │ +╞══════════════════════╪══════════════════════╪════════════════════════════════════╪════════════════════════════════════╡ +│ 30 ┆ null ┆ [2] ┆ [c] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 40 ┆ null ┆ [3] ┆ [c] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 50 ┆ null ┆ [4] ┆ [c] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 70 ┆ null ┆ [6] ┆ [c] │ +└──────────────────────┴──────────────────────┴────────────────────────────────────┴────────────────────────────────────┘ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents.snap index e5d49dcd761f..9ce694ec6eb0 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents.snap @@ -2,11 +2,11 @@ source: crates/store/re_dataframe/src/query.rs expression: DisplayRB(dataframe) --- -┌──────────────────────┬──────────────────────────────┐ -│ frame_nr ┆ log_time │ -│ --- ┆ --- │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) │ -│ index_name: frame_nr ┆ index_name: log_time │ -│ kind: index ┆ kind: index │ -╞══════════════════════╪══════════════════════════════╡ -└──────────────────────┴──────────────────────────────┘ +┌──────────────────────┬──────────────────────┐ +│ frame_nr ┆ log_time │ +│ --- ┆ --- │ +│ type: Int64 ┆ type: Timestamp(ns) │ +│ index_name: frame_nr ┆ index_name: log_time │ +│ kind: index ┆ kind: index │ +╞══════════════════════╪══════════════════════╡ +└──────────────────────┴──────────────────────┘ diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents_and_selection.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents_and_selection.snap index a91287905aa2..43b2d023d095 100644 --- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents_and_selection.snap +++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents_and_selection.snap @@ -2,26 +2,25 @@ source: crates/store/re_dataframe/src/query.rs expression: DisplayRB(dataframe) --- -┌──────────────────────┬──────────────────────────────┬──────────────────────┬──────────────────────────────┬──────────────────────────────┬──────────────────────────────┐ -│ frame_nr ┆ log_time ┆ log_tick ┆ /this/that:example.MyPoints: ┆ /this/that:example.MyPoints: ┆ /this/that:example.MyPoints: │ -│ --- ┆ --- ┆ --- ┆ points ┆ colors ┆ labels │ -│ type: nullable i64 ┆ type: nullable Timestamp(ns) ┆ type: nullable null ┆ --- ┆ --- ┆ --- │ -│ index_name: frame_nr ┆ index_name: log_time ┆ index_name: log_tick ┆ type: nullable null ┆ type: nullable List[nullable ┆ type: nullable List[nullable │ -│ kind: index ┆ kind: index ┆ is_sorted: true ┆ component: ┆ u32] ┆ Utf8] │ -│ ┆ ┆ kind: index ┆ example.MyPoints:points ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints │ -│ ┆ ┆ ┆ entity_path: /this/that ┆ component: ┆ component: │ -│ ┆ ┆ ┆ kind: data ┆ example.MyPoints:colors ┆ example.MyPoints:labels │ -│ ┆ ┆ ┆ ┆ component_type: ┆ component_type: │ -│ ┆ ┆ ┆ ┆ example.MyColor ┆ example.MyLabel │ -│ ┆ ┆ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that │ -│ ┆ ┆ ┆ ┆ kind: data ┆ is_static: true │ -│ ┆ ┆ ┆ ┆ ┆ kind: data │ -╞══════════════════════╪══════════════════════════════╪══════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ -│ 30 ┆ null ┆ null ┆ null ┆ [2] ┆ [c] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 40 ┆ null ┆ null ┆ null ┆ [3] ┆ [c] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 50 ┆ null ┆ null ┆ null ┆ [4] ┆ [c] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 70 ┆ null ┆ null ┆ null ┆ [6] ┆ [c] │ -└──────────────────────┴──────────────────────────────┴──────────────────────┴──────────────────────────────┴──────────────────────────────┴──────────────────────────────┘ +┌──────────────────────┬──────────────────────┬──────────────────────┬──────────────────────────────┬──────────────────────────────┬──────────────────────────────┐ +│ frame_nr ┆ log_time ┆ log_tick ┆ /this/that:example.MyPoints: ┆ /this/that:example.MyPoints: ┆ /this/that:example.MyPoints: │ +│ --- ┆ --- ┆ --- ┆ points ┆ colors ┆ labels │ +│ type: Int64 ┆ type: Timestamp(ns) ┆ type: Null ┆ --- ┆ --- ┆ --- │ +│ index_name: frame_nr ┆ index_name: log_time ┆ index_name: log_tick ┆ type: Null ┆ type: List(UInt32) ┆ type: List(Utf8) │ +│ kind: index ┆ kind: index ┆ is_sorted: true ┆ component: ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints │ +│ ┆ ┆ kind: index ┆ example.MyPoints:points ┆ component: ┆ component: │ +│ ┆ ┆ ┆ entity_path: /this/that ┆ example.MyPoints:colors ┆ example.MyPoints:labels │ +│ ┆ ┆ ┆ kind: data ┆ component_type: ┆ component_type: │ +│ ┆ ┆ ┆ ┆ example.MyColor ┆ example.MyLabel │ +│ ┆ ┆ ┆ ┆ entity_path: /this/that ┆ entity_path: /this/that │ +│ ┆ ┆ ┆ ┆ kind: data ┆ is_static: true │ +│ ┆ ┆ ┆ ┆ ┆ kind: data │ +╞══════════════════════╪══════════════════════╪══════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ +│ 30 ┆ null ┆ null ┆ null ┆ [2] ┆ [c] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 40 ┆ null ┆ null ┆ null ┆ [3] ┆ [c] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 50 ┆ null ┆ null ┆ null ┆ [4] ┆ [c] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 70 ┆ null ┆ null ┆ null ┆ [6] ┆ [c] │ +└──────────────────────┴──────────────────────┴──────────────────────┴──────────────────────────────┴──────────────────────────────┴──────────────────────────────┘ diff --git a/crates/store/re_dataframe/src/utils.rs b/crates/store/re_dataframe/src/utils.rs index 21f388f71592..e4d30e0dc5fe 100644 --- a/crates/store/re_dataframe/src/utils.rs +++ b/crates/store/re_dataframe/src/utils.rs @@ -1,35 +1,681 @@ -use arrow::array::{RecordBatch, RecordBatchOptions, new_null_array}; -use arrow::datatypes::{DataType, Schema}; -use arrow::error::ArrowError; use std::sync::Arc; -#[tracing::instrument(level = "info", skip_all)] +use arrow::array::{ + Array as _, ArrayRef, GenericListArray, OffsetSizeTrait, RecordBatch, RecordBatchOptions, + StructArray, new_null_array, +}; +use arrow::datatypes::{DataType, Field, FieldRef, Fields, Schema}; +use arrow::error::ArrowError; + +use re_arrow_util::format_field_datatype; + +/// Align a [`RecordBatch`] to a target [`Schema`], widening nested types where possible. +/// +/// # Schema-widening contract (RR-4429) +/// +/// The table below lists only the cases arrow's `Field::try_merge` used at registration +/// would accept. Anything `try_merge` rejects cannot reach here. +/// +/// | Case | Read-side widener | +/// |---------------------------------------------|-------------------| +/// | Struct child added (`{a,b}` → `{a,b,c}`) | adapt (null-pad) | +/// | Nullability widening (non-null → nullable) | adapt (re-wrap) | +/// | `DataType::Null` → typed, at any depth | adapt (typed null-array) | +/// | `List` / `LargeList` inner widened | adapt (recurse) | +/// | Identical types | fast-path pass through | +/// | `Union` | **reject** at registration (aligner gap) | +/// +/// Types in arrow-schema's leaf-equality bucket (`FixedSizeList`, primitives, `Dictionary`, `Map`, +/// decimals, `RunEndEncoded`, etc.) cannot reach the aligner in non-identical shape — `Field::try_merge` +/// rejects any non-identical pair before the aligner runs. So the aligner's only job for those types +/// is to pass them through when identical, which the fast-path below handles. +/// +/// `Union` is the one exception: `try_merge` *does* recursively widen Union children, which the +/// aligner has no branch for. It's rejected at registration by +/// [`re_arrow_util::reject_unsupported_widenings`] so it never reaches any of the above logic. +#[tracing::instrument(level = "trace", skip_all)] pub fn align_record_batch_to_schema( batch: &RecordBatch, target_schema: &Arc, ) -> Result { let num_rows = batch.num_rows(); - - let mut aligned_columns = Vec::with_capacity(target_schema.fields().len()); + let mut aligned = Vec::with_capacity(target_schema.fields().len()); for field in target_schema.fields() { - if let Some((idx, _)) = batch.schema().column_with_name(field.name()) { - let batch_data_type = batch.column(idx).data_type(); - if batch_data_type == &DataType::Null && field.data_type() != &DataType::Null { - // Chunk store may output a null array of null data type - aligned_columns.push(new_null_array(field.data_type(), num_rows)); - } else { - aligned_columns.push(batch.column(idx).clone()); - } - } else { - // Fill with nulls of the right data type - aligned_columns.push(new_null_array(field.data_type(), num_rows)); - } + let col = match batch.schema().column_with_name(field.name()) { + Some((idx, _)) => widen_array_to_field(batch.column(idx), field, field.name())?, + None => new_null_array(field.data_type(), num_rows), + }; + aligned.push(col); } RecordBatch::try_new_with_options( target_schema.clone(), - aligned_columns, + aligned, &RecordBatchOptions::new().with_row_count(Some(num_rows)), ) } + +/// Widen `array` to match the shape of `target`'s data type. +/// +/// `path` is a dotted breadcrumb used only for error messages +fn widen_array_to_field( + array: &ArrayRef, + target: &FieldRef, + path: &str, +) -> Result { + // A `Null`-typed source column converts to a typed null-array of the target type. + if matches!(array.data_type(), DataType::Null) { + return Ok(new_null_array(target.data_type(), array.len())); + } + + // Identical data types pass through (schema-widening contract row). + // `DataType` equality is structural and recursive (including inner `Field` nullability), + // so this correctly shortcuts only the cases where no widening is needed. + if array.data_type() == target.data_type() { + return Ok(array.clone()); + } + + match target.data_type() { + DataType::Struct(t_fields) => { + let t_fields = t_fields.clone(); + if !matches!(array.data_type(), DataType::Struct(_)) { + return Err(type_differs_error(path, target, array.data_type())); + } + widen_struct(array, &t_fields, path) + } + DataType::List(t_inner) => { + let t_inner = t_inner.clone(); + let DataType::List(_) = array.data_type() else { + return Err(type_differs_error(path, target, array.data_type())); + }; + widen_list_like::(array, &t_inner, path) + } + DataType::LargeList(t_inner) => { + let t_inner = t_inner.clone(); + let DataType::LargeList(_) = array.data_type() else { + return Err(type_differs_error(path, target, array.data_type())); + }; + widen_list_like::(array, &t_inner, path) + } + // `FixedSizeList` and other leaf-equality types are caught by the fast-path above when + // identical; `try_merge` rejects any non-identical shape, so reaching this arm means an + // upstream invariant violated the contract. + _ => Err(type_differs_error(path, target, array.data_type())), + } +} + +fn widen_struct( + array: &ArrayRef, + target_fields: &Fields, + path: &str, +) -> Result { + let struct_array = array + .as_any() + .downcast_ref::() + .ok_or_else(|| { + // defensive + schema_mismatch( + path, + &format!("expected struct array, got {}", array.data_type()), + ) + })?; + let struct_len = struct_array.len(); + + // Assumes target children are a superset of source children (guaranteed by + // `Schema::try_merge`, which only ever widens). + let mut widened_children = Vec::with_capacity(target_fields.len()); + for t_child in target_fields { + let child_path = format!("{path}.{}", t_child.name()); + let child = if let Some(source_col) = struct_array.column_by_name(t_child.name()) { + widen_array_to_field(source_col, t_child, &child_path)? + } else { + new_null_array(t_child.data_type(), struct_len) + }; + widened_children.push(child); + } + + Ok(Arc::new(StructArray::try_new( + target_fields.clone(), + widened_children, + struct_array.nulls().cloned(), + )?) as ArrayRef) +} + +fn widen_list_like( + array: &ArrayRef, + target_inner: &FieldRef, + path: &str, +) -> Result { + let list_array = array + .as_any() + .downcast_ref::>() + .ok_or_else(|| { + schema_mismatch( + path, + &format!("expected list array, got {}", array.data_type()), + ) + })?; + + let item_path = format!("{path}.{}", target_inner.name()); + let widened_values = widen_array_to_field(list_array.values(), target_inner, &item_path)?; + + Ok(Arc::new(GenericListArray::::try_new( + target_inner.clone(), + list_array.offsets().clone(), + widened_values, + list_array.nulls().cloned(), + )?) as ArrayRef) +} + +#[inline] +fn type_differs_error(path: &str, target: &Field, actual: &DataType) -> ArrowError { + schema_mismatch( + path, + &format!( + "type differs (expected {}, got {actual})", + format_field_datatype(target), + ), + ) +} + +#[inline] +fn schema_mismatch(path: &str, detail: &str) -> ArrowError { + ArrowError::SchemaError(format!("rerun schema mismatch at `{path}`: {detail}")) +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use super::*; + use arrow::array::{ArrayRef, Int32Array, Int64Array, ListArray, StringArray, StructArray}; + use arrow::buffer::OffsetBuffer; + use arrow::datatypes::{DataType, Field, Fields, Schema}; + + fn new_schema(fields: Vec) -> Schema { + let meta = HashMap::with_capacity(0); + Schema::new_with_metadata(fields, meta) + } + + /// Wrapper around `RecordBatch::try_new_with_options` so tests match the project's lint + /// policy without each call-site specifying row-count explicitly. + fn rb(schema: Arc, columns: Vec) -> RecordBatch { + let num_rows = columns.first().map_or(0, |c| c.len()); + RecordBatch::try_new_with_options( + schema, + columns, + &RecordBatchOptions::new().with_row_count(Some(num_rows)), + ) + .unwrap() + } + + #[test] + fn align_missing_top_level_column_null_pads() { + let target = Arc::new(new_schema(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Utf8, true), + ])); + let batch = rb( + Arc::new(new_schema(vec![Field::new("a", DataType::Int32, true)])), + vec![Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef], + ); + + let aligned = align_record_batch_to_schema(&batch, &target).unwrap(); + assert_eq!(aligned.num_rows(), 3); + assert_eq!(aligned.num_columns(), 2); + assert_eq!(aligned.column(1).null_count(), 3); + } + + #[test] + fn align_widens_struct_with_missing_child() { + let target_struct = DataType::Struct( + vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Int32, false), + Field::new("c", DataType::Int32, true), + ] + .into(), + ); + let source_struct = StructArray::try_new( + Fields::from(vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Int32, false), + ]), + vec![ + Arc::new(Int32Array::from(vec![1, 2])) as ArrayRef, + Arc::new(Int32Array::from(vec![10, 20])) as ArrayRef, + ], + None, + ) + .unwrap(); + + let target = Arc::new(new_schema(vec![Field::new("s", target_struct, true)])); + let batch = rb( + Arc::new(new_schema(vec![Field::new( + "s", + source_struct.data_type().clone(), + true, + )])), + vec![Arc::new(source_struct) as ArrayRef], + ); + + let aligned = align_record_batch_to_schema(&batch, &target).unwrap(); + let widened = aligned + .column(0) + .as_any() + .downcast_ref::() + .expect("struct"); + assert_eq!(widened.num_columns(), 3); + assert_eq!(widened.column_by_name("c").unwrap().null_count(), 2); + } + + #[test] + fn align_widens_list_inner_nullability_non_null_to_nullable() { + let source_inner_non_null = Arc::new(Field::new("item", DataType::Int32, false)); + let target_inner_nullable = Arc::new(Field::new("item", DataType::Int32, true)); + + let values = Int32Array::from(vec![1, 2, 3]); + let source_list = ListArray::new( + source_inner_non_null.clone(), + OffsetBuffer::new(vec![0i32, 3].into()), + Arc::new(values), + None, + ); + + let target = Arc::new(new_schema(vec![Field::new( + "col", + DataType::List(target_inner_nullable), + true, + )])); + let batch = rb( + Arc::new(new_schema(vec![Field::new( + "col", + DataType::List(source_inner_non_null), + true, + )])), + vec![Arc::new(source_list) as ArrayRef], + ); + + let aligned = align_record_batch_to_schema(&batch, &target).unwrap(); + assert_eq!(aligned.num_rows(), 1); + } + + #[test] + fn align_primitive_mismatch_errors_compactly() { + let target = Arc::new(new_schema(vec![Field::new("a", DataType::Int64, false)])); + let batch = rb( + Arc::new(new_schema(vec![Field::new("a", DataType::Int32, false)])), + vec![Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef], + ); + + let err = align_record_batch_to_schema(&batch, &target).unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("rerun schema mismatch at `a`"), "msg: {msg}"); + assert!(msg.contains("type differs"), "msg: {msg}"); + assert!(msg.contains("Int64"), "msg: {msg}"); + assert!(msg.contains("Int32"), "msg: {msg}"); + // Compact formatting — no Field-struct Debug spew. + assert!(!msg.contains("Field {"), "msg: {msg}"); + assert!(!msg.contains("dict_id"), "msg: {msg}"); + } + + #[test] + fn align_deeply_nested_path_in_error() { + let target_inner_struct = DataType::Struct( + vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Int32, true), + ] + .into(), + ); + let source_inner_struct = + DataType::Struct(vec![Field::new("a", DataType::Int64, false)].into()); + + let target_outer = DataType::Struct( + vec![Field::new( + "outer_list", + DataType::List(Arc::new(Field::new("item", target_inner_struct, true))), + true, + )] + .into(), + ); + let source_outer = DataType::Struct( + vec![Field::new( + "outer_list", + DataType::List(Arc::new(Field::new( + "item", + source_inner_struct.clone(), + true, + ))), + true, + )] + .into(), + ); + + let inner = StructArray::try_new( + Fields::from(vec![Field::new("a", DataType::Int64, false)]), + vec![Arc::new(Int64Array::from(vec![1])) as ArrayRef], + None, + ) + .unwrap(); + let inner_list = ListArray::new( + Arc::new(Field::new("item", source_inner_struct, true)), + OffsetBuffer::new(vec![0i32, 1].into()), + Arc::new(inner), + None, + ); + let outer = StructArray::try_new( + Fields::from(vec![Field::new( + "outer_list", + DataType::List(Arc::new(Field::new( + "item", + DataType::Struct(vec![Field::new("a", DataType::Int64, false)].into()), + true, + ))), + true, + )]), + vec![Arc::new(inner_list) as ArrayRef], + None, + ) + .unwrap(); + + let target = Arc::new(new_schema(vec![Field::new("top", target_outer, true)])); + let batch = rb( + Arc::new(new_schema(vec![Field::new("top", source_outer, true)])), + vec![Arc::new(outer) as ArrayRef], + ); + + let err = align_record_batch_to_schema(&batch, &target).unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("at `top.outer_list.item.a`"), "msg: {msg}"); + assert!(msg.contains("type differs"), "msg: {msg}"); + } + + #[test] + fn align_null_typed_source_becomes_typed_null_at_any_depth() { + let target = Arc::new(new_schema(vec![Field::new("a", DataType::Int32, true)])); + let batch = rb( + Arc::new(new_schema(vec![Field::new("a", DataType::Null, true)])), + vec![new_null_array(&DataType::Null, 2)], + ); + let aligned = align_record_batch_to_schema(&batch, &target).unwrap(); + assert_eq!(aligned.column(0).data_type(), &DataType::Int32); + assert_eq!(aligned.column(0).null_count(), 2); + } + + #[test] + fn align_already_matching_short_circuits() { + let target = Arc::new(new_schema(vec![Field::new("a", DataType::Utf8, false)])); + let batch = rb( + target.clone(), + vec![Arc::new(StringArray::from(vec!["x", "y"])) as ArrayRef], + ); + let aligned = align_record_batch_to_schema(&batch, &target).unwrap(); + assert_eq!(aligned.num_rows(), 2); + } +} + +/// Executable documentation of arrow's `Schema::try_merge` expectations. +/// +/// The aligner above relies on these invariants: it only adapts cases `try_merge` actually +/// emits, and every other shape is assumed unreachable. If arrow-rs ever changes one of these +/// rules (e.g. starts widening FSL inner fields, or allows dictionary key promotion), the +/// corresponding test here fails and points at exactly which aligner assumption needs +/// revisiting before we silently accept inputs the aligner can't handle. +#[cfg(test)] +mod try_merge_invariants { + use std::sync::Arc; + + use std::collections::HashMap; + + use arrow::datatypes::{DataType, Field, Schema}; + + fn schema_of(field: Field) -> Schema { + Schema::new_with_metadata(vec![field], HashMap::with_capacity(0)) + } + + fn try_merge_fields(a: Field, b: Field) -> Result { + Schema::try_merge([schema_of(a), schema_of(b)]) + } + + // ---- FixedSizeList: leaf-equality, never widened by `try_merge` --------------------------- + + #[test] + fn try_merge_rejects_fsl_inner_nullability_drift() { + let lhs = Field::new( + "x", + DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3), + true, + ); + let rhs = Field::new( + "x", + DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3), + true, + ); + assert!(try_merge_fields(lhs, rhs).is_err()); + } + + #[test] + fn try_merge_rejects_fsl_inner_type_drift() { + let lhs = Field::new( + "x", + DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3), + true, + ); + let rhs = Field::new( + "x", + DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int64, true)), 3), + true, + ); + assert!(try_merge_fields(lhs, rhs).is_err()); + } + + #[test] + fn try_merge_rejects_fsl_length_drift() { + let lhs = Field::new( + "x", + DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3), + true, + ); + let rhs = Field::new( + "x", + DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 4), + true, + ); + assert!(try_merge_fields(lhs, rhs).is_err()); + } + + #[test] + fn try_merge_accepts_identical_fsl() { + let dt = DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3); + let lhs = Field::new("x", dt.clone(), true); + let rhs = Field::new("x", dt.clone(), true); + let merged = try_merge_fields(lhs, rhs).expect("identical FSLs must merge"); + assert_eq!(merged.field(0).data_type(), &dt); + } + + // ---- Dictionary: leaf-equality, never widened by `try_merge` ------------------------------ + + #[test] + fn try_merge_rejects_dictionary_key_drift() { + let lhs = Field::new( + "x", + DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), + true, + ); + let rhs = Field::new( + "x", + DataType::Dictionary(Box::new(DataType::Int64), Box::new(DataType::Utf8)), + true, + ); + assert!(try_merge_fields(lhs, rhs).is_err()); + } + + #[test] + fn try_merge_rejects_dictionary_value_drift() { + let lhs = Field::new( + "x", + DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), + true, + ); + let rhs = Field::new( + "x", + DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::LargeUtf8)), + true, + ); + assert!(try_merge_fields(lhs, rhs).is_err()); + } + + // ---- Nullability: widens toward nullable, never narrows ----------------------------------- + + #[test] + fn try_merge_widens_nullability_never_narrows() { + let non_null = Field::new("x", DataType::Int32, false); + let nullable = Field::new("x", DataType::Int32, true); + let merged = try_merge_fields(non_null, nullable).expect("mixed nullability must merge"); + assert!( + merged.field(0).is_nullable(), + "merged field must be nullable (widening direction)" + ); + } + + // ---- List inner nullability widening: the one nested case the aligner actively uses ------- + + #[test] + fn try_merge_widens_list_inner_nullability() { + let lhs = Field::new( + "x", + DataType::List(Arc::new(Field::new("item", DataType::Int32, false))), + true, + ); + let rhs = Field::new( + "x", + DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), + true, + ); + let merged = try_merge_fields(lhs, rhs).expect("list inner widening must merge"); + let DataType::List(inner) = merged.field(0).data_type() else { + panic!("expected list"); + }; + assert!(inner.is_nullable(), "merged list inner must be nullable"); + } + + // ---- Null → typed: other key case the aligner actively adapts ----------------------------- + + #[test] + fn try_merge_widens_null_to_typed() { + let typed = Field::new("x", DataType::Int32, false); + let null = Field::new("x", DataType::Null, true); + let merged = try_merge_fields(typed, null).expect("Null → typed must merge"); + assert_eq!(merged.field(0).data_type(), &DataType::Int32); + assert!( + merged.field(0).is_nullable(), + "Null contributes nullability" + ); + } + + // ---- Leaf-equality composites are opaque to *inner* widening ------------------------------ + // + // These tests are the strong invariant the aligner relies on: when an inner `Field` or + // `DataType` *would* widen if placed at the top level (per the `widens_*` tests above), + // wrapping it in `Map` / `Dictionary` / `FixedSizeList` makes `try_merge` reject the pair + // instead of recursing. This is what lets the aligner skip writing inner-widening logic for + // these composite types. + + use arrow::datatypes::Fields; + + fn map_type_with_value_field(value: Field) -> DataType { + let entries = Field::new( + "entries", + DataType::Struct(Fields::from(vec![ + Field::new("key", DataType::Utf8, false), + value, + ])), + false, + ); + DataType::Map(Arc::new(entries), false) + } + + #[test] + fn try_merge_rejects_map_value_nullability_widening() { + // A nullability drift on the value field: at top level this would widen (see + // `try_merge_widens_nullability_never_narrows`). Wrapped in Map, try_merge rejects. + let lhs = Field::new( + "m", + map_type_with_value_field(Field::new("value", DataType::Int32, false)), + true, + ); + let rhs = Field::new( + "m", + map_type_with_value_field(Field::new("value", DataType::Int32, true)), + true, + ); + assert!(try_merge_fields(lhs, rhs).is_err()); + } + + #[test] + fn try_merge_rejects_map_value_struct_child_addition() { + // A struct-child addition on the value field: at top level this would widen (struct + // child-addition is in the aligner's contract). Wrapped in Map, try_merge rejects. + let v_narrow = DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, true)])); + let v_wide = DataType::Struct(Fields::from(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + ])); + let lhs = Field::new( + "m", + map_type_with_value_field(Field::new("value", v_narrow, true)), + true, + ); + let rhs = Field::new( + "m", + map_type_with_value_field(Field::new("value", v_wide, true)), + true, + ); + assert!(try_merge_fields(lhs, rhs).is_err()); + } + + #[test] + fn try_merge_rejects_dictionary_value_struct_child_addition() { + // Same shape as the Map test, for Dictionary: a widenable struct on the values side. + let v_narrow = DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, true)])); + let v_wide = DataType::Struct(Fields::from(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + ])); + let lhs = Field::new( + "d", + DataType::Dictionary(Box::new(DataType::Int32), Box::new(v_narrow)), + true, + ); + let rhs = Field::new( + "d", + DataType::Dictionary(Box::new(DataType::Int32), Box::new(v_wide)), + true, + ); + assert!(try_merge_fields(lhs, rhs).is_err()); + } + + #[test] + fn try_merge_rejects_fsl_inner_struct_child_addition() { + // Same shape as above, for FixedSizeList: a widenable struct as the FSL item type. + let inner_narrow = + DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, true)])); + let inner_wide = DataType::Struct(Fields::from(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + ])); + let lhs = Field::new( + "f", + DataType::FixedSizeList(Arc::new(Field::new("item", inner_narrow, true)), 3), + true, + ); + let rhs = Field::new( + "f", + DataType::FixedSizeList(Arc::new(Field::new("item", inner_wide, true)), 3), + true, + ); + assert!(try_merge_fields(lhs, rhs).is_err()); + } +} diff --git a/crates/store/re_datafusion/Cargo.toml b/crates/store/re_datafusion/Cargo.toml index e46836a8299e..1e0bafdc238c 100644 --- a/crates/store/re_datafusion/Cargo.toml +++ b/crates/store/re_datafusion/Cargo.toml @@ -30,9 +30,12 @@ default = [] [dependencies] # Rerun dependencies: +re_analytics.workspace = true re_arrow_util.workspace = true +re_backoff.workspace = true re_dataframe.workspace = true re_log.workspace = true +re_log_encoding = { workspace = true, features = ["decoder"] } re_redap_client.workspace = true re_log_types.workspace = true re_protos.workspace = true @@ -47,18 +50,24 @@ async-trait.workspace = true datafusion.workspace = true futures.workspace = true futures-util.workspace = true -log.workspace = true +http.workspace = true +opentelemetry.workspace = true +opentelemetry-proto = { workspace = true, features = ["gen-tonic-messages", "trace"] } parking_lot.workspace = true tokio.workspace = true tokio-stream.workspace = true tonic.workspace = true +tonic-prost.workspace = true tracing.workspace = true +web-time.workspace = true [target.'cfg(not(target_arch = "wasm32"))'.dependencies] re_perf_telemetry.workspace = true +reqwest.workspace = true [target.'cfg(target_arch = "wasm32")'.dependencies] futures.workspace = true +tonic-web-wasm-client.workspace = true wasm-bindgen-futures.workspace = true # These are used by Datafusion but the required features are not set for wasm build. diff --git a/crates/store/re_datafusion/src/analytics.rs b/crates/store/re_datafusion/src/analytics.rs new file mode 100644 index 000000000000..df1b22dacb2b --- /dev/null +++ b/crates/store/re_datafusion/src/analytics.rs @@ -0,0 +1,848 @@ +//! Per-connection analytics for dataset queries. +//! +//! Each connection to a Rerun Cloud instance gets its own analytics sender +//! that forwards OTLP trace events to that instance's OTEL ingest endpoint. +//! This ensures analytics go to the correct cloud when the viewer is connected +//! to multiple clouds simultaneously. +//! +//! ## One event per query +//! +//! A single user action (dataset query) produces exactly one analytics event, +//! sent when the query completes. The event includes both the scan/planning +//! phase stats and the fetch phase stats (split by gRPC vs direct fetches). +//! +//! ## Trace correlation +//! +//! When the client makes a `query_dataset` call, the server responds with an +//! `x-request-trace-id` header containing the server-side trace ID. The client +//! captures this and, when sending the analytics OTLP export to the server, +//! sets the same `x-request-trace-id` header on the analytics request. This +//! allows the server to correlate the analytics event with the original query +//! trace in Grafana/Tempo. + +use std::ops::Range; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::{Arc, OnceLock}; + +use opentelemetry_proto::tonic::{ + collector::trace::v1::{ExportTraceServiceRequest, ExportTraceServiceResponse}, + common::v1::any_value::Value, + common::v1::{AnyValue, KeyValue}, + resource::v1::Resource, + trace::v1::{ResourceSpans, ScopeSpans, Span, span::Link, span::SpanKind}, +}; +use re_dataframe::QueryExpression; +use re_uri::Origin; +use tokio::sync::OnceCell; +use web_time::{Duration, SystemTime}; + +#[cfg(not(target_arch = "wasm32"))] +type Channel = tonic::transport::Channel; + +#[cfg(target_arch = "wasm32")] +type Channel = tonic_web_wasm_client::Client; + +const EXPORT_PATH: &str = "/opentelemetry.proto.collector.trace.v1.TraceService/Export"; + +/// A per-connection analytics client that sends OTLP traces to a specific +/// Rerun Cloud's OTEL ingest endpoint. +/// +/// Cheap to clone (wraps an `Arc`). +/// +/// The target of these events are `PostHog`, and are aimed at user analytics. +/// This means a single user action (e.g. a dataset query) should only +/// trigger a single `PostHog` event, sent at the conclusion of the action. +#[derive(Clone)] +pub(crate) struct ConnectionAnalytics { + inner: Arc, +} + +impl std::fmt::Debug for ConnectionAnalytics { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ConnectionAnalytics") + .field("origin", &self.inner.origin) + .finish_non_exhaustive() + } +} + +struct Inner { + origin: Origin, + client: tokio::sync::Mutex>>, + + /// Lazily populated once per connection via [`ConnectionAnalytics::set_server_version`]. + /// `None` if the version RPC failed or has not completed yet. + server_version: OnceCell>, +} + +impl ConnectionAnalytics { + /// Create a new analytics sender for the given origin. + /// + /// The actual gRPC connection is established lazily on first use. + pub fn new(origin: Origin) -> Self { + Self { + inner: Arc::new(Inner { + origin, + client: tokio::sync::Mutex::new(None), + server_version: OnceCell::new(), + }), + } + } + + /// Record the server/stack version string (e.g. semver) for this connection. + /// + /// Only the first call has effect. The value is then attached to every + /// subsequent query span on this connection as `server_version`. + pub fn set_server_version(&self, version: Option) { + // `OnceCell::set` returns Err if the cell was already set; silently ignore — + // we only want the first value. + #[expect(clippy::let_underscore_must_use)] + let _ = self.inner.server_version.set(version); + } + + /// Returns the cached server version, if available. + fn server_version(&self) -> Option { + self.inner.server_version.get().and_then(Clone::clone) + } + + /// Begin tracking analytics for a query. + /// + /// Returns a [`PendingQueryAnalytics`] that accumulates stats across phases. + /// The analytics event is sent when the last clone is dropped. + pub fn begin_query( + &self, + query_info: QueryInfo, + scan_start: web_time::Instant, + ) -> PendingQueryAnalytics { + PendingQueryAnalytics { + inner: Arc::new(PendingInner { + connection: self.clone(), + query_info, + fetch_stats: SharedFetchStats::default(), + scan_start, + time_to_first_chunk: OnceLock::new(), + direct_terminal_reason: OnceLock::new(), + error_kind: OnceLock::new(), + }), + } + } + + /// Send an OTLP span in the background. Never blocks the caller. + fn send_span(&self, span: Span, trace_id: Option) { + let this = self.clone(); + + let fut = async move { + if let Err(err) = this.send_span_impl(span, trace_id).await { + re_log::debug_once!( + "Failed to send analytics to Rerun Cloud: {} ({})", + err.code(), + err.message() + ); + } + }; + + #[cfg(target_arch = "wasm32")] + wasm_bindgen_futures::spawn_local(fut); + + #[cfg(not(target_arch = "wasm32"))] + { + if let Ok(handle) = tokio::runtime::Handle::try_current() { + handle.spawn(fut); + } else { + // Prefer spawning on the current tokio runtime if available. + // When called from Python via FFI, the polling thread may not have a + // tokio runtime entered, so fall back to a detached thread. + std::thread::Builder::new() + .name("query-analytics-sender".to_owned()) + .spawn(move || { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build(); + match rt { + Ok(rt) => rt.block_on(fut), + Err(err) => { + re_log::debug_once!("Failed to create analytics runtime: {err}"); + } + } + }) + .ok(); + } + } + } + + async fn send_span_impl( + &self, + span: Span, + trace_id: Option, + ) -> tonic::Result<()> { + let mut guard = self.inner.client.lock().await; + + let grpc = if let Some(grpc) = guard.as_mut() { + grpc + } else { + match re_redap_client::channel(self.inner.origin.clone()).await { + Ok(channel) => guard.get_or_insert(tonic::client::Grpc::new(channel)), + Err(err) => { + return Err(tonic::Status::unavailable(format!( + "failed to connect for analytics: {err}" + ))); + } + } + }; + + let mut resource_attributes = vec![kv_string("service.name", "rerun-viewer")]; + if let Some(analytics) = re_analytics::Analytics::global_get() { + resource_attributes.push(kv_string("analytics_id", &analytics.config().analytics_id)); + } + + let export_request = ExportTraceServiceRequest { + resource_spans: vec![ResourceSpans { + resource: Some(Resource { + attributes: resource_attributes, + dropped_attributes_count: 0, + entity_refs: Vec::new(), + }), + scope_spans: vec![ScopeSpans { + scope: None, + spans: vec![span], + schema_url: String::new(), + }], + schema_url: String::new(), + }], + }; + + let mut request = tonic::Request::new(export_request); + if let Some(trace_id) = trace_id + && let Ok(value) = trace_id.to_string().parse() + { + request.metadata_mut().insert("x-request-trace-id", value); + } + + grpc.ready().await.map_err(|err| { + tonic::Status::unavailable(format!("analytics channel not ready: {err}")) + })?; + + let path = http::uri::PathAndQuery::from_static(EXPORT_PATH); + let codec = tonic_prost::ProstCodec::default(); + + let _response: tonic::Response = + grpc.unary(request.map(|m| m), path, codec).await?; + + Ok(()) + } +} + +// ---------------------------------------------------------------------------- + +/// Query shape +#[derive(Clone, Copy, Debug)] +pub(crate) enum QueryType { + /// Query for static (timeless) data — no temporal selector applies. + Static, + + /// Point-in-time query: a `latest_at` selector with no range. + LatestAt, + + /// Time-range query: a range selector with no `latest_at`. + Range, + + /// Combined dataframe query: both `latest_at` and range selectors are set. + Dataframe, + + /// Neither `latest_at` nor range is set — an unbounded scan of all timestamps. + FullScan, +} + +impl QueryType { + /// Classify the query shape into a bounded label for analytics. + pub(crate) fn classify(query_expression: &QueryExpression) -> Self { + if query_expression.is_static() { + Self::Static + } else { + let has_latest_at = query_expression.min_latest_at().is_some(); + let has_range = query_expression.max_range().is_some(); + match (has_latest_at, has_range) { + (true, true) => Self::Dataframe, + (true, false) => Self::LatestAt, + (false, true) => Self::Range, + (false, false) => Self::FullScan, + } + } + } + + /// Stable string label emitted into the analytics span. + const fn as_str(self) -> &'static str { + match self { + Self::Static => "static", + Self::LatestAt => "latest_at", + Self::Range => "range", + Self::Dataframe => "dataframe", + Self::FullScan => "full_scan", + } + } +} + +/// Information about the query planning phase, collected in `scan()`. +#[derive(Clone, Debug)] +pub struct QueryInfo { + /// The dataset being queried. Sent to the server so it can enrich the + /// analytics event with full dataset stats (total chunks, bytes, etc.). + pub dataset_id: String, + + /// Number of unique chunks returned by `query_dataset` (subset of the dataset). + pub query_chunks: usize, + + /// Number of distinct segments involved in the query. + pub query_segments: usize, + + /// Number of distinct layers touched by the query. + pub query_layers: usize, + + /// Number of columns in the query output schema. + pub query_columns: usize, + + /// Number of entity paths in the query request. + pub query_entities: usize, + + /// Total size of all queried chunks in bytes (from chunk metadata). + pub query_bytes: u64, + + /// Max number of chunks touched within any single segment in this query. + pub query_chunks_per_segment_max: u32, + + /// Mean number of chunks touched per segment in this query. + pub query_chunks_per_segment_mean: f32, + + /// Query shape + pub query_type: QueryType, + + /// Name of the sort/filter index (timeline) for this query, if any. + pub primary_index_name: Option, + + /// Wall-clock start..end of the scan planning phase. + pub time_range: Range, + + /// Time from sending `query_dataset` until the first response message + /// arrives (the chunk metadata, not actual chunk data). + pub time_to_first_chunk_info: Option, + + /// Server-side trace ID from the `x-request-trace-id` response header. + pub trace_id: Option, +} + +/// Accumulates fetch statistics from multiple partitions. +/// +/// Thread-safe — multiple IO loops can record stats concurrently. +/// +/// This is the final sink for per-query fetch counters. To avoid cross-core +/// cache-line contention during the hot fetch/retry loops, writers accumulate +/// into a task-local [`TaskFetchStats`] and flush into this shared struct +/// exactly once per outer fetch task via [`TaskFetchStats::flush_into`]. +#[derive(Default)] +pub(crate) struct SharedFetchStats { + grpc_requests: AtomicU64, + grpc_bytes: AtomicU64, + direct_requests: AtomicU64, + direct_bytes: AtomicU64, + + /// Total extra direct-fetch attempts across all merged requests (attempts beyond the first). + /// Note: gRPC retries happen at the transport layer and are not visible here — only direct + /// (HTTP Range) retries are counted. + direct_retries_total: AtomicU64, + + /// Number of distinct merged requests that ended up needing more than one attempt. + direct_requests_retried: AtomicU64, + + /// Total time spent in backoff sleeps across direct-fetch retries (microseconds). + direct_retry_sleep_us: AtomicU64, + + /// Worst-case attempt number reached for any single merged request. + direct_max_attempt: AtomicU64, + + /// Number of byte ranges generated by the batch splitter before gap-merging. + direct_original_ranges: AtomicU64, + + /// Number of merged HTTP Range requests actually issued after gap-merging. + direct_merged_ranges: AtomicU64, +} + +impl SharedFetchStats { + /// Take a snapshot of the counters after every outer fetch task has flushed. + fn snapshot(&mut self) -> TaskFetchStats { + // &mut self means we can skip the atomic-load barriers + TaskFetchStats { + grpc_requests: *self.grpc_requests.get_mut(), + grpc_bytes: *self.grpc_bytes.get_mut(), + direct_requests: *self.direct_requests.get_mut(), + direct_bytes: *self.direct_bytes.get_mut(), + direct_retries_total: *self.direct_retries_total.get_mut(), + direct_requests_retried: *self.direct_requests_retried.get_mut(), + direct_retry_sleep_us: *self.direct_retry_sleep_us.get_mut(), + direct_max_attempt: *self.direct_max_attempt.get_mut(), + direct_original_ranges: *self.direct_original_ranges.get_mut(), + direct_merged_ranges: *self.direct_merged_ranges.get_mut(), + } + } +} + +/// Tracks a query in progress. Accumulates fetch stats from all partitions +/// and sends a single combined analytics event when the last clone is dropped. +/// +/// Cheap to clone (wraps an `Arc`). +#[derive(Clone)] +pub(crate) struct PendingQueryAnalytics { + inner: Arc, +} + +impl std::fmt::Debug for PendingQueryAnalytics { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PendingQueryAnalytics") + .finish_non_exhaustive() + } +} + +struct PendingInner { + connection: ConnectionAnalytics, + query_info: QueryInfo, + fetch_stats: SharedFetchStats, + + /// Monotonic start time of the query, for computing elapsed durations. + scan_start: web_time::Instant, + + /// Time from scan start until the first chunk is returned to datafusion. + time_to_first_chunk: OnceLock, + + /// First terminal direct-fetch failure reason encountered, if any. + /// Only set once. Stored as `&'static str` from the bounded + /// [`DirectFetchFailureReason`] label set. + direct_terminal_reason: std::sync::OnceLock, + + /// Error classification, if the query failed. `None` ⇒ success. + /// Stored as `&'static str` from [`QueryErrorKind::as_str`] so emission is zero-copy. + error_kind: std::sync::OnceLock<&'static str>, +} + +/// Bounded set of query-failure classifications for the analytics span. +/// +/// Kept as an enum (rather than free-form strings) so that adding a new call +/// site cannot silently introduce a new `error_kind` value and inflate the +/// analytics cardinality. Add a variant here if you need a new bucket. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[cfg_attr(target_arch = "wasm32", expect(dead_code))] +pub enum QueryErrorKind { + /// A gRPC fetch (`FetchChunks` or the fast-path gRPC-only fetch) failed. + GrpcFetch, + + /// A direct (HTTP Range) fetch failed, non-retryable or retries exhausted. + DirectFetch, + + /// CPU-side decoding or execution error (chunk insertion, row materialization). + Decode, + + /// Generic / unclassified error (e.g. IO task join failure). + Other, +} + +impl QueryErrorKind { + /// Stable string label emitted into the analytics span. + pub fn as_str(self) -> &'static str { + match self { + Self::GrpcFetch => "grpc_fetch", + Self::DirectFetch => "direct_fetch", + Self::Decode => "decode", + Self::Other => "other", + } + } +} + +/// Bounded set of terminal failure reasons for direct fetches. +/// +/// These labels are emitted both into the per-process OTEL counter +/// (`chunk_fetch.direct.result`) and into the per-query `PostHog` span as +/// `fetch_direct_terminal_reason`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[cfg_attr(target_arch = "wasm32", expect(dead_code))] +pub(crate) enum DirectFetchFailureReason { + Timeout, + Http4xx, + Http5xx, + Connection, + Decode, + Other, +} + +impl DirectFetchFailureReason { + /// Convert to the stable string label used in telemetry. + pub(crate) fn as_str(self) -> &'static str { + match self { + Self::Timeout => "timeout", + Self::Http4xx => "http_4xx", + Self::Http5xx => "http_5xx", + Self::Connection => "connection", + Self::Decode => "decode", + Self::Other => "other", + } + } +} + +impl PendingQueryAnalytics { + /// Record that the first result chunk has been returned to the user. + /// Only the first call has any effect. + #[cfg_attr(target_arch = "wasm32", expect(dead_code))] + pub fn record_first_chunk(&self) { + self.inner + .time_to_first_chunk + .get_or_init(|| self.inner.scan_start.elapsed()); + } + + /// Access the shared [`SharedFetchStats`] sink. Used by [`TaskFetchStats::flush_into`]. + pub(crate) fn fetch_stats(&self) -> &SharedFetchStats { + &self.inner.fetch_stats + } + + /// Record the terminal failure reason for a direct fetch that exhausted retries + /// or hit a non-retryable error. Only the first call has effect. + #[cfg(not(target_arch = "wasm32"))] + pub fn record_direct_terminal_failure(&self, reason: DirectFetchFailureReason) { + #[expect(clippy::let_underscore_must_use)] + let _ = self.inner.direct_terminal_reason.set(reason); + } + + /// Mark the query as failed with the given error kind. + /// + /// Only the first call has effect. + pub fn record_error(&self, kind: QueryErrorKind) { + #[expect(clippy::let_underscore_must_use)] + let _ = self.inner.error_kind.set(kind.as_str()); + } +} + +/// Per-task accumulator for fetch counters. +/// +/// Each outer fetch task owns one of these and mutates it without synchronization. +/// At the end of the task it is folded into the shared [`SharedFetchStats`] via +/// [`TaskFetchStats::flush_into`], which is the only place the shared cache line +/// is touched. +/// +/// This avoids cross-core cache-line ping-pong on the shared atomics during the +/// hot fetch/retry loops, where worst-case contention would otherwise be +/// `inner_concurrency × outer_concurrency × num_partitions`. +#[derive(Default)] +#[must_use] +pub(crate) struct TaskFetchStats { + grpc_requests: u64, + grpc_bytes: u64, + direct_requests: u64, + direct_bytes: u64, + direct_retries_total: u64, + direct_requests_retried: u64, + direct_retry_sleep_us: u64, + direct_max_attempt: u64, + direct_original_ranges: u64, + direct_merged_ranges: u64, +} + +#[cfg_attr(target_arch = "wasm32", expect(dead_code))] +impl TaskFetchStats { + /// Record a gRPC fetch. + pub fn record_grpc_fetch(&mut self, bytes: u64) { + self.grpc_requests += 1; + self.grpc_bytes += bytes; + } + + /// Record a direct (HTTP) fetch. + pub fn record_direct_fetch(&mut self, bytes: u64) { + self.direct_requests += 1; + self.direct_bytes += bytes; + } + + /// Record a single direct-fetch retry on one merged request. + /// + /// `sleep` is the backoff duration actually slept before the retry attempt. + /// `attempt` is the attempt number about to be made (starts at 2 for the first retry). + pub fn record_direct_retry(&mut self, sleep: Duration, attempt: u64) { + self.direct_retries_total += 1; + self.direct_retry_sleep_us += sleep.as_micros() as u64; + self.direct_max_attempt = self.direct_max_attempt.max(attempt); + } + + /// Record that a single merged request needed at least one retry (call once per + /// retried request, regardless of how many attempts it took). + pub fn record_direct_request_was_retried(&mut self) { + self.direct_requests_retried += 1; + } + + /// Record the range-merging efficiency for this batch. + pub fn record_direct_ranges(&mut self, original: u64, merged: u64) { + self.direct_original_ranges += original; + self.direct_merged_ranges += merged; + } + + /// Merge another task-local accumulator into this one. + #[expect( + clippy::needless_pass_by_value, + reason = "Prevent double-counting stats" + )] + pub fn merge_from(&mut self, other: Self) { + let Self { + grpc_requests, + grpc_bytes, + direct_requests, + direct_bytes, + direct_retries_total, + direct_requests_retried, + direct_retry_sleep_us, + direct_max_attempt, + direct_original_ranges, + direct_merged_ranges, + } = other; + self.grpc_requests += grpc_requests; + self.grpc_bytes += grpc_bytes; + self.direct_requests += direct_requests; + self.direct_bytes += direct_bytes; + self.direct_retries_total += direct_retries_total; + self.direct_requests_retried += direct_requests_retried; + self.direct_retry_sleep_us += direct_retry_sleep_us; + self.direct_max_attempt = self.direct_max_attempt.max(direct_max_attempt); + self.direct_original_ranges += direct_original_ranges; + self.direct_merged_ranges += direct_merged_ranges; + } + + /// Fold this buffer into the shared atomic sink. + pub fn flush_into(self, shared: &SharedFetchStats) { + macro_rules! flush_stats { + {sum $($sum_id:ident),*; max $($max_id:ident),*;} => { + let Self { + $($sum_id,)* + $($max_id,)* + } = self; + $( + // Zero-valued fields are skipped so totally-idle tasks don't touch the + // shared cache line at all. + if $sum_id != 0 { + shared.$sum_id + .fetch_add($sum_id, Ordering::Relaxed); + } + )+ + $( + if $max_id != 0 { + shared.$max_id + .fetch_max($max_id, Ordering::Relaxed); + } + )* + }; + } + flush_stats! { + sum + grpc_requests, + grpc_bytes, + direct_requests, + direct_bytes, + direct_retries_total, + direct_requests_retried, + direct_retry_sleep_us, + direct_original_ranges, + direct_merged_ranges; + max + direct_max_attempt; + }; + } + + /// Flush this buffer into `analytics` if present, also recording an error (if any). + pub fn try_flush_into( + self, + analytics: Option<&PendingQueryAnalytics>, + result: Result<(), QueryErrorKind>, + ) { + if let Some(analytics) = analytics { + self.flush_into(analytics.fetch_stats()); + if let Err(err) = result { + analytics.record_error(err); + } + } + } +} + +impl Drop for PendingInner { + fn drop(&mut self) { + let Self { + connection, + query_info, + fetch_stats, + scan_start, + time_to_first_chunk, + direct_terminal_reason, + error_kind, + } = self; + + let total_duration = scan_start.elapsed(); + + let QueryInfo { + ref dataset_id, + query_chunks, + query_segments, + query_layers, + query_columns, + query_entities, + query_bytes, + query_chunks_per_segment_max, + query_chunks_per_segment_mean, + query_type, + ref primary_index_name, + ref time_range, + time_to_first_chunk_info, + trace_id, + } = *query_info; + + let fetch = fetch_stats.snapshot(); + let time_to_first_chunk = time_to_first_chunk.get().copied(); + let direct_terminal_reason = direct_terminal_reason.get().copied(); + let error_kind = error_kind.get().copied(); + + let [start_ns, end_ns] = [ + nanos_since_epoch(&time_range.start), + nanos_since_epoch(&time_range.end), + ]; + + #[expect( + clippy::cast_possible_wrap, + reason = "OTLP proto uses i64 for int values" + )] + let mut attributes = vec![ + kv_string("dataset_id", dataset_id), + kv_int("query_chunks", query_chunks as i64), + kv_int("query_segments", query_segments as i64), + kv_int("query_layers", query_layers as i64), + kv_int("query_columns", query_columns as i64), + kv_int("query_entities", query_entities as i64), + kv_int("query_bytes", query_bytes as i64), + kv_int( + "query_chunks_per_segment_max", + i64::from(query_chunks_per_segment_max), + ), + kv_double( + "query_chunks_per_segment_mean", + f64::from(query_chunks_per_segment_mean), + ), + kv_string("query_type", query_type.as_str()), + kv_int("total_duration_us", total_duration.as_micros() as i64), + kv_bool("is_success", error_kind.is_none()), + // Fetch stats: gRPC + kv_int("fetch_grpc_requests", fetch.grpc_requests as i64), + kv_int("fetch_grpc_bytes", fetch.grpc_bytes as i64), + // Fetch stats: direct (HTTP). Note: gRPC retries happen at the transport + // layer and are not visible here — only direct-URL retries are counted. + kv_int("fetch_direct_requests", fetch.direct_requests as i64), + kv_int("fetch_direct_bytes", fetch.direct_bytes as i64), + kv_int("fetch_direct_retries", fetch.direct_retries_total as i64), + kv_int( + "fetch_direct_requests_retried", + fetch.direct_requests_retried as i64, + ), + kv_int( + "fetch_direct_retry_sleep_us", + fetch.direct_retry_sleep_us as i64, + ), + kv_int("fetch_direct_max_attempt", fetch.direct_max_attempt as i64), + kv_int( + "fetch_direct_original_ranges", + fetch.direct_original_ranges as i64, + ), + kv_int( + "fetch_direct_merged_ranges", + fetch.direct_merged_ranges as i64, + ), + ]; + + if let Some(name) = primary_index_name { + attributes.push(kv_string("primary_index_name", name)); + } + + if let Some(ttfci) = time_to_first_chunk_info { + attributes.push(kv_int( + "time_to_first_chunk_info_us", + ttfci.as_micros() as i64, + )); + } + + if let Some(ttfr) = time_to_first_chunk { + attributes.push(kv_int("time_to_first_chunk_us", ttfr.as_micros() as i64)); + } + + if let Some(reason) = direct_terminal_reason { + attributes.push(kv_string("fetch_direct_terminal_reason", reason.as_str())); + } + + if let Some(kind) = error_kind { + attributes.push(kv_string("error_kind", kind)); + } + + if let Some(version) = connection.server_version() { + attributes.push(kv_string("server_version", &version)); + } + + let links = trace_id + .map(|id| { + vec![Link { + trace_id: id.to_bytes().to_vec(), + ..Default::default() + }] + }) + .unwrap_or_default(); + + let span = Span { + name: "cloud_query_dataset".to_owned(), + kind: SpanKind::Client.into(), + start_time_unix_nano: start_ns, + end_time_unix_nano: end_ns, + attributes, + links, + ..Default::default() + }; + + connection.send_span(span, trace_id); + } +} + +// ---------------------------------------------------------------------------- + +fn nanos_since_epoch(time: &SystemTime) -> u64 { + time.duration_since(SystemTime::UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64 +} + +fn kv_string(key: &str, value: &str) -> KeyValue { + KeyValue { + key: key.to_owned(), + value: Some(AnyValue { + value: Some(Value::StringValue(value.to_owned())), + }), + } +} + +fn kv_int(key: &str, value: i64) -> KeyValue { + KeyValue { + key: key.to_owned(), + value: Some(AnyValue { + value: Some(Value::IntValue(value)), + }), + } +} + +fn kv_bool(key: &str, value: bool) -> KeyValue { + KeyValue { + key: key.to_owned(), + value: Some(AnyValue { + value: Some(Value::BoolValue(value)), + }), + } +} + +fn kv_double(key: &str, value: f64) -> KeyValue { + KeyValue { + key: key.to_owned(), + value: Some(AnyValue { + value: Some(Value::DoubleValue(value)), + }), + } +} diff --git a/crates/store/re_datafusion/src/batch_coalescer/coalesce_exec.rs b/crates/store/re_datafusion/src/batch_coalescer/coalesce_exec.rs index 7a77e80efb2d..3ca41742ff83 100644 --- a/crates/store/re_datafusion/src/batch_coalescer/coalesce_exec.rs +++ b/crates/store/re_datafusion/src/batch_coalescer/coalesce_exec.rs @@ -172,7 +172,6 @@ impl ExecutionPlan for SizedCoalesceBatchesExec { fn partition_statistics(&self, partition: Option) -> Result { self.input.partition_statistics(partition)?.with_fetch( - self.schema(), self.coalescer_options.max_rows, 0, 1, diff --git a/crates/store/re_datafusion/src/catalog_provider.rs b/crates/store/re_datafusion/src/catalog_provider.rs index 2778ee5389b6..a0a432cb3e13 100644 --- a/crates/store/re_datafusion/src/catalog_provider.rs +++ b/crates/store/re_datafusion/src/catalog_provider.rs @@ -10,6 +10,7 @@ use parking_lot::Mutex; use re_redap_client::ConnectionClient; use tokio::runtime::Handle as RuntimeHandle; +use crate::IntoDfError as _; use crate::TableEntryTableProvider; // These are to match the defaults in datafusion. @@ -45,9 +46,9 @@ fn get_table_refs( .clone() .get_table_names() .await - .map_err(|err| DataFusionError::External(Box::new(err)))? + .map_err(|err| err.into_df_error())? .into_iter() - .map(TableReference::from) + .map(|name| TableReference::from(name.to_string())) .collect(), ) }) @@ -137,14 +138,14 @@ impl CatalogProvider for RedapCatalogProvider { fn schema_names(&self) -> Vec { self.get_schema_names().unwrap_or_else(|err| { - log::error!("Error attempting to get table references from server: {err}"); + re_log::error!("Error attempting to get table references from server: {err}"); vec![] }) } fn schema(&self, name: &str) -> Option> { if let Err(err) = self.update_from_server() { - log::error!("Error updating table references from server: {err}"); + re_log::error!("Error updating table references from server: {err}"); return None; } @@ -194,7 +195,7 @@ impl SchemaProvider for RedapSchemaProvider { fn table_names(&self) -> Vec { let table_refs = get_table_refs(&self.client, &self.runtime).unwrap_or_else(|err| { - log::error!("Error getting table references: {err}"); + re_log::error!("Error getting table references: {err}"); vec![] }); diff --git a/crates/store/re_datafusion/src/chunk_fetcher.rs b/crates/store/re_datafusion/src/chunk_fetcher.rs new file mode 100644 index 000000000000..22b4ec7f735b --- /dev/null +++ b/crates/store/re_datafusion/src/chunk_fetcher.rs @@ -0,0 +1,756 @@ +//! Chunk fetching strategies: direct URL (HTTP Range) and gRPC. + +use std::collections::BTreeMap; +use std::{error::Error as _, fmt::Write as _}; + +use arrow::array::{Array as _, DictionaryArray, RecordBatch, StringArray, UInt64Array}; +use arrow::datatypes::Int32Type; +use futures::StreamExt as _; +use tonic::IntoRequest as _; +use tracing::Instrument as _; + +use re_dataframe::external::re_chunk::Chunk; +use re_protos::cloud::v1alpha1::{FetchChunksRequest, QueryDatasetResponse}; +use re_redap_client::ApiResult; + +use crate::analytics::{DirectFetchFailureReason, PendingQueryAnalytics, TaskFetchStats}; +use crate::dataframe_query_common::DataframeClientAPI; + +// --- Telemetry --- + +#[cfg(not(target_arch = "wasm32"))] +pub(crate) mod metrics { + use std::sync::OnceLock; + + use opentelemetry::{KeyValue, metrics::Counter}; + + struct ChunkFetchMetrics { + /// Counts direct fetch outcomes: result = `success` | `failure` + direct_result: Counter, + + /// Counts bytes fetched, method = `direct` | `grpc` + bytes_fetched: Counter, + + /// Counts gRPC fetches for chunks without direct URLs + grpc_no_direct_urls: Counter, + } + + fn get() -> &'static ChunkFetchMetrics { + static INSTANCE: OnceLock = OnceLock::new(); + INSTANCE.get_or_init(|| { + let meter = opentelemetry::global::meter("chunk_fetch"); + ChunkFetchMetrics { + direct_result: meter + .u64_counter("chunk_fetch.direct.result") + .with_description("Direct fetch outcomes") + .build(), + bytes_fetched: meter + .u64_counter("chunk_fetch.bytes") + .with_description("Bytes fetched for chunk data") + .with_unit("B") + .build(), + grpc_no_direct_urls: meter + .u64_counter("chunk_fetch.grpc_no_direct_urls") + .with_description("gRPC fetches for chunks without direct URLs") + .build(), + } + }) + } + + /// Record when some number of bytes has been successfully fetched directly from object storage. + pub fn record_direct_success(bytes: u64) { + let m = get(); + m.direct_result + .add(1, &[KeyValue::new("result", "success")]); + m.bytes_fetched + .add(bytes, &[KeyValue::new("method", "direct")]); + } + + /// Record a direct fetch failure after retries were exhausted. + /// + /// `reason` should be one of: `"timeout"`, `"http_4xx"`, `"http_5xx"`, + /// `"connection"`, `"decode"`, `"other"`. + pub fn record_direct_failure(reason: &str) { + let m = get(); + m.direct_result.add( + 1, + &[ + KeyValue::new("result", "failure"), + KeyValue::new("reason", reason.to_owned()), + ], + ); + } + + /// Record a gRPC fetch when no direct URLs were available in the batch. + pub fn record_grpc_no_direct_urls(bytes: u64) { + let m = get(); + m.grpc_no_direct_urls.add(1, &[]); + m.bytes_fetched + .add(bytes, &[KeyValue::new("method", "grpc")]); + } +} + +/// Chunks tagged with their segment ID. +pub type ChunksWithSegment = Vec<(Chunk, Option)>; + +pub type SortedChunksWithSegment = (String, Vec); + +/// Maximum size of a single merged HTTP Range request (16 MB, matching server). +const MAX_MERGED_RANGE_SIZE: usize = 16 * 1024 * 1024; + +/// Number of times to retry direct fetch on transient errors before returning a hard error. +const DIRECT_FETCH_MAX_RETRIES: usize = 10; + +// --- Range merging types --- + +/// Where a single chunk lives within a merged range response. +struct ChunkInMergedRange { + /// Index of this chunk in the original `RecordBatch` (used to preserve ordering). + original_row_index: usize, + + /// Byte offset of this chunk within the merged response body. + offset_in_merged: usize, + + /// Byte length of this chunk. + length: usize, +} + +/// A single HTTP Range request that may cover multiple adjacent chunks. +struct MergedRangeRequest { + /// The presigned URL to fetch from. + url: String, + + /// Absolute byte range start within the file (inclusive). + file_range_start: usize, + + /// Absolute byte range end within the file (exclusive). + file_range_end: usize, + + /// Individual chunks to extract from the merged response. + chunks: Vec, +} + +/// Error from a direct URL fetch attempt. Retried up to [`DIRECT_FETCH_MAX_RETRIES`] times. +#[derive(Debug)] +pub struct DirectFetchError { + msg: String, + retryable: bool, +} + +impl DirectFetchError { + fn new(msg: String, retryable: bool) -> Self { + Self { msg, retryable } + } +} + +impl std::fmt::Display for DirectFetchError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.msg) + } +} + +impl std::error::Error for DirectFetchError {} + +/// Returns `true` if the batch contains at least one non-null direct URL. +pub fn batch_has_any_direct_urls(batch: &RecordBatch) -> bool { + batch + .column_by_name(QueryDatasetResponse::FIELD_DIRECT_URL) + .is_some_and(|col| col.null_count() < col.len()) +} + +/// Split a batch into (direct-URL rows, non-URL rows). +/// +/// Either half is `None` if it would have zero rows. +pub fn split_batch_by_direct_url( + batch: &RecordBatch, +) -> (Option, Option) { + use arrow::compute::{filter_record_batch, is_not_null, not}; + + let Some(url_col) = batch.column_by_name(QueryDatasetResponse::FIELD_DIRECT_URL) else { + return (None, Some(batch.clone())); + }; + + let has_url = is_not_null(url_col).expect("is_not_null on direct_url column"); + let no_url = not(&has_url).expect("boolean not"); + + let direct_batch = if has_url.true_count() > 0 { + Some(filter_record_batch(batch, &has_url).expect("filter_record_batch for direct URL rows")) + } else { + None + }; + + let grpc_batch = if no_url.true_count() > 0 { + Some(filter_record_batch(batch, &no_url).expect("filter_record_batch for gRPC rows")) + } else { + None + }; + + (direct_batch, grpc_batch) +} + +/// Sum of `chunk_byte_len` values in a batch (best-effort, returns 0 on missing column). +pub fn batch_byte_size(batch: &RecordBatch) -> u64 { + batch + .column_by_name(QueryDatasetResponse::FIELD_CHUNK_BYTE_LENGTH) + .and_then(|c| c.as_any().downcast_ref::()) + .map(|arr| arr.iter().map(|v| v.unwrap_or(0)).sum()) + .unwrap_or(0) +} + +/// Fetch a batch of chunks via direct URLs. +/// +/// Individual requests are retried up to [`DIRECT_FETCH_MAX_RETRIES`] times on transient errors. +/// +/// `stats` is the caller's per-task accumulator; `pending` is used only for +/// recording the one-shot terminal failure reason on the shared state. +#[tracing::instrument(level = "info", skip_all, fields(num_chunks, byte_size))] +pub async fn fetch_batch_direct( + batch: &RecordBatch, + http_client: &reqwest::Client, + stats: &mut TaskFetchStats, + pending: Option<&PendingQueryAnalytics>, +) -> ApiResult> { + #[cfg(not(target_arch = "wasm32"))] + let byte_size = batch_byte_size(batch); + + let span = tracing::Span::current(); + span.record("num_chunks", batch.num_rows()); + #[cfg(not(target_arch = "wasm32"))] + span.record("byte_size", byte_size); + + match fetch_batch_via_direct_urls(http_client, batch, stats).await { + Ok(chunks) => { + #[cfg(not(target_arch = "wasm32"))] + metrics::record_direct_success(byte_size); + Ok(chunks) + } + Err(err) => { + let reason = DirectFetchFailureReason::classify(&err); + if let Some(pending) = pending { + pending.record_direct_terminal_failure(reason); + } + #[cfg(not(target_arch = "wasm32"))] + metrics::record_direct_failure(reason.as_str()); + Err(re_redap_client::ApiError::connection_with_source( + None, + err, + "fetching chunks via direct URLs", + )) + } + } +} + +impl DirectFetchFailureReason { + /// Classify a `DirectFetchError` by matching on its error message. + fn classify(err: &DirectFetchError) -> Self { + let msg = &err.msg; + if msg.contains("timed out") || msg.contains("Timeout") { + Self::Timeout + } else if msg.contains("status 4") { + Self::Http4xx + } else if msg.contains("status 5") { + Self::Http5xx + } else if msg.contains("connection") || msg.contains("dns") || msg.contains("connect") { + Self::Connection + } else if msg.contains("decode") + || msg.contains("from_rrd_bytes") + || msg.contains("from_record_batch") + { + Self::Decode + } else { + Self::Other + } + } +} + +/// Fetch a group of batches using the gRPC `FetchChunks` proxy. +pub async fn fetch_batch_group_via_grpc( + batch_group: &[RecordBatch], + client: &T, +) -> ApiResult> { + let mut all_chunks = Vec::new(); + + let mut client = client.clone(); + for batch in batch_group { + let chunk_info: re_protos::common::v1alpha1::DataframePart = batch.clone().into(); + + let fetch_chunks_request = FetchChunksRequest { + chunk_infos: vec![chunk_info], + }; + + let response = client + .fetch_chunks(fetch_chunks_request.into_request()) + .instrument(tracing::trace_span!("batched_fetch_chunks")) + .await + .map_err(|err| re_redap_client::ApiError::tonic(err, "FetchChunks request failed"))?; + + let response_stream = + re_redap_client::ApiResponseStream::from_tonic_response(response, "/FetchChunks"); + + let chunk_stream = + re_redap_client::fetch_chunks_response_to_chunk_and_segment_id(response_stream); + + let batch_chunks: Vec> = chunk_stream.collect().await; + for chunk_result in batch_chunks { + all_chunks.push(chunk_result?); + } + } + + Ok(all_chunks) +} + +fn classify_http_status(status: reqwest::StatusCode) -> DirectFetchError { + DirectFetchError { + msg: format!("HTTP request returned status {status}"), + retryable: status_retryable(status), + } +} + +fn status_retryable(status: reqwest::StatusCode) -> bool { + !matches!( + status, + reqwest::StatusCode::BAD_REQUEST + | reqwest::StatusCode::UNAUTHORIZED + | reqwest::StatusCode::FORBIDDEN + | reqwest::StatusCode::METHOD_NOT_ALLOWED + ) +} + +impl From for DirectFetchError { + fn from(err: reqwest::Error) -> Self { + let mut msg = match err.status() { + Some(status) => { + format!("HTTP request failed with status {status}: {err}") + } + None => format!("HTTP request failed: {err}"), + }; + + let retryable = err.status().is_none_or(status_retryable); + + if let Some(source) = err.source() { + write!(msg, " ({source})").expect("Can append"); + } + + Self { msg, retryable } + } +} + +// --- Range merging helpers (ported from rrd_mapper.rs) --- + +/// Returns the optimal gap size for merging adjacent byte ranges. +/// Uses 25% of average chunk size — merging across a gap "wastes" at most 25% extra bandwidth. +fn calculate_optimal_gap_size(ranges: &[(u64, u64)]) -> usize { + if ranges.len() < 2 { + return 0; + } + let avg_chunk_size: f64 = + ranges.iter().map(|(_, len)| *len as f64).sum::() / ranges.len() as f64; + (avg_chunk_size * 0.25) as usize +} + +/// Merge adjacent byte ranges for a single URL into fewer, larger HTTP Range requests. +/// +/// Ranges are merged when the gap between them is <= `max_gap_size` and the resulting +/// merged range does not exceed [`MAX_MERGED_RANGE_SIZE`]. +fn merge_ranges_for_url( + url: String, + mut chunks: Vec<(usize, u64, u64)>, // (original_row_index, offset, length) + max_gap_size: usize, +) -> Vec { + if chunks.is_empty() { + return vec![]; + } + + // Sort by offset + chunks.sort_by_key(|&(_, offset, _)| offset); + // Deduplicate ranges with same offset, keeping the first one + chunks.dedup_by_key(|(_, offset, _)| *offset); + + let mut merged_ranges = Vec::new(); + let (first_row, first_offset, first_length) = chunks[0]; + let mut current_start = first_offset as usize; + let mut current_end = (first_offset + first_length) as usize; + let mut chunk_infos = vec![ChunkInMergedRange { + original_row_index: first_row, + offset_in_merged: 0, + length: first_length as usize, + }]; + + for (row_idx, offset, length) in chunks.into_iter().skip(1) { + let offset = offset as usize; + let length = length as usize; + let gap_size = offset.saturating_sub(current_end); + + let new_end = (offset + length).max(current_end); + let new_merged_size = new_end - current_start; + + let should_merge = gap_size <= max_gap_size && new_merged_size <= MAX_MERGED_RANGE_SIZE; + + if should_merge { + chunk_infos.push(ChunkInMergedRange { + original_row_index: row_idx, + offset_in_merged: offset - current_start, + length, + }); + current_end = new_end; + } else { + merged_ranges.push(MergedRangeRequest { + url: url.clone(), + file_range_start: current_start, + file_range_end: current_end, + chunks: chunk_infos, + }); + + current_start = offset; + current_end = offset + length; + chunk_infos = vec![ChunkInMergedRange { + original_row_index: row_idx, + offset_in_merged: 0, + length, + }]; + } + } + + // Don't forget the last range + merged_ranges.push(MergedRangeRequest { + url, + file_range_start: current_start, + file_range_end: current_end, + chunks: chunk_infos, + }); + + merged_ranges +} + +/// Calculate adaptive concurrency based on range sizes and total data volume. +/// +/// Small ranges are latency-bound (high concurrency helps), large ranges are +/// bandwidth-bound (fewer concurrent requests avoids contention). +fn calculate_adaptive_concurrency(ranges: &[(u64, u64)]) -> usize { + if ranges.is_empty() { + return 1; + } + let total_range_size: usize = ranges.iter().map(|(_, len)| *len as usize).sum(); + let avg_range_size = total_range_size / ranges.len(); + + // Factor 1: range size determines base concurrency + let base_concurrency = if avg_range_size <= 128 * 1024 { + 130 + } else if avg_range_size <= 2 * 1024 * 1024 { + 90 + } else { + 30 + }; + + // Factor 2: memory pressure limiter based on total data + let memory_limit = if total_range_size <= 50 * 1024 * 1024 { + base_concurrency + } else if total_range_size <= 200 * 1024 * 1024 { + 25 + } else { + 8 + }; + + base_concurrency.min(memory_limit) +} + +/// Decode a single chunk from raw RRD bytes (protobuf-encoded `ArrowMsg`). +fn decode_chunk_from_bytes(bytes: &[u8]) -> Result<(Chunk, Option), DirectFetchError> { + use re_log_encoding::Decodable; + let raw_msg = + as Decodable>::from_rrd_bytes(bytes) + .map_err(|err| { + DirectFetchError::new(format!("Msg::from_rrd_bytes failed: {err}"), false) + })? + .ok_or_else(|| DirectFetchError::new("empty msg".to_owned(), false))?; + let re_protos::log_msg::v1alpha1::log_msg::Msg::ArrowMsg(arrow_msg) = raw_msg else { + return Err(DirectFetchError::new("invalid msg type".to_owned(), false)); + }; + + let segment_id_opt = arrow_msg.store_id.clone().map(|id| id.recording_id); + + use re_log_encoding::ToApplication as _; + let app_msg = arrow_msg.to_application(()).map_err(|err| { + DirectFetchError::new(format!("ArrowMsg::to_application() failed: {err}"), false) + })?; + + let chunk = Chunk::from_record_batch(&app_msg.batch).map_err(|err| { + DirectFetchError::new(format!("Chunk::from_record_batch failed: {err}"), false) + })?; + + Ok((chunk, segment_id_opt)) +} + +/// Fetches chunks for a single request batch using direct URLs and HTTP Range requests. +/// +/// Adjacent byte ranges targeting the same URL are merged into larger HTTP Range requests +/// to reduce round-trips. Concurrency is adapted based on range sizes and total data volume. +/// The bytes at those offsets are protobuf-encoded `ArrowMsg` payloads +/// (the 16-byte `MessageHeader` has already been excluded from the manifest offsets). +#[tracing::instrument( + level = "info", + skip_all, + fields(num_chunks, num_merged_requests, concurrency) +)] +async fn fetch_batch_via_direct_urls( + http_client: &reqwest::Client, + batch: &RecordBatch, + stats: &mut TaskFetchStats, +) -> Result, DirectFetchError> { + fn batch_column<'a, T: arrow::array::Array + 'static>( + batch: &'a RecordBatch, + column_name: &'static str, + ) -> Result<&'a T, DirectFetchError> { + let column = batch + .column_by_name(column_name) + .ok_or_else(|| DirectFetchError::new(format!("missing column {column_name}"), false))?; + column + .as_any() + .downcast_ref::() + .ok_or_else(|| DirectFetchError::new(format!("invalid column {column_name}"), false)) + } + + let byte_offsets: &UInt64Array = + batch_column(batch, QueryDatasetResponse::FIELD_CHUNK_BYTE_OFFSET)?; + let byte_lengths: &UInt64Array = + batch_column(batch, QueryDatasetResponse::FIELD_CHUNK_BYTE_LENGTH)?; + let direct_urls: &DictionaryArray = + batch_column(batch, QueryDatasetResponse::FIELD_DIRECT_URL)?; + + let num_rows = batch.num_rows(); + + // Step 1: Group chunks by URL and collect all ranges for gap/concurrency calculations. + let mut url_groups: BTreeMap> = BTreeMap::new(); + let mut all_ranges: Vec<(u64, u64)> = Vec::with_capacity(num_rows); + + let url_values = direct_urls + .values() + .as_any() + .downcast_ref::() + .expect("direct_url dictionary values must be strings"); + + for i in 0..num_rows { + let offset = byte_offsets.value(i); + let length = byte_lengths.value(i); + + re_log::debug_assert!( + !direct_urls.is_null(i), + "split_batch_by_direct_url should have filtered null URLs" + ); + if direct_urls.is_null(i) { + return Err(DirectFetchError::new( + format!("no direct URL for chunk at row {i}"), + false, + )); + } + let key = direct_urls.keys().value(i); + let url = url_values.value(key as usize).to_owned(); + + url_groups.entry(url).or_default().push((i, offset, length)); + all_ranges.push((offset, length)); + } + + // Step 2: Merge adjacent ranges per URL. + let max_gap_size = calculate_optimal_gap_size(&all_ranges); + let merged_requests: Vec = url_groups + .into_iter() + .flat_map(|(url, chunks)| merge_ranges_for_url(url, chunks, max_gap_size)) + .collect(); + + // Step 3: Calculate adaptive concurrency from original (un-merged) ranges. + let concurrency = calculate_adaptive_concurrency(&all_ranges); + + let span = tracing::Span::current(); + span.record("num_chunks", num_rows); + span.record("num_merged_requests", merged_requests.len()); + span.record("concurrency", concurrency); + + stats.record_direct_ranges(all_ranges.len() as u64, merged_requests.len() as u64); + + re_log::debug!( + "Range merging: {num_rows} chunks → {} merged requests, concurrency={concurrency}", + merged_requests.len() + ); + + // Step 4: Fetch merged ranges concurrently and extract individual chunks. + // + // Each inner future owns its own `TaskFetchStats` so nothing touches a + // shared cache line across threads during the retry-heavy hot path. The + // per-future buffers are merged into the outer task's accumulator below. + let fetches = merged_requests + .into_iter() + .enumerate() + .map(|(req_idx, request)| { + let MergedRangeRequest { + url, + file_range_start, + file_range_end, + chunks, + } = request; + + let http_client = http_client.clone(); + async move { + let mut local_stats = TaskFetchStats::default(); + // Range headers are inclusive + let range_end = file_range_end - 1; + re_log::debug!( + "Merged fetch [{req_idx}]: {file_range_start}..={range_end} ({} chunks)", + chunks.len() + ); + + // Backoff matching gRPC retry settings: base 100ms, max 3s, 50% jitter. + let mut backoff_gen = re_backoff::BackoffGenerator::new( + std::time::Duration::from_millis(100), + std::time::Duration::from_secs(3), + ) + .expect("base is less than max"); + + let mut last_err: Option = None; + for attempt in 1..=DIRECT_FETCH_MAX_RETRIES { + if last_err.is_some() { + let backoff = backoff_gen.gen_next(); + let jittered = backoff.jittered(); + re_log::debug!( + "Direct fetch [{req_idx}] retry attempt {attempt}/{DIRECT_FETCH_MAX_RETRIES} after {jittered:?}" + ); + if attempt == 2 { + // Count this merged request as "needed a retry" on the first retry only. + local_stats.record_direct_request_was_retried(); + } + local_stats.record_direct_retry(jittered, attempt as u64); + backoff.sleep().await; + } + + let fetch_result = fetch_merged_range( + &http_client, + &url, + file_range_start, + range_end, + &chunks, + ) + .await; + + match fetch_result { + Ok(results) => { + if attempt > 1 { + re_log::debug!( + "Direct fetch [{req_idx}] succeeded on attempt {attempt}" + ); + } + return (Ok(results), local_stats); + } + Err(err) if err.retryable => { + re_log::debug!( + "Direct fetch [{req_idx}] failure (attempt {attempt}/{DIRECT_FETCH_MAX_RETRIES}): {err}" + ); + last_err = Some(err); + } + Err(err) => { + re_log::error!( + "Non-retryable direct fetch failure on attempt {attempt}: {err}" + ); + return (Err(err), local_stats); + } + } + } + + let err = last_err.expect("at least one attempt was made"); + ( + Err(DirectFetchError::new( + format!( + "request [{req_idx}] failed after {DIRECT_FETCH_MAX_RETRIES} attempts: {err}" + ), + false, + )), + local_stats, + ) + } + .instrument(tracing::info_span!( + "direct_fetch_request", + req = req_idx, + bytes = tracing::field::Empty + )) + }); + + // Fold every inner buffer into the outer task's accumulator before we bail + // on the first error — we want stats from successful fetches preserved. + let mut all_chunks: Vec<(usize, (Chunk, Option))> = Vec::new(); + let mut first_err: Option = None; + async { + let mut stream = futures::stream::iter(fetches).buffer_unordered(concurrency); + while let Some((result, local_stats)) = stream.next().await { + stats.merge_from(local_stats); + match result { + Ok(chunks) => all_chunks.extend(chunks), + Err(err) => { + if first_err.is_none() { + first_err = Some(err); + } + } + } + } + } + .instrument(tracing::info_span!("direct_fetch_all")) + .await; + if let Some(err) = first_err { + return Err(err); + } + + // Step 5: Reassemble in original row order. + all_chunks.sort_by_key(|(idx, _)| *idx); + let ordered: Vec<(Chunk, Option)> = all_chunks + .into_iter() + .map(|(_, chunk_with_segment)| chunk_with_segment) + .collect(); + + Ok(vec![ordered]) +} + +type DecodedChunk = (usize, (Chunk, Option)); + +async fn fetch_merged_range( + http_client: &reqwest::Client, + url: &str, + range_start: usize, + range_end: usize, + chunks: &[ChunkInMergedRange], +) -> Result, DirectFetchError> { + let response = http_client + .get(url) + .header("Range", format!("bytes={range_start}-{range_end}")) + .send() + .await?; + + if !response.status().is_success() { + return Err(classify_http_status(response.status())); + } + + let merged_bytes = response + .bytes() + .await + .map_err(|err| DirectFetchError::new(format!("failed to read body: {err}"), true))?; + + tracing::Span::current().record("bytes", merged_bytes.len()); + + // Extract individual chunks from the merged response. + // Deep copy each chunk to avoid holding the entire merged buffer alive. + chunks + .iter() + .map(|info| { + let start = info.offset_in_merged; + let end = start + info.length; + // Deep copy: prevents holding entire 16MB merged buffer in memory + let chunk_bytes = merged_bytes.get(start..end).ok_or_else(|| { + DirectFetchError::new( + format!( + "merged range shorter than expected: need {end} bytes, got {}", + merged_bytes.len() + ), + false, + ) + })?; + decode_chunk_from_bytes(chunk_bytes) + .map(|chunk_with_segment| (info.original_row_index, chunk_with_segment)) + }) + .collect::, _>>() +} diff --git a/crates/store/re_datafusion/src/dataframe_query_common.rs b/crates/store/re_datafusion/src/dataframe_query_common.rs index 2914d7b40288..987c26c2fde2 100644 --- a/crates/store/re_datafusion/src/dataframe_query_common.rs +++ b/crates/store/re_datafusion/src/dataframe_query_common.rs @@ -1,7 +1,8 @@ +use crate::analytics::QueryType; use crate::batch_coalescer::coalesce_exec::SizedCoalesceBatchesExec; use crate::batch_coalescer::coalescer::CoalescerOptions; use crate::pushdown_expressions::{apply_filter_expr_to_queries, filter_expr_is_supported}; -use ahash::HashSet; +use ahash::{HashMap, HashMapExt as _, HashSet}; use arrow::array::{ Array as _, ArrayRef, DurationNanosecondArray, FixedSizeBinaryArray, Int64Array, RecordBatch, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, @@ -18,8 +19,8 @@ use datafusion::logical_expr::{Expr, Operator, TableProviderFilterPushDown}; use datafusion::physical_plan::ExecutionPlan; use futures::StreamExt as _; use re_dataframe::external::re_chunk_store::ChunkStore; -use re_dataframe::{Index, IndexValue, QueryExpression}; -use re_log_types::EntryId; +use re_dataframe::{Index, IndexValue, QueryExpression, SparseFillStrategy}; +use re_log_types::{EntityPath, EntryId}; use re_protos::cloud::v1alpha1::ext::{Query, QueryDatasetRequest, QueryLatestAt, QueryRange}; use re_protos::cloud::v1alpha1::{ FetchChunksRequest, GetDatasetSchemaRequest, GetDatasetSchemaResponse, QueryDatasetResponse, @@ -27,7 +28,9 @@ use re_protos::cloud::v1alpha1::{ }; use re_protos::common::v1alpha1::ext::ScanParameters; use re_protos::headers::RerunHeadersInjectorExt as _; -use re_redap_client::{ConnectionClient, ConnectionRegistryHandle}; +use re_redap_client::{ApiError, ApiResult, ConnectionClient, ConnectionRegistryHandle}; + +use crate::IntoDfError as _; use re_sorbet::{BatchType, ChunkColumnDescriptors, ColumnKind, ComponentColumnSelector}; use re_uri::Origin; use std::any::Any; @@ -35,6 +38,8 @@ use std::cmp::Ordering; use std::collections::{BTreeMap, BTreeSet}; use std::str::FromStr as _; use std::sync::Arc; +use tracing::Instrument as _; +use web_time::Instant; /// Sets the size for output record batches in rows. The last batch will likely be smaller. /// The default for Data Fusion is 8192, which leads to a 256Kb record batch on average for @@ -63,6 +68,9 @@ pub struct DataframeQueryTableProvider { /// the entire operation under a single trace. #[cfg(not(target_arch = "wasm32"))] trace_headers: Option, + + /// Per-connection analytics sender for query stats. + analytics: Option, } /// This trait provides the specific methods used when interacting with the @@ -75,22 +83,18 @@ pub trait DataframeClientAPI: std::fmt::Debug + Clone + Send + Sync + Unpin + 's async fn get_dataset_schema( &mut self, request: tonic::Request, - ) -> Result, tonic::Status>; + ) -> tonic::Result>; async fn query_dataset( &mut self, request: tonic::Request, - ) -> std::result::Result< - tonic::Response>, - tonic::Status, - >; + ) -> tonic::Result>>; async fn fetch_chunks( &mut self, request: tonic::Request, - ) -> std::result::Result< + ) -> tonic::Result< tonic::Response>, - tonic::Status, >; } @@ -99,26 +103,22 @@ impl DataframeClientAPI for ConnectionClient { async fn get_dataset_schema( &mut self, request: tonic::Request, - ) -> Result, tonic::Status> { + ) -> tonic::Result> { self.inner().get_dataset_schema(request).await } async fn query_dataset( &mut self, request: tonic::Request, - ) -> std::result::Result< - tonic::Response>, - tonic::Status, - > { + ) -> tonic::Result>> { self.inner().query_dataset(request).await } async fn fetch_chunks( &mut self, request: tonic::Request, - ) -> std::result::Result< + ) -> tonic::Result< tonic::Response>, - tonic::Status, > { self.inner().fetch_chunks(request).await } @@ -128,7 +128,11 @@ impl DataframeQueryTableProvider { /// Create a table provider for a gRPC query. This function is async /// because we need to make gRPC calls to determine the schema at the /// creation of the table provider. + /// + /// If `arrow_schema` is `Some`, it is used directly and the `/GetDatasetSchema` + /// RPC is skipped — useful when the caller has already fetched the schema. #[tracing::instrument(level = "info", skip_all)] + #[cfg_attr(not(target_arch = "wasm32"), expect(clippy::too_many_arguments))] pub async fn new( origin: Origin, connection: ConnectionRegistryHandle, @@ -136,23 +140,55 @@ impl DataframeQueryTableProvider { query_expression: &QueryExpression, segment_ids: &[impl AsRef + Sync], index_values: IndexValuesMap, + arrow_schema: Option, #[cfg(not(target_arch = "wasm32"))] trace_headers: Option, - ) -> Result { - let client = connection - .client(origin) - .await - .map_err(|err| exec_datafusion_err!("{err}"))?; + ) -> ApiResult { + let client = connection.client(origin.clone()).await?; - Self::new_from_client( + let mut provider = Self::new_from_client( client, dataset_id, query_expression, segment_ids, index_values, + arrow_schema, #[cfg(not(target_arch = "wasm32"))] trace_headers, ) - .await + .await?; + + let analytics = crate::ConnectionAnalytics::new(origin); + + // Kick off a background fetch of the server version so subsequent analytics + // spans can be filtered by cloud build. Lazy-cached on `analytics`; the + // first query will ship without it, the rest will have it. + { + let analytics_bg = analytics.clone(); + let mut client_bg = provider.client.clone(); + let fetch_fut = async move { + match client_bg.version_info().await { + Ok(response) => { + analytics_bg.set_server_version(Some(response.version)); + } + Err(err) => { + re_log::debug_once!("Failed to fetch server version for analytics: {err}"); + analytics_bg.set_server_version(None); + } + } + }; + + #[cfg(target_arch = "wasm32")] + wasm_bindgen_futures::spawn_local(fetch_fut); + + #[cfg(not(target_arch = "wasm32"))] + if let Ok(handle) = tokio::runtime::Handle::try_current() { + handle.spawn(fetch_fut); + } + } + + provider.analytics = Some(analytics); + + Ok(provider) } } @@ -164,28 +200,43 @@ impl DataframeQueryTableProvider { query_expression: &QueryExpression, segment_ids: &[impl AsRef + Sync], index_values: IndexValuesMap, + arrow_schema: Option, #[cfg(not(target_arch = "wasm32"))] trace_headers: Option, - ) -> Result { - let schema = client - .get_dataset_schema( - tonic::Request::new(GetDatasetSchemaRequest {}) - .with_entry_id(dataset_id) - .map_err(|err| exec_datafusion_err!("{err}"))?, - ) - .await - .map_err(|err| exec_datafusion_err!("{err}"))? - .into_inner() - .schema() - .map_err(|err| exec_datafusion_err!("{err}"))?; + ) -> ApiResult { + // Either use the caller-provided schema or fetch it from the server. + let (schema, trace_id) = if let Some(schema) = arrow_schema { + (schema, None) + } else { + let request = tonic::Request::new(GetDatasetSchemaRequest {}) + .with_entry_id(dataset_id) + .map_err(|err| { + ApiError::internal_with_source(None, err, "attaching dataset entry_id header") + })?; + let response = client + .get_dataset_schema(request) + .await + .map_err(|err| ApiError::tonic(err, "get_dataset_schema"))?; + let trace_id = re_redap_client::extract_trace_id(response.metadata()); + let schema = response.into_inner().schema().map_err(|err| { + ApiError::deserialization_with_source(trace_id, err, "decoding dataset schema") + })?; + (schema, trace_id) + }; - let schema = compute_schema_for_query(&schema, query_expression)?; + let schema = compute_schema_for_query(&schema, query_expression).map_err(|err| { + // `compute_schema_for_query` fails when the caller-provided query + // references columns/entity-paths not present in the dataset schema + ApiError::invalid_arguments_with_source(trace_id, err, "computing schema for query") + })?; let select_all_entity_paths = false; let entity_paths = query_expression .view_contents .as_ref() - .map_or(vec![], |contents| contents.keys().collect::>()); + .map_or(vec![], |contents| { + contents.keys().cloned().collect::>() + }); let query = query_from_query_expression(query_expression); let fuzzy_descriptors: Vec = query_expression @@ -208,7 +259,7 @@ impl DataframeQueryTableProvider { .map(|id| id.as_ref().to_owned().into()) .collect(), chunk_ids: vec![], - entity_paths: entity_paths.into_iter().map(|p| (*p).clone()).collect(), + entity_paths, select_all_entity_paths, fuzzy_descriptors, exclude_static_data: false, @@ -218,6 +269,7 @@ impl DataframeQueryTableProvider { columns: FetchChunksRequest::required_column_names(), ..Default::default() }), + generate_direct_urls: true, }; let schema = Arc::new(prepend_string_column_schema( @@ -235,6 +287,7 @@ impl DataframeQueryTableProvider { index_values, #[cfg(not(target_arch = "wasm32"))] trace_headers, + analytics: None, }) } @@ -302,7 +355,6 @@ impl TableProvider for DataframeQueryTableProvider { TableType::Base } - #[tracing::instrument(level = "info", skip_all)] async fn scan( &self, state: &dyn Session, @@ -310,84 +362,182 @@ impl TableProvider for DataframeQueryTableProvider { filters: &[Expr], limit: Option, ) -> datafusion::common::Result> { - let mut dataset_queries = vec![self.query_dataset_request.clone()]; - for filter in filters { - if let Some(updated_queries) = - apply_filter_expr_to_queries(dataset_queries.clone(), filter, &self.schema)? + let scan_span = { + // Attach trace context BEFORE creating the span so the span is + // parented under the propagated trace + #[cfg(not(target_arch = "wasm32"))] + let _trace_guard = + crate::dataframe_query_provider::attach_trace_context(self.trace_headers.as_ref()); + + tracing::info_span!("scan") + }; + + async { + let scan_start_wall = web_time::SystemTime::now(); + let scan_start = Instant::now(); + + let mut dataset_queries = vec![self.query_dataset_request.clone()]; + for filter in filters { + if let Some(updated_queries) = + apply_filter_expr_to_queries(dataset_queries.clone(), filter, &self.schema)? + { + dataset_queries = updated_queries; + } + } + + // Entity path projection pushdown: narrow the server request to only + // fetch chunks for entity paths that are actually needed by the projection + // and filters. Skip when fill_latest_at is enabled, because timestamps + // from excluded entities would produce rows with filled values that the + // user expects. + if self.query_expression.sparse_fill_strategy == SparseFillStrategy::None + && let Some(projected_paths) = projection.map(|projection| { + extract_projected_entity_paths(&self.schema, projection, filters) + }) + && !projected_paths.is_empty() { - dataset_queries = updated_queries; + for query in &mut dataset_queries { + if !query.select_all_entity_paths && !query.entity_paths.is_empty() { + query + .entity_paths + .retain(|path| projected_paths.contains(path)); + } + } } - } - let mut query_expression = self.query_expression.clone(); + let mut query_expression = self.query_expression.clone(); - let mut chunk_info_batches = Vec::with_capacity(dataset_queries.len()); + let mut chunk_info_batches = Vec::with_capacity(dataset_queries.len()); + let mut time_to_first_chunk_info: Option = None; - for dataset_query in dataset_queries { - let response_stream = self - .client - .clone() - .query_dataset( - tonic::Request::new(dataset_query.into()) - .with_entry_id(self.dataset_id) - .map_err(|err| exec_datafusion_err!("{err}"))?, - ) - .await - .map_err(|err| exec_datafusion_err!("{err}"))? - .into_inner(); + let mut trace_id: Option = None; - let batches: Vec = response_stream - .collect::>() - .await - .into_iter() - .collect::, _>>() - .map_err(|err| exec_datafusion_err!("{err}"))? - .into_iter() - .filter_map(|response| response.data) - .map(|dataframe_part| { - dataframe_part - .try_into() - .map_err(|err| exec_datafusion_err!("{err}")) - }) - .collect::, _>>()?; + for dataset_query in dataset_queries { + let query_start = Instant::now(); - chunk_info_batches.push(batches); - } - let chunk_info_batches = Arc::new(compute_unique_chunk_info_ids(chunk_info_batches)?); - - // Find the first column selection that is a component - if query_expression.filtered_is_not_null.is_none() { - let filters = filters.iter().collect::>(); - query_expression.filtered_is_not_null = - Self::compute_column_is_neq_null_filter(&filters) - .into_iter() - .flatten() - .next(); - } + let request = tonic::Request::new(dataset_query.into()) + .with_entry_id(self.dataset_id) + .map_err(|err| { + ApiError::internal_with_source( + None, + err, + "attaching dataset entry_id header", + ) + .into_df_error() + })?; + let response = self + .client + .clone() + .query_dataset(request) + .await + .map_err(|err| ApiError::tonic(err, "query_dataset").into_df_error())?; + + // Capture the server-side trace-id from response metadata. + if trace_id.is_none() { + trace_id = re_redap_client::extract_trace_id(response.metadata()); + } - crate::SegmentStreamExec::try_new( - &self.schema, - self.sort_index, - projection, - state.config().target_partitions(), - chunk_info_batches, - query_expression, - self.index_values.clone(), - self.client.clone(), - #[cfg(not(target_arch = "wasm32"))] - self.trace_headers.clone(), - ) - .map(Arc::new) - .map(|exec| { - Arc::new(SizedCoalesceBatchesExec::new( - exec, - CoalescerOptions { - target_batch_rows: DEFAULT_BATCH_ROWS, - target_batch_bytes: DEFAULT_BATCH_BYTES, - max_rows: limit, - }, - )) as Arc - }) + let mut response_stream = response.into_inner(); + + while let Some(response) = response_stream.next().await { + if time_to_first_chunk_info.is_none() { + time_to_first_chunk_info = Some(query_start.elapsed()); + } + + let response = response.map_err(|err| { + ApiError::tonic(err, "query_dataset response stream") + .with_trace_id(trace_id) + .into_df_error() + })?; + let Some(dataframe_part) = response.data else { + continue; + }; + let batch: RecordBatch = dataframe_part.try_into().map_err(|err| { + ApiError::deserialization_with_source( + trace_id, + err, + "decoding query_dataset response batch", + ) + .into_df_error() + })?; + + chunk_info_batches.push(batch); + } + } + let chunk_info_batches = compute_unique_chunk_info_ids(chunk_info_batches)?; + + // Begin per-connection analytics tracking. + // Fetch stats will be accumulated by the IO loops; the event is sent on drop. + let pending_analytics = self.analytics.as_ref().map(|analytics| { + let agg = chunk_info_batches + .as_ref() + .map(compute_chunk_info_aggregates) + .unwrap_or_default(); + + analytics.begin_query( + crate::analytics::QueryInfo { + dataset_id: self.dataset_id.to_string(), + query_chunks: agg.chunks, + query_segments: agg.segments, + query_layers: agg.layers, + query_columns: self.schema.fields().len(), + query_entities: self.query_dataset_request.entity_paths.len(), + query_bytes: agg.bytes, + query_chunks_per_segment_max: agg.chunks_per_segment_max, + query_chunks_per_segment_mean: agg.chunks_per_segment_mean, + query_type: QueryType::classify(&self.query_expression), + primary_index_name: self + .query_expression + .filtered_index + .map(|i| i.as_str().to_owned()), + time_range: scan_start_wall..web_time::SystemTime::now(), + time_to_first_chunk_info, + trace_id, + }, + scan_start, + ) + }); + + // Find the first column selection that is a component + if query_expression.filtered_is_not_null.is_none() { + let filters = filters.iter().collect::>(); + query_expression.filtered_is_not_null = + Self::compute_column_is_neq_null_filter(&filters) + .into_iter() + .flatten() + .next(); + } + + crate::SegmentStreamExec::try_new( + &self.schema, + self.sort_index, + projection, + state.config().target_partitions(), + chunk_info_batches, + query_expression, + self.index_values.clone(), + self.client.clone(), + limit, + #[cfg(not(target_arch = "wasm32"))] + self.trace_headers.clone(), + #[cfg(not(target_arch = "wasm32"))] + trace_id, + pending_analytics, + ) + .map(Arc::new) + .map(|exec| { + Arc::new(SizedCoalesceBatchesExec::new( + exec, + CoalescerOptions { + target_batch_rows: DEFAULT_BATCH_ROWS, + target_batch_bytes: DEFAULT_BATCH_BYTES, + max_rows: limit, + }, + )) as Arc + }) + } + .instrument(scan_span) + .await } fn supports_filters_pushdown( @@ -424,6 +574,52 @@ impl TableProvider for DataframeQueryTableProvider { } } +/// Extract entity paths referenced by the projected columns and filter expressions. +/// +/// Returns `None` when no narrowing is possible (`projection` is `None`). +/// Returns `Some(empty set)` when projection contains only non-entity columns +/// (e.g. time / `segment_id`) — caller should not narrow in this case. +fn extract_projected_entity_paths( + schema: &SchemaRef, + projection: &Vec, + filters: &[Expr], +) -> BTreeSet { + let mut entity_paths = BTreeSet::new(); + + // Collect entity paths from projected columns. + for &idx in projection { + if let Some(path) = entity_path_from_field(schema.field(idx)) { + entity_paths.insert(path); + } + } + + // Collect entity paths from filter-referenced columns. Filters may reference + // columns that aren't in the projection (e.g. `WHERE t.b > 5` with only `t.a` + // projected) — we must still fetch data for those entities. + for filter in filters { + for col_ref in filter.column_refs() { + if let Ok(field) = schema.field_with_name(col_ref.name()) + && let Some(path) = entity_path_from_field(field) + { + entity_paths.insert(path); + } + } + } + + entity_paths +} + +/// Extract an [`EntityPath`] from an Arrow field's metadata, if present. +/// +/// Component columns carry `rerun:entity_path` metadata; time/index columns +/// and the prepended `rerun_segment_id` column do not. +fn entity_path_from_field(field: &Field) -> Option { + field + .metadata() + .get(re_sorbet::metadata::SORBET_ENTITY_PATH) + .map(|s| EntityPath::from(&**s)) +} + /// Compute the output schema for a query on a dataset. When we call `get_dataset_schema` /// on the Data Platform, we will get the schema for all entities and all components. This /// method is used to down select from that full schema based on `query_expression`. @@ -484,7 +680,7 @@ fn compute_schema_for_query( ))) } -#[tracing::instrument(level = "info", skip_all)] +#[tracing::instrument(level = "trace", skip_all)] pub(crate) fn prepend_string_column_schema(schema: &Schema, column_name: &str) -> Schema { let mut fields = vec![Field::new(column_name, DataType::Utf8, false)]; fields.extend(schema.fields().iter().map(|f| (**f).clone())); @@ -502,11 +698,11 @@ pub(crate) fn prepend_string_column_schema(schema: &Schema, column_name: &str) - /// see `SegmentStreamExec::try_new` for more details. #[tracing::instrument(level = "trace", skip_all)] pub(crate) fn group_chunk_infos_by_segment_id( - chunk_info_batches: &Arc>, + chunk_info_batches: &[RecordBatch], ) -> Result>>, DataFusionError> { - let mut results = BTreeMap::new(); + let mut results: BTreeMap> = BTreeMap::new(); - for batch in chunk_info_batches.as_ref() { + for batch in chunk_info_batches { let segment_ids = batch .column_by_name(QueryDatasetResponse::FIELD_CHUNK_SEGMENT_ID) .ok_or(exec_datafusion_err!( @@ -540,10 +736,7 @@ pub(crate) fn group_chunk_infos_by_segment_id( let segment_batch = re_arrow_util::take_record_batch(batch, &row_indices)?; - results - .entry(segment_id) - .or_insert_with(Vec::new) - .push(segment_batch); + results.entry(segment_id).or_default().push(segment_batch); } } @@ -584,6 +777,78 @@ pub(crate) fn time_array_ref_to_i64(time_array: &ArrayRef) -> Result ChunkInfoAggregates { + use arrow::array::UInt64Array; + + let chunks = batch.num_rows(); + + /// Downcasts `column_name` to array type `T` and iterates over its non-null values. + fn iter_column_values<'a, T: Any>( + batch: &'a RecordBatch, + column_name: &str, + ) -> Option::IntoIter>> + where + &'a T: IntoIterator, + { + let arr = batch + .column_by_name(column_name)? + .as_any() + .downcast_ref::()?; + Some(arr.into_iter().flatten()) + } + + // Segment count + per-segment histogram in one pass + let mut per_segment: HashMap<&str, u32> = HashMap::new(); + if let Some(items) = + iter_column_values::(batch, QueryDatasetResponse::FIELD_CHUNK_SEGMENT_ID) + { + for v in items { + *per_segment.entry(v).or_default() += 1; + } + } + let segments = per_segment.len(); + let chunks_per_segment_max = per_segment.into_values().max().unwrap_or(0); + let chunks_per_segment_mean = if segments == 0 { + 0.0 + } else { + // chunks fits in u32 for realistic queries; precision loss is acceptable for analytics. + chunks as f32 / segments as f32 + }; + + let layers = + iter_column_values::(batch, QueryDatasetResponse::FIELD_CHUNK_LAYER_NAME) + .map(|iter| iter.collect::>().len()) + .unwrap_or(0); + + let bytes: u64 = + iter_column_values::(batch, QueryDatasetResponse::FIELD_CHUNK_BYTE_LENGTH) + .map_or(0, Iterator::sum); + + ChunkInfoAggregates { + chunks, + segments, + layers, + bytes, + chunks_per_segment_max, + chunks_per_segment_mean, + } +} + pub fn query_from_query_expression(query_expression: &QueryExpression) -> Query { let latest_at = if query_expression.is_static() { Some(QueryLatestAt::new_static()) @@ -604,7 +869,7 @@ pub fn query_from_query_expression(query_expression: &QueryExpression) -> Query }), columns_always_include_everything: false, columns_always_include_entity_paths: false, - columns_always_include_byte_offsets: false, + columns_always_include_byte_offsets: true, // so we know exactly what to fetch from direct URLs columns_always_include_static_indexes: false, columns_always_include_global_indexes: false, columns_always_include_component_indexes: false, @@ -612,19 +877,19 @@ pub fn query_from_query_expression(query_expression: &QueryExpression) -> Query } fn compute_unique_chunk_info_ids( - chunk_info_batches: Vec>, -) -> Result, DataFusionError> { - let batches: Vec<_> = chunk_info_batches.into_iter().flatten().collect(); - if batches.is_empty() { - return Ok(vec![]); + chunk_info_batches: Vec, +) -> Result, DataFusionError> { + if chunk_info_batches.is_empty() { + return Ok(None); } - let schema = batches[0].schema(); - let combined = concat_batches(&schema, &batches)?; + let schema = chunk_info_batches[0].schema(); + let combined = concat_batches(&schema, &chunk_info_batches)?; + drop(chunk_info_batches); // Find the chunk_id column let chunk_id_col = combined - .column_by_name("chunk_id") + .column_by_name(QueryDatasetResponse::FIELD_CHUNK_ID) .ok_or(exec_datafusion_err!("chunk_id column not found"))?; let chunk_id_array = chunk_id_col @@ -650,16 +915,16 @@ fn compute_unique_chunk_info_ids( let distinct_columns = arrow::compute::take_arrays(combined.columns(), &indices, None)?; - Ok(vec![RecordBatch::try_new_with_options( + Ok(Some(RecordBatch::try_new_with_options( schema, distinct_columns, &RecordBatchOptions::default(), - )?]) + )?)) } #[cfg(test)] mod tests { - use std::collections::HashMap; + use std::{collections::HashMap, iter::once}; use arrow::array::{Array as _, FixedSizeBinaryArray, FixedSizeBinaryBuilder}; @@ -724,7 +989,7 @@ mod tests { let group_a = grouped.get("A").unwrap(); assert_eq!(group_a.len(), 1); let chunk_ids_a = group_a[0] - .column_by_name("chunk_id") + .column_by_name(QueryDatasetResponse::FIELD_CHUNK_ID) .unwrap() .as_any() .downcast_ref::() @@ -736,7 +1001,7 @@ mod tests { let group_b = grouped.get("B").unwrap(); assert_eq!(group_b.len(), 2); let chunk_ids_b1 = group_b[0] - .column_by_name("chunk_id") + .column_by_name(QueryDatasetResponse::FIELD_CHUNK_ID) .unwrap() .as_any() .downcast_ref::() @@ -744,7 +1009,7 @@ mod tests { assert_eq!(chunk_ids_b1.len(), 1); assert_eq!(chunk_ids_b1.value(0), [1u8; 16]); let chunk_ids_b2 = group_b[1] - .column_by_name("chunk_id") + .column_by_name(QueryDatasetResponse::FIELD_CHUNK_ID) .unwrap() .as_any() .downcast_ref::() @@ -755,7 +1020,7 @@ mod tests { let group_c = grouped.get("C").unwrap(); assert_eq!(group_c.len(), 2); let chunk_ids_c1 = group_c[0] - .column_by_name("chunk_id") + .column_by_name(QueryDatasetResponse::FIELD_CHUNK_ID) .unwrap() .as_any() .downcast_ref::() @@ -763,7 +1028,7 @@ mod tests { assert_eq!(chunk_ids_c1.len(), 1); assert_eq!(chunk_ids_c1.value(0), [3u8; 16]); let chunk_ids_c2 = group_c[1] - .column_by_name("chunk_id") + .column_by_name(QueryDatasetResponse::FIELD_CHUNK_ID) .unwrap() .as_any() .downcast_ref::() @@ -774,7 +1039,7 @@ mod tests { let group_d = grouped.get("D").unwrap(); assert_eq!(group_d.len(), 1); let chunk_ids_d = group_d[0] - .column_by_name("chunk_id") + .column_by_name(QueryDatasetResponse::FIELD_CHUNK_ID) .unwrap() .as_any() .downcast_ref::() @@ -782,4 +1047,208 @@ mod tests { assert_eq!(chunk_ids_d.len(), 1); assert_eq!(chunk_ids_d.value(0), [6u8; 16]); } + + // ==================== Entity path projection pushdown tests ==================== + + /// Build a schema mimicking `DataframeQueryTableProvider`'s output schema: + /// - Index 0: `rerun_segment_id` (Utf8, no entity path metadata) + /// - Index 1: `log_time` (Int64, with `rerun:kind=index` metadata) + /// - Index 2: `/points:Position3D:positions` (component, `entity_path=/points`) + /// - Index 3: `/points:Color:colors` (component, `entity_path=/points`) + /// - Index 4: `/cameras:Transform3D:transform` (component, `entity_path=/cameras`) + fn make_schema_with_entities() -> SchemaRef { + use re_sorbet::metadata::{RERUN_KIND, SORBET_ENTITY_PATH}; + + let index_metadata = HashMap::from([(RERUN_KIND.to_owned(), "index".to_owned())]); + let points_metadata = + HashMap::from([(SORBET_ENTITY_PATH.to_owned(), "/points".to_owned())]); + let cameras_metadata = + HashMap::from([(SORBET_ENTITY_PATH.to_owned(), "/cameras".to_owned())]); + + Arc::new(Schema::new_with_metadata( + vec![ + Field::new("rerun_segment_id", DataType::Utf8, false), + Field::new("log_time", DataType::Int64, false).with_metadata(index_metadata), + Field::new("/points:Position3D:positions", DataType::Utf8, true) + .with_metadata(points_metadata.clone()), + Field::new("/points:Color:colors", DataType::Utf8, true) + .with_metadata(points_metadata), + Field::new("/cameras:Transform3D:transform", DataType::Utf8, true) + .with_metadata(cameras_metadata), + ], + HashMap::new(), + )) + } + + #[test] + fn test_projection_single_entity() { + let schema = make_schema_with_entities(); + // Select seg_id + log_time + both /points columns + let projection = vec![0, 1, 2, 3]; + let paths = extract_projected_entity_paths(&schema, &projection, &[]); + assert_eq!(paths.len(), 1); + assert!(paths.contains(&EntityPath::from("/points"))); + } + + #[test] + fn test_projection_multiple_entities() { + let schema = make_schema_with_entities(); + // Select seg_id + one /points col + /cameras col + let projection = vec![0, 2, 4]; + let paths = extract_projected_entity_paths(&schema, &projection, &[]); + assert_eq!(paths.len(), 2); + assert!(paths.contains(&EntityPath::from("/points"))); + assert!(paths.contains(&EntityPath::from("/cameras"))); + } + + #[test] + fn test_projection_only_non_entity_cols() { + let schema = make_schema_with_entities(); + // Select only seg_id + log_time — no entity paths + let projection = vec![0, 1]; + let paths = extract_projected_entity_paths(&schema, &projection, &[]); + assert!(paths.is_empty()); + } + + #[test] + fn test_filter_adds_entity_paths() { + use datafusion::logical_expr::col; + + let schema = make_schema_with_entities(); + // Project only /points column + let projection = vec![0, 2]; + // Filter references /cameras column + let filters = vec![col("/cameras:Transform3D:transform").is_not_null()]; + let paths = extract_projected_entity_paths(&schema, &projection, &filters); + assert_eq!(paths.len(), 2); + assert!(paths.contains(&EntityPath::from("/points"))); + assert!(paths.contains(&EntityPath::from("/cameras"))); + } + + #[test] + fn test_filter_with_non_entity_cols_only() { + use datafusion::logical_expr::{col, lit}; + + let schema = make_schema_with_entities(); + // Project only /points column + let projection = vec![0, 2]; + // Filter references segment_id (no entity path) and time index (no entity path) + let filters = vec![ + col("rerun_segment_id").eq(lit("seg_a")), + col("log_time").gt(lit(100_i64)), + ]; + let paths = extract_projected_entity_paths(&schema, &projection, &filters); + // Only /points from projection — filters don't add entity paths + assert_eq!(paths.len(), 1); + assert!(paths.contains(&EntityPath::from("/points"))); + } + + #[test] + fn test_narrowing_intersects_with_original() { + let projected_paths: BTreeSet = once(EntityPath::from("/points")).collect(); + let mut query = QueryDatasetRequest { + entity_paths: vec![ + EntityPath::from("/points"), + EntityPath::from("/cameras"), + EntityPath::from("/meshes"), + ], + select_all_entity_paths: false, + ..Default::default() + }; + + query + .entity_paths + .retain(|path| projected_paths.contains(path)); + + assert_eq!(query.entity_paths, vec![EntityPath::from("/points")]); + } + + #[test] + fn test_narrowing_empty_projected_no_change() { + let projected_paths: BTreeSet = BTreeSet::new(); + let mut query = QueryDatasetRequest { + entity_paths: vec![EntityPath::from("/points"), EntityPath::from("/cameras")], + select_all_entity_paths: false, + ..Default::default() + }; + let original = query.entity_paths.clone(); + + // Empty projected_paths → caller should skip narrowing + if !projected_paths.is_empty() { + query + .entity_paths + .retain(|path| projected_paths.contains(path)); + } + + assert_eq!(query.entity_paths, original); + } + + #[test] + fn test_narrowing_select_all_no_change() { + let projected_paths: BTreeSet = once(EntityPath::from("/points")).collect(); + let mut query = QueryDatasetRequest { + entity_paths: vec![], + select_all_entity_paths: true, + ..Default::default() + }; + + // select_all_entity_paths=true → skip narrowing + if !query.select_all_entity_paths && !query.entity_paths.is_empty() { + query + .entity_paths + .retain(|path| projected_paths.contains(path)); + } + + assert!(query.entity_paths.is_empty()); + assert!(query.select_all_entity_paths); + } + + #[test] + fn test_narrowing_preserves_multiple_queries() { + let projected_paths: BTreeSet = once(EntityPath::from("/points")).collect(); + let mut queries = vec![ + QueryDatasetRequest { + entity_paths: vec![EntityPath::from("/points"), EntityPath::from("/cameras")], + select_all_entity_paths: false, + ..Default::default() + }, + QueryDatasetRequest { + entity_paths: vec![EntityPath::from("/points"), EntityPath::from("/meshes")], + select_all_entity_paths: false, + ..Default::default() + }, + ]; + + for query in &mut queries { + if !query.select_all_entity_paths && !query.entity_paths.is_empty() { + query + .entity_paths + .retain(|path| projected_paths.contains(path)); + } + } + + assert_eq!(queries[0].entity_paths, vec![EntityPath::from("/points")]); + assert_eq!(queries[1].entity_paths, vec![EntityPath::from("/points")]); + } + + #[test] + fn test_narrowing_skipped_with_fill_latest_at() { + let projected_paths: BTreeSet = once(EntityPath::from("/points")).collect(); + let mut query = QueryDatasetRequest { + entity_paths: vec![EntityPath::from("/points"), EntityPath::from("/cameras")], + select_all_entity_paths: false, + ..Default::default() + }; + let original = query.entity_paths.clone(); + + // Simulate fill_latest_at=true check + let sparse_fill_strategy = SparseFillStrategy::LatestAtGlobal; + if sparse_fill_strategy == SparseFillStrategy::None && !projected_paths.is_empty() { + query + .entity_paths + .retain(|path| projected_paths.contains(path)); + } + + assert_eq!(query.entity_paths, original); + } } diff --git a/crates/store/re_datafusion/src/dataframe_query_provider.rs b/crates/store/re_datafusion/src/dataframe_query_provider.rs index e2ef9c8c268f..c6305b5c7c67 100644 --- a/crates/store/re_datafusion/src/dataframe_query_provider.rs +++ b/crates/store/re_datafusion/src/dataframe_query_provider.rs @@ -1,10 +1,15 @@ use std::any::Any; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashSet}; use std::fmt::Debug; use std::pin::Pin; -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; use std::task::{Context, Poll}; +use crate::analytics::{QueryErrorKind, TaskFetchStats}; +use crate::chunk_fetcher::{ + SortedChunksWithSegment, batch_byte_size, batch_has_any_direct_urls, fetch_batch_direct, + fetch_batch_group_via_grpc, split_batch_by_direct_url, +}; use crate::dataframe_query_common::{ DataframeClientAPI, IndexValuesMap, group_chunk_infos_by_segment_id, prepend_string_column_schema, @@ -23,6 +28,7 @@ use datafusion::physical_expr::{ }; use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; use datafusion::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties}; +use futures::StreamExt as _; use futures_util::{FutureExt as _, Stream}; use re_dataframe::external::re_chunk::Chunk; use re_dataframe::external::re_chunk_store::ChunkStore; @@ -31,16 +37,19 @@ use re_dataframe::{ ChunkStoreHandle, Index, QueryCache, QueryEngine, QueryExpression, QueryHandle, StorageEngine, }; use re_log_types::{ApplicationId, StoreId, StoreKind}; -use re_protos::cloud::v1alpha1::{FetchChunksRequest, ScanSegmentTableResponse}; -use re_redap_client::ApiResult; +use re_protos::cloud::v1alpha1::ScanSegmentTableResponse; +use re_redap_client::{ApiError, ApiResult}; + +use crate::IntoDfError as _; use re_sorbet::{ColumnDescriptor, ColumnSelector}; use tokio::runtime::Handle; use tokio::sync::Notify; use tokio::sync::mpsc::{Receiver, Sender}; use tokio::task::JoinHandle; -use tonic::IntoRequest as _; use tracing::Instrument as _; +// TODO(zehiko) make these configurable + /// This parameter sets the back pressure that either the streaming provider /// can place on the CPU worker thread or the CPU worker thread can place on /// the IO stream. @@ -52,7 +61,18 @@ const CPU_THREAD_IO_CHANNEL_SIZE: usize = 32; const TARGET_BATCH_SIZE_BYTES: usize = 8 * 1024 * 1024; // 8 MB /// How many concurrent requests to make to the server when fetching chunks. -const TARGET_CONCURRENCY: usize = 12; +const GRPC_BATCH_SIZE: usize = 12; + +/// Max batch-level futures in-flight at once in the IO pipeline. +/// This bounds both concurrency and the reorder buffer size. +const IO_PIPELINE_BUFFER: usize = 24; + +/// Environment variable to force the client to go through the `FetchChunks` data fetching path. +static CHUNK_STRATEGY: LazyLock = LazyLock::new(|| { + std::env::var("RERUN_CHUNK_STRATEGY") + .unwrap_or_default() + .to_ascii_lowercase() +}); /// Helper to attach parent trace context if available. /// Returns a guard that must be kept alive for the duration of the traced scope. @@ -60,25 +80,16 @@ const TARGET_CONCURRENCY: usize = 12; /// parented by a single trace. #[cfg(not(target_arch = "wasm32"))] #[inline] -fn attach_trace_context( - trace_headers: &Option, +#[must_use] +pub(crate) fn attach_trace_context( + trace_headers: Option<&crate::TraceHeaders>, ) -> Option { - let headers = trace_headers.as_ref()?; - if !headers.traceparent.is_empty() { - let parent_ctx = - re_perf_telemetry::external::opentelemetry::global::get_text_map_propagator(|prop| { - prop.extract(headers) - }); - Some(parent_ctx.attach()) - } else { - None - } + trace_headers?.attach() } #[derive(Debug)] pub(crate) struct SegmentStreamExec { props: PlanProperties, - chunk_info_batches: Arc>, index_values: IndexValuesMap, /// Describes the chunks per partition, derived from `chunk_info_batches`. @@ -93,32 +104,60 @@ pub(crate) struct SegmentStreamExec { worker_runtime: Arc, client: T, - /// passing trace headers between phases of execution pipeline helps keep + /// Optional row limit pushed down from the scan. When set, background + /// threads will stop fetching/processing data once this many rows have + /// been produced. + limit: Option, + + /// Request trace-headers. + /// Passing trace headers between phases of execution pipeline helps keep /// the entire operation under a single trace. trace_headers: Option, + + /// Server-assigned response trace-id for this scan. + /// This may or may not match request `trace_headers`. + server_trace_id: Option, + + /// Pending query analytics — fetch stats are accumulated here. + /// The event is sent when the last clone is dropped. + pending_analytics: Option, } -type ChunksWithSegment = Vec<(Chunk, Option)>; +use crate::chunk_fetcher::ChunksWithSegment; pub struct DataframeSegmentStreamInner { projected_schema: SchemaRef, client: T, chunk_infos: Vec, - chunk_tx: Option>>, + chunk_tx: Option>>, store_output_channel: Receiver, - io_join_handle: Option>>, + io_join_handle: Option>>, /// We must keep a handle on the cpu runtime because the execution plan /// is dropped during streaming. We need this handle to continue to exist /// so that our worker does not shut down unexpectedly. #[expect(dead_code)] cpu_runtime: Arc, - cpu_join_handle: Option>>, + cpu_join_handle: Option>>, - /// passing trace headers between phases of execution pipeline helps keep + /// Request trace-headers. + /// Passing trace headers between phases of execution pipeline helps keep /// the entire operation under a single trace. trace_headers: Option, + + /// The `execute` span, captured at stream creation so that work spawned + /// later from `poll_next` (notably `chunk_io_pipeline`) stays nested under + /// `execute` instead of surfacing as a sibling when `execute`'s entered + /// guard has already been dropped. + execute_span: tracing::Span, + + /// Server-assigned response trace-id for this scan. + /// This may or may not match request `trace_headers`. + server_trace_id: Option, + + /// Pending query analytics — keeps alive until stream completes. + pending_analytics: Option, } /// This is a temporary fix to minimize the impact of leaking memory @@ -143,48 +182,83 @@ impl Stream for DataframeSegmentStream { }; #[cfg(not(target_arch = "wasm32"))] - let _trace_guard = attach_trace_context(&this.trace_headers); - let _span = tracing::info_span!("poll_next").entered(); + let _trace_guard = attach_trace_context(this.trace_headers.as_ref()); - // If we have any errors on the worker thread, we want to ensure we pass them up - // through the stream. - if this - .cpu_join_handle - .as_ref() - .map(|h| h.is_finished()) - .unwrap_or(false) + // If we have any errors on the worker threads, we want to ensure we pass them up + // through the stream. Any `ApiError` that didn't already carry a trace-id + // picks up the scan's server trace-id on the way out. + if let Some(join_handle) = this.cpu_join_handle.take_if(|h| h.is_finished()) + && let Some(cpu_join_result) = join_handle.now_or_never() { - let Some(join_handle) = this.cpu_join_handle.take() else { - return Poll::Ready(Some(exec_err!("CPU join handle is None"))); - }; - - // Below is safe because we have already checked is_finished - let cpu_join_result = join_handle.now_or_never().expect("is_finished is true"); - match cpu_join_result { - Err(err) => return Poll::Ready(Some(exec_err!("{err}"))), - Ok(Err(err)) => return Poll::Ready(Some(Err(err))), + Err(err) => { + if let Some(analytics) = &this.pending_analytics { + analytics.record_error(QueryErrorKind::Decode); + } + return Poll::Ready(Some(exec_err!("{err}"))); + } + Ok(Err(err)) => { + if let Some(analytics) = &this.pending_analytics { + analytics.record_error(QueryErrorKind::Decode); + } + return Poll::Ready(Some(Err(err + .with_trace_id(this.server_trace_id) + .into_df_error()))); + } Ok(Ok(())) => {} } } - // If this is the first call, we are uninitialized so create the io worker - if this.io_join_handle.is_none() { - let io_handle = Handle::current(); + // Also check the IO task — if it failed (e.g. gRPC 500 from FetchChunks), + // the error would otherwise be silently lost because the CPU task sees a + // closed channel and exits with Ok(()). + if let Some(join_handle) = this.io_join_handle.take_if(|h| h.is_finished()) + && let Some(io_join_result) = join_handle.now_or_never() + { + match io_join_result { + Err(err) => { + if let Some(analytics) = &this.pending_analytics { + analytics.record_error(QueryErrorKind::Other); + } + return Poll::Ready(Some(exec_err!("{err}"))); + } + Ok(Err(err)) => { + if let Some(analytics) = &this.pending_analytics { + // The IO task's own error-recording will have already set a + // more specific kind (direct_fetch / grpc_fetch) via OnceLock, + // so this call is a no-op fallback if that hasn't happened. + analytics.record_error(QueryErrorKind::Other); + } + return Poll::Ready(Some(Err(err + .with_trace_id(this.server_trace_id) + .into_df_error()))); + } + Ok(Ok(())) => {} + } + } - // In order to properly drop the tx so the channel closes, do not clone it. - let Some(chunk_tx) = this.chunk_tx.take() else { - return Poll::Ready(Some(exec_err!("No tx for chunks from CPU thread"))); - }; + // If this is the first call, we are uninitialized so create the io worker. + // We check both `io_join_handle` (not yet spawned) and `chunk_tx` (not yet taken) + // because the IO check above can consume `io_join_handle` after it finishes. + if this.io_join_handle.is_none() + && let Some(chunk_tx) = this.chunk_tx.take() + { + let io_handle = Handle::current(); let client = this.client.clone(); let chunk_infos = this.chunk_infos.clone(); - let current_span = tracing::Span::current(); + let pending_analytics = this.pending_analytics.clone(); + + // Parent the IO pipeline under `execute`, not under whichever + // caller happens to poll us first. + let io_span = tracing::info_span!(parent: &this.execute_span, "chunk_io_pipeline"); this.io_join_handle = Some( io_handle.spawn( - async move { chunk_stream_io_loop(client, chunk_infos, chunk_tx).await } - .instrument(current_span.clone()), + async move { + chunk_stream_io_loop(client, chunk_infos, chunk_tx, pending_analytics).await + } + .instrument(io_span), ), ); } @@ -194,6 +268,15 @@ impl Stream for DataframeSegmentStream { .poll_recv(cx) .map(|result| Ok(result).transpose()); + if matches!(&result, Poll::Ready(Some(Ok(_)))) + && let Some(analytics) = &this.pending_analytics + { + // This could be the first time we return data that will + // actually be shown to the user. + // This is as close to the perceived latency as we're gonna come right now. + analytics.record_first_chunk(); + } + if matches!(&result, Poll::Ready(None)) { this_outer.inner = None; } @@ -219,20 +302,21 @@ impl SegmentStreamExec { sort_index: Option, projection: Option<&Vec>, num_partitions: usize, - chunk_info_batches: Arc>, + chunk_info_batches: Option, mut query_expression: QueryExpression, index_values: IndexValuesMap, client: T, + limit: Option, trace_headers: Option, + server_trace_id: Option, + pending_analytics: Option, ) -> datafusion::common::Result { let projected_schema = match projection { Some(p) => Arc::new(table_schema.project(p)?), None => Arc::clone(table_schema), }; - if let Some(projected_cols) = projection - && !projected_cols.is_empty() - { + if projection.is_some_and(|projection| !projection.is_empty()) { let selection = projected_schema .fields() .iter() @@ -310,13 +394,13 @@ impl SegmentStreamExec { Boundedness::Bounded, ); - let chunk_info = group_chunk_infos_by_segment_id(&chunk_info_batches)?; + let chunk_info = group_chunk_infos_by_segment_id(chunk_info_batches.as_slice())?; + drop(chunk_info_batches); let worker_runtime = Arc::new(CpuRuntime::try_new(num_partitions)?); Ok(Self { props, - chunk_info_batches, chunk_info, query_expression, index_values, @@ -324,7 +408,10 @@ impl SegmentStreamExec { target_partitions: num_partitions, worker_runtime, client, + limit, trace_headers, + server_trace_id, + pending_analytics, }) } } @@ -335,7 +422,14 @@ async fn send_next_row( segment_id: &str, target_schema: &Arc, output_channel: &Sender, -) -> Result, DataFusionError> { + rows_sent: &mut usize, + limit: Option, +) -> ApiResult> { + // If we have already sent enough rows, stop early. + if limit.is_some_and(|l| *rows_sent >= l) { + return Ok(None); + } + let query_schema = Arc::clone(query_handle.schema()); let num_fields = query_schema.fields.len(); @@ -348,7 +442,9 @@ async fn send_next_row( return Ok(None); } if num_fields != next_row.len() { - return plan_err!("Unexpected number of columns returned from query"); + return Err(ApiError::internal( + "Unexpected number of columns returned from query", + )); } let num_rows = next_row[0].len(); @@ -366,98 +462,182 @@ async fn send_next_row( batch_schema, next_row, &RecordBatchOptions::default().with_row_count(Some(num_rows)), - )?; + ) + .map_err(|err| { + ApiError::deserialization_with_source( + None, + err, + "building output record batch from chunk-store rows", + ) + })?; + + // align the batch to the target schema, this should be always possible + // by construction. + let output_batch = align_record_batch_to_schema(&batch, target_schema).map_err(|err| { + ApiError::internal_with_source(None, err, "DataFusion schema mismatch error") + })?; + + // Slice the batch to respect the row limit + let output_batch = if let Some(limit) = limit { + let remaining = limit.saturating_sub(*rows_sent); + if remaining == 0 { + return Ok(None); + } + if output_batch.num_rows() > remaining { + output_batch.slice(0, remaining) + } else { + output_batch + } + } else { + output_batch + }; - let output_batch = align_record_batch_to_schema(&batch, target_schema)?; + *rows_sent += output_batch.num_rows(); output_channel .send(output_batch) .await - .map_err(|err| exec_datafusion_err!("{err}"))?; + .map_err(|err| ApiError::internal_with_source(None, err, "output channel closed"))?; Ok(Some(())) } // TODO(#10781) - support for sending intermediate results/chunks -#[tracing::instrument(level = "trace", skip_all)] async fn chunk_store_cpu_worker_thread( - mut input_channel: Receiver>, + mut input_channel: Receiver>, output_channel: Sender, query_expression: QueryExpression, projected_schema: Arc, index_values: IndexValuesMap, -) -> Result<(), DataFusionError> { - let mut current_stores: Option<(String, ChunkStoreHandle, QueryHandle)> = None; - while let Some(chunks_and_segment_ids) = input_channel.recv().await { - let chunks_and_segment_ids = - chunks_and_segment_ids.map_err(|err| exec_datafusion_err!("{err}"))?; - - for (chunk, segment_id) in chunks_and_segment_ids { - let segment_id = segment_id - .ok_or_else(|| exec_datafusion_err!("Received chunk without a segment id"))?; - if let Some(idx_values) = &index_values - && !idx_values.contains_key(&segment_id) - { - continue; + limit: Option, +) -> ApiResult<()> { + struct CurrentStores { + segment_id: String, + store: ChunkStoreHandle, + query_handle: QueryHandle, + } + + impl CurrentStores { + #[tracing::instrument(level = "info", skip_all, fields(segment_id = %segment_id))] + fn new( + segment_id: String, + query_expression: &QueryExpression, + index_values: &IndexValuesMap, + ) -> Self { + let store_id = StoreId::random( + StoreKind::Recording, + ApplicationId::from(segment_id.as_str()), + ); + let store = ChunkStore::new_handle(store_id.clone(), Default::default()); + + let query_engine = + QueryEngine::new(store.clone(), QueryCache::new_handle(store.clone())); + let mut individual_query = query_expression.clone(); + + let values = index_values + .as_ref() + .and_then(|index_values| index_values.get(&segment_id)); + if let Some(values) = values { + individual_query.using_index_values = Some(values.clone()); } - if let Some((current_segment, _, query_handle)) = ¤t_stores { - // When we change segments, flush the outputs - if current_segment != &segment_id { - while send_next_row( - query_handle, - current_segment.as_str(), - &projected_schema, - &output_channel, - ) - .await? - .is_some() - {} + let query_handle = query_engine.query(individual_query); - current_stores = None; - } + Self { + segment_id, + store, + query_handle, } + } - let current_stores = current_stores.get_or_insert_with(|| { - let store_id = StoreId::random( - StoreKind::Recording, - ApplicationId::from(segment_id.as_str()), - ); - let store = ChunkStore::new_handle(store_id.clone(), Default::default()); - - let query_engine = - QueryEngine::new(store.clone(), QueryCache::new_handle(store.clone())); - let mut individual_query = query_expression.clone(); - if let Some(values_map) = &index_values - && let Some(values) = values_map.get(&segment_id) - { - individual_query.using_index_values = Some(values.clone()); - } - let query_handle = query_engine.query(individual_query); + /// Flush all remaining rows from the query handle, respecting the row limit. + #[tracing::instrument(level = "info", skip_all, fields(segment_id = %self.segment_id))] + async fn flush( + self, + projected_schema: &Arc, + output_channel: &Sender, + rows_sent: &mut usize, + limit: Option, + ) -> ApiResult<()> { + while send_next_row( + &self.query_handle, + &self.segment_id, + projected_schema, + output_channel, + rows_sent, + limit, + ) + .await? + .is_some() + {} + Ok(()) + } + } + let mut current_stores: Option = None; + let mut rows_sent: usize = 0; + loop { + // Time spent here = `cpu_worker` idle waiting for the IO pipeline to + // deliver the next batch of chunks. Short consecutive spans = healthy + // stream; one long dominating span = IO-starved worker. + let recv_span = tracing::info_span!("waiting_for_chunks"); + let Some(chunks_and_segment_ids) = input_channel.recv().instrument(recv_span).await else { + break; + }; + let (segment_id, chunks) = chunks_and_segment_ids?; + + if chunks.is_empty() { + continue; + } + + if index_values + .as_ref() + .is_some_and(|index_values| !index_values.contains_key(&segment_id)) + { + continue; + } + + // When we change segments, flush the outputs + if let Some(current_stores) = current_stores.take_if(|s| s.segment_id != segment_id) { + current_stores + .flush(&projected_schema, &output_channel, &mut rows_sent, limit) + .await?; - (segment_id.clone(), store, query_handle) - }); + if limit.is_some_and(|l| rows_sent >= l) { + return Ok(()); + } + } - let (_, store, _) = current_stores; + let CurrentStores { + store, segment_id, .. + } = current_stores.get_or_insert_with(|| { + CurrentStores::new(segment_id, &query_expression, &index_values) + }); + let _insert_span = tracing::info_span!( + "insert_chunks", + segment_id = %segment_id, + n = chunks.len(), + ) + .entered(); + for chunk in chunks { store .write() .insert_chunk(&Arc::new(chunk)) - .map_err(|err| exec_datafusion_err!("{err}"))?; + .map_err(|err| { + ApiError::internal_with_source( + None, + err, + "inserting chunk into in-memory store", + ) + })?; } } // Flush out remaining of last segment - if let Some((final_segment, _, query_handle)) = &mut current_stores.as_mut() { - while send_next_row( - query_handle, - final_segment, - &projected_schema, - &output_channel, - ) - .await? - .is_some() - {} + if let Some(current_stores) = current_stores { + current_stores + .flush(&projected_schema, &output_channel, &mut rows_sent, limit) + .await?; } Ok(()) @@ -466,26 +646,26 @@ async fn chunk_store_cpu_worker_thread( /// Extract segment ID from a `chunk_info` `RecordBatch`. Each `chunk_info` batch contains /// chunks *for a single segment*, hence we can just take the first row's `segment_id`. This is /// guaranteed by the implementation in `group_chunk_infos_by_segment_id`. -fn extract_segment_id(chunk_info: &RecordBatch) -> Result { +fn extract_segment_id(chunk_info: &RecordBatch) -> ApiResult { let segment_ids = chunk_info .column_by_name(re_protos::cloud::v1alpha1::QueryDatasetResponse::FIELD_CHUNK_SEGMENT_ID) - .ok_or_else(|| exec_datafusion_err!("Missing segment_id column"))? + .ok_or_else(|| ApiError::internal("missing segment_id column in chunk_info batch"))? .as_any() .downcast_ref::() - .ok_or_else(|| exec_datafusion_err!("segment_id column is not a string array"))?; + .ok_or_else(|| ApiError::internal("segment_id column is not a string array"))?; Ok(segment_ids.value(0).to_owned()) } /// Extract chunk sizes from a `chunk_info` `RecordBatch`. /// Returns a reference to the arrow array containing `chunk_byte_len` values. -fn extract_chunk_sizes(chunk_info: &RecordBatch) -> Result<&UInt64Array, DataFusionError> { +fn extract_chunk_sizes(chunk_info: &RecordBatch) -> ApiResult<&UInt64Array> { let chunk_sizes = chunk_info .column_by_name(re_protos::cloud::v1alpha1::QueryDatasetResponse::FIELD_CHUNK_BYTE_LENGTH) - .ok_or_else(|| exec_datafusion_err!("Missing chunk_byte_len column"))? + .ok_or_else(|| ApiError::internal("missing chunk_byte_len column in chunk_info batch"))? .as_any() .downcast_ref::() - .ok_or_else(|| exec_datafusion_err!("chunk_byte_len column is not a uint64 array"))?; + .ok_or_else(|| ApiError::internal("chunk_byte_len column is not a uint64 array"))?; Ok(chunk_sizes) } @@ -502,11 +682,16 @@ type BatchingResult = (Vec, Vec); fn create_request_batches( chunk_infos: Vec, target_size_bytes: u64, -) -> Result { +) -> ApiResult { + let merge_err = |err: arrow::error::ArrowError, ctx: &'static str| { + ApiError::deserialization_with_source(None, err, ctx) + }; + let mut request_batches = Vec::new(); let mut current_batch = Vec::new(); let mut current_batch_size = 0u64; let mut segment_order = Vec::new(); + let mut segment_seen = HashSet::new(); for chunk_info in chunk_infos { let segment_id = extract_segment_id(&chunk_info)?; @@ -514,7 +699,7 @@ fn create_request_batches( let segment_size: u64 = chunk_sizes.iter().map(|v| v.unwrap_or(0)).sum(); // Track original segment order - if !segment_order.contains(&segment_id) { + if segment_seen.insert(segment_id.clone()) { segment_order.push(segment_id.clone()); } @@ -522,7 +707,7 @@ fn create_request_batches( if !current_batch.is_empty() && current_batch_size + segment_size > target_size_bytes { // Merge current batch and add to results let merged_batch = re_arrow_util::concat_polymorphic_batches(¤t_batch) - .map_err(|err| exec_datafusion_err!("Failed to merge batch: {err}"))?; + .map_err(|err| merge_err(err, "merging chunk-info batches"))?; request_batches.push(merged_batch); current_batch = Vec::new(); current_batch_size = 0; @@ -533,7 +718,7 @@ fn create_request_batches( // If current batch is not empty, merge and send it first if !current_batch.is_empty() { let merged_batch = re_arrow_util::concat_polymorphic_batches(¤t_batch) - .map_err(|err| exec_datafusion_err!("Failed to merge batch: {err}"))?; + .map_err(|err| merge_err(err, "merging chunk-info batches"))?; request_batches.push(merged_batch); current_batch = Vec::new(); current_batch_size = 0; @@ -555,7 +740,7 @@ fn create_request_batches( // Don't forget to merge the last batch if !current_batch.is_empty() { let merged_batch = re_arrow_util::concat_polymorphic_batches(¤t_batch) - .map_err(|err| exec_datafusion_err!("Failed to merge final batch: {err}"))?; + .map_err(|err| merge_err(err, "merging final chunk-info batch"))?; request_batches.push(merged_batch); } @@ -576,7 +761,11 @@ fn split_large_segments( chunk_info: &RecordBatch, target_size: u64, chunk_sizes: &UInt64Array, -) -> Result, DataFusionError> { +) -> ApiResult> { + let take_err = |err: arrow::error::ArrowError| { + ApiError::deserialization_with_source(None, err, "slicing large segment into sub-batches") + }; + let mut result_batches = Vec::new(); let mut current_indices = Vec::new(); let mut current_size = 0u64; @@ -590,7 +779,8 @@ fn split_large_segments( current_size += chunk_size; } else { // Create batch from current indices - let batch = re_arrow_util::take_record_batch(chunk_info, ¤t_indices)?; + let batch = + re_arrow_util::take_record_batch(chunk_info, ¤t_indices).map_err(take_err)?; result_batches.push(batch); // Start new batch with current chunk @@ -601,7 +791,8 @@ fn split_large_segments( // Don't forget the last batch if !current_indices.is_empty() { - let batch = re_arrow_util::take_record_batch(chunk_info, ¤t_indices)?; + let batch = + re_arrow_util::take_record_batch(chunk_info, ¤t_indices).map_err(take_err)?; result_batches.push(batch); } @@ -624,33 +815,76 @@ fn split_large_segments( fn sort_chunks_by_segment_order( chunks: Vec, segment_order: &[String], -) -> Vec { +) -> Vec { use std::collections::HashMap; // Collect all individual chunks grouped by segment ID (we don't care about ordering of individual // chunks within a segment here) - let mut segment_groups: HashMap)>> = HashMap::default(); + let mut segment_groups: HashMap> = HashMap::default(); // Extract all chunks and group by segment for chunks_with_segment in chunks { for (chunk, segment_id_opt) in chunks_with_segment { - let segment_id = segment_id_opt - .clone() - .unwrap_or_else(|| "unknown".to_owned()); - segment_groups - .entry(segment_id) - .or_default() - .push((chunk, segment_id_opt)); + let Some(segment_id) = segment_id_opt else { + continue; + }; + segment_groups.entry(segment_id).or_default().push(chunk); } } // Rebuild chunks in the correct segment order segment_order .iter() - .filter_map(|segment_id| segment_groups.remove(segment_id)) + .filter_map(|segment_id| segment_groups.remove_entry(segment_id)) .collect() } +/// Helper to sort and send chunks through the output channel, preserving segment order. +/// Returns `false` if the output channel is closed (consumer dropped). +async fn send_sorted_chunks( + chunks: Vec, + global_segment_order: &[String], + output_channel: &Sender>, +) -> bool { + let sorted = { + let _span = tracing::info_span!("sort_chunks").entered(); + sort_chunks_by_segment_order(chunks, global_segment_order) + }; + let n_sorted = sorted.len(); + async { + for chunk in sorted { + if output_channel.send(Ok(chunk)).await.is_err() { + return false; + } + } + true + } + .instrument(tracing::info_span!("send_chunks", n = n_sorted)) + .await +} + +/// Fetch remaining batches via batched gRPC (groups of `GRPC_BATCH_SIZE`), +/// preserving ordering. +async fn fetch_remaining_via_grpc( + batches: &[RecordBatch], + client: &T, + global_segment_order: &[String], + output_channel: &Sender>, +) -> ApiResult<()> { + for batch_group in batches.chunks(GRPC_BATCH_SIZE) { + #[cfg(not(target_arch = "wasm32"))] + { + let bytes: u64 = batch_group.iter().map(batch_byte_size).sum(); + crate::chunk_fetcher::metrics::record_grpc_no_direct_urls(bytes); + } + let all_chunks = fetch_batch_group_via_grpc(batch_group, client).await?; + if !send_sorted_chunks(all_chunks, global_segment_order, output_channel).await { + return Ok(()); + } + } + Ok(()) +} + /// This is the function that will run on the IO (main) tokio runtime that will listen /// to the gRPC channel for chunks coming in from the Data Platform. This loop is started /// up by the execute fn of the physical plan, so we will start one per output DataFusion partition, @@ -665,77 +899,196 @@ fn sort_chunks_by_segment_order( /// In order to improve performance, while maintaining ordering, we batch requests to the server /// and process them concurrently in groups. After data for each group is collected, it is sorted /// by the input segment order before being sent to the CPU worker thread. -#[tracing::instrument(level = "trace", skip_all)] +#[tracing::instrument( + level = "info", + skip_all, + fields(n_chunks, n_batches, n_segments, fetch_strategy,) +)] async fn chunk_stream_io_loop( client: T, chunk_infos: Vec, - output_channel: Sender>, -) -> Result<(), DataFusionError> { - #![expect(clippy::redundant_iter_cloned)] // False positive? Or requires smarter async code. - - // TODO(zehiko) make these configurable + output_channel: Sender>, + pending_analytics: Option, +) -> ApiResult<()> { let target_size_bytes = TARGET_BATCH_SIZE_BYTES as u64; - let target_concurrency = TARGET_CONCURRENCY; + let n_chunks = chunk_infos.len(); let (request_batches, global_segment_order) = create_request_batches(chunk_infos, target_size_bytes)?; - use futures::{StreamExt as _, TryStreamExt as _}; + let span = tracing::Span::current(); + span.record("n_chunks", n_chunks); + span.record("n_batches", request_batches.len()); + span.record("n_segments", global_segment_order.len()); - // Process batches in chunks for memory efficiency while preserving perfect ordering - for batch_group in request_batches.chunks(target_concurrency) { - // Execute all batch requests in this group concurrently - let group_results: Vec>> = - futures::stream::iter(batch_group.iter().cloned().map(|batch| { - let mut client = client.clone(); + re_log::debug!( + "Fetching {n_chunks} chunks in {} batches ({} segments)", + request_batches.len(), + global_segment_order.len() + ); - async move { - let chunk_info: re_protos::common::v1alpha1::DataframePart = batch.into(); + // Allow overriding the fetch strategy via environment variable. + let force_grpc = *CHUNK_STRATEGY == "grpc"; - let fetch_chunks_request = FetchChunksRequest { - chunk_infos: vec![chunk_info], - }; + // Fast path: if no batches contain direct URLs (or gRPC is forced), fetch everything via gRPC. + if force_grpc || !request_batches.iter().any(batch_has_any_direct_urls) { + let reason = if force_grpc { + "grpc_forced" + } else { + "no_direct_urls" + }; + span.record("fetch_strategy", reason); + re_log::debug!( + "{reason}, fetching all {} chunks via FetchChunks gRPC", + request_batches.len() + ); + let result = fetch_remaining_via_grpc( + &request_batches, + &client, + &global_segment_order, + &output_channel, + ) + .await; + + if let Some(analytics) = &pending_analytics { + match &result { + Ok(()) => { + // All fetches were gRPC — record total bytes into a task-local + // buffer and flush once. No intermediate atomics. + let total_bytes: u64 = request_batches.iter().map(batch_byte_size).sum(); + let mut stats = TaskFetchStats::default(); + stats.record_grpc_fetch(total_bytes); + stats.flush_into(analytics.fetch_stats()); + } + Err(_) => { + analytics.record_error(QueryErrorKind::GrpcFetch); + } + } + } - let fetch_chunks_response_stream = client - .fetch_chunks(fetch_chunks_request.into_request()) - .instrument(tracing::trace_span!("batched_fetch_chunks")) - .await - .map_err(|err| exec_datafusion_err!("{err}"))? - .into_inner(); + return result; + } - // Collect all chunks from this single batch request - let chunk_stream = - re_redap_client::fetch_chunks_response_to_chunk_and_segment_id( - fetch_chunks_response_stream, - ); + // Split each batch into direct-URL rows and non-URL rows, producing independent work items. + // Each work item gets a sequential index for the reorder buffer. + enum FetchTask { + Direct(RecordBatch), + Grpc(RecordBatch), + } - let batch_chunks: Vec> = - chunk_stream.collect().await; + let mut work_items: Vec = Vec::new(); + let mut n_direct = 0usize; + let mut n_grpc = 0usize; + for batch in &request_batches { + let (direct_batch, grpc_batch) = split_batch_by_direct_url(batch); + if let Some(b) = direct_batch { + n_direct += 1; + work_items.push(FetchTask::Direct(b)); + } + if let Some(b) = grpc_batch { + n_grpc += 1; + work_items.push(FetchTask::Grpc(b)); + } + } - Ok::>, DataFusionError>(batch_chunks) - } - })) - .buffer_unordered(target_concurrency) - .try_collect() - .await?; + if n_grpc == 0 { + span.record("fetch_strategy", "direct"); + } else { + span.record( + "fetch_strategy", + format!("hybrid(direct={n_direct},grpc={n_grpc})"), + ); + } + re_log::debug!("Fetch tasks: {n_direct} direct, {n_grpc} gRPC fallback"); + + let http_client = reqwest::Client::new(); + + let fetch_stream = futures::stream::iter(work_items.into_iter().enumerate()) + .map(|(task_idx, task)| { + let http_client = http_client.clone(); + let client = client.clone(); + let pending_analytics = pending_analytics.clone(); + async move { + // Task-local stats buffer — flushed once to the shared atomics + // at the end of this task to avoid cross-core cache-line + // contention on the hot counters. + let mut stats = TaskFetchStats::default(); + let pending_analytics = pending_analytics.as_ref(); + + let chunks = match task { + FetchTask::Direct(batch) => { + let bytes = batch_byte_size(&batch); + let chunks = match fetch_batch_direct( + &batch, + &http_client, + &mut stats, + pending_analytics, + ) + .await + { + Ok(chunks) => chunks, + Err(err) => { + stats.try_flush_into( + pending_analytics, + Err(QueryErrorKind::DirectFetch), + ); + return Err(err); + } + }; + stats.record_direct_fetch(bytes); + chunks + } + FetchTask::Grpc(batch) => { + let bytes = batch_byte_size(&batch); + #[cfg(not(target_arch = "wasm32"))] + crate::chunk_fetcher::metrics::record_grpc_no_direct_urls(bytes); + let chunks = + match fetch_batch_group_via_grpc(std::slice::from_ref(&batch), &client) + .await + { + Ok(chunks) => chunks, + Err(err) => { + stats.try_flush_into( + pending_analytics, + Err(QueryErrorKind::GrpcFetch), + ); + return Err(err); + } + }; + stats.record_grpc_fetch(bytes); + chunks + } + }; + + stats.try_flush_into(pending_analytics, Ok(())); + + Ok::<_, ApiError>((task_idx, chunks)) + } + .instrument(tracing::info_span!("fetch_task", task_idx)) + }) + .buffer_unordered(IO_PIPELINE_BUFFER); - let all_chunks: Vec = group_results - .into_iter() - .flatten() - .collect::, _>>() - .map_err(|err| exec_datafusion_err!("Error fetching chunks: {err}"))?; + tokio::pin!(fetch_stream); - // Sort chunks from this group using the global segment order - let sorted_chunks = sort_chunks_by_segment_order(all_chunks, &global_segment_order); + let mut next_to_emit: usize = 0; + let mut reorder_buf: BTreeMap> = BTreeMap::new(); - // Send all chunks from this group before processing next group - for chunks_with_segment in sorted_chunks { - if output_channel.send(Ok(chunks_with_segment)).await.is_err() { + while let Some(result) = fetch_stream.next().await { + let (task_idx, chunks) = result?; + reorder_buf.insert(task_idx, chunks); + + // Drain contiguous completed tasks in order + while let Some(chunks) = reorder_buf.remove(&next_to_emit) { + if !send_sorted_chunks(chunks, &global_segment_order, &output_channel).await { return Ok(()); } + next_to_emit += 1; } } + // Fetch stats are already recorded per-task into pending_analytics. + // The combined event will be sent when the last PendingQueryAnalytics clone is dropped. + Ok(()) } @@ -773,28 +1126,32 @@ impl ExecutionPlan for SegmentStreamExec { _context: Arc, ) -> datafusion::common::Result { #[cfg(not(target_arch = "wasm32"))] - let _trace_guard = attach_trace_context(&self.trace_headers); - let _span = tracing::info_span!("execute").entered(); + let _trace_guard = attach_trace_context(self.trace_headers.as_ref()); + let execute_span = tracing::info_span!("execute", partition); + let _entered = execute_span.enter(); let (chunk_tx, chunk_rx) = tokio::sync::mpsc::channel(CPU_THREAD_IO_CHANNEL_SIZE); let random_state = ahash::RandomState::with_seeds(0, 0, 0, 0); - let (_, chunk_infos): (Vec<_>, Vec<_>) = self + let chunk_infos = self .chunk_info .iter() .filter(|(segment_id, _)| { let hash_value = segment_id.hash_one(&random_state) as usize; hash_value % self.target_partitions == partition }) - .map(|(k, v)| (k.clone(), v.clone())) - .unzip(); - // we end up with 1 batch per (rerun) segment. Order is important and must be preserved. - // See SegmentStreamExec::try_new for details on ordering. - let chunk_infos = chunk_infos - .into_iter() - .map(|batches| re_arrow_util::concat_polymorphic_batches(&batches)) + // we end up with 1 batch per (rerun) segment. Order is important and must be preserved. + // See SegmentStreamExec::try_new for details on ordering. + .map(|(_, batches)| re_arrow_util::concat_polymorphic_batches(batches)) .collect::, _>>() - .map_err(|err| exec_datafusion_err!("{err}"))?; + .map_err(|err| { + ApiError::deserialization_with_source( + None, + err, + "concatenating chunk-info batches per segment", + ) + .into_df_error() + })?; // if no chunks match this datafusion partition, return an empty stream if chunk_infos.is_empty() { @@ -807,15 +1164,20 @@ impl ExecutionPlan for SegmentStreamExec { let (batches_tx, batches_rx) = tokio::sync::mpsc::channel(CPU_THREAD_IO_CHANNEL_SIZE); let query_expression = self.query_expression.clone(); let projected_schema = self.projected_schema.clone(); - let cpu_join_handle = Some(self.worker_runtime.handle().spawn( - chunk_store_cpu_worker_thread( - chunk_rx, - batches_tx, - query_expression, - projected_schema, - self.index_values.clone(), + let limit = self.limit; + let cpu_join_handle = Some( + self.worker_runtime.handle().spawn( + chunk_store_cpu_worker_thread( + chunk_rx, + batches_tx, + query_expression, + projected_schema, + self.index_values.clone(), + limit, + ) + .instrument(tracing::info_span!("cpu_worker")), ), - )); + ); let stream = DataframeSegmentStreamInner { projected_schema: self.projected_schema.clone(), @@ -826,7 +1188,10 @@ impl ExecutionPlan for SegmentStreamExec { io_join_handle: None, cpu_join_handle, cpu_runtime: Arc::clone(&self.worker_runtime), + execute_span: execute_span.clone(), trace_headers: self.trace_headers.clone(), + server_trace_id: self.server_trace_id, + pending_analytics: self.pending_analytics.clone(), }; let stream = DataframeSegmentStream { inner: Some(stream), @@ -846,7 +1211,6 @@ impl ExecutionPlan for SegmentStreamExec { let mut plan = Self { props: self.props.clone(), - chunk_info_batches: self.chunk_info_batches.clone(), chunk_info: self.chunk_info.clone(), query_expression: self.query_expression.clone(), index_values: self.index_values.clone(), @@ -854,7 +1218,10 @@ impl ExecutionPlan for SegmentStreamExec { target_partitions, worker_runtime: Arc::new(CpuRuntime::try_new(target_partitions)?), client: self.client.clone(), + limit: self.limit, trace_headers: self.trace_headers.clone(), + server_trace_id: self.server_trace_id, + pending_analytics: self.pending_analytics.clone(), }; plan.props.partitioning = match plan.props.partitioning { @@ -898,7 +1265,7 @@ impl Drop for CpuRuntime { if let Some(thread_join_handle) = self.thread_join_handle.take() { // If the thread is still running, we wait for it to finish if thread_join_handle.join().is_err() { - log::error!("Error joining CPU runtime thread"); + re_log::error!("Error joining CPU runtime thread"); } } } @@ -950,8 +1317,8 @@ mod tests { use super::*; /// Extract segment ID from a chunk result (test helper) - fn extract_segment_id_from_chunk(chunk: &ChunksWithSegment) -> Option { - chunk.first()?.1.clone() + fn extract_segment_id_from_chunk((segment_id, _chunks): &SortedChunksWithSegment) -> &str { + segment_id } /// Helper to create a test `RecordBatch` with chunk info for testing @@ -1108,9 +1475,9 @@ mod tests { let sorted_chunks = sort_chunks_by_segment_order(chunks, &segment_order); // Verify chunks are sorted according to segment order - let sorted_segments: Vec = sorted_chunks + let sorted_segments: Vec<&str> = sorted_chunks .iter() - .map(|chunk| extract_segment_id_from_chunk(chunk).unwrap_or_default()) + .map(extract_segment_id_from_chunk) .collect(); assert_eq!(sorted_segments, vec!["segA", "segB", "segC"]); @@ -1142,17 +1509,17 @@ mod tests { // After sorting, we should have segments in correct order: segA, segB, segC // And the function should have split the multi-segment response into separate responses assert_eq!(sorted_chunks.len(), 3); - let sorted_segments: Vec = sorted_chunks + let sorted_segments: Vec<&str> = sorted_chunks .iter() - .map(|chunk| extract_segment_id_from_chunk(chunk).unwrap_or_default()) + .map(extract_segment_id_from_chunk) .collect(); assert_eq!(sorted_segments, vec!["segA", "segB", "segC"]); // Verify each segment has the correct number of chunks - let seg_a_chunks = sorted_chunks[0].len(); - let seg_b_chunks = sorted_chunks[1].len(); - let seg_c_chunks = sorted_chunks[2].len(); + let seg_a_chunks = sorted_chunks[0].1.len(); + let seg_b_chunks = sorted_chunks[1].1.len(); + let seg_c_chunks = sorted_chunks[2].1.len(); assert_eq!(seg_a_chunks, 2); assert_eq!(seg_b_chunks, 3); @@ -1185,15 +1552,15 @@ mod tests { // Should be sorted: segA, segB (grouped together), segC assert_eq!(sorted_chunks.len(), 3); - let sorted_segments: Vec = sorted_chunks + let sorted_segments: Vec<&str> = sorted_chunks .iter() - .map(|chunk| extract_segment_id_from_chunk(chunk).unwrap_or_default()) + .map(extract_segment_id_from_chunk) .collect(); assert_eq!(sorted_segments, vec!["segA", "segB", "segC"]); // Verify segB has 2 chunks (they should be grouped together) - let seg_b_chunks = sorted_chunks[1].len(); + let seg_b_chunks = sorted_chunks[1].1.len(); assert_eq!(seg_b_chunks, 2); } } diff --git a/crates/store/re_datafusion/src/dataframe_query_provider_wasm.rs b/crates/store/re_datafusion/src/dataframe_query_provider_wasm.rs index c8ecb51e523b..d4650662a5d5 100644 --- a/crates/store/re_datafusion/src/dataframe_query_provider_wasm.rs +++ b/crates/store/re_datafusion/src/dataframe_query_provider_wasm.rs @@ -11,9 +11,12 @@ use arrow::array::{Array, RecordBatch, RecordBatchOptions, StringArray}; use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion::common::hash_utils::HashValue as _; -use datafusion::common::{exec_datafusion_err, exec_err, plan_err}; +use datafusion::common::plan_err; use datafusion::config::ConfigOptions; use datafusion::error::DataFusionError; +use re_redap_client::{ApiError, ApiResult}; + +use crate::IntoDfError as _; use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext}; use datafusion::physical_expr::expressions::Column; use datafusion::physical_expr::{ @@ -35,7 +38,6 @@ use tonic::IntoRequest as _; #[derive(Debug)] pub(crate) struct SegmentStreamExec { props: PlanProperties, - chunk_info_batches: Arc>, /// Describes the chunks per segment, derived from `chunk_info_batches`. /// We keep both around so that we only have to process once, but we may @@ -47,6 +49,9 @@ pub(crate) struct SegmentStreamExec { projected_schema: Arc, target_partitions: usize, client: T, + + /// Pending query analytics — keeps alive until all streams complete. + pending_analytics: Option, } pub struct DataframeSegmentStream { @@ -56,28 +61,32 @@ pub struct DataframeSegmentStream { current_query: Option<(String, QueryHandle)>, query_expression: QueryExpression, remaining_segment_ids: Vec, + + /// Pending query analytics — kept alive so the event fires on drop. + _pending_analytics: Option, } impl DataframeSegmentStream { async fn get_chunk_store_for_single_rerun_segment( &mut self, segment_id: &str, - ) -> Result { + ) -> ApiResult { let chunk_infos = self.chunk_infos.iter().map(Into::into).collect::>(); let fetch_chunks_request = FetchChunksRequest { chunk_infos }; - let fetch_chunks_response_stream = self + let response = self .client .fetch_chunks(fetch_chunks_request.into_request()) .await - .map_err(|err| exec_datafusion_err!("{err}"))? - .into_inner(); + .map_err(|err| ApiError::tonic(err, "fetch_chunks"))?; + + let response_stream = + re_redap_client::ApiResponseStream::from_tonic_response(response, "/FetchChunks"); // Then we need to fully decode these chunks, i.e. both the transport layer (Protobuf) // and the app layer (Arrow). - let mut chunk_stream = re_redap_client::fetch_chunks_response_to_chunk_and_segment_id( - fetch_chunks_response_stream, - ); + let mut chunk_stream = + re_redap_client::fetch_chunks_response_to_chunk_and_segment_id(response_stream); // Note: using segment id as the store id, shouldn't really // matter since this is just a temporary store. @@ -85,8 +94,7 @@ impl DataframeSegmentStream { let store = ChunkStore::new_handle(store_id, Default::default()); while let Some(chunks_and_segment_ids) = chunk_stream.next().await { - let chunks_and_segment_ids = - chunks_and_segment_ids.map_err(|err| exec_datafusion_err!("{err}"))?; + let chunks_and_segment_ids = chunks_and_segment_ids?; let _span = tracing::trace_span!( "fetch_chunks::batch_insert", @@ -97,16 +105,32 @@ impl DataframeSegmentStream { for chunk_and_segment_id in chunks_and_segment_ids { let (chunk, received_segment_id) = chunk_and_segment_id; - let received_segment_id = received_segment_id - .ok_or_else(|| exec_datafusion_err!("Received chunk without a segment id"))?; + let received_segment_id = received_segment_id.ok_or_else(|| { + ApiError::deserialization( + None, + "server returned chunk without a segment id in fetch_chunks response", + ) + })?; if received_segment_id != segment_id { - return exec_err!("Unexpected segment id: {received_segment_id}"); + return Err(ApiError::deserialization( + None, + format!( + "server returned chunk for unexpected segment id `{received_segment_id}` \ + while fetching chunks for `{segment_id}`" + ), + )); } store .write() .insert_chunk(&Arc::new(chunk)) - .map_err(|err| exec_datafusion_err!("{err}"))?; + .map_err(|err| { + ApiError::internal_with_source( + None, + err, + "inserting chunk into in-memory store", + ) + })?; } } @@ -133,7 +157,8 @@ impl Stream for DataframeSegmentStream { let runtime = Handle::current(); let store = runtime - .block_on(this.get_chunk_store_for_single_rerun_segment(segment_id.as_str()))?; + .block_on(this.get_chunk_store_for_single_rerun_segment(segment_id.as_str())) + .map_err(|err| err.into_df_error())?; let query_engine = QueryEngine::new(store.clone(), QueryCache::new_handle(store)); @@ -150,7 +175,9 @@ impl Stream for DataframeSegmentStream { .expect("current_query should be Some"); // If the following returns none, we have exhausted that rerun segment id - match create_next_row(query, segment_id, &this.projected_schema)? { + match create_next_row(query, segment_id, &this.projected_schema) + .map_err(|err| err.into_df_error())? + { Some(rb) => return Poll::Ready(Some(Ok(rb))), None => this.current_query = None, } @@ -179,10 +206,12 @@ impl SegmentStreamExec { sort_index: Option, projection: Option<&Vec>, num_partitions: usize, - chunk_info_batches: Arc>, + chunk_info_batches: Option, query_expression: QueryExpression, _index_values: IndexValuesMap, client: T, + _limit: Option, + pending_analytics: Option, ) -> datafusion::common::Result { let projected_schema = match projection { Some(p) => Arc::new(table_schema.project(p)?), @@ -243,16 +272,17 @@ impl SegmentStreamExec { Boundedness::Bounded, ); - let chunk_info = group_chunk_infos_by_segment_id(&chunk_info_batches)?; + let chunk_info = group_chunk_infos_by_segment_id(chunk_info_batches.as_slice())?; + drop(chunk_info_batches); Ok(Self { props, - chunk_info_batches, chunk_info, query_expression, projected_schema, target_partitions: num_partitions, client, + pending_analytics, }) } } @@ -262,7 +292,7 @@ fn create_next_row( query_handle: &QueryHandle, segment_id: &str, target_schema: &Arc, -) -> Result, DataFusionError> { +) -> ApiResult> { let query_schema = Arc::clone(query_handle.schema()); let num_fields = query_schema.fields.len(); @@ -275,7 +305,9 @@ fn create_next_row( return Ok(None); } if num_fields != next_row.len() { - return plan_err!("Unexpected number of columns returned from query"); + return Err(ApiError::internal( + "Unexpected number of columns returned from query", + )); } let num_rows = next_row[0].len(); @@ -295,9 +327,18 @@ fn create_next_row( batch_schema, arrays, &RecordBatchOptions::default().with_row_count(Some(num_rows)), - )?; + ) + .map_err(|err| { + ApiError::deserialization_with_source( + None, + err, + "building output record batch from chunk-store rows", + ) + })?; - let output_batch = align_record_batch_to_schema(&batch, target_schema)?; + let output_batch = align_record_batch_to_schema(&batch, target_schema).map_err(|err| { + ApiError::deserialization_with_source(None, err, "DataFusion schema mismatch error") + })?; Ok(Some(output_batch)) } @@ -341,12 +382,12 @@ impl ExecutionPlan for SegmentStreamExec { let mut plan = Self { props: self.props.clone(), - chunk_info_batches: self.chunk_info_batches.clone(), chunk_info: self.chunk_info.clone(), query_expression: self.query_expression.clone(), projected_schema: self.projected_schema.clone(), target_partitions, client: self.client.clone(), + pending_analytics: self.pending_analytics.clone(), }; plan.props.partitioning = match plan.props.partitioning { @@ -397,6 +438,7 @@ impl ExecutionPlan for SegmentStreamExec { remaining_segment_ids, current_query: None, query_expression, + _pending_analytics: self.pending_analytics.clone(), }; Ok(Box::pin(stream)) diff --git a/crates/store/re_datafusion/src/dataset_manifest.rs b/crates/store/re_datafusion/src/dataset_manifest.rs index 5e8fb14d7e5a..26b843993e6f 100644 --- a/crates/store/re_datafusion/src/dataset_manifest.rs +++ b/crates/store/re_datafusion/src/dataset_manifest.rs @@ -4,11 +4,11 @@ use arrow::array::RecordBatch; use arrow::datatypes::SchemaRef; use async_trait::async_trait; use datafusion::catalog::TableProvider; -use datafusion::error::{DataFusionError, Result as DataFusionResult}; +use datafusion::error::Result as DataFusionResult; use re_log_types::EntryId; use re_protos::cloud::v1alpha1::{ScanDatasetManifestRequest, ScanDatasetManifestResponse}; use re_protos::headers::RerunHeadersInjectorExt as _; -use re_redap_client::ConnectionClient; +use re_redap_client::{ApiError, ApiResult, ConnectionClient}; use tracing::instrument; use crate::grpc_streaming_provider::{GrpcStreamProvider, GrpcStreamToTable}; @@ -19,19 +19,27 @@ use crate::wasm_compat::make_future_send; pub struct DatasetManifestProvider { client: ConnectionClient, dataset_id: EntryId, + + /// Captured at construction so DataFusion-spawned execution tasks can re-attach + /// the caller's tracing span — otherwise gRPC spans below surface as root traces. + parent_span: tracing::Span, } impl std::fmt::Debug for DatasetManifestProvider { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("DatasetManifestProvider") .field("dataset_id", &self.dataset_id) - .finish() + .finish_non_exhaustive() } } impl DatasetManifestProvider { pub fn new(client: ConnectionClient, dataset_id: EntryId) -> Self { - Self { client, dataset_id } + Self { + client, + dataset_id, + parent_span: tracing::Span::current(), + } } /// This is a convenience function @@ -44,56 +52,59 @@ impl DatasetManifestProvider { impl GrpcStreamToTable for DatasetManifestProvider { type GrpcStreamData = ScanDatasetManifestResponse; - #[instrument(skip(self), err)] - async fn fetch_schema(&mut self) -> DataFusionResult { + #[instrument(skip(self), err, parent = &self.parent_span)] + async fn fetch_schema(&mut self) -> ApiResult { let mut client = self.client.clone(); - let dataset_id = self.dataset_id; Ok(Arc::new( - make_future_send(async move { - client - .get_dataset_manifest_schema(dataset_id) - .await - .map_err(|err| { - DataFusionError::External( - format!("Couldn't get dataset manifest schema: {err}").into(), - ) - }) - }) - .await?, + make_future_send(async move { client.get_dataset_manifest_schema(dataset_id).await }) + .await?, )) } // TODO(ab): what `GrpcStreamToTable` attempts to simplify should probably be handled by // `ConnectionClient` - #[instrument(skip(self), err)] + #[instrument(skip(self), err, parent = &self.parent_span)] async fn send_streaming_request( &mut self, - ) -> DataFusionResult>> { + ) -> ApiResult> { let request = tonic::Request::new(ScanDatasetManifestRequest { columns: vec![], // all of them }) .with_entry_id(self.dataset_id) - .map_err(|err| DataFusionError::External(Box::new(err)))?; + .map_err(|err| ApiError::tonic(err, "failed building /ScanDatasetManifest request"))?; let mut client = self.client.clone(); - make_future_send(async move { Ok(client.inner().scan_dataset_manifest(request).await) }) - .await? - .map_err(|err| DataFusionError::External(Box::new(err))) + let response = make_future_send(async move { + client + .inner() + .scan_dataset_manifest(request) + .await + .map_err(|err| ApiError::tonic(err, "/ScanDatasetManifest failed")) + }) + .await?; + + Ok(re_redap_client::ApiResponseStream::from_tonic_response( + response, + "/ScanDatasetManifest", + )) } - fn process_response( - &mut self, - response: Self::GrpcStreamData, - ) -> DataFusionResult { + fn process_response(&mut self, response: Self::GrpcStreamData) -> ApiResult { response .data - .ok_or(DataFusionError::Execution( - "DataFrame missing from DatasetManifest response".to_owned(), - ))? + .ok_or_else(|| { + ApiError::deserialization(None, "DataFrame missing from DatasetManifest response") + })? .try_into() - .map_err(|err| DataFusionError::External(Box::new(err))) + .map_err(|err: re_protos::TypeConversionError| { + ApiError::deserialization_with_source( + None, + err, + "failed decoding /ScanDatasetManifest response", + ) + }) } } diff --git a/crates/store/re_datafusion/src/errors.rs b/crates/store/re_datafusion/src/errors.rs new file mode 100644 index 000000000000..6ecd4d20a5dc --- /dev/null +++ b/crates/store/re_datafusion/src/errors.rs @@ -0,0 +1,16 @@ +use datafusion::error::DataFusionError; +use re_redap_client::ApiError; + +/// Extension trait for converting an [`ApiError`] into a [`DataFusionError::External`]. +/// +/// In general, we want [`DataFusionError::External`] to always wrap an [`ApiError`], +/// and never anything else. +pub(crate) trait IntoDfError { + fn into_df_error(self) -> DataFusionError; +} + +impl IntoDfError for ApiError { + fn into_df_error(self) -> DataFusionError { + DataFusionError::External(Box::new(self)) + } +} diff --git a/crates/store/re_datafusion/src/grpc_streaming_provider.rs b/crates/store/re_datafusion/src/grpc_streaming_provider.rs index 2dd369fd6c64..c1252ad41133 100644 --- a/crates/store/re_datafusion/src/grpc_streaming_provider.rs +++ b/crates/store/re_datafusion/src/grpc_streaming_provider.rs @@ -2,6 +2,7 @@ use std::any::Any; use std::pin::Pin; use std::sync::Arc; +use crate::IntoDfError as _; use crate::batch_coalescer::coalesce_exec::SizedCoalesceBatchesExec; use crate::batch_coalescer::coalescer::CoalescerOptions; use arrow::array::RecordBatch; @@ -17,6 +18,7 @@ use datafusion::physical_plan::ExecutionPlan; use datafusion::physical_plan::streaming::{PartitionStream, StreamingTableExec}; use datafusion::prelude::Expr; use futures_util::StreamExt as _; +use re_redap_client::{ApiResponseStream, ApiResult}; use tokio_stream::Stream; #[async_trait] @@ -25,14 +27,13 @@ pub trait GrpcStreamToTable: { type GrpcStreamData; - async fn fetch_schema(&mut self) -> DataFusionResult; + async fn fetch_schema(&mut self) -> ApiResult; - fn process_response(&mut self, response: Self::GrpcStreamData) - -> DataFusionResult; + fn process_response(&mut self, response: Self::GrpcStreamData) -> ApiResult; async fn send_streaming_request( &mut self, - ) -> DataFusionResult>>; + ) -> ApiResult>; fn supports_filters_pushdown( &self, @@ -62,7 +63,10 @@ pub struct GrpcStreamProvider { impl GrpcStreamProvider { pub async fn prepare(mut client: T) -> Result, DataFusionError> { - let schema = client.fetch_schema().await?; + let schema = client + .fetch_schema() + .await + .map_err(|err| err.into_df_error())?; Ok(Arc::new(Self { schema, client })) } } @@ -174,17 +178,14 @@ impl GrpcStream { T: GrpcStreamToTable + Send + 'static, { let adapted_stream = Box::pin(async_stream::try_stream! { - let mut stream = client.send_streaming_request().await.map_err(|err| DataFusionError::External(Box::new( - tonic::Status::internal(err.to_string()) - )))?.into_inner(); + let mut stream = client.send_streaming_request().await.map_err(|err| err.into_df_error())?; + let trace_id = stream.trace_id(); while let Some(msg) = stream.next().await { - let msg = msg.map_err(|err| DataFusionError::External(Box::new(err)))?; + let msg = msg.map_err(|err| err.into_df_error())?; let processed = client .process_response(msg) - .map_err(|err| DataFusionError::External(Box::new( - tonic::Status::internal(err.to_string()) - )))?; + .map_err(|err| err.with_trace_id(trace_id).into_df_error())?; yield processed; } }); diff --git a/crates/store/re_datafusion/src/lib.rs b/crates/store/re_datafusion/src/lib.rs index 387a0f122c7b..a1701af777b3 100644 --- a/crates/store/re_datafusion/src/lib.rs +++ b/crates/store/re_datafusion/src/lib.rs @@ -1,13 +1,17 @@ //! The Rerun public data APIs. Access `DataFusion` `TableProviders`. +mod analytics; mod batch_coalescer; mod catalog_provider; +#[cfg(not(target_arch = "wasm32"))] +mod chunk_fetcher; mod dataframe_query_common; #[cfg(not(target_arch = "wasm32"))] mod dataframe_query_provider; #[cfg(target_arch = "wasm32")] mod dataframe_query_provider_wasm; mod dataset_manifest; +mod errors; mod grpc_streaming_provider; pub(crate) mod pushdown_expressions; mod search_provider; @@ -15,6 +19,8 @@ mod segment_table; mod table_entry_provider; mod wasm_compat; +pub(crate) use self::errors::IntoDfError; +pub(crate) use analytics::{ConnectionAnalytics, PendingQueryAnalytics}; pub use catalog_provider::{DEFAULT_CATALOG_NAME, RedapCatalogProvider, get_all_catalog_names}; pub use dataframe_query_common::{ DataframeClientAPI, DataframeQueryTableProvider, query_from_query_expression, diff --git a/crates/store/re_datafusion/src/pushdown_expressions.rs b/crates/store/re_datafusion/src/pushdown_expressions.rs index caf83652d31e..9798efc2590f 100644 --- a/crates/store/re_datafusion/src/pushdown_expressions.rs +++ b/crates/store/re_datafusion/src/pushdown_expressions.rs @@ -104,27 +104,25 @@ pub(crate) fn apply_filter_expr_to_queries( // have (leftA OR leftB) AND (rightC OR rightD). We need to // consider the combinatorial for the final output. - let Some(left_queries) = - apply_filter_expr_to_queries(queries.clone(), &left, schema)? - else { - return apply_filter_expr_to_queries(queries.clone(), &right, schema); - }; - let Some(right_queries) = - apply_filter_expr_to_queries(queries.clone(), &right, schema)? - else { - return Ok(Some(left_queries)); - }; - - let final_exprs = left_queries - .iter() - .flat_map(|left| { - right_queries + match ( + apply_filter_expr_to_queries(queries.clone(), &left, schema)?, + apply_filter_expr_to_queries(queries, &right, schema)?, + ) { + (None, None) => None, + (Some(queries), None) | (None, Some(queries)) => Some(queries), + (Some(left_queries), Some(right_queries)) => { + let final_exprs = left_queries .iter() - .map(|right| merge_queries_and(left, right)) - }) - .collect::, _>>()?; - - Some(final_exprs) + .flat_map(|left| { + right_queries + .iter() + .map(|right| merge_queries_and(left, right)) + }) + .collect::, _>>()?; + + Some(final_exprs) + } + } } Operator::Or => { let Some(mut left_queries) = @@ -145,8 +143,8 @@ pub(crate) fn apply_filter_expr_to_queries( match known_filter_column(left.as_ref(), right.as_ref(), schema) { KnownFilterColumn::Index(index_name, time) => Some( queries - .iter() - .map(|query| replace_time_in_query(query, &index_name, time, op)) + .into_iter() + .map(|query| replace_time_in_query(&query, &index_name, time, op)) .collect::, _>>()?, ), KnownFilterColumn::SegmentId(segment_id) => { diff --git a/crates/store/re_datafusion/src/search_provider.rs b/crates/store/re_datafusion/src/search_provider.rs index 02cab48f4800..dcfa81df28e1 100644 --- a/crates/store/re_datafusion/src/search_provider.rs +++ b/crates/store/re_datafusion/src/search_provider.rs @@ -4,12 +4,12 @@ use arrow::array::RecordBatch; use arrow::datatypes::SchemaRef; use async_trait::async_trait; use datafusion::catalog::TableProvider; -use datafusion::error::{DataFusionError, Result as DataFusionResult}; +use datafusion::error::DataFusionError; use re_log_types::EntryId; use re_protos::cloud::v1alpha1::{SearchDatasetRequest, SearchDatasetResponse}; use re_protos::common::v1alpha1::ScanParameters; use re_protos::headers::RerunHeadersInjectorExt as _; -use re_redap_client::ConnectionClient; +use re_redap_client::{ApiError, ApiResult, ConnectionClient}; use tokio_stream::StreamExt as _; use tracing::instrument; @@ -21,13 +21,18 @@ pub struct SearchResultsTableProvider { client: ConnectionClient, dataset_id: EntryId, request: SearchDatasetRequest, + + /// Captured at construction so DataFusion-spawned execution tasks can re-attach + /// the caller's tracing span — otherwise gRPC spans below surface as root traces. + parent_span: tracing::Span, } impl std::fmt::Debug for SearchResultsTableProvider { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("SearchResultsTableProvider") + .field("dataset_id", &self.dataset_id) .field("request", &self.request) - .finish() + .finish_non_exhaustive() } } @@ -47,6 +52,7 @@ impl SearchResultsTableProvider { client, dataset_id, request, + parent_span: tracing::Span::current(), }) } @@ -60,8 +66,8 @@ impl SearchResultsTableProvider { impl GrpcStreamToTable for SearchResultsTableProvider { type GrpcStreamData = SearchDatasetResponse; - #[instrument(skip(self), err)] - async fn fetch_schema(&mut self) -> DataFusionResult { + #[instrument(skip(self), err, parent = &self.parent_span)] + async fn fetch_schema(&mut self) -> ApiResult { let mut request = self.request.clone(); request.scan_parameters = Some(ScanParameters { limit_len: Some(0), @@ -71,62 +77,86 @@ impl GrpcStreamToTable for SearchResultsTableProvider { let mut client = self.client.clone(); let dataset_id = self.dataset_id; - let rb: RecordBatch = make_future_send(async move { - Ok::<_, DataFusionError>( - client - .inner() - .search_dataset( - tonic::Request::new(request) - .with_entry_id(dataset_id) - .map_err(|err| DataFusionError::External(Box::new(err)))?, - ) - .await - .map_err(|err| DataFusionError::External(Box::new(err)))? - .into_inner() - .next() - .await, - ) + let mut stream = make_future_send(async move { + let response = client + .inner() + .search_dataset( + tonic::Request::new(request) + .with_entry_id(dataset_id) + .map_err(|err| { + ApiError::tonic(err, "failed building /SearchDataset schema request") + })?, + ) + .await + .map_err(|err| ApiError::tonic(err, "/SearchDataset schema request failed"))?; + Ok(re_redap_client::ApiResponseStream::from_tonic_response( + response, + "/SearchDataset", + )) }) - .await? - .ok_or(DataFusionError::Execution( - "Empty stream from search results".to_owned(), - ))? - .map_err(|err| DataFusionError::External(Box::new(err)))? - .data - .ok_or(DataFusionError::Execution( - "Empty data from search results".to_owned(), - ))? - .try_into() - .map_err(|err| DataFusionError::External(Box::new(err)))?; + .await?; + + let trace_id = stream.trace_id(); + + let rb: RecordBatch = stream + .next() + .await + .ok_or_else(|| ApiError::deserialization(None, "Empty stream from search results"))?? + .data + .ok_or_else(|| ApiError::deserialization(None, "Empty data from search results"))? + .try_into() + .map_err(|err: re_protos::TypeConversionError| { + ApiError::deserialization_with_source( + trace_id, + err, + "failed decoding /SearchDataset schema response", + ) + })?; Ok(rb.schema()) } - #[instrument(skip(self), err)] + #[instrument(skip(self), err, parent = &self.parent_span)] async fn send_streaming_request( &mut self, - ) -> DataFusionResult>> { + ) -> ApiResult> { let request = tonic::Request::new(self.request.clone()) .with_entry_id(self.dataset_id) - .map_err(|err| DataFusionError::External(Box::new(err)))?; + .map_err(|err| ApiError::tonic(err, "failed building /SearchDataset request"))?; let mut client = self.client.clone(); - make_future_send(async move { Ok(client.inner().search_dataset(request).await) }) - .await? - .map_err(|err| DataFusionError::External(Box::new(err))) + let response = make_future_send(async move { + client + .inner() + .search_dataset(request) + .await + .map_err(|err| ApiError::tonic(err, "/SearchDataset failed")) + }) + .await?; + + Ok(re_redap_client::ApiResponseStream::from_tonic_response( + response, + "/SearchDataset", + )) } - fn process_response( - &mut self, - response: Self::GrpcStreamData, - ) -> DataFusionResult { + fn process_response(&mut self, response: Self::GrpcStreamData) -> ApiResult { response .data - .ok_or(DataFusionError::Execution( - "DataFrame missing from SearchDataResponse response".to_owned(), - ))? + .ok_or_else(|| { + ApiError::deserialization( + None, + "DataFrame missing from SearchDataResponse response", + ) + })? .try_into() - .map_err(|err| DataFusionError::External(Box::new(err))) + .map_err(|err: re_protos::TypeConversionError| { + ApiError::deserialization_with_source( + None, + err, + "failed decoding /SearchDataset response", + ) + }) } } diff --git a/crates/store/re_datafusion/src/segment_table.rs b/crates/store/re_datafusion/src/segment_table.rs index 1f8dc51c778e..f6aca22b732d 100644 --- a/crates/store/re_datafusion/src/segment_table.rs +++ b/crates/store/re_datafusion/src/segment_table.rs @@ -4,11 +4,11 @@ use arrow::array::RecordBatch; use arrow::datatypes::SchemaRef; use async_trait::async_trait; use datafusion::catalog::TableProvider; -use datafusion::error::{DataFusionError, Result as DataFusionResult}; +use datafusion::error::Result as DataFusionResult; use re_log_types::EntryId; use re_protos::cloud::v1alpha1::{ScanSegmentTableRequest, ScanSegmentTableResponse}; use re_protos::headers::RerunHeadersInjectorExt as _; -use re_redap_client::ConnectionClient; +use re_redap_client::{ApiError, ApiResult, ConnectionClient}; use tracing::instrument; use crate::grpc_streaming_provider::{GrpcStreamProvider, GrpcStreamToTable}; @@ -19,19 +19,27 @@ use crate::wasm_compat::make_future_send; pub struct SegmentTableProvider { client: ConnectionClient, dataset_id: EntryId, + + /// Captured at construction so DataFusion-spawned execution tasks can re-attach + /// the caller's tracing span — otherwise gRPC spans below surface as root traces. + parent_span: tracing::Span, } impl std::fmt::Debug for SegmentTableProvider { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("SegmentTableProvider") .field("dataset_id", &self.dataset_id) - .finish() + .finish_non_exhaustive() } } impl SegmentTableProvider { pub fn new(client: ConnectionClient, dataset_id: EntryId) -> Self { - Self { client, dataset_id } + Self { + client, + dataset_id, + parent_span: tracing::Span::current(), + } } /// This is a convenience function @@ -44,56 +52,59 @@ impl SegmentTableProvider { impl GrpcStreamToTable for SegmentTableProvider { type GrpcStreamData = ScanSegmentTableResponse; - #[instrument(skip(self), err)] - async fn fetch_schema(&mut self) -> DataFusionResult { + #[instrument(skip(self), err, parent = &self.parent_span)] + async fn fetch_schema(&mut self) -> ApiResult { let mut client = self.client.clone(); - let dataset_id = self.dataset_id; Ok(Arc::new( - make_future_send(async move { - client - .get_segment_table_schema(dataset_id) - .await - .map_err(|err| { - DataFusionError::External( - format!("Couldn't get segment table schema: {err}").into(), - ) - }) - }) - .await?, + make_future_send(async move { client.get_segment_table_schema(dataset_id).await }) + .await?, )) } // TODO(ab): what `GrpcStreamToTable` attempts to simplify should probably be handled by // `ConnectionClient` - #[instrument(skip(self), err)] + #[instrument(skip(self), err, parent = &self.parent_span)] async fn send_streaming_request( &mut self, - ) -> DataFusionResult>> { + ) -> ApiResult> { let request = tonic::Request::new(ScanSegmentTableRequest { columns: vec![], // all of them }) .with_entry_id(self.dataset_id) - .map_err(|err| DataFusionError::External(Box::new(err)))?; + .map_err(|err| ApiError::tonic(err, "failed building /ScanSegmentTable request"))?; let mut client = self.client.clone(); - make_future_send(async move { Ok(client.inner().scan_segment_table(request).await) }) - .await? - .map_err(|err| DataFusionError::External(Box::new(err))) + let response = make_future_send(async move { + client + .inner() + .scan_segment_table(request) + .await + .map_err(|err| ApiError::tonic(err, "/ScanSegmentTable failed")) + }) + .await?; + + Ok(re_redap_client::ApiResponseStream::from_tonic_response( + response, + "/ScanSegmentTable", + )) } - fn process_response( - &mut self, - response: Self::GrpcStreamData, - ) -> DataFusionResult { + fn process_response(&mut self, response: Self::GrpcStreamData) -> ApiResult { response .data - .ok_or(DataFusionError::Execution( - "DataFrame missing from SegmentTable response".to_owned(), - ))? + .ok_or_else(|| { + ApiError::deserialization(None, "DataFrame missing from SegmentTable response") + })? .try_into() - .map_err(|err| DataFusionError::External(Box::new(err))) + .map_err(|err: re_protos::TypeConversionError| { + ApiError::deserialization_with_source( + None, + err, + "failed decoding /ScanSegmentTable response", + ) + }) } } diff --git a/crates/store/re_datafusion/src/table_entry_provider.rs b/crates/store/re_datafusion/src/table_entry_provider.rs index a0d1335d4c4d..2fee1f3ce7ec 100644 --- a/crates/store/re_datafusion/src/table_entry_provider.rs +++ b/crates/store/re_datafusion/src/table_entry_provider.rs @@ -21,10 +21,11 @@ use re_protos::cloud::v1alpha1::{ EntryFilter, EntryKind, FindEntriesRequest, GetTableSchemaRequest, ScanTableRequest, ScanTableResponse, }; -use re_redap_client::ConnectionClient; +use re_redap_client::{ApiError, ApiResult, ConnectionClient}; use tokio::runtime::Handle; use tracing::instrument; +use crate::IntoDfError as _; use crate::grpc_streaming_provider::{GrpcStreamProvider, GrpcStreamToTable}; use crate::wasm_compat::make_future_send; @@ -36,6 +37,10 @@ pub struct TableEntryTableProvider { // cache the table id when resolved table_id: Option, + + /// Captured at construction so DataFusion-spawned execution tasks can re-attach + /// the caller's tracing span — otherwise gRPC spans below surface as root traces. + parent_span: tracing::Span, } impl std::fmt::Debug for TableEntryTableProvider { @@ -43,7 +48,7 @@ impl std::fmt::Debug for TableEntryTableProvider { f.debug_struct("TableEntryTableProvider") .field("table", &self.table) .field("table_id", &self.table_id) - .finish() + .finish_non_exhaustive() } } @@ -58,6 +63,7 @@ impl TableEntryTableProvider { table: table.into(), table_id: None, runtime, + parent_span: tracing::Span::current(), } } @@ -70,8 +76,8 @@ impl TableEntryTableProvider { Ok(GrpcStreamProvider::prepare(self).await?) } - #[instrument(skip(self), err)] - async fn table_id(&mut self) -> Result { + #[instrument(skip(self), err, parent = &self.parent_span)] + async fn table_id(&mut self) -> ApiResult { if let Some(table_id) = self.table_id { return Ok(table_id); } @@ -83,8 +89,8 @@ impl TableEntryTableProvider { let mut client = self.client.clone(); let table_name_copy = table_name.clone(); - let entry_details: EntryDetails = make_future_send(async move { - Ok(client + let response = make_future_send(async move { + client .inner() .find_entries(FindEntriesRequest { filter: Some(EntryFilter { @@ -93,21 +99,31 @@ impl TableEntryTableProvider { entry_kind: Some(EntryKind::Table as i32), }), }) - .await) + .await + .map_err(|err| ApiError::tonic(err, "/FindEntries failed")) }) - .await? - .map_err(|err| DataFusionError::External(Box::new(err)))? - .into_inner() - .entries - .first() - .ok_or_else(|| { - DataFusionError::External( - format!("No entry found with name: {table_name}").into(), - ) - })? - .clone() - .try_into() - .map_err(|err| DataFusionError::External(Box::new(err)))?; + .await?; + let trace_id = re_redap_client::extract_trace_id(response.metadata()); + + let entry_details: EntryDetails = response + .into_inner() + .entries + .first() + .ok_or_else(|| { + ApiError::deserialization( + trace_id, + format!("No entry found with name: {table_name}"), + ) + })? + .clone() + .try_into() + .map_err(|err: re_protos::TypeConversionError| { + ApiError::deserialization_with_source( + trace_id, + err, + "failed decoding /FindEntries response", + ) + })?; entry_details.id } @@ -122,53 +138,84 @@ impl TableEntryTableProvider { impl GrpcStreamToTable for TableEntryTableProvider { type GrpcStreamData = ScanTableResponse; - #[instrument(skip(self), err)] - async fn fetch_schema(&mut self) -> DataFusionResult { + #[instrument(skip(self), err, parent = &self.parent_span)] + async fn fetch_schema(&mut self) -> ApiResult { let request = GetTableSchemaRequest { table_id: Some(self.table_id().await?.into()), }; let mut client = self.client.clone(); + let response = make_future_send(async move { + client + .inner() + .get_table_schema(request) + .await + .map_err(|err| ApiError::tonic(err, "/GetTableSchema failed")) + }) + .await?; + let trace_id = re_redap_client::extract_trace_id(response.metadata()); + Ok(Arc::new( - make_future_send(async move { Ok(client.inner().get_table_schema(request).await) }) - .await? - .map_err(|err| DataFusionError::External(Box::new(err)))? + response .into_inner() .schema - .ok_or(DataFusionError::External( - "Schema missing from GetTableSchema response".into(), - ))? - .try_into()?, + .ok_or_else(|| { + ApiError::deserialization( + trace_id, + "Schema missing from GetTableSchema response", + ) + })? + .try_into() + .map_err(|err: arrow::error::ArrowError| { + ApiError::deserialization_with_source( + trace_id, + err, + "failed decoding /GetTableSchema response", + ) + })?, )) } - #[instrument(skip(self), err)] + #[instrument(skip(self), err, parent = &self.parent_span)] async fn send_streaming_request( &mut self, - ) -> DataFusionResult>> { + ) -> ApiResult> { let request = ScanTableRequest { table_id: Some(self.table_id().await?.into()), }; let mut client = self.client.clone(); - make_future_send(async move { Ok(client.inner().scan_table(request).await) }) - .await? - .map_err(|err| DataFusionError::External(Box::new(err))) + let response = make_future_send(async move { + client + .inner() + .scan_table(request) + .await + .map_err(|err| ApiError::tonic(err, "/ScanTable failed")) + }) + .await?; + + Ok(re_redap_client::ApiResponseStream::from_tonic_response( + response, + "/ScanTable", + )) } - fn process_response( - &mut self, - response: Self::GrpcStreamData, - ) -> DataFusionResult { + fn process_response(&mut self, response: Self::GrpcStreamData) -> ApiResult { response .dataframe_part - .ok_or(DataFusionError::Execution( - "DataFrame missing from PartitionList response".to_owned(), - ))? + .ok_or_else(|| { + ApiError::deserialization(None, "DataFrame missing from PartitionList response") + })? .try_into() - .map_err(|err| DataFusionError::External(Box::new(err))) + .map_err(|err: re_protos::TypeConversionError| { + ApiError::deserialization_with_source( + None, + err, + "failed decoding /ScanTable response", + ) + }) } async fn insert_into( @@ -178,7 +225,11 @@ impl GrpcStreamToTable for TableEntryTableProvider { insert_op: InsertOp, ) -> DataFusionResult> { let num_partitions = input.properties().output_partitioning().partition_count(); - let entry_id = self.clone().table_id().await?; + let entry_id = self + .clone() + .table_id() + .await + .map_err(|err| err.into_df_error())?; let insert_op = match insert_op { InsertOp::Append => TableInsertMode::Append, InsertOp::Replace => { @@ -297,9 +348,9 @@ impl ExecutionPlan for TableEntryWriterExec { struct RecordBatchGrpcOutputStream { input_stream: SendableRecordBatchStream, grpc_sender: Option, - thread_status: tokio::sync::oneshot::Receiver, + thread_status: tokio::sync::oneshot::Receiver, complete: bool, - grpc_error: Option, + grpc_error: Option, } struct GrpcStreamSender { @@ -355,12 +406,10 @@ impl Stream for RecordBatchGrpcOutputStream { // Check for gRPC errors first (only if we haven't already stored one) if self.grpc_error.is_none() { match Pin::new(&mut self.thread_status).poll(cx) { - Poll::Ready(Ok(Err(status))) => { + Poll::Ready(Ok(Err(err))) => { // Store the error for potential future use - // Not ideal to throw out the ApiError, but it doesn't impl Clone - self.grpc_error = Some(tonic::Status::internal(status.to_string())); - // Return the error immediately - return Poll::Ready(Some(Err(DataFusionError::External(Box::new(status))))); + self.grpc_error = Some(err.clone()); + return Poll::Ready(Some(Err(err.into_df_error()))); } Poll::Ready(Ok(Ok(())) | Err(_)) => { self.complete = true; @@ -384,18 +433,14 @@ impl Stream for RecordBatchGrpcOutputStream { Err(tokio::sync::mpsc::error::TrySendError::Closed(_)) => { // Channel closed - the gRPC task may have failed // Check if we have a stored error - if let Some(status) = self.grpc_error.take() { - return Poll::Ready(Some(Err(DataFusionError::External( - Box::new(status), - )))); + if let Some(err) = self.grpc_error.take() { + return Poll::Ready(Some(Err(err.into_df_error()))); } else { // Channel closed without error - treat as broken pipe - return Poll::Ready(Some(Err(DataFusionError::External( - Box::new(std::io::Error::new( - std::io::ErrorKind::BrokenPipe, - "gRPC stream closed unexpectedly", - )), - )))); + return Poll::Ready(Some(Err(ApiError::connection( + "/WriteTable gRPC stream closed unexpectedly", + ) + .into_df_error()))); } } } diff --git a/crates/store/re_datafusion/src/wasm_compat.rs b/crates/store/re_datafusion/src/wasm_compat.rs index 3f864248ece8..50e61e2c5b03 100644 --- a/crates/store/re_datafusion/src/wasm_compat.rs +++ b/crates/store/re_datafusion/src/wasm_compat.rs @@ -1,12 +1,12 @@ -use datafusion::common::DataFusionError; +use re_redap_client::ApiResult; /// This is a no-op on non-Wasm target, because the `tonic` future are already `Send`. See wasm /// version for information. #[cfg(not(target_arch = "wasm32"))] #[inline] -pub async fn make_future_send(f: F) -> Result +pub async fn make_future_send(f: F) -> ApiResult where - F: std::future::Future> + Send + 'static, + F: std::future::Future> + Send + 'static, T: Send + 'static, { f.await @@ -20,9 +20,9 @@ where #[cfg(target_arch = "wasm32")] pub fn make_future_send( f: F, -) -> impl std::future::Future> + Send + 'static +) -> impl std::future::Future> + Send + 'static where - F: std::future::Future> + 'static, + F: std::future::Future> + 'static, T: Send + 'static, { use futures::{FutureExt as _, pin_mut}; @@ -48,5 +48,11 @@ where } }); - rx.map(|result| result.unwrap_or_else(|err| Err(DataFusionError::External(err.into())))) + rx.map(|result| { + result.unwrap_or_else(|_cancelled| { + Err(re_redap_client::ApiError::internal( + "wasm task cancelled unexpectedly", + )) + }) + }) } diff --git a/crates/store/re_entity_db/Cargo.toml b/crates/store/re_entity_db/Cargo.toml index caa5d246ba05..c23bab7ed72a 100644 --- a/crates/store/re_entity_db/Cargo.toml +++ b/crates/store/re_entity_db/Cargo.toml @@ -22,24 +22,19 @@ all-features = true [features] default = [] -## Enable (de)serialization using serde. -serde = ["dep:serde", "re_log_types/serde"] - ## Enable some feature only used for tests testing = [] [dependencies] re_arrow_util.workspace = true -re_build_info.workspace = true re_byte_size.workspace = true re_chunk = { workspace = true, features = ["serde"] } re_chunk_store.workspace = true re_format.workspace = true -re_int_histogram.workspace = true re_log_channel.workspace = true re_log_encoding = { workspace = true, features = ["decoder"] } -re_log_types.workspace = true +re_log_types = { workspace = true, features = ["serde"] } re_log.workspace = true re_mutex.workspace = true re_query.workspace = true @@ -56,16 +51,19 @@ indexmap.workspace = true itertools.workspace = true nohash-hasher.workspace = true poll-promise.workspace = true -serde = { workspace = true, features = ["derive", "rc"], optional = true } +serde = { workspace = true, features = ["derive", "rc"] } static_assertions.workspace = true tap.workspace = true thiserror.workspace = true +vec1.workspace = true web-time.workspace = true [dev-dependencies] +re_build_info.workspace = true re_log_encoding = { workspace = true, features = ["decoder", "encoder"] } anyhow.workspace = true +insta.workspace = true similar-asserts.workspace = true [lib] diff --git a/crates/store/re_entity_db/src/chunk_requests.rs b/crates/store/re_entity_db/src/chunk_requests.rs index 8919eeae93b4..c84a4f8b6a89 100644 --- a/crates/store/re_entity_db/src/chunk_requests.rs +++ b/crates/store/re_entity_db/src/chunk_requests.rs @@ -116,7 +116,7 @@ impl ChunkRequests { pub fn num_on_wire_bytes_pending(&self) -> u64 { self.requests .iter() - .map(|b| b.info.size_bytes_uncompressed) + .map(|b| b.info.size_bytes_on_wire) .sum() } diff --git a/crates/store/re_entity_db/src/data_meta_per_timeline.rs b/crates/store/re_entity_db/src/data_meta_per_timeline.rs new file mode 100644 index 000000000000..b0197446c008 --- /dev/null +++ b/crates/store/re_entity_db/src/data_meta_per_timeline.rs @@ -0,0 +1,471 @@ +use std::collections::BTreeMap; + +use re_chunk::TimelineName; +use re_chunk_store::{ChunkStore, ChunkStoreDiff, ChunkStoreEvent}; + +use crate::RrdManifestIndex; + +#[derive(Default, Clone, Copy)] +struct RowCount { + /// Row count from a rrd manifest. + from_rrd_manifest: u64, + + /// Row counts from volatile chunks, i.e chunks that aren't in a rrd manifest. + from_volatile_chunks: u64, +} + +impl RowCount { + fn is_empty(&self) -> bool { + self.from_rrd_manifest == 0 && self.from_volatile_chunks == 0 + } +} + +impl re_byte_size::SizeBytes for RowCount { + fn heap_size_bytes(&self) -> u64 { + let Self { + from_rrd_manifest: _, + from_volatile_chunks: _, + } = self; + + 0 + } + + fn is_pod() -> bool { + true + } +} + +/// Helper to track row counts and time ranges across all entities and components per timeline. +#[derive(Default, Clone)] +pub struct DataMetaPerTimeline { + row_count_per_timeline: BTreeMap, +} + +impl re_byte_size::SizeBytes for DataMetaPerTimeline { + fn heap_size_bytes(&self) -> u64 { + let Self { + row_count_per_timeline, + } = self; + + row_count_per_timeline.heap_size_bytes() + } +} + +impl DataMetaPerTimeline { + pub fn row_count_for_timeline(&self, timeline: &TimelineName) -> u64 { + self.row_count_per_timeline + .get(timeline) + .map( + |RowCount { + from_rrd_manifest, + from_volatile_chunks, + }| from_rrd_manifest + from_volatile_chunks, + ) + .unwrap_or(0) + } + + fn handle_event_for_row_count( + &mut self, + manifest_index: &RrdManifestIndex, + store: &ChunkStore, + event: &ChunkStoreEvent, + ) { + match &event.diff { + ChunkStoreDiff::Addition(addition) => { + // If this addition comes from a root chunk in the rrd manifest, + // then don't count it since we've already counted that with a virtual + // addition. + if store + .find_root_manifest_chunks(&addition.delta_chunk().id()) + .is_empty() + { + for (timeline, col) in addition.delta_chunk().timelines() { + let row_count = self.row_count_per_timeline.entry(*timeline).or_default(); + + row_count.from_volatile_chunks += col.num_rows() as u64; + } + } + } + ChunkStoreDiff::VirtualAddition(addition) => { + for per_timeline in addition.rrd_manifest.temporal_map().values() { + for (timeline, per_component) in per_timeline { + let row_count = self + .row_count_per_timeline + .entry(*timeline.name()) + .or_default(); + + for chunks in per_component.values() { + for entry in chunks.values() { + row_count.from_rrd_manifest += entry.num_rows; + } + } + } + } + } + ChunkStoreDiff::Deletion(deletion) => { + let mut rrd_manifest_row_counts = BTreeMap::new(); + + // We don't want to subtract rows that were in the rrd manifest + // since those are tracked separately and never deleted. + // + // So we collect the count of all root chunks in the rrd manifest + // for the deleted chunk. + let rrd_manifest_row_counts_iter = store + .find_root_manifest_chunks(&deletion.chunk.id()) + .into_iter() + .filter_map(|c| manifest_index.root_chunk_info(&c)) + .flat_map(|info| { + info.temporals.iter().map(|(timeline, info)| { + (*timeline, info.num_rows_for_all_entities_all_components) + }) + }); + + for (timeline, row_count) in rrd_manifest_row_counts_iter { + *rrd_manifest_row_counts.entry(timeline).or_insert(0) += row_count; + } + + for (timeline, col) in deletion.chunk.timelines() { + let row_count = self.row_count_per_timeline.entry(*timeline).or_default(); + + let chunk_volatile_chunk_count = (col.num_rows() as u64).saturating_sub( + rrd_manifest_row_counts.get(timeline).copied().unwrap_or(0), + ); + + row_count.from_volatile_chunks = row_count + .from_volatile_chunks + .saturating_sub(chunk_volatile_chunk_count); + + if row_count.is_empty() { + self.row_count_per_timeline.remove(timeline); + } + } + } + ChunkStoreDiff::SchemaAddition(_) => {} + } + } + + pub fn on_events( + &mut self, + manifest_index: &RrdManifestIndex, + store: &ChunkStore, + events: &[ChunkStoreEvent], + ) { + re_tracing::profile_function!(); + + for event in events { + self.handle_event_for_row_count(manifest_index, store, event); + } + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use re_chunk::{Chunk, RowId, TimelineName}; + use re_chunk_store::{ChunkStore, ChunkStoreConfig}; + use re_log_types::example_components::{MyPoint, MyPoints}; + use re_log_types::{EntityPath, StoreId, TimePoint, Timeline}; + use re_types_core::ComponentBatch; + + use super::*; + use crate::RrdManifestIndex; + + /// Insert a single-row chunk and feed the resulting events into `meta`. + fn insert_and_update( + store: &mut ChunkStore, + meta: &mut DataMetaPerTimeline, + manifest: &RrdManifestIndex, + entity: &EntityPath, + timeline: Timeline, + time: i64, + ) { + let chunk = Arc::new( + Chunk::builder(entity.clone()) + .with_component_batch( + RowId::new(), + TimePoint::from_iter([(timeline, time)]), + ( + MyPoints::descriptor_points(), + &[MyPoint::new(1.0, 1.0)] as &dyn ComponentBatch, + ), + ) + .build() + .unwrap(), + ); + let events = store.insert_chunk(&chunk).unwrap(); + meta.on_events(manifest, store, &events); + } + + #[test] + fn row_count_tracks_additions() { + let mut store = ChunkStore::new( + StoreId::random(re_log_types::StoreKind::Recording, "test"), + ChunkStoreConfig::ALL_DISABLED, + ); + let mut meta = DataMetaPerTimeline::default(); + let manifest = RrdManifestIndex::default(); + + let entity: EntityPath = "entity".into(); + let tl_frame = Timeline::new_sequence("frame"); + let tl_log = Timeline::new_sequence("log_time"); + + // Unknown timeline returns 0. + assert_eq!( + meta.row_count_for_timeline(&TimelineName::from("missing")), + 0 + ); + + // Insert three rows on tl_frame. + for t in [10, 20, 30] { + insert_and_update(&mut store, &mut meta, &manifest, &entity, tl_frame, t); + } + assert_eq!(meta.row_count_for_timeline(tl_frame.name()), 3); + + // Different timeline is still 0. + assert_eq!(meta.row_count_for_timeline(tl_log.name()), 0); + + // Insert on a second timeline. + insert_and_update(&mut store, &mut meta, &manifest, &entity, tl_log, 100); + assert_eq!(meta.row_count_for_timeline(tl_log.name()), 1); + // First timeline unchanged. + assert_eq!(meta.row_count_for_timeline(tl_frame.name()), 3); + } + + #[test] + fn row_count_tracks_deletions() { + let mut store = ChunkStore::new( + StoreId::random(re_log_types::StoreKind::Recording, "test"), + ChunkStoreConfig::ALL_DISABLED, + ); + let mut meta = DataMetaPerTimeline::default(); + let manifest = RrdManifestIndex::default(); + + let entity: EntityPath = "entity".into(); + let tl = Timeline::new_sequence("frame"); + + for t in [10, 20, 30] { + insert_and_update(&mut store, &mut meta, &manifest, &entity, tl, t); + } + assert_eq!(meta.row_count_for_timeline(tl.name()), 3); + + // GC everything and feed deletion events. + let (gc_events, _stats) = + store.gc(&re_chunk_store::GarbageCollectionOptions::gc_everything()); + meta.on_events(&manifest, &store, &gc_events); + + assert_eq!(meta.row_count_for_timeline(tl.name()), 0); + } + + #[test] + fn multiple_entities_contribute_to_same_timeline() { + let mut store = ChunkStore::new( + StoreId::random(re_log_types::StoreKind::Recording, "test"), + ChunkStoreConfig::ALL_DISABLED, + ); + let mut meta = DataMetaPerTimeline::default(); + let manifest = RrdManifestIndex::default(); + + let tl = Timeline::new_sequence("frame"); + let entity_a: EntityPath = "a".into(); + let entity_b: EntityPath = "b".into(); + + insert_and_update(&mut store, &mut meta, &manifest, &entity_a, tl, 10); + insert_and_update(&mut store, &mut meta, &manifest, &entity_b, tl, 20); + + // Row count sums across entities. + assert_eq!(meta.row_count_for_timeline(tl.name()), 2); + } + + #[test] + fn multi_row_chunk_counted_correctly() { + let mut store = ChunkStore::new( + StoreId::random(re_log_types::StoreKind::Recording, "test"), + ChunkStoreConfig::ALL_DISABLED, + ); + let mut meta = DataMetaPerTimeline::default(); + let manifest = RrdManifestIndex::default(); + + let entity: EntityPath = "entity".into(); + let tl = Timeline::new_sequence("frame"); + let point = MyPoint::new(1.0, 1.0); + + let chunk = Arc::new( + Chunk::builder(entity.clone()) + .with_component_batch( + RowId::new(), + TimePoint::from_iter([(tl, 10)]), + ( + MyPoints::descriptor_points(), + &[point] as &dyn ComponentBatch, + ), + ) + .with_component_batch( + RowId::new(), + TimePoint::from_iter([(tl, 20)]), + ( + MyPoints::descriptor_points(), + &[point] as &dyn ComponentBatch, + ), + ) + .with_component_batch( + RowId::new(), + TimePoint::from_iter([(tl, 30)]), + ( + MyPoints::descriptor_points(), + &[point] as &dyn ComponentBatch, + ), + ) + .build() + .unwrap(), + ); + let events = store.insert_chunk(&chunk).unwrap(); + meta.on_events(&manifest, &store, &events); + + assert_eq!(meta.row_count_for_timeline(tl.name()), 3); + } + + /// Build chunks at given times, create an RRD manifest from them, and return both. + fn build_manifest_chunks( + entity: &EntityPath, + timeline: Timeline, + times: &[i64], + store_id: &StoreId, + ) -> (Vec>, Arc) { + let point = MyPoint::new(1.0, 1.0); + let chunks: Vec> = times + .iter() + .map(|&t| { + Arc::new( + Chunk::builder(entity.clone()) + .with_component_batch( + RowId::new(), + TimePoint::from_iter([(timeline, t)]), + ( + MyPoints::descriptor_points(), + &[point] as &dyn ComponentBatch, + ), + ) + .build() + .unwrap(), + ) + }) + .collect(); + + let manifest = re_log_encoding::RrdManifest::build_in_memory_from_chunks( + store_id.clone(), + chunks.iter().map(|c| &**c), + ) + .unwrap(); + + (chunks, manifest) + } + + #[test] + fn virtual_addition_row_count() { + let store_id = StoreId::random(re_log_types::StoreKind::Recording, "test"); + let mut store = ChunkStore::new(store_id.clone(), ChunkStoreConfig::ALL_DISABLED); + let mut meta = DataMetaPerTimeline::default(); + let mut manifest_index = RrdManifestIndex::default(); + + let entity: EntityPath = "entity".into(); + let tl = Timeline::new_sequence("frame"); + + let (_, rrd_manifest) = build_manifest_chunks(&entity, tl, &[10, 20, 30], &store_id); + + // Insert the manifest virtually. + let events = store.insert_rrd_manifest(rrd_manifest.clone()); + manifest_index + .append(rrd_manifest, store.entity_tree()) + .unwrap(); + meta.on_events(&manifest_index, &store, &events); + + // Virtual rows should be counted. + assert_eq!(meta.row_count_for_timeline(tl.name()), 3); + } + + #[test] + fn virtual_then_physical_no_double_count() { + let store_id = StoreId::random(re_log_types::StoreKind::Recording, "test"); + let mut store = ChunkStore::new(store_id.clone(), ChunkStoreConfig::ALL_DISABLED); + let mut meta = DataMetaPerTimeline::default(); + let mut manifest_index = RrdManifestIndex::default(); + + let entity: EntityPath = "entity".into(); + let tl = Timeline::new_sequence("frame"); + + let (chunks, rrd_manifest) = build_manifest_chunks(&entity, tl, &[10, 20, 30], &store_id); + + // Load virtually first. + let events = store.insert_rrd_manifest(rrd_manifest.clone()); + manifest_index + .append(rrd_manifest, store.entity_tree()) + .unwrap(); + manifest_index.on_events(&store, &events); + meta.on_events(&manifest_index, &store, &events); + + assert_eq!(meta.row_count_for_timeline(tl.name()), 3); + + // Now load the same chunks physically. + for chunk in &chunks { + let events = store.insert_chunk(chunk).unwrap(); + manifest_index.on_events(&store, &events); + meta.on_events(&manifest_index, &store, &events); + } + + // Physical additions for chunks that are in the manifest should not be double-counted. + assert_eq!(meta.row_count_for_timeline(tl.name()), 3); + } + + #[test] + fn virtual_addition_multiple_entities() { + let store_id = StoreId::random(re_log_types::StoreKind::Recording, "test"); + let mut store = ChunkStore::new(store_id.clone(), ChunkStoreConfig::ALL_DISABLED); + let mut meta = DataMetaPerTimeline::default(); + let mut manifest_index = RrdManifestIndex::default(); + + let tl = Timeline::new_sequence("frame"); + let entity_a: EntityPath = "a".into(); + let entity_b: EntityPath = "b".into(); + + // Build one manifest that contains chunks for both entities. + let point = MyPoint::new(1.0, 1.0); + let chunks: Vec> = [ + (entity_a.clone(), 10), + (entity_a.clone(), 20), + (entity_b.clone(), 30), + (entity_b.clone(), 40), + ] + .into_iter() + .map(|(e, t)| { + Arc::new( + Chunk::builder(e) + .with_component_batch( + RowId::new(), + TimePoint::from_iter([(tl, t)]), + ( + MyPoints::descriptor_points(), + &[point] as &dyn ComponentBatch, + ), + ) + .build() + .unwrap(), + ) + }) + .collect(); + + let rrd_manifest = re_log_encoding::RrdManifest::build_in_memory_from_chunks( + store_id.clone(), + chunks.iter().map(|c| &**c), + ) + .unwrap(); + + let events = store.insert_rrd_manifest(rrd_manifest.clone()); + manifest_index + .append(rrd_manifest, store.entity_tree()) + .unwrap(); + meta.on_events(&manifest_index, &store, &events); + + assert_eq!(meta.row_count_for_timeline(tl.name()), 4); + } +} diff --git a/crates/store/re_entity_db/src/entity_db.rs b/crates/store/re_entity_db/src/entity_db.rs index e6b44e39da8e..3824d0a26480 100644 --- a/crates/store/re_entity_db/src/entity_db.rs +++ b/crates/store/re_entity_db/src/entity_db.rs @@ -1,11 +1,10 @@ use std::{ collections::BTreeSet, - fmt::{Debug, Formatter}, + fmt::{Debug, Formatter, Write as _}, ops::Deref, sync::Arc, }; -use itertools::Itertools as _; use nohash_hasher::IntMap; use re_byte_size::{MemUsageNode, MemUsageTree, MemUsageTreeCapture, SizeBytes as _}; use re_chunk::{ @@ -28,9 +27,9 @@ use re_query::{ QueryCache, QueryCacheHandle, StorageEngine, StorageEngineArcReadGuard, StorageEngineReadGuard, }; +use crate::Error; use crate::ingestion_statistics::IngestionStatistics; use crate::rrd_manifest_index::RrdManifestIndex; -use crate::{Error, TimeHistogramPerTimeline}; // ---------------------------------------------------------------------------- @@ -72,10 +71,16 @@ pub enum RedapConnectionState { /// We are not connected to redap NotConnected, - DownloadingManifest, + /// We are downloading the manifest (no parts received yet). + DownloadingFirstManifestPart, + /// Connected but manifest is missing or failed to download. MissingManifest, + /// We have some manifest data, but more parts are still arriving. + PartialManifest, + + /// The full manifest has been received and we are ready to fetch chunks. Ready, } @@ -139,8 +144,11 @@ pub struct EntityDb { /// In many places we just store the hashes, so we need a way to translate back. entity_path_from_hash: IntMap, - /// A time histogram of all entities, for every timeline. - time_histogram_per_timeline: crate::TimeHistogramPerTimeline, + /// Keeps track of meta per timeline. + /// + /// This includes: + /// - Row count. + data_meta_per_timeline: crate::data_meta_per_timeline::DataMetaPerTimeline, /// The [`StorageEngine`] that backs this [`EntityDb`]. /// @@ -156,12 +164,6 @@ pub struct EntityDb { /// Lazily calculated store_size_bytes: StoreSizeBytes, - /// How much RAM the whole application uses beyond the raw physical chunks in this recording. - /// - /// This is estimated by the viewer after a GC pass, when there is only one recording loaded. - /// Includes primary and secondary indices, (purged) caches, fonts, icons, and other overhead. - pub estimated_application_overhead_bytes: Option, - stats: IngestionStatistics, } @@ -172,7 +174,7 @@ impl Debug for EntityDb { .field("store_id", &self.store_id) .field("data_source", &self.data_source) .field("set_store_info", &self.set_store_info) - .finish() + .finish_non_exhaustive() } } @@ -216,19 +218,13 @@ impl EntityDb { latest_row_id: None, entity_paths: Default::default(), entity_path_from_hash: Default::default(), - time_histogram_per_timeline: Default::default(), + data_meta_per_timeline: Default::default(), storage_engine, store_size_bytes: StoreSizeBytes(Mutex::new(None)), - estimated_application_overhead_bytes: None, stats: IngestionStatistics::default(), } } - #[inline] - pub fn tree(&self) -> &crate::EntityTree { - &self.rrd_manifest_index.entity_tree - } - /// Formats the entity tree into a human-readable text representation with component schema information. pub fn format_with_components(&self) -> String { let mut text = String::new(); @@ -236,33 +232,31 @@ impl EntityDb { let storage_engine = self.storage_engine(); let store = storage_engine.store(); - self.tree().visit_children_recursively(|entity_path| { - if entity_path.is_root() { - return; - } - let depth = entity_path.len() - 1; - let indent = " ".repeat(depth); - text.push_str(&format!("{indent}{entity_path}\n")); - let Some(components) = store.all_components_for_entity_sorted(entity_path) else { - return; - }; - for component in components { - let component_indent = " ".repeat(depth + 1); - if let Some((component_type, datatype)) = - store.lookup_component_type(entity_path, component) - { - let name = component_type - .map_or_else(|| component.to_string(), |ct| ct.short_name().to_owned()); - text.push_str(&format!( - "{component_indent}{name}: {}\n", - re_arrow_util::format_data_type(&datatype) - )); - } else { - // Fallback to component identifier - text.push_str(&format!("{component_indent}{component}\n")); + store + .entity_tree() + .visit_children_recursively(|entity_path| { + if entity_path.is_root() { + return; } - } - }); + let depth = entity_path.len() - 1; + let indent = " ".repeat(depth); + writeln!(text, "{indent}{entity_path}").ok(); + let Some(components) = store.schema().all_components_for_entity(entity_path) else { + return; + }; + for &component in components { + let component_indent = " ".repeat(depth + 1); + if let Some((component_type, datatype)) = + store.schema().lookup_component_type(entity_path, component) + { + let name = component_type + .map_or_else(|| component.to_string(), |ct| ct.short_name().to_owned()); + writeln!(text, "{component_indent}{name}: {datatype}").ok(); + } else { + writeln!(text, "{component_indent}{component}").ok(); + } + } + }); text } @@ -335,7 +329,7 @@ impl EntityDb { &mut self.rrd_manifest_index } - fn redap_connection_state(&self) -> RedapConnectionState { + pub fn redap_connection_state(&self) -> RedapConnectionState { // TODO(RR-3670): Check that connection is healthy and pick the correct icon to show the user based on that let is_connected_to_redap = self .data_source @@ -344,9 +338,13 @@ impl EntityDb { if is_connected_to_redap { if self.rrd_manifest_index.has_manifest() { - RedapConnectionState::Ready + if self.rrd_manifest_index.is_manifest_complete() { + RedapConnectionState::Ready + } else { + RedapConnectionState::PartialManifest + } } else if self.num_physical_chunks() == 0 { - RedapConnectionState::DownloadingManifest + RedapConnectionState::DownloadingFirstManifestPart } else { // This handles the case where we tried and failed to download the manifest, // but managed to download the data anyhow. @@ -357,17 +355,27 @@ impl EntityDb { } } + /// True if this recording has chunks we're actively keeping in memory. + /// + /// Recordings with protected chunks should not be auto-closed during memory pressure, + /// since we'd immediately need to re-download them. + pub fn has_protected_chunks(&self) -> bool { + self.rrd_manifest_index.has_protected_chunks() + } + /// Are we connected to redap, and can fetch missing chunks? pub fn can_fetch_chunks_from_redap(&self) -> bool { match self.redap_connection_state() { RedapConnectionState::NotConnected | RedapConnectionState::MissingManifest => false, - RedapConnectionState::DownloadingManifest | RedapConnectionState::Ready => true, + RedapConnectionState::DownloadingFirstManifestPart + | RedapConnectionState::PartialManifest + | RedapConnectionState::Ready => true, } } /// Are we currently in the process of downloading the RRD Manifest? - pub fn is_currently_downloading_manifest(&self) -> bool { - self.redap_connection_state() == RedapConnectionState::DownloadingManifest + pub fn is_downloading_first_part_of_manifest(&self) -> bool { + self.redap_connection_state() == RedapConnectionState::DownloadingFirstManifestPart } /// True if we're are currently waiting for necessary @@ -375,7 +383,8 @@ impl EntityDb { pub fn is_buffering(&self) -> bool { match self.redap_connection_state() { RedapConnectionState::NotConnected | RedapConnectionState::MissingManifest => false, - RedapConnectionState::DownloadingManifest => true, + RedapConnectionState::DownloadingFirstManifestPart + | RedapConnectionState::PartialManifest => true, RedapConnectionState::Ready => { if let Some(state) = self @@ -383,7 +392,7 @@ impl EntityDb { .chunk_prioritizer() .latest_result() { - !state.all_required_are_loaded + state.all_required_are_loaded != Some(true) } else { true // no prefetch done yet } @@ -497,7 +506,7 @@ impl EntityDb { pub fn timeline_type(&self, timeline_name: &TimelineName) -> TimeType { self.storage_engine() - .store() + .schema() .time_column_type(timeline_name) .unwrap_or_else(|| { if timeline_name == &TimelineName::log_time() { @@ -628,22 +637,46 @@ impl EntityDb { } pub fn timelines(&self) -> std::collections::BTreeMap { - self.storage_engine().store().timelines() - } - - /// When do we have data on each timeline? - pub fn timeline_histograms(&self) -> &TimeHistogramPerTimeline { - &self.time_histogram_per_timeline + self.storage_engine().schema().timelines() } /// Returns the time range of data on the given timeline, ignoring any static times. pub fn time_range_for(&self, timeline: &TimelineName) -> Option { - self.storage_engine().store().time_range(timeline) + self.storage_engine.read().store().time_range(timeline) + } + + /// Data time ranges for gap collapsing in the time panel. + /// + /// Only available for redap connections with manifests. + pub fn data_time_ranges_for(&self, timeline: &TimelineName) -> Option<&[AbsoluteTimeRange]> { + self.rrd_manifest_index.data_time_ranges_for(timeline) + } + + /// Returns the total number of temporal rows on the given timeline across all entities. + pub fn num_temporal_rows_on_timeline(&self, timeline: &TimelineName) -> u64 { + self.data_meta_per_timeline.row_count_for_timeline(timeline) } - /// Histogram of all events on the timeeline, of all entities. - pub fn time_histogram(&self, timeline: &TimelineName) -> Option<&crate::TimeHistogram> { - self.time_histogram_per_timeline.get(timeline) + /// Returns the next time with data on the given timeline, strictly after `after`. + pub fn next_time_on_timeline( + &self, + timeline: &TimelineName, + after: TimeInt, + ) -> Option { + self.storage_engine() + .store() + .next_time_on_timeline(timeline, after) + } + + /// Returns the previous time with data on the given timeline, strictly before `before`. + pub fn prev_time_on_timeline( + &self, + timeline: &TimelineName, + before: TimeInt, + ) -> Option { + self.storage_engine() + .store() + .prev_time_on_timeline(timeline, before) } /// Return the current `ChunkStoreGeneration`. This can be used to determine whether the @@ -684,7 +717,12 @@ impl EntityDb { /// Returns `true` also for entities higher up in the hierarchy. #[inline] pub fn is_known_entity(&self, entity_path: &EntityPath) -> bool { - self.tree().subtree(entity_path).is_some() + self.storage_engine + .read() + .store() + .entity_tree() + .subtree(entity_path) + .is_some() } /// If you log `world/points`, then that is a logged entity, but `world` is not, @@ -694,29 +732,32 @@ impl EntityDb { self.entity_path_from_hash.contains_key(&entity_path.hash()) } - pub fn add_rrd_manifest_message(&mut self, rrd_manifest: Arc) { + pub fn add_rrd_manifest_message( + &mut self, + rrd_manifest: Arc, + ) -> Vec { re_tracing::profile_function!(); - re_log::debug!("Received RrdManifest for {:?}", self.store_id()); - let event = self + let events = self .storage_engine .write() .store() .insert_rrd_manifest(rrd_manifest.clone()); - match event { - Ok(event) => { - self.on_store_events(&[event]); - } - Err(err) => { - re_log::error!("Failed to load RRD Manifest into store: {err}"); - } + self.on_store_events(&events); + + let engine = self.storage_engine.read(); + let entity_tree = engine.store().entity_tree(); + if let Err(err) = self.rrd_manifest_index.append(rrd_manifest, entity_tree) { + re_log::error!("Failed to append RRD manifest: {err}"); } - self.rrd_manifest_index.append(rrd_manifest); + events + } - self.time_histogram_per_timeline - .on_rrd_manifest(&self.rrd_manifest_index); + /// Mark the RRD manifest as complete (all parts have been received). + pub fn mark_rrd_manifest_complete(&mut self) { + self.rrd_manifest_index.set_manifest_complete(); } /// Insert new data into the store. @@ -816,33 +857,11 @@ impl EntityDb { .on_events(engine.store(), store_events); // Update our internal views by notifying them of resulting [`ChunkStoreEvent`]s. - self.time_histogram_per_timeline.on_events( - engine.store(), + self.data_meta_per_timeline.on_events( &self.rrd_manifest_index, + engine.store(), store_events, ); - self.rrd_manifest_index - .entity_tree - .on_store_additions(store_events.iter().filter_map(|e| e.to_addition())); - - let dels = store_events - .iter() - .filter_map(|e| e.to_deletion()) - .collect_vec(); - - // It is possible for writes to trigger deletions: specifically in the case of - // overwritten static data leading to dangling chunks. - let entity_paths_with_deletions = - dels.iter().map(|e| e.chunk.entity_path().clone()).collect(); - - { - re_tracing::profile_scope!("on_store_deletions"); - self.rrd_manifest_index.entity_tree.on_store_deletions( - &engine, - &entity_paths_with_deletions, - &dels, - ); - } } pub fn set_store_info(&mut self, store_info: SetStoreInfo) { @@ -935,6 +954,7 @@ impl EntityDb { &mut self, timeline: &TimelineName, drop_range: AbsoluteTimeRange, + reason: re_chunk_store::ChunkDeletionReason, ) -> Vec { re_tracing::profile_function!(); @@ -942,7 +962,7 @@ impl EntityDb { .storage_engine .write() .store() - .drop_time_range_deep(timeline, drop_range); + .drop_time_range_deep(timeline, drop_range, reason); self.on_store_events(&store_events); @@ -972,10 +992,14 @@ impl EntityDb { let mut to_drop = vec![entity_path.clone()]; - if let Some(tree) = self.tree().subtree(entity_path) { - tree.visit_children_recursively(|path| { - to_drop.push(path.clone()); - }); + { + let storage_engine = self.storage_engine(); + let tree = storage_engine.store().entity_tree(); + if let Some(subtree) = tree.subtree(entity_path) { + subtree.visit_children_recursively(|path| { + to_drop.push(path.clone()); + }); + } } for entity_path in to_drop { @@ -1105,13 +1129,13 @@ impl EntityDb { /// /// This excludes temporal data. pub fn subtree_stats_static( - &self, engine: &StorageEngineReadGuard<'_>, entity_path: &EntityPath, ) -> ChunkStoreChunkStats { re_tracing::profile_function!(); - let Some(subtree) = self.tree().subtree(entity_path) else { + let entity_tree = engine.store().entity_tree(); + let Some(subtree) = entity_tree.subtree(entity_path) else { return Default::default(); }; @@ -1127,14 +1151,14 @@ impl EntityDb { /// /// This excludes static data. pub fn subtree_stats_on_timeline( - &self, engine: &StorageEngineReadGuard<'_>, entity_path: &EntityPath, timeline: &TimelineName, ) -> ChunkStoreChunkStats { re_tracing::profile_function!(); - let Some(subtree) = self.tree().subtree(entity_path) else { + let entity_tree = engine.store().entity_tree(); + let Some(subtree) = entity_tree.subtree(entity_path) else { return Default::default(); }; @@ -1157,7 +1181,8 @@ impl EntityDb { ) -> bool { re_tracing::profile_function!(); - let Some(subtree) = self.tree().subtree(entity_path) else { + let entity_tree = engine.store().entity_tree(); + let Some(subtree) = entity_tree.subtree(entity_path) else { return false; }; @@ -1183,7 +1208,8 @@ impl EntityDb { ) -> bool { re_tracing::profile_function!(); - let Some(subtree) = self.tree().subtree(entity_path) else { + let entity_tree = engine.store().entity_tree(); + let Some(subtree) = entity_tree.subtree(entity_path) else { return false; }; @@ -1258,10 +1284,9 @@ impl re_byte_size::SizeBytes for EntityDb { latest_row_id: _, entity_paths, entity_path_from_hash, - time_histogram_per_timeline, + data_meta_per_timeline, storage_engine, store_size_bytes, - estimated_application_overhead_bytes: _, stats: _, } = self; @@ -1282,7 +1307,7 @@ impl re_byte_size::SizeBytes for EntityDb { + set_store_info.heap_size_bytes() + entity_paths.heap_size_bytes() + entity_path_from_hash.heap_size_bytes() - + time_histogram_per_timeline.heap_size_bytes() + + data_meta_per_timeline.heap_size_bytes() + storage_engine_size } } @@ -1293,7 +1318,7 @@ impl MemUsageTreeCapture for EntityDb { let Self { rrd_manifest_index, - time_histogram_per_timeline, + data_meta_per_timeline, storage_engine, entity_paths, entity_path_from_hash, @@ -1306,7 +1331,6 @@ impl MemUsageTreeCapture for EntityDb { last_modified_at: _, latest_row_id: _, store_size_bytes: _, - estimated_application_overhead_bytes: _, stats: _, } = self; @@ -1328,8 +1352,8 @@ impl MemUsageTreeCapture for EntityDb { ); node.add( - "time_histogram_per_timeline", - time_histogram_per_timeline.capture_mem_usage_tree(), + "data_meta_per_timeline", + data_meta_per_timeline.total_size_bytes(), ); node.add( "rrd_manifest_index", @@ -1377,10 +1401,12 @@ mod tests { db.add_chunk(&Arc::new(chunk))?; } - assert_eq!( - db.format_with_components(), - "/parent\n /parent/child1\n /parent/child1/grandchild\n example.MyPoint: Struct[2]\n" - ); + insta::assert_snapshot!(db.format_with_components(), @r###" + /parent + /parent/child1 + /parent/child1/grandchild + example.MyPoint: Struct("x": non-null Float32, "y": non-null Float32) + "###); Ok(()) } diff --git a/crates/store/re_entity_db/src/entity_tree.rs b/crates/store/re_entity_db/src/entity_tree.rs deleted file mode 100644 index b7c325089c50..000000000000 --- a/crates/store/re_entity_db/src/entity_tree.rs +++ /dev/null @@ -1,303 +0,0 @@ -use std::collections::BTreeMap; - -use nohash_hasher::IntSet; -use re_chunk_store::{ - ChunkStoreDiffAddition, ChunkStoreDiffDeletion, ChunkStoreEvent, ChunkStoreSubscriber, -}; -use re_log_types::{EntityPath, EntityPathPart}; -use re_query::StorageEngineReadGuard; - -// ---------------------------------------------------------------------------- - -/// A recursive, manually updated [`ChunkStoreSubscriber`] that maintains the entity hierarchy. -/// -/// The tree contains a list of subtrees, and so on recursively. -#[derive(Debug, Clone)] -pub struct EntityTree { - /// Full path prefix to the root of this (sub)tree. - pub path: EntityPath, - - /// Direct descendants of this (sub)tree. - pub children: BTreeMap, -} - -impl Default for EntityTree { - fn default() -> Self { - Self::root() - } -} - -// NOTE: This is only to let people know that this is in fact a [`ChunkStoreSubscriber`], so they A) don't try -// to implement it on their own and B) don't try to register it. -impl ChunkStoreSubscriber for EntityTree { - fn name(&self) -> String { - "rerun.store_subscribers.EntityTree".into() - } - - fn as_any(&self) -> &dyn std::any::Any { - self - } - - fn as_any_mut(&mut self) -> &mut dyn std::any::Any { - self - } - - #[expect(clippy::unimplemented)] - fn on_events(&mut self, _events: &[ChunkStoreEvent]) { - unimplemented!( - r"EntityTree view is maintained manually, see `EntityTree::on_store_{{additions|deletions}}`" - ); - } -} - -impl EntityTree { - pub fn root() -> Self { - Self::new(EntityPath::root()) - } - - pub fn new(path: EntityPath) -> Self { - Self { - path, - children: Default::default(), - } - } - - /// Has no child entities. - pub fn is_leaf(&self) -> bool { - self.children.is_empty() - } - - /// Returns `true` if this entity has no children and no data. - /// - /// Checking for the absence of data is neither costly nor totally free: do it a few hundreds or - /// thousands times a frame and it will absolutely kill framerate. - /// Don't blindly call this on every existing entity every frame: use [`ChunkStoreEvent`]s to make - /// sure anything changed at all first. - pub fn check_is_empty(&self, engine: &StorageEngineReadGuard<'_>) -> bool { - self.children.is_empty() && !engine.store().entity_has_physical_data(&self.path) - } - - /// Updates the [`EntityTree`] by applying a batch of [`ChunkStoreDiffAddition`]s, adding any - /// new entities to the tree. - pub fn on_store_additions<'a>( - &mut self, - events: impl Iterator, - ) { - re_tracing::profile_function!(); - for event in events { - self.on_store_addition(event); - } - } - - fn on_store_addition(&mut self, event: &ChunkStoreDiffAddition) { - self.on_new_entity(event.delta_chunk().entity_path()); - } - - pub fn on_new_entity(&mut self, entity_path: &EntityPath) { - re_tracing::profile_function!(); - - // Book-keeping for each level in the hierarchy: - let mut tree = self; - for (i, part) in entity_path.iter().enumerate() { - tree = tree - .children - .entry(part.clone()) - .or_insert_with(|| Self::new(entity_path.as_slice()[..=i].into())); - } - } - - /// Updates the [`EntityTree`] by removing any entities which have no data and no children. - pub fn on_store_deletions( - &mut self, - engine: &StorageEngineReadGuard<'_>, - entity_paths_with_deletions: &IntSet, - events: &[&ChunkStoreDiffDeletion], - ) { - // NOTE: no re_tracing here because this is a recursive function - if entity_paths_with_deletions.is_empty() { - return; // early-out - } - - // We don't actually use the events for anything, we just want to - // have a direct dependency on the chunk store which must have - // produced them by the time this function was called. - let _ = events; - - self.children.retain(|_, entity| { - // this is placed first, because we'll only know if the child entity is empty after telling it to clear itself. - entity.on_store_deletions(engine, entity_paths_with_deletions, events); - - let has_children = || !entity.children.is_empty(); - // Checking for lack of data is not free, so make sure there is any reason to believe - // that any relevant data has changed first. - let has_recursive_deletion_events = || { - entity_paths_with_deletions - .iter() - .any(|removed_entity_path| removed_entity_path.starts_with(&entity.path)) - }; - let has_data = || engine.store().entity_has_physical_data(&entity.path); - - let should_be_removed = - !has_children() && (has_recursive_deletion_events() && !has_data()); - - !should_be_removed - }); - } - - pub fn subtree(&self, path: &EntityPath) -> Option<&Self> { - fn subtree_recursive<'tree>( - this: &'tree EntityTree, - path: &[EntityPathPart], - ) -> Option<&'tree EntityTree> { - match path { - [] => Some(this), - [first, rest @ ..] => { - let child = this.children.get(first)?; - subtree_recursive(child, rest) - } - } - } - - subtree_recursive(self, path.as_slice()) - } - - /// Invokes visitor for `self` and all children recursively. - pub fn visit_children_recursively(&self, mut visitor: impl FnMut(&EntityPath)) { - fn visit(this: &EntityTree, visitor: &mut impl FnMut(&EntityPath)) { - visitor(&this.path); - for child in this.children.values() { - visit(child, visitor); - } - } - - visit(self, &mut visitor); - } - - /// Invokes the `predicate` for `self` and all children recursively, - /// returning the _first_ entity for which the `predicate` returns `true`. - /// - /// Note that this function has early return semantics, meaning if multiple - /// entities would return `true`, only the first is returned. - /// The entities are yielded in order of their entity paths. - pub fn find_first_child_recursive( - &self, - mut predicate: impl FnMut(&EntityPath) -> bool, - ) -> Option<&Self> { - fn visit<'a>( - this: &'a EntityTree, - predicate: &mut impl FnMut(&EntityPath) -> bool, - ) -> Option<&'a EntityTree> { - if predicate(&this.path) { - return Some(this); - } - - for child in this.children.values() { - if let Some(subtree) = visit(child, predicate) { - // Early return - return Some(subtree); - } - } - - None - } - - visit(self, &mut predicate) - } -} - -impl re_byte_size::SizeBytes for EntityTree { - fn heap_size_bytes(&self) -> u64 { - let Self { path, children } = self; - path.heap_size_bytes() + children.heap_size_bytes() - } -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use re_chunk::{Chunk, RowId}; - use re_log_types::example_components::{MyPoint, MyPoints}; - use re_log_types::{EntityPath, StoreId, TimePoint, Timeline}; - - use crate::EntityDb; - - #[test] - fn deleting_descendants() -> anyhow::Result<()> { - re_log::setup_logging(); - - let mut db = EntityDb::new(StoreId::random( - re_log_types::StoreKind::Recording, - "test_app", - )); - - let timeline_frame = Timeline::new_sequence("frame"); - - let entity_path_parent: EntityPath = "parent".into(); - let entity_path_child: EntityPath = "parent/child1".into(); - let entity_path_grandchild: EntityPath = "parent/child1/grandchild".into(); - - assert!(db.tree().check_is_empty(&db.storage_engine())); - - { - let row_id = RowId::new(); - let timepoint = TimePoint::from_iter([(timeline_frame, 10)]); - let point = MyPoint::new(1.0, 2.0); - let chunk = Chunk::builder(entity_path_grandchild.clone()) - .with_component_batches( - row_id, - timepoint, - [(MyPoints::descriptor_points(), &[point] as _)], - ) - .build()?; - - db.add_chunk(&Arc::new(chunk))?; - } - - { - let parent = db - .tree() - .find_first_child_recursive(|entity_path| *entity_path == entity_path_parent) - .unwrap(); - let child = db - .tree() - .find_first_child_recursive(|entity_path| *entity_path == entity_path_child) - .unwrap(); - let grandchild = db - .tree() - .find_first_child_recursive(|entity_path| *entity_path == entity_path_grandchild) - .unwrap(); - - assert_eq!(1, parent.children.len()); - assert_eq!(1, child.children.len()); - assert_eq!(0, grandchild.children.len()); - - assert!(!db.tree().check_is_empty(&db.storage_engine())); - assert!(!parent.check_is_empty(&db.storage_engine())); - assert!(!child.check_is_empty(&db.storage_engine())); - assert!(!grandchild.check_is_empty(&db.storage_engine())); - } - - let store_events = db.gc(&re_chunk_store::GarbageCollectionOptions::gc_everything()); - db.on_store_events(&store_events); - - { - let parent = db - .tree() - .find_first_child_recursive(|entity_path| *entity_path == entity_path_parent); - let child = db - .tree() - .find_first_child_recursive(|entity_path| *entity_path == entity_path_child); - let grandchild = db - .tree() - .find_first_child_recursive(|entity_path| *entity_path == entity_path_grandchild); - - assert!(db.tree().check_is_empty(&db.storage_engine())); - assert!(parent.is_none()); - assert!(child.is_none()); - assert!(grandchild.is_none()); - } - - Ok(()) - } -} diff --git a/crates/store/re_entity_db/src/instance_path.rs b/crates/store/re_entity_db/src/instance_path.rs index 4b0e96fbe5dd..2472d6adcab7 100644 --- a/crates/store/re_entity_db/src/instance_path.rs +++ b/crates/store/re_entity_db/src/instance_path.rs @@ -9,8 +9,9 @@ use crate::{EntityDb, VersionedInstancePath, VersionedInstancePathHash}; // ---------------------------------------------------------------------------- /// The path to either a specific instance of an entity, or the whole entity. -#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +#[derive( + Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Deserialize, serde::Serialize, +)] pub struct InstancePath { pub entity_path: EntityPath, diff --git a/crates/store/re_entity_db/src/lib.rs b/crates/store/re_entity_db/src/lib.rs index 53e8ce6cd878..2a9ee0e16e73 100644 --- a/crates/store/re_entity_db/src/lib.rs +++ b/crates/store/re_entity_db/src/lib.rs @@ -5,29 +5,29 @@ //! mod chunk_requests; +mod data_meta_per_timeline; pub mod entity_db; -pub mod entity_tree; mod ingestion_statistics; mod instance_path; mod rrd_manifest_index; mod sorted_range_map; mod store_bundle; -mod time_histogram_per_timeline; mod versioned_instance_path; #[doc(no_inline)] pub use re_log_types::{EntityPath, EntityPathPart, TimeInt, Timeline}; pub use self::entity_db::{DEFAULT_GC_TIME_BUDGET, EntityDb}; -pub use self::entity_tree::EntityTree; pub use self::ingestion_statistics::{IngestionStatistics, LatencySnapshot, LatencyStats}; pub use self::instance_path::{InstancePath, InstancePathHash}; pub use self::rrd_manifest_index::{ - ChunkPrefetchOptions, ChunkPromise, ChunkRequests, RequestInfo, RrdManifestIndex, + ChunkFetcher, ChunkPrefetchOptions, ChunkPromise, ChunkRequests, FetchStage, PrefetchError, + PrefetchTimeCursor, PrioritizationState, ProtectedChunks, RemainingByteBudget, RequestInfo, + RrdManifestIndex, }; pub use self::store_bundle::{StoreBundle, StoreLoadError}; -pub use self::time_histogram_per_timeline::{TimeHistogram, TimeHistogramPerTimeline}; pub use self::versioned_instance_path::{VersionedInstancePath, VersionedInstancePathHash}; +pub use re_chunk_store::EntityTree; pub mod external { pub use {re_chunk_store, re_query}; diff --git a/crates/store/re_entity_db/src/rrd_manifest_index.rs b/crates/store/re_entity_db/src/rrd_manifest_index.rs index eb2ff53ba671..f5dcff9b6ff8 100644 --- a/crates/store/re_entity_db/src/rrd_manifest_index.rs +++ b/crates/store/re_entity_db/src/rrd_manifest_index.rs @@ -7,19 +7,20 @@ use nohash_hasher::IntSet; use re_byte_size::{MemUsageTree, MemUsageTreeCapture}; use re_chunk::{ChunkId, EntityPath, Timeline, TimelineName}; use re_chunk_store::{ChunkStore, ChunkStoreDiff, ChunkStoreEvent}; -use re_log::debug_assert; -use re_log_encoding::RrdManifest; -use re_log_types::{AbsoluteTimeRange, StoreKind, TimelinePoint}; -use re_mutex::Mutex; +use re_log_encoding::{CodecResult, RrdManifest}; +use re_log_types::{AbsoluteTimeRange, StoreKind}; -use crate::chunk_requests::ChunkBatchRequest; pub use crate::chunk_requests::{ChunkPromise, ChunkRequests, RequestInfo}; mod chunk_prioritizer; +mod collapsed_time_ranges; mod sorted_temporal_chunks; mod time_range_merger; -pub use chunk_prioritizer::{ChunkPrefetchOptions, ChunkPrioritizer, PrefetchError}; +pub use chunk_prioritizer::{ + ChunkFetcher, ChunkPrefetchOptions, ChunkPrioritizer, FetchStage, PrefetchError, + PrefetchTimeCursor, PrioritizationState, ProtectedChunks, RemainingByteBudget, +}; pub use sorted_temporal_chunks::ChunkCountInfo; use sorted_temporal_chunks::SortedTemporalChunks; @@ -137,16 +138,18 @@ impl re_byte_size::SizeBytes for LoadedRanges { /// A secondary index that keeps track of which chunks have been loaded into memory. /// /// This is constructed from an [`RrdManifest`], which is what the server sends to the client/viewer. -// -// TODO(RR-3383): support multiple manifests per index. +/// The manifest may be received in parts and concatenated together. #[derive(Default)] #[cfg_attr(feature = "testing", derive(Clone))] pub struct RrdManifestIndex { - /// The raw manifest. + /// The raw manifest (accumulated from possibly multiple parts). /// /// This is known ahead-of-time for _some_ data sources. manifest: Option>, + /// True once all parts of the manifest have been received. + manifest_complete: bool, + /// These are the chunks known to exist in the data source (e.g. remote server). /// /// The chunk store may split and/or merge root chunks, producing _derived_ chunks. @@ -168,7 +171,10 @@ pub struct RrdManifestIndex { /// Full time range per timeline timelines: BTreeMap, - pub entity_tree: crate::EntityTree, + /// Cached data time ranges per timeline, used for gap collapsing in the time panel. + /// Computed from chunk time ranges when the manifest is complete. + data_time_ranges: BTreeMap>, + entity_has_static_data: IntSet, full_uncompressed_size: u64, @@ -181,33 +187,36 @@ impl std::fmt::Debug for RrdManifestIndex { } impl RrdManifestIndex { - pub fn append(&mut self, manifest: Arc) { + pub fn append( + &mut self, + delta: Arc, + entity_tree: &re_chunk_store::EntityTree, + ) -> CodecResult<()> { re_tracing::profile_function!(); - if self.manifest.is_some() { - re_log::warn!( - "Received a second RRD manifest schema for the same recording. This is not yet supported." - ); - } + self.update_timeline_stats(&delta); + self.update_entity_static_data(&delta); + self.chunk_prioritizer.on_rrd_manifest(&delta); - self.full_uncompressed_size = manifest.col_chunk_byte_size_uncompressed().iter().sum(); + self.full_uncompressed_size += delta.col_chunk_byte_size_uncompressed().iter().sum::(); - self.update_timeline_stats(&manifest); - self.update_entity_tree(&manifest); - self.update_entity_static_data(&manifest); - self.chunk_prioritizer.on_rrd_manifest(&manifest); + self.loaded_ranges = None; // invalidate and recompute - self.sorted_chunks - .update(&self.entity_tree, manifest.temporal_map()); + let row_offset = self + .manifest + .as_ref() + .map_or(0, |manifest| manifest.chunk_fetcher_rb().num_rows()); - for (row_idx, (&root_chunk_id, entity_path)) in - izip!(manifest.col_chunk_ids(), manifest.col_chunk_entity_path()).enumerate() + for (delta_row_idx, (&root_chunk_id, entity_path)) in + izip!(delta.col_chunk_ids(), delta.col_chunk_entity_path()).enumerate() { - self.root_chunks - .insert(root_chunk_id, RootChunkInfo::new(entity_path, row_idx)); + self.root_chunks.insert( + root_chunk_id, + RootChunkInfo::new(entity_path, row_offset + delta_row_idx), + ); } - for timelines in manifest.temporal_map().values() { + for timelines in delta.temporal_map().values() { for (&timeline, comps) in timelines { for chunks in comps.values() { for (&chunk_id, entry) in chunks { @@ -232,7 +241,18 @@ impl RrdManifestIndex { } } - self.manifest = Some(manifest); + let new_full_manifest = if let Some(existing) = self.manifest.take() { + Arc::new(RrdManifest::concat(&[&existing, &delta])?) + } else { + delta + }; + + self.sorted_chunks = + SortedTemporalChunks::new(entity_tree, new_full_manifest.temporal_map()); + + self.manifest = Some(new_full_manifest); + + Ok(()) } /// Iterate over all chunks in the manifest. @@ -267,16 +287,6 @@ impl RrdManifestIndex { } } - fn update_entity_tree(&mut self, manifest: &RrdManifest) { - for entity in manifest - .static_map() - .keys() - .chain(manifest.temporal_map().keys()) - { - self.entity_tree.on_new_entity(entity); - } - } - fn update_entity_static_data(&mut self, manifest: &RrdManifest) { for entity in manifest.static_map().keys() { self.entity_has_static_data.insert(entity.clone()); @@ -384,11 +394,35 @@ impl RrdManifestIndex { } /// False for recordings streamed from SDK via proxy + /// + /// This is true as soon as the first piece of the manifest is available. pub fn has_manifest(&self) -> bool { self.manifest.is_some() } - /// The full manifest, if known. + /// Have all parts of the manifest been received? + pub fn is_manifest_complete(&self) -> bool { + self.manifest_complete + } + + /// Mark the manifest as complete (all parts have been received). + pub fn set_manifest_complete(&mut self) { + self.manifest_complete = true; + + let num_root_chunks = self.root_chunks.len(); + if 25_000 < num_root_chunks { + re_log::debug_warn!( + "There are {} root chunks in this recording. Consider running `rerun rrd optimize` on the original.", + re_format::format_uint(num_root_chunks) + ); + } + + self.data_time_ranges = collapsed_time_ranges::compute_data_time_ranges(&self.root_chunks); + } + + /// The manifest as it currently stands. + /// + /// More pieces of it may still arrive unless [`Self::is_manifest_complete`] is true. pub fn manifest(&self) -> Option<&RrdManifest> { self.manifest.as_deref() } @@ -422,12 +456,14 @@ impl RrdManifestIndex { self.mark_roots_as(store, &del.chunk.id(), LoadState::Unloaded); } - ChunkStoreDiff::VirtualAddition(_) => {} + ChunkStoreDiff::VirtualAddition(_) | ChunkStoreDiff::SchemaAddition(_) => {} } } } fn mark_roots_as(&mut self, store: &ChunkStore, chunk_id: &ChunkId, new_state: LoadState) { + re_tracing::profile_function!(); + let store_kind = store.id().kind(); let root_chunk_ids = store.find_root_chunks(chunk_id); @@ -438,6 +474,9 @@ impl RrdManifestIndex { "Added chunk that was not part of the chunk index", ); } else { + // Track which timelines had a large chunk transition to loaded + let mut timelines_to_recalculate: Vec = Vec::new(); + for chunk_id in root_chunk_ids { if let Some(chunk_info) = self.root_chunks.get_mut(&chunk_id) { let old_state = chunk_info.state; @@ -464,6 +503,16 @@ impl RrdManifestIndex { } } } + + // When a large chunk gets loaded, recalculate data ranges for its timelines + if new_state == LoadState::FullyLoaded && old_state != LoadState::FullyLoaded { + timelines_to_recalculate.extend( + collapsed_time_ranges::should_recalculate_for_chunk( + chunk_info, + &self.timelines, + ), + ); + } } else { warn_when_editing_recording( store_kind, @@ -471,9 +520,27 @@ impl RrdManifestIndex { ); } } + + for timeline_name in timelines_to_recalculate { + if let Some(ranges) = collapsed_time_ranges::calculate_data_ranges_for_timeline( + &self.root_chunks, + &self.timelines, + store, + &timeline_name, + ) { + self.data_time_ranges.insert(timeline_name, ranges); + } + } } } + /// Data time ranges for the given timeline, used for gap collapsing in the time panel. + /// + /// Returns `None` if the manifest is not yet complete or if there are no ranges. + pub fn data_time_ranges_for(&self, timeline: &TimelineName) -> Option<&[AbsoluteTimeRange]> { + self.data_time_ranges.get(timeline).map(|v| v.as_slice()) + } + /// When do we have data on this timeline? pub fn timeline_range(&self, timeline: &TimelineName) -> Option { self.timelines.get(timeline).copied() @@ -516,67 +583,80 @@ impl RrdManifestIndex { &mut self, store: &ChunkStore, options: &ChunkPrefetchOptions, - time_cursor: Option, + time_cursor: Option, + budget: &mut RemainingByteBudget, load_chunks: &dyn Fn(RecordBatch) -> ChunkPromise, ) -> Result<(), PrefetchError> { re_tracing::profile_function!(); - let used_and_missing = store.take_tracked_chunk_ids(); // Note: this mutates the store (kind of). - - if let Some(manifest) = &self.manifest { - let to_load = self.chunk_prioritizer.prioritize_and_prefetch( - store, - &used_and_missing, - options, - time_cursor, - manifest, - &self.root_chunks, - )?; - - // Start loading all batches we prepared: - for (rb, batch_info) in to_load { - for root_chunk_id in &batch_info.root_chunk_ids { - if let Some(root_chunk) = self.root_chunks.get_mut(root_chunk_id) { - root_chunk.state = LoadState::InTransit; - } - } + let Some(mut fetcher) = self.prepare_chunk_fetcher(store, options, time_cursor, budget) + else { + return Ok(()); + }; - let promise = load_chunks(rb); - let batch = ChunkBatchRequest { - promise: Mutex::new(Some(promise)), - info: batch_info.into(), - }; - self.chunk_prioritizer.chunk_requests_mut().add(batch); - } + fetcher.fetch(budget, options.max_fetch_stage)?; - if let Some(time_cursor) = time_cursor { - self.update_loaded_ranges(time_cursor.name); - } + let res = fetcher.finish(load_chunks)?; - // Sanity checking: - if let Some(state) = self.chunk_prioritizer.latest_result() - && state.all_chunks_loaded_or_in_transit() - { - for (root_chunk_id, chunk_info) in &self.root_chunks { - debug_assert!( - chunk_info.state != LoadState::Unloaded, - "All root chunks should be either loading or already loaded" - ); - debug_assert!( - self.chunk_prioritizer - .protected_chunks() - .roots - .contains(root_chunk_id) - ); - } + self.handle_fetch_result(res); + + Ok(()) + } + + pub fn handle_fetch_result(&mut self, res: chunk_prioritizer::ChunkFetchResult) { + for chunk_id in res.new_in_transit_chunks { + if let Some(chunk) = self.root_chunks.get_mut(&chunk_id) { + chunk.state = LoadState::InTransit; } + } - Ok(()) - } else { - Err(PrefetchError::NoManifest) + if let Some(time_cursor) = res.time_cursor { + self.update_loaded_ranges(*time_cursor.timeline().name()); } } + /// Handle initial chunk prioritization and build a [`ChunkFetcher`]. + /// + /// This should be called once per frame per recording, because it + /// clears tracked missing & used chunks from the chunk store, so that can be populated again next frame. + /// + /// Subtracts already loaded physical chunks from the memory budget. + /// + /// Then call [`ChunkFetcher::fetch`] to actually fetch chunks, + /// and [`ChunkFetcher::finish`] when done. + pub fn prepare_chunk_fetcher<'a>( + &'a mut self, + store: &'a ChunkStore, + options: &ChunkPrefetchOptions, + time_cursor: Option, + budget: &mut RemainingByteBudget, + ) -> Option> { + let manifest = self.manifest.as_ref()?; + Some(self.chunk_prioritizer.prepare_chunk_fetcher( + store, + manifest, + options, + time_cursor.map(|mut time_cursor| { + if let Some(loop_range) = time_cursor.loop_range + && let Some(timeline_range) = self.timeline_range(time_cursor.name()) + { + time_cursor.loop_range = loop_range.intersection(timeline_range); + } + + time_cursor + }), + &self.root_chunks, + budget, + )) + } + + /// True if there are any protected chunks (chunks we're keeping in memory). + /// + /// Recordings with protected chunks should not be auto-closed. + pub fn has_protected_chunks(&self) -> bool { + !self.chunk_prioritizer.protected_chunks().roots.is_empty() + } + /// Creates an iterator of time ranges which are loaded on a specific timeline. /// /// The ranges are guaranteed to be ordered and non-overlapping. @@ -607,23 +687,32 @@ impl RrdManifestIndex { ) -> impl Iterator { re_tracing::profile_function!(); - fn iterate_unloaded<'a>( - index: &RrdManifestIndex, - chunks: &'a [ChunkCountInfo], - ) -> impl Iterator { - chunks - .iter() - .filter(|info| index.is_chunk_unloaded(&info.id)) - } + self.temporal_entries_for(timeline, entity, component) + .iter() + .filter(|info| self.is_chunk_unloaded(&info.id)) + } + + /// If `component` is some, this returns all temporal entries for that specific + /// component on the given timeline. + /// + /// If not, this returns all temporal entries for `entity`'s components and its + /// descendants' unloaded temporal entries. + pub fn temporal_entries_for( + &self, + timeline: &re_chunk::TimelineName, + entity: &re_chunk::EntityPath, + component: Option, + ) -> &[ChunkCountInfo] { + re_tracing::profile_function!(); let Some(entry) = self.sorted_chunks.get(timeline, &entity.hash()) else { - return iterate_unloaded(self, &[]); + return &[]; }; if let Some(component) = component { - iterate_unloaded(self, entry.component_chunks(&component)) + entry.component_chunks(&component) } else { - iterate_unloaded(self, entry.per_entity()) + entry.per_entity() } } @@ -663,24 +752,25 @@ impl re_byte_size::SizeBytes for RrdManifestIndex { let Self { entity_has_static_data, - entity_tree, manifest, sorted_chunks, loaded_ranges, root_chunks: virtual_chunks, chunk_prioritizer, timelines, + data_time_ranges, full_uncompressed_size: _, + manifest_complete: _, } = self; entity_has_static_data.heap_size_bytes() - + entity_tree.heap_size_bytes() + manifest.heap_size_bytes() + sorted_chunks.heap_size_bytes() + loaded_ranges.heap_size_bytes() + virtual_chunks.heap_size_bytes() + chunk_prioritizer.heap_size_bytes() + timelines.heap_size_bytes() + + data_time_ranges.heap_size_bytes() } } @@ -692,14 +782,15 @@ impl MemUsageTreeCapture for RrdManifestIndex { let Self { entity_has_static_data, - entity_tree, sorted_chunks, loaded_ranges, manifest, root_chunks: virtual_chunks, chunk_prioritizer, timelines, + data_time_ranges: _, full_uncompressed_size: _, + manifest_complete: _, } = self; let mut node = re_byte_size::MemUsageNode::new(); @@ -708,7 +799,6 @@ impl MemUsageTreeCapture for RrdManifestIndex { "entity_has_static_data", entity_has_static_data.total_size_bytes(), ); - node.add("entity_tree", entity_tree.total_size_bytes()); node.add("sorted_chunks", sorted_chunks.total_size_bytes()); node.add("loaded_ranges", loaded_ranges.total_size_bytes()); node.add("manifest", manifest.total_size_bytes()); diff --git a/crates/store/re_entity_db/src/rrd_manifest_index/chunk_prioritizer.rs b/crates/store/re_entity_db/src/rrd_manifest_index/chunk_prioritizer.rs index b1127125b51b..bc1e67173af4 100644 --- a/crates/store/re_entity_db/src/rrd_manifest_index/chunk_prioritizer.rs +++ b/crates/store/re_entity_db/src/rrd_manifest_index/chunk_prioritizer.rs @@ -1,20 +1,21 @@ use std::collections::{BTreeMap, BTreeSet}; use std::ops::RangeInclusive; +use std::time::Duration; use ahash::{HashMap, HashSet}; use arrow::array::RecordBatch; -use itertools::chain; use re_byte_size::SizeBytes as _; use re_chunk::{Chunk, ChunkId, ComponentIdentifier, TimeInt, Timeline, TimelineName}; use re_chunk_store::{ChunkStore, QueriedChunkIdTracker}; use re_log::debug_assert; use re_log_encoding::RrdManifest; use re_log_types::{AbsoluteTimeRange, EntityPathHash, TimelinePoint}; +use re_mutex::Mutex; use crate::{ chunk_requests::{ChunkRequests, RequestInfo}, rrd_manifest_index::{LoadState, RootChunkInfo}, - sorted_range_map::SortedRangeMap, + sorted_range_map::{OverlapIterState, SortedRangeMap}, }; #[derive(Clone, Copy, Default)] @@ -31,8 +32,10 @@ pub struct PrioritizationState { /// Are all required chunks fully loaded? /// - /// If true, there are no missing chunks. - pub all_required_are_loaded: bool, + /// `None` means we haven't run a fetch yet, so we don't know. + /// `Some(true)` means no required chunk was found to be missing. + /// `Some(false)` means at least one required chunk is missing or in transit. + pub all_required_are_loaded: Option, } impl PrioritizationState { @@ -62,12 +65,15 @@ pub enum PrefetchError { /// How to calculate which chunks to prefetch. #[derive(Clone, Debug, PartialEq, Eq)] pub struct ChunkPrefetchOptions { + /// Only prefetch chunks up to (and including) this stage. + /// + /// Useful for debugging and for users who want to limit how aggressively + /// we prefetch data ahead of what is strictly needed. + pub max_fetch_stage: FetchStage, + /// Batch together requests until we reach this size. pub max_on_wire_bytes_per_batch: u64, - /// Total budget for all physical chunks. - pub total_uncompressed_byte_budget: u64, - /// Maximum number of bytes in transit at once. pub max_bytes_on_wire_at_once: u64, } @@ -75,7 +81,7 @@ pub struct ChunkPrefetchOptions { impl Default for ChunkPrefetchOptions { fn default() -> Self { Self { - total_uncompressed_byte_budget: u64::MAX, + max_fetch_stage: FetchStage::default(), // Batch small chunks together. max_on_wire_bytes_per_batch: 256 * 1024, @@ -97,24 +103,6 @@ struct HighPrioChunks { temporal_chunks: BTreeMap>, } -impl HighPrioChunks { - /// All static chunks, plus all temporal chunks on this timeline before the given time. - /// With chunks closest to the time cursor ordered first. - fn all_before(&self, timeline_point: TimelinePoint) -> impl Iterator + '_ { - self.temporal_chunks - .get(timeline_point.name()) - .into_iter() - .flat_map(move |chunks| { - let idx = - chunks.partition_point(|chunk| chunk.time_range.min <= timeline_point.time); - - // Start loading closest to the time cursor. - chunks[..idx].iter().rev() - }) - .map(|chunk| chunk.chunk_id) - } -} - impl re_byte_size::SizeBytes for HighPrioChunks { fn heap_size_bytes(&self) -> u64 { let Self { temporal_chunks } = self; @@ -164,13 +152,12 @@ impl CurrentBatch { /// Helper struct responsible for batching requests and creating /// promises for missing chunks. -struct ChunkRequestBatcher<'a> { +pub(crate) struct ChunkRequestBatcher<'a> { manifest: &'a RrdManifest, chunk_byte_size_uncompressed: &'a [u64], chunk_byte_size: &'a [u64], max_on_wire_bytes_per_batch: u64, - remaining_bytes_in_on_wire_budget: u64, current_batch: CurrentBatch, // Output @@ -178,20 +165,13 @@ struct ChunkRequestBatcher<'a> { } impl<'a> ChunkRequestBatcher<'a> { - fn new( - manifest: &'a RrdManifest, - requests: &ChunkRequests, - options: &ChunkPrefetchOptions, - ) -> Self { + pub(crate) fn new(manifest: &'a RrdManifest, options: &ChunkPrefetchOptions) -> Self { Self { chunk_byte_size_uncompressed: manifest.col_chunk_byte_size_uncompressed(), chunk_byte_size: manifest.col_chunk_byte_size(), manifest, max_on_wire_bytes_per_batch: options.max_on_wire_bytes_per_batch, - remaining_bytes_in_on_wire_budget: options - .max_bytes_on_wire_at_once - .saturating_sub(requests.num_on_wire_bytes_pending()), current_batch: Default::default(), to_load: Vec::new(), @@ -213,7 +193,7 @@ impl<'a> ChunkRequestBatcher<'a> { } let rb = re_arrow_util::take_record_batch( - self.manifest.data(), + self.manifest.chunk_fetcher_rb(), &std::mem::take(&mut self.current_batch.row_indices), )?; self.to_load.push(( @@ -230,13 +210,18 @@ impl<'a> ChunkRequestBatcher<'a> { } /// Add a chunk to be fetched. - fn try_fetch(&mut self, chunk_row_idx: usize) -> Result { - if self.remaining_bytes_in_on_wire_budget == 0 { + fn try_fetch( + &mut self, + chunk_row_idx: usize, + budget: &mut RemainingByteBudget, + ) -> Result { + let on_wire_byte_size = self.chunk_byte_size[chunk_row_idx]; + + if !budget.try_fit_on_wire(on_wire_byte_size) { return Ok(false); } let uncompressed_chunk_size = self.chunk_byte_size_uncompressed[chunk_row_idx]; - let on_wire_byte_size = self.chunk_byte_size[chunk_row_idx]; self.current_batch.row_indices.push(chunk_row_idx); self.current_batch.uncompressed_bytes += uncompressed_chunk_size; @@ -245,9 +230,7 @@ impl<'a> ChunkRequestBatcher<'a> { if self.max_on_wire_bytes_per_batch <= self.current_batch.on_wire_bytes { self.finish_batch()?; } - self.remaining_bytes_in_on_wire_budget = self - .remaining_bytes_in_on_wire_budget - .saturating_sub(on_wire_byte_size); + Ok(true) } @@ -272,18 +255,41 @@ fn warn_entity_exceeds_memory(entity_path: &str) { } } -struct RemainingByteBudget { - remaining_bytes: u64, +pub struct RemainingByteBudget { + /// Fixed total — used to check if a single chunk is too large to ever fit. + pub total_bytes_in_memory: u64, + remaining_bytes_in_memory: u64, + + /// The amount of bytes left to download on wire. + /// + /// This is allowed to go in the negatives, since we allow downloading + /// chunks larger than the budget. But if it's 0 or less, no more chunks + /// will be requested. + pub remaining_bytes_on_wire: i64, } impl RemainingByteBudget { - /// Try to fit `bytes` into the remaining budget. + /// If either the wire budget, or memory budget is filled. + pub fn full(&self) -> bool { + self.remaining_bytes_in_memory == 0 || self.remaining_bytes_on_wire <= 0 + } + + /// Create a new budget with the given memory and on-wire limits. + pub fn new(total_bytes_in_memory: u64, max_bytes_on_wire: u64) -> Self { + Self { + total_bytes_in_memory, + remaining_bytes_in_memory: total_bytes_in_memory, + remaining_bytes_on_wire: max_bytes_on_wire.cast_signed(), + } + } + + /// Try to fit `bytes` into the remaining memory budget. /// /// Returns `true` if it fits (even partially), `false` if the budget is exhausted. - fn try_fit_into_budget(&mut self, bytes: u64, required: bool) -> bool { - self.remaining_bytes = self.remaining_bytes.saturating_sub(bytes); + fn try_fit_in_memory(&mut self, bytes: u64, required: bool) -> bool { + self.remaining_bytes_in_memory = self.remaining_bytes_in_memory.saturating_sub(bytes); - if self.remaining_bytes == 0 { + if self.remaining_bytes_in_memory == 0 { if required { if cfg!(target_arch = "wasm32") { re_log::warn_once!( @@ -302,13 +308,25 @@ impl RemainingByteBudget { true } } + + /// Try to fit `bytes` into the remaining on-wire budget. + /// + /// Returns `true` if it fits (even partially), `false` if the budget is exhausted. + fn try_fit_on_wire(&mut self, bytes: u64) -> bool { + let fit_on_wire = self.remaining_bytes_on_wire > 0; + + self.remaining_bytes_on_wire = self.remaining_bytes_on_wire.saturating_sub_unsigned(bytes); + + fit_on_wire + } } /// Chunk that we've prioritized in `chunks_in_priority`. -struct PrioritizedRootChunk { +#[derive(Clone, Copy)] +pub struct PrioritizedRootChunk { /// If this chunk came from `used_physical` or `missing_virtual` it's required /// and we log a warning if we can't fit it. - required: bool, + stage: FetchStage, root_chunk_id: ChunkId, } @@ -316,14 +334,21 @@ struct PrioritizedRootChunk { impl PrioritizedRootChunk { fn required(root_chunk_id: ChunkId) -> Self { Self { - required: true, + stage: FetchStage::Required, root_chunk_id, } } - fn optional(chunk_id: ChunkId) -> Self { + fn similar(chunk_id: ChunkId, time_cursor_offset: Option) -> Self { Self { - required: false, + stage: FetchStage::Similar(time_cursor_offset), + root_chunk_id: chunk_id, + } + } + + fn everything(chunk_id: ChunkId) -> Self { + Self { + stage: FetchStage::Everything, root_chunk_id: chunk_id, } } @@ -388,7 +413,7 @@ impl re_byte_size::SizeBytes for ProtectedChunks { pub struct ChunkPrioritizer { protected_chunks: ProtectedChunks, - /// Result of the latest call to [`Self::prioritize_and_prefetch`]. + /// Result of the latest fetch pass (set by [`ChunkFetcher::finish`]). latest_result: Option, /// Chunks that are in the progress of being downloaded. @@ -398,7 +423,7 @@ pub struct ChunkPrioritizer { root_chunk_intervals: BTreeMap>, /// All static root chunks in the rrd manifest. - static_chunk_ids: HashSet, + static_chunk_ids: Vec, /// Chunks that should be downloaded before any else. high_priority_chunks: HighPrioChunks, @@ -407,6 +432,12 @@ pub struct ChunkPrioritizer { /// Component paths that were reported either as being used or missing. pub components_of_interest: HashSet, + + /// Root chunks visited during the required pass of the current frame. + /// + /// Carried into the optional pass so those chunks are skipped (not double-counted). + /// Reset at the start of each required pass. + frame_visited: HashSet, } impl re_byte_size::SizeBytes for ChunkPrioritizer { @@ -420,6 +451,7 @@ impl re_byte_size::SizeBytes for ChunkPrioritizer { high_priority_chunks, component_paths_from_root_id, components_of_interest, + frame_visited, } = self; protected_chunks.heap_size_bytes() @@ -428,17 +460,38 @@ impl re_byte_size::SizeBytes for ChunkPrioritizer { + high_priority_chunks.heap_size_bytes() + component_paths_from_root_id.heap_size_bytes() + components_of_interest.heap_size_bytes() + + frame_visited.heap_size_bytes() + } +} + +#[derive(Clone, Copy)] +pub struct PrefetchTimeCursor { + pub time_cursor: TimelinePoint, + + /// How fast the time cursor would move in `TimeInt / real second` if + /// not paused. + pub speed_if_unpaused: f64, + + /// If the time playing is looped this defines what range is looped. + pub loop_range: Option, +} + +impl std::ops::Deref for PrefetchTimeCursor { + type Target = TimelinePoint; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.time_cursor } } impl ChunkPrioritizer { - pub fn on_rrd_manifest(&mut self, manifest: &RrdManifest) { - self.update_static_chunks(manifest); - self.update_chunk_intervals(manifest); - self.update_high_priority_chunks(manifest); + pub fn on_rrd_manifest(&mut self, delta: &RrdManifest) { + self.update_static_chunks(delta); + self.update_chunk_intervals(delta); + self.update_high_priority_chunks(delta); - self.component_paths_from_root_id.clear(); - for (entity, per_component) in manifest.static_map() { + for (entity, per_component) in delta.static_map() { for (component, chunk) in per_component { self.component_paths_from_root_id .entry(*chunk) @@ -450,7 +503,7 @@ impl ChunkPrioritizer { } } - for (entity, per_timeline) in manifest.temporal_map() { + for (entity, per_timeline) in delta.temporal_map() { for per_component in per_timeline.values() { for (component, chunks) in per_component { for chunk in chunks.keys() { @@ -467,7 +520,7 @@ impl ChunkPrioritizer { } } - /// Result of the latest call to [`Self::prioritize_and_prefetch`]. + /// Result of the latest fetch pass (set by [`ChunkFetcher::finish`]). pub fn latest_result(&self) -> Option { self.latest_result } @@ -510,18 +563,27 @@ impl ChunkPrioritizer { // parts of a hierarchy, and not all of the transform are required to be // available at each time point. // More here: https://linear.app/rerun/issue/RR-3441/required-transform-frames-arent-always-loaded - self.high_priority_chunks = Self::find_chunks_with_component_prefix( + let new_chunks = Self::find_chunks_with_component_prefix( manifest, "Transform3D:", // Hard-coding this here is VERY hacky, but I want to ship MVP ); + for (timeline, mut chunks) in new_chunks.temporal_chunks { + let existing = self + .high_priority_chunks + .temporal_chunks + .entry(timeline) + .or_default(); + existing.append(&mut chunks); + existing.sort_by_key(|chunk| chunk.time_range.min); + } } fn update_static_chunks(&mut self, manifest: &RrdManifest) { for entity_chunks in manifest.static_map().values() { - for &chunk_id in entity_chunks.values() { - self.static_chunk_ids.insert(chunk_id); - } + self.static_chunk_ids.extend(entity_chunks.values()); } + self.static_chunk_ids.sort(); + self.static_chunk_ids.dedup(); } fn update_chunk_intervals(&mut self, manifest: &RrdManifest) { @@ -539,10 +601,11 @@ impl ChunkPrioritizer { } } - self.root_chunk_intervals.clear(); for (timeline, chunks) in per_timeline_chunks { self.root_chunk_intervals - .insert(timeline, SortedRangeMap::new(chunks)); + .entry(timeline) + .or_default() + .extend(chunks); } } @@ -558,282 +621,607 @@ impl ChunkPrioritizer { &self.protected_chunks } - /// An iterator over root chunks in priority order. + /// Handle initial chunk prioritization and build a [`ChunkFetcher`]. /// - /// May return duplicates! + /// This should be called once per frame per recording, because it + /// clears tracked missing & used chunks from the chunk store, so that can be populated again next frame. /// - /// See [`Self::prioritize_and_prefetch`] for more details. - #[expect(clippy::too_many_arguments)] // TODO(emilk): refactor to simplify - fn root_chunks_in_priority<'a>( - components_of_interest: &'a HashSet, - component_paths_from_root_id: &'a HashMap>, - static_chunk_ids: &'a HashSet, - high_priority_chunks: &'a HighPrioChunks, + /// Subtracts already loaded physical chunks from the memory budget. + pub fn prepare_chunk_fetcher<'a>( + &'a mut self, store: &'a ChunkStore, - used_and_missing: &QueriedChunkIdTracker, - time_cursor: Option, + manifest: &'a RrdManifest, + options: &ChunkPrefetchOptions, + time_cursor: Option, root_chunks: &'a HashMap, - root_chunks_on_timeline: Option<&'a SortedRangeMap>, - ) -> impl Iterator + use<'a> { - re_tracing::profile_function!(); + budget: &mut RemainingByteBudget, + ) -> ChunkFetcher<'a> { + let used_and_missing = store.take_tracked_chunk_ids(); - let mut missing_roots = Vec::new(); - for missing_virtual_chunk_id in &used_and_missing.missing_virtual { - store.collect_root_ids(missing_virtual_chunk_id, &mut missing_roots); - } - missing_roots.sort(); - missing_roots.dedup(); - - let chunks_ids_after_time_cursor = move || { - time_cursor - .zip(root_chunks_on_timeline) - .map(|(time_cursor, root_chunks_on_timeline)| { - root_chunks_on_timeline - .query(time_cursor.time..=TimeInt::MAX) - .map(|(_, chunk_id)| *chunk_id) - }) - .into_iter() - .flatten() - }; - let chunks_ids_before_time_cursor = move || { - time_cursor - .zip(root_chunks_on_timeline) - .map(|(time_cursor, root_chunks_on_timeline)| { - root_chunks_on_timeline - .query(TimeInt::MIN..=time_cursor.time.saturating_sub(1)) - .map(|(_, chunk_id)| *chunk_id) - }) - .into_iter() - .flatten() - }; - - // Note: we do NOT take `components_of_interest` for high-priority transform chunks, - // because that seems to cause bugs for unknown reasons. - let high_prio_chunks_before_time_cursor = time_cursor - .map(|time_cursor| high_priority_chunks.all_before(time_cursor)) - .into_iter() - .flatten(); - - // Chunks that are required for the current view. - let required_chunks = chain!( - missing_roots, - static_chunk_ids.iter().copied(), - high_prio_chunks_before_time_cursor, - ); + self.frame_visited.clear(); + self.update_components_of_interest(store, &used_and_missing); + self.protected_chunks.roots.clear(); + self.protected_chunks.physical.clear(); + self.protect_used_and_missing(store, &used_and_missing); - // Chunks that aren't currently required. Pure prefetching: - let optional_chunks = { - // Chunks for components we are interested in. - let is_interesting_chunk = |chunk_id: &ChunkId| { - component_paths_from_root_id[chunk_id] - .iter() - .any(|path| components_of_interest.contains(path)) - }; - let is_uninteresting_chunk = |chunk_id: &ChunkId| { - !component_paths_from_root_id[chunk_id] - .iter() - .any(|path| components_of_interest.contains(path)) - }; + for &physical_chunk_id in &used_and_missing.used_physical { + debug_assert!( + self.protected_chunks.physical.contains(&physical_chunk_id), + "We added it earlier" + ); + if let Some(chunk) = store.physical_chunk(&physical_chunk_id) { + budget.try_fit_in_memory(Chunk::total_size_bytes(chunk.as_ref()), true); + } else { + re_log::debug_warn_once!("Couldn't get physical chunk from chunk store"); + } + } - // Extra chunks we try to prefetch, that may _soon_ be needed: - let optional_interesting_chunks = chain!( - std::iter::once_with(chunks_ids_after_time_cursor).flatten(), - std::iter::once_with(chunks_ids_before_time_cursor).flatten(), - ) - .filter(is_interesting_chunk); - - // Extra chunks at the current time (or after), that the user is not _currently_ - // looking at, but they may switch views. - let optional_uninteresting_chunks = std::iter::once_with(chunks_ids_after_time_cursor) - .flatten() - .filter(is_uninteresting_chunk); - - // Finally: backfill with ALL unloaded chunks. - // If we have the memory budget for it, we always want to load the full recording: - let all_chunks = root_chunks.keys().copied(); - - chain!( - optional_interesting_chunks, - optional_uninteresting_chunks, - all_chunks, - ) - }; + ChunkFetcher { + visited_root_chunks: std::mem::take(&mut self.frame_visited), + chunk_id_scratch: Vec::new(), + state: PrioritizationState::default(), + prioritizer: self, + root_chunks, + time_cursor, + store, + next_chunk: None, + fetch_stage: ChunkPriorityStage::Start(used_and_missing), - chain!( - required_chunks.map(PrioritizedRootChunk::required), - optional_chunks.map(PrioritizedRootChunk::optional), - ) + request_batcher: Some(ChunkRequestBatcher::new(manifest, options)), + } } - /// Prioritize which chunk (loaded & unloaded) we want to fit in the - /// current memory budget. And prefetch some amount of those chunks. - /// - /// This prioritizes chunks in the order of: - /// - Physical chunks that were used since last time this was ran. - /// - Virtual chunks that would've been hit by queries since last time - /// this was ran. - /// - Static chunks. - /// - Chunks after the time cursor in rising temporal order. - /// - Chunks before the time cursor in rising temporal order. - /// - /// We go through these chunks until we hit [`ChunkPrefetchOptions::total_uncompressed_byte_budget`] - /// and prefetch missing chunks until we hit [`ChunkPrefetchOptions::max_bytes_on_wire_at_once`]. - /// Returns all batches that should be loaded. - #[must_use = "Load the returned batches"] - pub fn prioritize_and_prefetch( + fn update_components_of_interest( &mut self, store: &ChunkStore, used_and_missing: &QueriedChunkIdTracker, - options: &ChunkPrefetchOptions, - time_cursor: Option, - manifest: &RrdManifest, - root_chunks: &HashMap, - ) -> Result, PrefetchError> { + ) { re_tracing::profile_function!(); - let mut chunk_batcher = ChunkRequestBatcher::new(manifest, &self.chunk_requests, options); + // Basically: what components of which entities are currently being viewed by the user? + self.components_of_interest.clear(); - if let Some(latest_result) = &mut self.latest_result - && chunk_batcher.remaining_bytes_in_on_wire_budget == 0 - { - // Early-out: too many bytes already in-transit. + let QueriedChunkIdTracker { + used_physical, + missing_virtual, + } = used_and_missing; - if !used_and_missing.missing_virtual.is_empty() { - latest_result.all_required_are_loaded = false; + for physical_chunk_id in used_physical { + if let Some(chunk) = store.physical_chunk(physical_chunk_id) { + for component in chunk.components_identifiers() { + self.components_of_interest.insert(ComponentPathKey { + entity_path: chunk.entity_path().hash(), + component, + }); + } + } + } + for missing_virtual_chunk_id in missing_virtual { + for root_id in store.find_root_chunks(missing_virtual_chunk_id) { + if let Some(components) = self.component_paths_from_root_id.get(&root_id) { + self.components_of_interest + .extend(components.iter().copied()); + } } + } + } + + /// Prevent these chunks from being canceled or GC:ed. + fn protect_used_and_missing( + &mut self, + store: &ChunkStore, + used_and_missing: &QueriedChunkIdTracker, + ) { + let QueriedChunkIdTracker { + used_physical, + missing_virtual, + } = used_and_missing; + + for physical_chunk_id in used_physical { + // We don't need to add the root(s) of this to the `protected_root_chunks`. + // It is fine to cancel the download of the root(s), + // as long as we don't GC this particular physical chunk. + self.protected_chunks.physical.insert(*physical_chunk_id); + } - self.protect_used_and_missing(store, used_and_missing); - return Ok(vec![]); + for chunk_id in missing_virtual { + // Do not cancel any downloads of any roots of this missing chunk: + for root_id in store.find_root_chunks(chunk_id) { + self.protected_chunks.roots.insert(root_id); + } } + } - self.update_components_of_interest(store, used_and_missing); + /// Cancel all fetches of things that are not currently needed. + #[must_use = "Returns root chunks whose download got cancelled. Mark them as unloaded!"] + pub fn cancel_outdated_requests(&mut self, egui_now_time: f64) -> Vec { + self.chunk_requests + .cancel_outdated_requests(egui_now_time, &self.protected_chunks.roots) + } +} - // We will re-calculate these: - self.protected_chunks.roots.clear(); - self.protected_chunks.physical.clear(); // <- Things we put in here will also be subtracted from remaining_byte_budget +/// How much we should prefetch. A higher stage also includes all lower stages. +#[derive(PartialEq, Eq, Clone, Copy, Debug, serde::Deserialize, serde::Serialize)] +#[repr(u32)] +pub enum FetchStage { + /// Fetch all required chunks, which includes: + /// - Static chunks. + /// - Missing chunks. + /// - High-prio chunks (e.g Transform ones). + Required = 0, - self.protect_used_and_missing(store, used_and_missing); + /// Fetches all chunks on the component paths of chunks that were reported + /// as used or missing within the given time range. + /// + /// This is in number of seconds ahead if the timeline were to be played. + Similar(Option) = 1, - let mut remaining_byte_budget = RemainingByteBudget { - remaining_bytes: options.total_uncompressed_byte_budget, - }; + /// Fetches everything. Starting at the time cursor. + Everything = 2, +} - // Start by going through the actually used physical chunks: - for &physical_chunk_id in &used_and_missing.used_physical { - debug_assert!( - self.protected_chunks.physical.contains(&physical_chunk_id), - "We added it earlier" - ); +impl PartialOrd for FetchStage { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} - if let Some(chunk) = store.physical_chunk(&physical_chunk_id) { - let required = true; - remaining_byte_budget - .try_fit_into_budget(Chunk::total_size_bytes(chunk.as_ref()), required); - } else { - re_log::debug_warn_once!("Couldn't get physical chunk from chunk store"); +impl Ord for FetchStage { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + use std::cmp::Ordering; + match (self, other) { + (Self::Required, Self::Required) | (Self::Everything, Self::Everything) => { + Ordering::Equal } + + (Self::Similar(a), Self::Similar(b)) => match (a, b) { + (Some(a), Some(b)) => a.cmp(b), + (Some(_), None) => Ordering::Less, + (None, Some(_)) => Ordering::Greater, + (None, None) => Ordering::Equal, + }, + + (Self::Required, _) | (_, Self::Everything) => Ordering::Less, + (_, Self::Required) | (Self::Everything, _) => Ordering::Greater, } + } +} - let root_chunks_on_timeline = time_cursor - .and_then(|time_cursor| self.root_chunk_intervals.get(&time_cursor.timeline())); +impl Default for FetchStage { + fn default() -> Self { + Self::Similar(Some(Duration::from_secs(30))) + } +} - let root_chunk_ids_in_priority_order = Self::root_chunks_in_priority( - &self.components_of_interest, - &self.component_paths_from_root_id, - &self.static_chunk_ids, - &self.high_priority_chunks, - store, - used_and_missing, - time_cursor, - root_chunks, - root_chunks_on_timeline, - ); +impl FetchStage { + pub fn is_required(&self) -> bool { + match self { + Self::Required => true, + Self::Similar(_) | Self::Everything => false, + } + } - let state = Self::fill_byte_budget( - &mut self.protected_chunks, - store, - options, - manifest, - root_chunks, - &mut chunk_batcher, - &mut remaining_byte_budget, - root_chunk_ids_in_priority_order, - )?; - self.latest_result = Some(state); + pub fn is_everything(&self) -> bool { + match self { + Self::Required | Self::Similar(_) => false, + Self::Everything => true, + } + } +} + +enum IterState { + Uninited, + Idx(usize), + Done, +} + +#[derive(Clone, Copy)] +enum TimeRangeStage { + AfterCursor, + BeforeCursor, + AfterCursorOutsideLoop, + BeforeCursorOutsideLoop, +} - chunk_batcher.finish() +impl TimeRangeStage { + fn next(&self) -> Option { + match self { + Self::AfterCursor => Some(Self::BeforeCursor), + Self::BeforeCursor => Some(Self::AfterCursorOutsideLoop), + Self::AfterCursorOutsideLoop => Some(Self::BeforeCursorOutsideLoop), + Self::BeforeCursorOutsideLoop => None, + } } +} - #[expect(clippy::too_many_arguments)] - fn fill_byte_budget( - protected_chunks: &mut ProtectedChunks, - store: &ChunkStore, - options: &ChunkPrefetchOptions, - manifest: &RrdManifest, - root_chunks: &HashMap, - chunk_batcher: &mut ChunkRequestBatcher<'_>, - remaining_byte_budget: &mut RemainingByteBudget, - mut root_chunk_ids_in_priority_order: impl Iterator, - ) -> Result { - re_tracing::profile_function!(); +/// Chunk fetching stages, defined in the order they're done. +enum ChunkPriorityStage<'a> { + /// Initial state. + Start(QueriedChunkIdTracker), + + /// Fetches all missing chunks. + Missing(std::vec::IntoIter), + + /// Fetches all static chunks. + Static(usize), + + /// Fetches high prio chunks before the time cursor in reverse order. + HighPrio(IterState), + + /// Fetches chunks in temporal order within a specific range. + /// + /// If `interesting` is true, this only fetches chunks if they contain a component path + /// that has been marked as used/missing. + TimeQuery { + stage: TimeRangeStage, + iter_state: Option, + interesting: bool, + }, + + /// All chunks in no particular order. + /// + /// This will make sure we fetch chunks that aren't on the current timeline. + Everything(std::collections::hash_map::Keys<'a, ChunkId, RootChunkInfo>), - let entity_paths = manifest.col_chunk_entity_path_raw(); + /// No more chunks to check. + Done, +} - let mut visited_root_chunks: HashSet = Default::default(); +/// Per-recording state for a pre-fetch pass. +/// +/// Constructed by calling `ChunkPrioritizer::prepare_chunk_fetcher`, and +/// [`Self::finish`] must be called when completed. +#[must_use] +pub struct ChunkFetcher<'a> { + time_cursor: Option, + visited_root_chunks: HashSet, + chunk_id_scratch: Vec, + pub state: PrioritizationState, + + store: &'a ChunkStore, + prioritizer: &'a mut ChunkPrioritizer, + root_chunks: &'a HashMap, + + next_chunk: Option, + fetch_stage: ChunkPriorityStage<'a>, + + request_batcher: Option>, +} - let mut physical_chunks_scratch = Vec::new(); // scratch space to save on reallocations +impl Drop for ChunkFetcher<'_> { + fn drop(&mut self) { + if self.request_batcher.is_some() { + re_log::debug_warn_once!("`ChunkFetcher::finish` not called for `ChunkFetcher`"); + } + } +} + +impl ChunkFetcher<'_> { + fn peek_chunk(&mut self) -> Option { + if self.next_chunk.is_none() { + self.next_chunk = self.next_chunk(); + } + + self.next_chunk + } - let mut state = PrioritizationState { - transit_budget_filled: false, - memory_budget_filled: false, - some_chunks_too_big: false, - all_required_are_loaded: true, + /// Get the next root chunk in priority order. + /// + /// This may return duplicates! + fn next_chunk(&mut self) -> Option { + if let Some(chunk) = self.next_chunk.take() { + return Some(chunk); + } + + loop { + match &mut self.fetch_stage { + ChunkPriorityStage::Start(tracker) => { + let mut missing_roots = Vec::new(); + for missing_virtual_chunk_id in &tracker.missing_virtual { + self.store + .collect_root_ids(missing_virtual_chunk_id, &mut missing_roots); + } + missing_roots.sort(); + missing_roots.dedup(); + + self.fetch_stage = ChunkPriorityStage::Missing(missing_roots.into_iter()); + } + ChunkPriorityStage::Missing(missing) => { + if let Some(missing) = missing.next() { + return Some(PrioritizedRootChunk::required(missing)); + } else { + self.fetch_stage = ChunkPriorityStage::Static(0); + } + } + ChunkPriorityStage::Static(idx) => { + if let Some(c) = self.prioritizer.static_chunk_ids.get(*idx) { + *idx += 1; + + return Some(PrioritizedRootChunk::required(*c)); + } else { + self.fetch_stage = ChunkPriorityStage::HighPrio(IterState::Uninited); + } + } + ChunkPriorityStage::HighPrio(idx) => { + if let Some(time_cursor) = self.time_cursor + && let Some(chunks_on_timeline) = self + .prioritizer + .high_priority_chunks + .temporal_chunks + .get(time_cursor.timeline().name()) + && let Some(current_idx) = match idx { + IterState::Uninited => { + let (new_idx, res) = if let Some(idx) = chunks_on_timeline + .partition_point(|c| c.time_range.min <= time_cursor.time) + .checked_sub(1) + { + (IterState::Idx(idx), Some(idx)) + } else { + (IterState::Done, None) + }; + + *idx = new_idx; + + res + } + IterState::Idx(idx) => Some(*idx), + IterState::Done => None, + } + && let Some(c) = chunks_on_timeline.get(current_idx) + { + *idx = if let Some(idx) = current_idx.checked_sub(1) { + IterState::Idx(idx) + } else { + IterState::Done + }; + + return Some(PrioritizedRootChunk::required(c.chunk_id)); + } else { + self.fetch_stage = ChunkPriorityStage::TimeQuery { + stage: TimeRangeStage::AfterCursor, + iter_state: None, + interesting: true, + }; + } + } + ChunkPriorityStage::TimeQuery { + stage, + iter_state, + interesting, + } => { + let stage = *stage; + let interesting = *interesting; + let mut iter_state = *iter_state; + if let Some(chunk) = + self.next_in_time_query(stage, &mut iter_state, interesting) + { + self.fetch_stage = ChunkPriorityStage::TimeQuery { + stage, + iter_state, + interesting, + }; + + return Some(chunk); + } else if let Some(stage) = stage.next() { + self.fetch_stage = ChunkPriorityStage::TimeQuery { + stage, + iter_state: None, + interesting, + }; + } else if interesting { + self.fetch_stage = ChunkPriorityStage::TimeQuery { + stage: TimeRangeStage::AfterCursor, + iter_state: None, + interesting: false, + }; + } else { + self.fetch_stage = ChunkPriorityStage::Everything(self.root_chunks.keys()); + } + } + ChunkPriorityStage::Everything(chunks) => { + if let Some(chunk_id) = chunks.next() { + return Some(PrioritizedRootChunk::everything(*chunk_id)); + } else { + self.fetch_stage = ChunkPriorityStage::Done; + } + } + ChunkPriorityStage::Done => return None, + } + } + } + + fn next_in_time_query( + &self, + stage: TimeRangeStage, + cursor: &mut Option, + interesting: bool, + ) -> Option { + let time_cursor = self.time_cursor?; + let query = match stage { + TimeRangeStage::AfterCursor => { + let loop_range = time_cursor.loop_range?; + AbsoluteTimeRange::new(loop_range.min.max(time_cursor.time), loop_range.max) + } + TimeRangeStage::BeforeCursor => { + let loop_range = time_cursor.loop_range?; + AbsoluteTimeRange::new( + loop_range.min, + loop_range.max.min(time_cursor.time.saturating_sub(1)), + ) + } + TimeRangeStage::AfterCursorOutsideLoop => AbsoluteTimeRange::new( + time_cursor + .loop_range + .map(|r| r.max + TimeInt::new_temporal(1)) + .unwrap_or(time_cursor.time), + TimeInt::MAX, + ), + TimeRangeStage::BeforeCursorOutsideLoop => AbsoluteTimeRange::new( + TimeInt::MIN, + time_cursor + .loop_range + .map(|r| r.min.saturating_sub(1)) + .unwrap_or_else(|| time_cursor.time.saturating_sub(1)), + ), + }; + + if query.is_empty() { + return None; + } + + let map = self + .prioritizer + .root_chunk_intervals + .get(&time_cursor.timeline())?; + + let mut iter = match *cursor { + Some(c) => map.resume_query(query.min..=query.max, c), + None => map.query(query.min..=query.max), + }; + + // Skip chunks that don't match the current interest filter. + let chunk = iter.find(|(_, c)| { + let is_interesting = self + .prioritizer + .component_paths_from_root_id + .get(c) + .is_some_and(|k| { + k.iter() + .any(|k| self.prioritizer.components_of_interest.contains(k)) + }); + + is_interesting == interesting + }); + + *cursor = Some(iter.cursor()); + + let (range, chunk_id) = chunk?; + let range = AbsoluteTimeRange::new(*range.start(), *range.end()); + + let chunk = if interesting { + let after = Duration::try_from_secs_f64( + (range.min - time_cursor.time).max(TimeInt::ZERO).as_f64() + / time_cursor.speed_if_unpaused, + ) + .ok(); + + // The time it would take (in real time), for the time cursor to get to this chunk. + // + // `None` if it would never reach, if for example outside of the current loop section. + let real_time_offset = match stage { + TimeRangeStage::AfterCursor => after, + TimeRangeStage::BeforeCursor => time_cursor.loop_range.and_then(|loop_range| { + Duration::try_from_secs_f64( + ((loop_range.max - time_cursor.time).max(TimeInt::ZERO) + + (range.min - loop_range.min).max(TimeInt::ZERO)) + .as_f64() + / time_cursor.speed_if_unpaused, + ) + .ok() + }), + TimeRangeStage::AfterCursorOutsideLoop => { + if time_cursor.loop_range.is_some() { + None + } else { + after + } + } + TimeRangeStage::BeforeCursorOutsideLoop => None, + }; + + PrioritizedRootChunk::similar(*chunk_id, real_time_offset) + } else { + PrioritizedRootChunk::everything(*chunk_id) }; - for next in root_chunk_ids_in_priority_order.by_ref() { - let PrioritizedRootChunk { - required, + Some(chunk) + } + + /// Iterate through prioritized chunks, consuming budget. + /// + /// `to_state` determines how many chunks we process before stopping (within budget). + pub fn fetch( + &mut self, + budget: &mut RemainingByteBudget, + to_state: FetchStage, + ) -> Result<(), PrefetchError> { + let Some(mut batcher) = self.request_batcher.take() else { + return Ok(()); + }; + + let res = self.fetch_inner(&mut batcher, budget, to_state); + + self.request_batcher = Some(batcher); + + res + } + + fn fetch_inner( + &mut self, + batcher: &mut ChunkRequestBatcher<'_>, + budget: &mut RemainingByteBudget, + to_state: FetchStage, + ) -> Result<(), PrefetchError> { + if self.state.all_required_are_loaded.is_none() { + self.state.all_required_are_loaded = Some(true); + } + + let entity_paths = batcher.manifest.col_chunk_entity_path_raw(); + + loop { + // Peek before consuming so we can stop without eating the first optional + // chunk when doing the required-only pass. + if self.peek_chunk().is_some_and(|next| next.stage > to_state) { + break; + } + + let Some(PrioritizedRootChunk { + stage, root_chunk_id, - } = next; + }) = self.next_chunk() + else { + break; + }; - if !visited_root_chunks.insert(root_chunk_id) { - continue; // We've already handled this chunk earlier in the priority order. + if !self.visited_root_chunks.insert(root_chunk_id) { + continue; // Already handled earlier in the priority order. } - let Some(root_chunk) = root_chunks.get(&root_chunk_id) else { + let Some(root_chunk) = self.root_chunks.get(&root_chunk_id) else { re_log::debug_warn_once!("Missing root chunk"); continue; }; - store.collect_physical_descendents_of(&root_chunk_id, &mut physical_chunks_scratch); + self.store + .collect_physical_descendents_of(&root_chunk_id, &mut self.chunk_id_scratch); match root_chunk.state { LoadState::Unloaded | LoadState::InTransit => { - if required { - state.all_required_are_loaded = false; + if stage.is_required() { + self.state.all_required_are_loaded = Some(false); } let row_idx = root_chunk.row_id; // We count only the chunks we are interested in as being part of the memory budget. // The others can/will be evicted as needed. - let uncompressed_chunk_size = - chunk_batcher.chunk_byte_size_uncompressed[row_idx]; + let uncompressed_chunk_size = batcher.chunk_byte_size_uncompressed[row_idx]; - if options.total_uncompressed_byte_budget < uncompressed_chunk_size { + if budget.total_bytes_in_memory < uncompressed_chunk_size { warn_entity_exceeds_memory(entity_paths.value(row_idx)); - state.some_chunks_too_big = true; + self.state.some_chunks_too_big = true; + self.chunk_id_scratch.clear(); continue; } - if !remaining_byte_budget.try_fit_into_budget(uncompressed_chunk_size, required) - { - state.memory_budget_filled = true; + if !budget.try_fit_in_memory(uncompressed_chunk_size, stage.is_required()) { + self.state.memory_budget_filled = true; + self.chunk_id_scratch.clear(); break; } if root_chunk.state == LoadState::Unloaded - && !chunk_batcher.try_fetch(row_idx)? + && !batcher.try_fetch(row_idx, budget)? { // If we don't have anything more to fetch we stop looking. // @@ -843,25 +1231,38 @@ impl ChunkPrioritizer { // chunks inbetween we have to download first. After // which we won't stop prioritizing which chunks should // be in memory here. - state.transit_budget_filled = true; + self.state.transit_budget_filled = true; + self.chunk_id_scratch.clear(); break; } - protected_chunks.roots.insert(root_chunk_id); - protected_chunks + self.prioritizer + .protected_chunks + .roots + .insert(root_chunk_id); + self.prioritizer + .protected_chunks .physical - .extend(physical_chunks_scratch.drain(..)); + .extend(self.chunk_id_scratch.drain(..)); } LoadState::FullyLoaded => { - protected_chunks.roots.insert(root_chunk_id); - - for chunk_id in physical_chunks_scratch.drain(..) { - if protected_chunks.physical.contains(&chunk_id) { - continue; // Already counted as part of our byte budget + self.prioritizer + .protected_chunks + .roots + .insert(root_chunk_id); + + for chunk_id in self.chunk_id_scratch.drain(..) { + if self + .prioritizer + .protected_chunks + .physical + .contains(&chunk_id) + { + continue; // Already counted as part of our byte budget. } - let Some(chunk) = store.physical_chunk(&chunk_id) else { + let Some(chunk) = self.store.physical_chunk(&chunk_id) else { re_log::debug_warn_once!( "Couldn't get physical chunk from chunk store" ); @@ -869,92 +1270,78 @@ impl ChunkPrioritizer { }; let bytes = Chunk::total_size_bytes(chunk.as_ref()); - if !remaining_byte_budget.try_fit_into_budget(bytes, required) { - state.memory_budget_filled = true; + if !budget.try_fit_in_memory(bytes, stage.is_required()) { + self.state.memory_budget_filled = true; break; } - protected_chunks.physical.insert(chunk_id); + self.prioritizer.protected_chunks.physical.insert(chunk_id); + } + // `drain` drops remaining elements on break, but clear to be explicit. + self.chunk_id_scratch.clear(); + + // Don't continue if we already hit the limit with this. + if self.state.memory_budget_filled { + break; } } } } - if root_chunk_ids_in_priority_order - .next() - .is_some_and(|next| next.required) + // If budget ran out before all required chunks were seen, flag it. + if self + .peek_chunk() + .is_some_and(|next| next.stage.is_required()) { - state.all_required_are_loaded = false; + self.state.all_required_are_loaded = Some(false); } - Ok(state) + Ok(()) } - fn update_components_of_interest( - &mut self, - store: &ChunkStore, - used_and_missing: &QueriedChunkIdTracker, - ) { - re_tracing::profile_function!(); - - // Basically: what components of which entities are currently being viewed by the user? - self.components_of_interest.clear(); - - let QueriedChunkIdTracker { - used_physical, - missing_virtual, - } = used_and_missing; - - for physical_chunk_id in used_physical { - if let Some(chunk) = store.physical_chunk(physical_chunk_id) { - for component in chunk.components_identifiers() { - self.components_of_interest.insert(ComponentPathKey { - entity_path: chunk.entity_path().hash(), - component, - }); - } - } + /// Handle the result of a [`ChunkFetcher`]. + pub fn finish( + mut self, + load_chunks: &dyn Fn(RecordBatch) -> super::ChunkPromise, + ) -> Result { + let prioritizer = &mut *self.prioritizer; + + prioritizer.frame_visited = std::mem::take(&mut self.visited_root_chunks); + let mut state = self.state; + if state.all_required_are_loaded.is_none() { + // `fetch` was never called, preserve the previous value. + state.all_required_are_loaded = prioritizer + .latest_result + .as_ref() + .and_then(|prev| prev.all_required_are_loaded); } - for missing_virtual_chunk_id in missing_virtual { - for root_id in store.find_root_chunks(missing_virtual_chunk_id) { - if let Some(components) = self.component_paths_from_root_id.get(&root_id) { - self.components_of_interest - .extend(components.iter().copied()); - } - } - } - } + prioritizer.latest_result = Some(state); - /// Prevent these chunks from being canceled or GC:ed. - fn protect_used_and_missing( - &mut self, - store: &ChunkStore, - used_and_missing: &QueriedChunkIdTracker, - ) { - let QueriedChunkIdTracker { - used_physical, - missing_virtual, - } = used_and_missing; - - for physical_chunk_id in used_physical { - // We don't need to add the root(s) of this to the `protected_root_chunks`. - // It is fine to cancel the download of the root(s), - // as long as we don't GC this particular physical chunk. - self.protected_chunks.physical.insert(*physical_chunk_id); - } + let mut res = ChunkFetchResult { + new_in_transit_chunks: Vec::new(), + time_cursor: self.time_cursor.as_deref().copied(), + }; - for chunk_id in missing_virtual { - // Do not cancel any downloads of any roots of this missing chunk: - for root_id in store.find_root_chunks(chunk_id) { - self.protected_chunks.roots.insert(root_id); + if let Some(batcher) = self.request_batcher.take() { + let to_load = batcher.finish()?; + for (rb, batch_info) in to_load { + res.new_in_transit_chunks + .extend(batch_info.root_chunk_ids.iter().copied()); + let promise = load_chunks(rb); + let batch = crate::chunk_requests::ChunkBatchRequest { + promise: Mutex::new(Some(promise)), + info: batch_info.into(), + }; + self.prioritizer.chunk_requests_mut().add(batch); } } - } - /// Cancel all fetches of things that are not currently needed. - #[must_use = "Returns root chunks whose download got cancelled. Mark them as unloaded!"] - pub fn cancel_outdated_requests(&mut self, egui_now_time: f64) -> Vec { - self.chunk_requests - .cancel_outdated_requests(egui_now_time, &self.protected_chunks.roots) + Ok(res) } } + +#[must_use] +pub struct ChunkFetchResult { + pub(super) new_in_transit_chunks: Vec, + pub(super) time_cursor: Option, +} diff --git a/crates/store/re_entity_db/src/rrd_manifest_index/collapsed_time_ranges.rs b/crates/store/re_entity_db/src/rrd_manifest_index/collapsed_time_ranges.rs new file mode 100644 index 000000000000..2f29fb05abd3 --- /dev/null +++ b/crates/store/re_entity_db/src/rrd_manifest_index/collapsed_time_ranges.rs @@ -0,0 +1,153 @@ +use std::collections::BTreeMap; + +use ahash::HashMap; +use re_chunk::{ChunkId, TimelineName}; +use re_chunk_store::ChunkStore; +use re_log_types::AbsoluteTimeRange; + +use super::RootChunkInfo; + +/// Chunks spanning more than 20% of the full timeline are considered "large" +/// and will be scanned for internal gaps once loaded. +fn large_chunk_threshold(timeline_range: AbsoluteTimeRange) -> u64 { + (timeline_range.abs_length() / 5).max(10) +} + +/// Sort ranges by start time and merge overlapping/adjacent ones. +fn merge_and_sort_ranges(ranges: &[AbsoluteTimeRange]) -> Vec { + let Some(sorted) = vec1::Vec1::try_from_vec({ + let mut v = ranges.to_vec(); + v.sort_by_key(|r| r.min.as_i64()); + v + }) + .ok() else { + return Vec::new(); + }; + + let (first, rest) = sorted.split_off_first(); + let mut merged = vec1::vec1![first]; + for range in rest { + let last = merged.last_mut(); + if range.min.as_i64() <= last.max.as_i64() + 1 { + if range.max.as_i64() > last.max.as_i64() { + last.max = range.max; + } + } else { + merged.push(range); + } + } + + merged.into() +} + +/// Split a time column into sub-ranges at gaps larger than `gap_threshold`. +fn split_time_column_at_gaps( + time_column: &re_chunk::TimeColumn, + gap_threshold: u64, +) -> Vec { + let times = time_column.times_raw(); + if times.len() < 2 || !time_column.is_sorted() { + return vec![time_column.time_range()]; + } + + let mut ranges = Vec::new(); + let mut start = times[0]; + let mut prev = times[0]; + + for &t in ×[1..] { + if prev.abs_diff(t) > gap_threshold { + ranges.push(AbsoluteTimeRange::new(start, prev)); + start = t; + } + prev = t; + } + ranges.push(AbsoluteTimeRange::new(start, prev)); + ranges +} + +/// Compute data time ranges from manifest chunk ranges. +/// +/// This merges all chunk time ranges per timeline to detect gaps between chunks. +/// Chunks spanning large durations are tracked for recalculation when they get loaded. +pub fn compute_data_time_ranges( + root_chunks: &HashMap, +) -> BTreeMap> { + re_tracing::profile_function!(); + + let mut ranges_per_timeline: BTreeMap> = BTreeMap::new(); + + for chunk_info in root_chunks.values() { + for (timeline_name, temporal_info) in &chunk_info.temporals { + ranges_per_timeline + .entry(*timeline_name) + .or_default() + .push(temporal_info.time_range); + } + } + + let mut result = BTreeMap::new(); + for (timeline_name, ranges) in &ranges_per_timeline { + let merged = merge_and_sort_ranges(ranges); + result.insert(*timeline_name, merged); + } + result +} + +/// Refine data time ranges for a timeline by scanning all loaded physical chunks +/// for internal gaps within large time ranges. +pub fn calculate_data_ranges_for_timeline( + root_chunks: &HashMap, + timelines: &BTreeMap, + store: &ChunkStore, + timeline_name: &TimelineName, +) -> Option> { + re_tracing::profile_function!(); + + let &timeline_range = timelines.get(timeline_name)?; + let threshold = large_chunk_threshold(timeline_range); + + let mut all_ranges: Vec = Vec::new(); + + // Loaded chunks: scan for internal gaps in large ones + for chunk in store.physical_chunks() { + if let Some(time_col) = chunk.timelines().get(timeline_name) { + let range = time_col.time_range(); + let duration = range.abs_length(); + if duration > threshold { + all_ranges.extend(split_time_column_at_gaps(time_col, threshold)); + } else { + all_ranges.push(range); + } + } + } + + // Unloaded chunks: include their manifest ranges as-is + for chunk_info in root_chunks.values() { + if !chunk_info.is_fully_loaded() + && let Some(temporal_info) = chunk_info.temporals.get(timeline_name) + { + all_ranges.push(temporal_info.time_range); + } + } + + Some(merge_and_sort_ranges(&all_ranges)) +} + +/// Check if a newly loaded chunk is "large" enough to warrant recalculating data ranges +/// for its timelines. +pub fn should_recalculate_for_chunk( + chunk_info: &RootChunkInfo, + timelines: &BTreeMap, +) -> Vec { + let mut result = Vec::new(); + for (timeline_name, temporal_info) in &chunk_info.temporals { + if let Some(&timeline_range) = timelines.get(timeline_name) { + let threshold = large_chunk_threshold(timeline_range); + let duration = temporal_info.time_range.abs_length(); + if duration > threshold { + result.push(*timeline_name); + } + } + } + result +} diff --git a/crates/store/re_entity_db/src/rrd_manifest_index/sorted_temporal_chunks.rs b/crates/store/re_entity_db/src/rrd_manifest_index/sorted_temporal_chunks.rs index 38edd7abb0d2..5bc8d656444c 100644 --- a/crates/store/re_entity_db/src/rrd_manifest_index/sorted_temporal_chunks.rs +++ b/crates/store/re_entity_db/src/rrd_manifest_index/sorted_temporal_chunks.rs @@ -17,6 +17,7 @@ use std::collections::BTreeMap; use nohash_hasher::IntMap; use re_chunk::{ChunkId, TimelineName}; +use re_chunk_store::EntityTree; use re_log_types::{AbsoluteTimeRange, EntityPathHash}; /// Summary information about a chunk for display/query purposes. @@ -121,12 +122,23 @@ impl re_byte_size::SizeBytes for SortedTemporalChunks { } impl SortedTemporalChunks { + pub fn new( + entity_tree: &EntityTree, + native_temporal_map: &re_log_encoding::RrdManifestTemporalMap, + ) -> Self { + re_tracing::profile_function!(); + let mut slf = Self::default(); + slf.update(entity_tree, native_temporal_map); + slf + } + /// Update the cache from the manifest's temporal map and entity tree. /// /// Should be called when a new rrd manifest is appended. - pub fn update( + // TODO(emilk): handle incremental ingestion + fn update( &mut self, - entity_tree: &crate::EntityTree, + entity_tree: &EntityTree, native_temporal_map: &re_log_encoding::RrdManifestTemporalMap, ) { re_tracing::profile_function!(); @@ -157,14 +169,14 @@ impl SortedTemporalChunks { } /// Bottom-up entity traversal - fn visit(current: &crate::EntityTree, visitor: &mut impl FnMut(&crate::EntityTree)) { + fn visit(current: &EntityTree, visitor: &mut impl FnMut(&EntityTree)) { for child in current.children.values() { visit(child, visitor); } visitor(current); } - visit(entity_tree, &mut |node: &crate::EntityTree| { + visit(entity_tree, &mut |node: &EntityTree| { for per_entity in self.per_timeline.values_mut() { // Collect all chunks from direct children which now already includes // their descendants and components @@ -250,8 +262,8 @@ mod tests { use re_chunk::EntityPath; use re_log_types::TimeInt; - fn make_entity_tree(paths: &[&EntityPath]) -> crate::EntityTree { - let mut tree = crate::EntityTree::root(); + fn make_entity_tree(paths: &[&EntityPath]) -> EntityTree { + let mut tree = EntityTree::root(); for path in paths { tree.on_new_entity(path); } diff --git a/crates/store/re_entity_db/src/sorted_range_map.rs b/crates/store/re_entity_db/src/sorted_range_map.rs index 1c09cffe07fa..baa39bba2b58 100644 --- a/crates/store/re_entity_db/src/sorted_range_map.rs +++ b/crates/store/re_entity_db/src/sorted_range_map.rs @@ -3,7 +3,7 @@ use std::ops::RangeInclusive; /// A sorted, immutable collection of inclusive ranges mapped to values. /// /// Supports O(log N) queries for overlapping ranges. -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone)] pub struct SortedRangeMap { /// Entries sorted by `range.start()`. entries: Vec<(RangeInclusive, V)>, @@ -13,6 +13,15 @@ pub struct SortedRangeMap { max_end: Vec, } +impl Default for SortedRangeMap { + fn default() -> Self { + Self { + entries: Vec::new(), + max_end: Vec::new(), + } + } +} + impl re_byte_size::SizeBytes for SortedRangeMap where K: re_byte_size::SizeBytes + Ord + Copy, @@ -25,24 +34,6 @@ where } impl SortedRangeMap { - pub fn new(mut entries: Vec<(RangeInclusive, V)>) -> Self { - entries.sort_by(|a, b| a.0.start().cmp(b.0.start())); - - let mut max_end = Vec::with_capacity(entries.len()); - let mut running_max = None::; - - for (range, _) in &entries { - let new_max = match running_max { - Some(m) => m.max(*range.end()), - None => *range.end(), - }; - running_max = Some(new_max); - max_end.push(new_max); - } - - Self { entries, max_end } - } - /// Returns an iterator over all (range, value) pairs that overlap with `query`. /// Results are yielded in order of `range.start()` (ascending). /// @@ -58,6 +49,20 @@ impl SortedRangeMap { } } + /// Resume a previously suspended overlap query from a saved cursor. + #[inline] + pub fn resume_query( + &self, + query: RangeInclusive, + cursor: OverlapIterState, + ) -> OverlapIter<'_, K, V> { + OverlapIter { + map: self, + query, + idx: cursor.0, + } + } + /// Find the first index that could possibly overlap with the query. #[inline] fn find_first_possible(&self, query: &RangeInclusive) -> usize { @@ -65,6 +70,27 @@ impl SortedRangeMap { self.max_end.partition_point(|max| *max < *query.start()) } + /// Append new entries and re-sort. + pub fn extend(&mut self, new_entries: Vec<(RangeInclusive, V)>) { + self.entries.extend(new_entries); + self.entries.sort_by(|a, b| a.0.start().cmp(b.0.start())); + self.update_max_end(); + } + + fn update_max_end(&mut self) { + self.max_end.clear(); + self.max_end.reserve_exact(self.entries.len()); + let mut running_max = None::; + for (range, _) in &self.entries { + let new_max = match running_max { + Some(m) => m.max(*range.end()), + None => *range.end(), + }; + running_max = Some(new_max); + self.max_end.push(new_max); + } + } + #[inline] #[cfg_attr(not(test), expect(dead_code))] // only used in tests pub fn len(&self) -> usize { @@ -78,6 +104,13 @@ impl SortedRangeMap { } } +/// Opaque position within an [`OverlapIter`]. +/// +/// Obtain one via [`OverlapIter::cursor`] and pass it to +/// [`SortedRangeMap::resume_query`] to continue iteration later. +#[derive(Debug, Clone, Copy)] +pub struct OverlapIterState(usize); + /// Non-allocating iterator over overlapping ranges. #[derive(Debug, Clone)] pub struct OverlapIter<'a, K, V> { @@ -86,6 +119,15 @@ pub struct OverlapIter<'a, K, V> { idx: usize, } +impl OverlapIter<'_, K, V> { + /// Snapshot the current position so the query can be resumed later + /// via [`SortedRangeMap::resume_query`]. + #[inline] + pub fn cursor(&self) -> OverlapIterState { + OverlapIterState(self.idx) + } +} + impl<'a, K: Ord + Copy, V> Iterator for OverlapIter<'a, K, V> { type Item = (&'a RangeInclusive, &'a V); @@ -122,6 +164,19 @@ impl std::iter::FusedIterator for OverlapIter<'_, K, V> {} mod tests { use super::*; + impl SortedRangeMap { + pub fn new(mut entries: Vec<(RangeInclusive, V)>) -> Self { + entries.sort_by(|a, b| a.0.start().cmp(b.0.start())); + + let mut slf = Self { + entries, + max_end: vec![], + }; + slf.update_max_end(); + slf + } + } + #[test] fn test_basic_overlap() { let map = SortedRangeMap::new(vec![ diff --git a/crates/store/re_entity_db/src/store_bundle.rs b/crates/store/re_entity_db/src/store_bundle.rs index 8ee972b3df00..2aabe11c7f48 100644 --- a/crates/store/re_entity_db/src/store_bundle.rs +++ b/crates/store/re_entity_db/src/store_bundle.rs @@ -20,7 +20,7 @@ pub enum StoreLoadError { #[derive(Default)] pub struct StoreBundle { // `indexmap` is used to keep track of the insertion order. - recording_store: indexmap::IndexMap, + stores: indexmap::IndexMap, } impl StoreBundle { @@ -28,6 +28,7 @@ impl StoreBundle { /// It can theoretically contain multiple recordings, and blueprints. pub fn from_rrd( reader: std::io::BufReader, + data_source: &re_log_channel::LogSource, ) -> Result { re_tracing::profile_function!(); @@ -37,43 +38,50 @@ impl StoreBundle { for msg in decoder { let msg = msg?; - slf.entry(msg.store_id()).add_log_msg(&msg)?; + let entity_db = slf.stores.entry(msg.store_id().clone()).or_insert_with(|| { + let mut db = EntityDb::new(msg.store_id().clone()); + db.data_source = Some(data_source.clone()); + db + }); + entity_db.add_log_msg(&msg)?; } Ok(slf) } /// All loaded [`EntityDb`], both recordings and blueprints, in insertion order. pub fn entity_dbs(&self) -> impl Iterator { - self.recording_store.values() + self.stores.values() } /// All loaded [`EntityDb`], both recordings and blueprints, in insertion order. pub fn entity_dbs_mut(&mut self) -> impl Iterator { - self.recording_store.values_mut() + self.stores.values_mut() } pub fn remove(&mut self, id: &StoreId) -> Option { - self.recording_store.shift_remove(id) + self.stores.shift_remove(id) } // -- pub fn contains(&self, id: &StoreId) -> bool { - self.recording_store.contains_key(id) + self.stores.contains_key(id) } pub fn get(&self, id: &StoreId) -> Option<&EntityDb> { - self.recording_store.get(id) + self.stores.get(id) } pub fn get_mut(&mut self, id: &StoreId) -> Option<&mut EntityDb> { - self.recording_store.get_mut(id) + self.stores.get_mut(id) } /// Returns either a recording or blueprint [`EntityDb`]. /// One is created if it doesn't already exist. + // NOTE(grtlr): We should clean this up, it's much too easy to create an + // entry in without the required book-keeping for new stores. pub fn entry(&mut self, id: &StoreId) -> &mut EntityDb { - self.recording_store.entry(id.clone()).or_insert_with(|| { + self.stores.entry(id.clone()).or_insert_with(|| { re_log::trace!("Creating new store: '{id:?}'"); EntityDb::new(id.clone()) }) @@ -85,7 +93,7 @@ impl StoreBundle { pub fn blueprint_entry(&mut self, id: &StoreId) -> &mut EntityDb { re_log::debug_assert!(id.is_blueprint()); - self.recording_store.entry(id.clone()).or_insert_with(|| { + self.stores.entry(id.clone()).or_insert_with(|| { // TODO(jleibs): If the blueprint doesn't exist this probably means we are // initializing a new default-blueprint for the application in question. // Make sure it's marked as a blueprint. @@ -107,32 +115,45 @@ impl StoreBundle { } pub fn insert(&mut self, entity_db: EntityDb) { - self.recording_store - .insert(entity_db.store_id().clone(), entity_db); + self.stores.insert(entity_db.store_id().clone(), entity_db); } /// In insertion order. pub fn recordings(&self) -> impl Iterator { - self.recording_store + self.stores .values() .filter(|log| log.store_kind() == StoreKind::Recording) } /// In insertion order. pub fn recordings_mut(&mut self) -> impl Iterator { - self.recording_store + self.stores .values_mut() .filter(|log| log.store_kind() == StoreKind::Recording) } + /// Recordings for a redap origin + pub fn recordings_for_origin( + &self, + origin: &re_uri::Origin, + ) -> impl Iterator { + self.recordings() + .filter(|db| { + matches!( + &db.data_source, + Some(re_log_channel::LogSource::RedapGrpcStream { uri, .. }) if uri.origin == *origin + ) + }) + } + // -- pub fn retain(&mut self, mut f: impl FnMut(&EntityDb) -> bool) { - self.recording_store.retain(|_, db| f(db)); + self.stores.retain(|_, db| f(db)); } /// In insertion order. pub fn drain_entity_dbs(&mut self) -> impl Iterator + '_ { - self.recording_store.drain(..).map(|(_, store)| store) + self.stores.drain(..).map(|(_, store)| store) } } diff --git a/crates/store/re_entity_db/src/time_histogram_per_timeline.rs b/crates/store/re_entity_db/src/time_histogram_per_timeline.rs deleted file mode 100644 index ebc1210bf227..000000000000 --- a/crates/store/re_entity_db/src/time_histogram_per_timeline.rs +++ /dev/null @@ -1,483 +0,0 @@ -use std::collections::BTreeMap; -use std::ops::Bound; - -use emath::lerp; -use itertools::Itertools as _; -use re_byte_size::{MemUsageNode, MemUsageTree, MemUsageTreeCapture, SizeBytes as _}; -use re_chunk::{TimeInt, Timeline, TimelineName}; -use re_chunk_store::{ChunkDirectLineage, ChunkStore, ChunkStoreDiff, ChunkStoreEvent}; -use re_log_types::{AbsoluteTimeRange, AbsoluteTimeRangeF, TimeReal}; - -use crate::RrdManifestIndex; - -// --- - -/// Number of messages per time. -// TODO(RR-3784): get rid of TimeHistogram completely -#[derive(Clone)] -pub struct TimeHistogram { - timeline: Timeline, - hist: re_int_histogram::Int64Histogram, -} - -impl std::ops::Deref for TimeHistogram { - type Target = re_int_histogram::Int64Histogram; - - #[inline] - fn deref(&self) -> &Self::Target { - &self.hist - } -} - -impl TimeHistogram { - pub fn new(timeline: Timeline) -> Self { - Self { - timeline, - hist: Default::default(), - } - } - - pub fn timeline(&self) -> Timeline { - self.timeline - } - - pub fn num_rows(&self) -> u64 { - self.hist.total_count() - } - - pub fn insert(&mut self, time: TimeInt, count: u64) { - self.hist.increment(time.as_i64(), count as _); - } - - pub fn increment(&mut self, time: i64, n: u32) { - self.hist.increment(time, n); - } - - pub fn decrement(&mut self, time: i64, n: u32) { - self.hist.decrement(time, n); - } - - fn min_opt(&self) -> Option { - self.min_key().map(TimeInt::new_temporal) - } - - pub fn min(&self) -> TimeInt { - self.min_opt().unwrap_or(TimeInt::MIN) - } - - fn max_opt(&self) -> Option { - self.max_key().map(TimeInt::new_temporal) - } - - pub fn max(&self) -> TimeInt { - self.max_opt().unwrap_or(TimeInt::MIN) - } - - pub fn full_range(&self) -> AbsoluteTimeRange { - AbsoluteTimeRange::new(self.min(), self.max()) - } - - pub fn step_fwd_time(&self, time: TimeReal) -> TimeInt { - self.next_key_after(time.floor().as_i64()) - .map(TimeInt::new_temporal) - .unwrap_or_else(|| self.min()) - } - - pub fn step_back_time(&self, time: TimeReal) -> TimeInt { - self.prev_key_before(time.ceil().as_i64()) - .map(TimeInt::new_temporal) - .unwrap_or_else(|| self.max()) - } - - pub fn step_fwd_time_looped( - &self, - time: TimeReal, - loop_range: &AbsoluteTimeRangeF, - ) -> TimeReal { - if time < loop_range.min || loop_range.max <= time { - loop_range.min - } else if let Some(next) = self - .range( - ( - Bound::Excluded(time.floor().as_i64()), - Bound::Included(loop_range.max.floor().as_i64()), - ), - 1, - ) - .next() - .map(|(r, _)| r.min) - { - TimeReal::from(next) - } else { - self.step_fwd_time(time).into() - } - } - - pub fn step_back_time_looped( - &self, - time: TimeReal, - loop_range: &AbsoluteTimeRangeF, - ) -> TimeReal { - re_tracing::profile_function!(); - - if time <= loop_range.min || loop_range.max < time { - loop_range.max - } else { - // Collect all keys in the range and take the last one. - // Yes, this could be slow :/ - let mut prev_key = None; - for (range, _) in self.range( - ( - Bound::Included(loop_range.min.ceil().as_i64()), - Bound::Excluded(time.ceil().as_i64()), - ), - 1, - ) { - prev_key = Some(range.max); - } - if let Some(prev) = prev_key { - TimeReal::from(TimeInt::new_temporal(prev)) - } else { - self.step_back_time(time).into() - } - } - } -} - -/// Number of messages per time per timeline. -/// -/// Does NOT include static data. -#[derive(Default, Clone)] -pub struct TimeHistogramPerTimeline { - /// When do we have data? Ignores static data. - times: BTreeMap, - - /// Extra bookkeeping used to seed any timelines that include static msgs. - has_static: bool, -} - -impl TimeHistogramPerTimeline { - #[inline] - pub fn is_empty(&self) -> bool { - self.times.is_empty() && !self.has_static - } - - #[inline] - pub fn timelines(&self) -> impl ExactSizeIterator { - self.times.values().map(|h| h.timeline()) - } - - pub fn histograms(&self) -> impl ExactSizeIterator { - self.times.values() - } - - #[inline] - pub fn get(&self, timeline: &TimelineName) -> Option<&TimeHistogram> { - self.times.get(timeline) - } - - #[inline] - pub fn has_timeline(&self, timeline: &TimelineName) -> bool { - self.times.contains_key(timeline) - } - - #[inline] - pub fn iter(&self) -> impl ExactSizeIterator { - self.times.iter() - } - - /// Total number of temporal messages over all timelines. - pub fn num_temporal_messages(&self) -> u64 { - self.times.values().map(|hist| hist.total_count()).sum() - } - - /// Increments `n` for each specified time. - /// - /// I.e. this adds a total of `n*times.len()`. - fn add_temporal(&mut self, timeline: &Timeline, times: &[i64], n: u32) { - re_tracing::profile_function!(); - - let histogram = self - .times - .entry(*timeline.name()) - .or_insert_with(|| TimeHistogram::new(*timeline)); - for &time in times { - histogram.increment(time, n); - } - } - - /// Decrements `n` for each specified time. - /// - /// I.e. this removes a total of `n*times.len()`. - fn remove_temporal(&mut self, timeline: &Timeline, times: &[i64], n: u32) { - re_tracing::profile_function!(); - - if let Some(histogram) = self.times.get_mut(timeline.name()) { - for &time in times { - histogram.decrement(time, n); - } - if histogram.is_empty() { - self.times.remove(timeline.name()); - } - } - } - - /// If we know the manifest ahead of time, we can pre-populate - /// the histogram with a rough estimate of the final form. - pub fn on_rrd_manifest(&mut self, rrd_manifest_index: &RrdManifestIndex) { - re_tracing::profile_function!(); - - for chunk in rrd_manifest_index.root_chunks() { - if chunk.temporals.is_empty() { - self.has_static = true; - } - - for info in chunk.temporals.values() { - let histogram = self - .times - .entry(*info.timeline.name()) - .or_insert_with(|| TimeHistogram::new(info.timeline)); - - apply_estimate( - Application::Add, - histogram, - info.time_range, - info.num_rows_for_all_entities_all_components, - ); - } - } - } - - pub fn on_events( - &mut self, - store: &ChunkStore, - rrd_manifest_index: &RrdManifestIndex, - events: &[ChunkStoreEvent], - ) { - re_tracing::profile_function!(); - - for event in events { - match &event.diff { - ChunkStoreDiff::Addition(add) => { - let delta_chunk = add.delta_chunk(); - - let root_chunk_id = add.chunk_before_processing.id(); - let root_chunk_info = rrd_manifest_index.root_chunk_info(&root_chunk_id); - - if delta_chunk.is_static() { - self.has_static = true; - } else { - for time_column in delta_chunk.timelines().values() { - let times = time_column.times_raw(); - let timeline = time_column.timeline(); - - if let Some(chunk_info) = root_chunk_info - && let Some(timeline_info) = - &chunk_info.temporals.get(timeline.name()) - { - // We added an estimated value for this before, based on the RRD manifest. - // Now that we have the whole chunk we need to subtract those fake values again, - // before we add in the actual contents of the chunk: - - let histogram = self - .times - .entry(*timeline.name()) - .or_insert_with(|| TimeHistogram::new(*timeline)); - - apply_estimate( - Application::Remove, - histogram, - timeline_info.time_range, - timeline_info.num_rows_for_all_entities_all_components, - ); - } - - self.add_temporal( - time_column.timeline(), - times, - // This value is incorrect since it doesn't account for the potential sparseness - // of individual components. - // I.e. this will over-count. For what we use this datastructure for, this is fine. - delta_chunk.num_components() as _, - ); - } - } - } - - ChunkStoreDiff::Deletion(del) => { - if del.chunk.is_static() { - // we don't care - } else { - // We want to explicitly look for root chunks here, even if that means walking recursively - // through the lineage tree. - // We will need them in order to re-fill the estimates as best as we can. - let root_chunk_ids = store.find_root_chunks(&del.chunk.id()); - let root_chunk_infos = root_chunk_ids - .iter() - .filter_map(|cid| rrd_manifest_index.root_chunk_info(cid)) - .collect_vec(); - - for time_column in del.chunk.timelines().values() { - let times = time_column.times_raw(); - let timeline = time_column.timeline(); - - self.remove_temporal( - time_column.timeline(), - times, - // This value is incorrect since it doesn't account for the potential sparseness - // of individual components. - // I.e. this will over-count. For what we use this datastructure for, this is fine. - del.chunk.num_components() as _, - ); - - #[expect(clippy::match_same_arms)] // readability - let undo_factor: f64 = match store.direct_lineage(&del.chunk.id()) { - // If the removed chunk was part of split lineage of siblings, then only bring that - // much of the estimate back. - Some(ChunkDirectLineage::SplitFrom(_, sibling_ids)) => { - 1.0 / (sibling_ids.len() + 1) as f64 - } - - Some(ChunkDirectLineage::CompactedFrom(_)) => 1.0, - - _ => 1.0, - }; - - for chunk_info in &root_chunk_infos { - if let Some(timeline_info) = - chunk_info.temporals.get(timeline.name()) - { - let histogram = self - .times - .entry(*timeline.name()) - .or_insert_with(|| TimeHistogram::new(*timeline)); - - let n = timeline_info.num_rows_for_all_entities_all_components - as f64; - let n = n * undo_factor; - let n = n as u64; - - apply_estimate( - Application::Add, - histogram, - timeline_info.time_range, - n, - ); - } - } - } - } - } - - ChunkStoreDiff::VirtualAddition(_) => { - // TODO(cmc): this should probably replace the `on_rrd_manifest` impl above. - } - } - } - } -} - -#[derive(Clone, Copy, Debug)] -enum Application { - Add, - Remove, -} - -impl Application { - fn apply(self, histogram: &mut TimeHistogram, position: i64, inc: u32) { - match self { - Self::Add => { - histogram.increment(position, inc); - } - Self::Remove => { - histogram.decrement(position, inc); - } - } - } -} - -fn apply_estimate( - application: Application, - histogram: &mut TimeHistogram, - time_range: re_log_types::AbsoluteTimeRange, - num_rows_for_all_entities_all_components: u64, -) { - if num_rows_for_all_entities_all_components == 0 { - return; - } - - // Assume even spread of chunk (for now): - let num_pieces = u64::min(num_rows_for_all_entities_all_components, 10); - - if num_pieces == 1 || time_range.min == time_range.max { - let position = time_range.center(); - application.apply( - histogram, - position.as_i64(), - num_rows_for_all_entities_all_components as u32, - ); - } else { - let inc = (num_rows_for_all_entities_all_components / num_pieces) as u32; - for i in 0..num_pieces { - let position = lerp( - time_range.min.as_f64()..=time_range.max.as_f64(), - i as f64 / (num_pieces as f64 - 1.0), - ) - .round() as i64; - - application.apply( - histogram, - position, - inc + (i < num_rows_for_all_entities_all_components % num_pieces) as u32, - ); - } - } -} - -impl re_byte_size::SizeBytes for TimeHistogram { - fn heap_size_bytes(&self) -> u64 { - // Calculating the memory use of the time histogram can be slow - // (tens of ms for 1h-recording). - // But it can also use a lot of memory - // TODO(RR-3784): get rid of TimeHistogram completely - - re_tracing::profile_function!(); - - let Self { timeline: _, hist } = self; - - let accurate_but_slow = true; - - if accurate_but_slow { - hist.heap_size_bytes() - } else { - // VERY rouch estimate. Can easily be wrong by a factor of 4 in either direction. - hist.total_count() * (std::mem::size_of::() as u64) - } - } -} - -impl re_byte_size::SizeBytes for TimeHistogramPerTimeline { - fn heap_size_bytes(&self) -> u64 { - re_tracing::profile_function!(); - let Self { times, has_static } = self; - times.heap_size_bytes() + has_static.heap_size_bytes() - } -} - -impl MemUsageTreeCapture for TimeHistogramPerTimeline { - fn capture_mem_usage_tree(&self) -> MemUsageTree { - re_tracing::profile_function!(); - - let Self { times, has_static } = self; - _ = has_static; - - let mut node = MemUsageNode::new(); - for (timeline_name, histogram) in times { - node.add( - timeline_name.as_str().to_owned(), - histogram.total_size_bytes(), - ); - } - node.into_tree() - } -} diff --git a/crates/store/re_entity_db/src/versioned_instance_path.rs b/crates/store/re_entity_db/src/versioned_instance_path.rs index 85d9746f00c6..9f9985481a35 100644 --- a/crates/store/re_entity_db/src/versioned_instance_path.rs +++ b/crates/store/re_entity_db/src/versioned_instance_path.rs @@ -10,8 +10,9 @@ use crate::{InstancePath, InstancePathHash}; /// or the whole entity. /// /// The easiest way to construct this type is via [`crate::InstancePath::versioned`]. -#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +#[derive( + Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Deserialize, serde::Serialize, +)] pub struct VersionedInstancePath { pub instance_path: InstancePath, pub row_id: RowId, diff --git a/crates/store/re_grpc_client/Cargo.toml b/crates/store/re_grpc_client/Cargo.toml index 19baaee98b1a..3fc445083ebf 100644 --- a/crates/store/re_grpc_client/Cargo.toml +++ b/crates/store/re_grpc_client/Cargo.toml @@ -23,6 +23,7 @@ all-features = true [dependencies] re_chunk.workspace = true +re_error.workspace = true re_log_channel.workspace = true re_log_encoding = { workspace = true, features = ["encoder", "decoder"] } re_log_types.workspace = true diff --git a/crates/store/re_grpc_client/src/lib.rs b/crates/store/re_grpc_client/src/lib.rs index d4f1ea5e66c3..1996311308fb 100644 --- a/crates/store/re_grpc_client/src/lib.rs +++ b/crates/store/re_grpc_client/src/lib.rs @@ -39,26 +39,31 @@ impl TonicStatusError { impl std::fmt::Display for TonicStatusError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // TODO(emilk): duplicated in `re_grpc_server` - let status = &self.0; + // NOTE: duplicated in `re_grpc_server` and `re_redap_client` + fmt_tonic_status(f, &self.0) + } +} +fn fmt_tonic_status(f: &mut std::fmt::Formatter<'_>, status: &tonic::Status) -> std::fmt::Result { + if status.message().is_empty() { write!(f, "gRPC error")?; + } else { + write!(f, "{}", status.message())?; + } + + if status.code() != tonic::Code::Unknown { + write!(f, " ({})", status.code())?; + } - if status.code() != tonic::Code::Unknown { - write!(f, ", code: '{}'", status.code())?; - } - if !status.message().is_empty() { - write!(f, ", message: {:?}", status.message())?; - } - // Binary data - not useful. - // if !status.details().is_empty() { - // write!(f, ", details: {:?}", status.details())?; - // } - if !status.metadata().is_empty() { - write!(f, ", metadata: {:?}", status.metadata().as_ref())?; - } - Ok(()) + if !status.metadata().is_empty() { + write!( + f, + "{} metadata: {:?}", + re_error::DETAILS_SEPARATOR, + status.metadata().as_ref() + )?; } + Ok(()) } impl From for TonicStatusError { diff --git a/crates/store/re_grpc_client/src/read.rs b/crates/store/re_grpc_client/src/read.rs index 29a6fe00b31f..ea311e5e6e0c 100644 --- a/crates/store/re_grpc_client/src/read.rs +++ b/crates/store/re_grpc_client/src/read.rs @@ -90,9 +90,8 @@ async fn stream_async( break; } - Err(_) => { - re_log::debug!("gRPC stream timed out"); - break; + Err(err) => { + return Err(err.into()); } } } diff --git a/crates/store/re_grpc_client/src/write.rs b/crates/store/re_grpc_client/src/write.rs index 7148d1484e08..90e75a8c1e94 100644 --- a/crates/store/re_grpc_client/src/write.rs +++ b/crates/store/re_grpc_client/src/write.rs @@ -190,7 +190,7 @@ impl Client { } } else { self.cmd_tx.blocking_send(cmd) - }.map_err(|_ignored_details| ()) + }.map_err(|_ignored_err| ()) } /// Whether the client is connected to a remote server. @@ -256,15 +256,15 @@ impl Client { }); } - if !has_emitted_slow_warning && very_slow <= start.elapsed() { + if !has_emitted_slow_warning && very_slow <= elapsed { if timeout < Duration::from_secs(10_000) { - re_log::info!( + re_log::warn!( "Flushing the gRPC stream has taken over {:.1}s seconds (timeout: {:.0}s); will keep waiting…", elapsed.as_secs_f32(), timeout.as_secs_f32(), ); } else { - re_log::info!( + re_log::warn!( "Flushing the gRPC stream has taken over {:.1}s seconds; will keep waiting…", elapsed.as_secs_f32() ); @@ -365,7 +365,7 @@ async fn message_proxy_client( re_log::debug!("Shutting down client without flush"); return; } - _ = tokio::time::sleep(Duration::from_millis(100)) => { + () = tokio::time::sleep(Duration::from_millis(100)) => { } } } @@ -438,18 +438,13 @@ async fn message_proxy_client( } }; - let disconnect_result = if let Err(status) = client.write_messages(stream).await { + let disconnect_result = if let Err(err) = client.write_messages(stream).await { re_log::error!( "Write messages call failed: {}", - TonicStatusError::from(status.clone()) + TonicStatusError::from(err.clone()) ); - // Ignore status code "Unknown" since this was observed to happen on regular Viewer shutdowns. - if status.code() != tonic::Code::Ok && status.code() != tonic::Code::Unknown { - Err(ClientConnectionFailure::FailedToSendMessages(status.code())) - } else { - Ok(()) - } + Err(ClientConnectionFailure::FailedToSendMessages(err.code())) } else { Ok(()) }; diff --git a/crates/store/re_grpc_server/Cargo.toml b/crates/store/re_grpc_server/Cargo.toml index c97492a02d33..6b2af72c9195 100644 --- a/crates/store/re_grpc_server/Cargo.toml +++ b/crates/store/re_grpc_server/Cargo.toml @@ -22,6 +22,7 @@ all-features = true [dependencies] re_byte_size.workspace = true re_chunk.workspace = true +re_error.workspace = true re_format.workspace = true re_log = { workspace = true, features = ["setup"] } re_log_channel.workspace = true @@ -43,6 +44,7 @@ parking_lot.workspace = true tonic = { workspace = true, default-features = false, features = ["transport", "router"] } tonic-web.workspace = true tower-http = { workspace = true, features = ["cors"] } +wildmatch.workspace = true # Native dependencies: [target.'cfg(not(target_arch = "wasm32"))'.dependencies] diff --git a/crates/store/re_grpc_server/src/lib.rs b/crates/store/re_grpc_server/src/lib.rs index a95b84fe87f1..bce87947b175 100644 --- a/crates/store/re_grpc_server/src/lib.rs +++ b/crates/store/re_grpc_server/src/lib.rs @@ -26,7 +26,7 @@ use tokio::sync::oneshot; use tokio_stream::{Stream, StreamExt as _}; use tonic::transport::Server; use tonic::transport::server::TcpIncoming; -use tower_http::cors::CorsLayer; +use tower_http::cors::{AllowOrigin, CorsLayer}; use crate::priority_stream::PriorityMerge; @@ -48,7 +48,7 @@ const CHANNEL_SIZE_MESSAGES: usize = 1024; // TODO(emilk): move into `ServerOpti const CHANNEL_SIZE_BYTES: u64 = 128 * 1024 * 1024; // TODO(emilk): move into `ServerOptions` after the patch release. /// Options for the gRPC Proxy Server -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Debug)] pub struct ServerOptions { /// When a client connect, should they be sent the oldest data first, or the newest? pub playback_behavior: PlaybackBehavior, @@ -57,6 +57,18 @@ pub struct ServerOptions { /// /// It will start garbage collecting old data when we reach this. pub memory_limit: MemoryLimit, // TODO(emilk): rename `history_limit` + + /// Additional origin patterns allowed to make cross-origin requests to the server. + /// + /// By default, only `localhost`, `127.0.0.1`, and `rerun.io` are allowed. + /// Patterns are matched against the full `Origin` header value (e.g. `https://example.com:8080`), + /// using glob-style matching where `*` matches any sequence of characters. + /// + /// Examples: + /// - `"https://*.example.com"` — all subdomains on the default port (443) + /// - `"https://example.com:8080"` — exact origin with a specific port + /// - `"https://example.com:*"` — any port on example.com + pub cors_allowed_origins: Vec, } impl Default for ServerOptions { @@ -64,6 +76,7 @@ impl Default for ServerOptions { Self { playback_behavior: PlaybackBehavior::OldestFirst, memory_limit: MemoryLimit::from_bytes(1024 * 1024 * 1024), // Be very conservative by default + cors_allowed_origins: Vec::new(), } } } @@ -96,26 +109,31 @@ pub struct TonicStatusError(pub tonic::Status); impl std::fmt::Display for TonicStatusError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // TODO(emilk): duplicated in `re_grpc_client` - let status = &self.0; + // NOTE: duplicated in `re_grpc_client` and `re_redap_client` + fmt_tonic_status(f, &self.0) + } +} +fn fmt_tonic_status(f: &mut std::fmt::Formatter<'_>, status: &tonic::Status) -> std::fmt::Result { + if status.message().is_empty() { write!(f, "gRPC error")?; + } else { + write!(f, "{}", status.message())?; + } - if status.code() != tonic::Code::Unknown { - write!(f, ", code: '{}'", status.code())?; - } - if !status.message().is_empty() { - write!(f, ", message: {:?}", status.message())?; - } - // Binary data - not useful. - // if !status.details().is_empty() { - // write!(f, ", details: {:?}", status.details())?; - // } - if !status.metadata().is_empty() { - write!(f, ", metadata: {:?}", status.metadata().as_ref())?; - } - Ok(()) + if status.code() != tonic::Code::Unknown { + write!(f, " ({})", status.code())?; } + + if !status.metadata().is_empty() { + write!( + f, + "{} metadata: {:?}", + re_error::DETAILS_SEPARATOR, + status.metadata().as_ref() + )?; + } + Ok(()) } impl From for TonicStatusError { @@ -124,6 +142,42 @@ impl From for TonicStatusError { } } +const DEFAULT_CORS_PATTERNS: &[&str] = &[ + "*://localhost", + "*://localhost:*", + "*://127.0.0.1", + "*://127.0.0.1:*", + "*://rerun.io", + "*://rerun.io:*", +]; + +/// Returns true if the given origin is allowed by the given patterns. +fn is_origin_allowed(origin: &str, patterns: &[wildmatch::WildMatch]) -> bool { + patterns.iter().any(|pat| pat.matches(origin)) +} + +/// Build a CORS layer that allows only localhost, 127.0.0.1, rerun.io, +/// and any additional user-specified origin patterns. +/// +/// Patterns are matched against the full `Origin` header value, +/// using glob-style matching where `*` matches any sequence of characters. +pub fn cors_layer(extra_allowed_origins: &[String]) -> CorsLayer { + let allowed_origin_patterns: Vec = DEFAULT_CORS_PATTERNS + .iter() + .copied() + .chain(extra_allowed_origins.iter().map(String::as_str)) + .map(wildmatch::WildMatch::new) + .collect(); + CorsLayer::very_permissive().allow_origin(AllowOrigin::predicate( + move |origin, _request_parts| { + let Ok(origin) = origin.to_str() else { + return false; + }; + is_origin_allowed(origin, &allowed_origin_patterns) + }, + )) +} + // TODO(jan): Refactor `serve`/`spawn` variants into a builder? /// Start a Rerun server, listening on `addr`. @@ -147,7 +201,8 @@ pub async fn serve( options: ServerOptions, shutdown: shutdown::Shutdown, ) -> anyhow::Result<()> { - serve_impl(addr, options, MessageProxy::new(options), shutdown).await + let message_proxy = MessageProxy::new(options.clone()); + serve_impl(addr, options, message_proxy, shutdown).await } async fn serve_impl( @@ -205,7 +260,7 @@ async fn serve_impl( re_log::debug!("Server memory limit set at {}", options.memory_limit); - let cors = CorsLayer::very_permissive(); + let cors = cors_layer(&options.cors_allowed_origins); let grpc_web = tonic_web::GrpcWebLayer::new(); let routes = { @@ -247,7 +302,7 @@ pub async fn serve_from_channel( shutdown: shutdown::Shutdown, channel_rx: re_log_channel::LogReceiver, ) { - let message_proxy = MessageProxy::new(options); + let message_proxy = MessageProxy::new(options.clone()); let event_tx = message_proxy.event_tx.clone(); tokio::task::spawn_blocking(move || { @@ -322,7 +377,7 @@ pub fn spawn_from_rx_set( shutdown: shutdown::Shutdown, rxs: re_log_channel::LogReceiverSet, ) { - let message_proxy = MessageProxy::new(options); + let message_proxy = MessageProxy::new(options.clone()); let event_tx = message_proxy.event_tx.clone(); tokio::spawn(async move { @@ -416,7 +471,7 @@ pub fn spawn_with_recv( let (channel_log_tx, channel_log_rx) = re_log_channel::log_channel(re_log_channel::LogSource::MessageProxy(uri)); - let (message_proxy, mut broadcast_log_rx) = MessageProxy::new_with_recv(options); + let (message_proxy, mut broadcast_log_rx) = MessageProxy::new_with_recv(options.clone()); tokio::spawn(async move { if let Err(err) = serve_impl(addr, options, message_proxy, shutdown).await { @@ -891,10 +946,13 @@ impl MessageProxy { async_mpsc_channel::channel("re_grpc_server events", message_queue_capacity) }; - let task_handle = tokio::spawn(async move { - EventLoop::new(options, event_rx, broadcast_log_tx) - .run_in_place() - .await; + let task_handle = tokio::spawn({ + let options = options.clone(); + async move { + EventLoop::new(options, event_rx, broadcast_log_tx) + .run_in_place() + .await; + } }); ( @@ -1247,6 +1305,7 @@ mod tests { setup_opt(ServerOptions { playback_behavior: PlaybackBehavior::OldestFirst, memory_limit: MemoryLimit::UNLIMITED, + cors_allowed_origins: vec![], }) .await } @@ -1255,6 +1314,7 @@ mod tests { setup_opt(ServerOptions { playback_behavior: PlaybackBehavior::OldestFirst, memory_limit, + cors_allowed_origins: vec![], }) .await } @@ -1582,6 +1642,7 @@ mod tests { let (completion, addr) = setup_opt(ServerOptions { playback_behavior: PlaybackBehavior::NewestFirst, // this is what we want to test memory_limit: MemoryLimit::UNLIMITED, + cors_allowed_origins: vec![], }) .await; let mut client = make_client(addr).await; @@ -1614,4 +1675,58 @@ mod tests { completion.finish(); } + + mod cors_tests { + use super::super::{DEFAULT_CORS_PATTERNS, is_origin_allowed}; + + fn check(origin: &str, extra: &[&str]) -> bool { + let patterns: Vec = DEFAULT_CORS_PATTERNS + .iter() + .copied() + .chain(extra.iter().copied()) + .map(wildmatch::WildMatch::new) + .collect(); + is_origin_allowed(origin, &patterns) + } + + #[test] + fn default_allowed_origins() { + assert!(check("http://localhost", &[])); + assert!(check("http://localhost:8080", &[])); + assert!(check("https://127.0.0.1", &[])); + assert!(check("https://127.0.0.1:9090", &[])); + assert!(check("https://rerun.io", &[])); + assert!(check("https://rerun.io:443", &[])); + } + + #[test] + fn default_rejected_origins() { + assert!(!check("https://evil.com", &[])); + assert!(!check("https://notlocalhost.com", &[])); + assert!(!check("https://localhost.evil.com", &[])); + } + + #[test] + fn extra_patterns() { + assert!(check("https://app.example.com", &["https://*.example.com"])); + assert!(!check("https://evil.com", &["https://*.example.com"])); + + // `?` is a bit of a footgun, you might think this could work but it doesn't: + assert!(check("https://example.com", &["http?://example.com"])); + assert!(!check("http://example.com", &["http?://example.com"])); + + // Port wildcard + assert!(check( + "https://example.com:8080", + &["https://example.com:*"] + )); + } + + #[test] + fn edge_cases() { + assert!(!check("", &[])); + assert!(!check("localhost", &[])); + assert!(!check("evil.com", &[])); + } + } } diff --git a/crates/store/re_grpc_server/src/main.rs b/crates/store/re_grpc_server/src/main.rs index 54efd16d1eed..37b499a6f69e 100644 --- a/crates/store/re_grpc_server/src/main.rs +++ b/crates/store/re_grpc_server/src/main.rs @@ -14,6 +14,7 @@ async fn main() -> anyhow::Result<()> { ServerOptions { playback_behavior: re_grpc_server::PlaybackBehavior::OldestFirst, memory_limit: re_grpc_server::MemoryLimit::from_fraction_of_total(0.75), + cors_allowed_origins: vec![], }, shutdown::never(), ) diff --git a/crates/store/re_data_loader/Cargo.toml b/crates/store/re_importer/Cargo.toml similarity index 86% rename from crates/store/re_data_loader/Cargo.toml rename to crates/store/re_importer/Cargo.toml index 07721d33dcc5..e7f9124a7e41 100644 --- a/crates/store/re_data_loader/Cargo.toml +++ b/crates/store/re_importer/Cargo.toml @@ -1,7 +1,7 @@ [package] -name = "re_data_loader" +name = "re_importer" authors.workspace = true -description = "Handles loading of Rerun data from file using data loader plugins" +description = "Handles importing of Rerun data from file using importer plugins" edition.workspace = true homepage.workspace = true include.workspace = true @@ -24,20 +24,21 @@ default = [] [dependencies] -re_arrow_combinators.workspace = true re_arrow_util.workspace = true re_build_info.workspace = true re_chunk.workspace = true re_error.workspace = true +re_format.workspace = true +re_lenses_core.workspace = true re_lenses.workspace = true -re_log.workspace = true +re_log_channel.workspace = true re_log_encoding = { workspace = true, features = ["decoder"] } re_log_types.workspace = true +re_log.workspace = true re_mcap.workspace = true -re_log_channel.workspace = true re_quota_channel.workspace = true -re_tracing.workspace = true re_sdk_types = { workspace = true, features = ["ecolor", "glam", "image", "video"] } +re_tracing.workspace = true re_video.workspace = true ahash.workspace = true @@ -60,8 +61,9 @@ urdf-rs.workspace = true walkdir.workspace = true [target.'cfg(not(any(target_arch = "wasm32")))'.dependencies] -parquet = { workspace = true, features = ["arrow", "snap"] } +parquet = { workspace = true, features = ["arrow", "snap", "zstd"] } re_crash_handler.workspace = true +re_parquet.workspace = true [dev-dependencies] re_chunk_store.workspace = true diff --git a/crates/store/re_importer/README.md b/crates/store/re_importer/README.md new file mode 100644 index 000000000000..b22832302903 --- /dev/null +++ b/crates/store/re_importer/README.md @@ -0,0 +1,10 @@ +# re_importer + +Part of the [`rerun`](https://github.com/rerun-io/rerun) family of crates. + +[![Latest version](https://img.shields.io/crates/v/re_importer.svg)](https://crates.io/crates/re_importer?speculative-link) +[![Documentation](https://docs.rs/re_importer/badge.svg)](https://docs.rs/re_importer?speculative-link) +![MIT](https://img.shields.io/badge/license-MIT-blue.svg) +![Apache](https://img.shields.io/badge/license-Apache-blue.svg) + +Handles importing of Rerun data from file using importer plugins diff --git a/crates/store/re_data_loader/src/load_file.rs b/crates/store/re_importer/src/import_file.rs similarity index 63% rename from crates/store/re_data_loader/src/load_file.rs rename to crates/store/re_importer/src/import_file.rs index 6660ad780695..c173e1129d6a 100644 --- a/crates/store/re_data_loader/src/load_file.rs +++ b/crates/store/re_importer/src/import_file.rs @@ -4,25 +4,25 @@ use ahash::{HashMap, HashMapExt as _}; use re_log_channel::LogSender; use re_log_types::{ApplicationId, FileSource, LogMsg}; -use crate::{DataLoader as _, DataLoaderError, LoadedData, RrdLoader}; +use crate::{ImportedData, Importer as _, ImporterError, RrdImporter}; // --- -/// Loads the given `path` using all [`crate::DataLoader`]s available. +/// Imports from the given `path` using all [`crate::Importer`]s available. /// -/// A single `path` might be handled by more than one loader. +/// A single `path` might be handled by more than one importer. /// /// Synchronously checks whether the file exists and can be loaded. Beyond that, all -/// errors are asynchronous and handled directly by the [`crate::DataLoader`]s themselves +/// errors are asynchronous and handled directly by the [`crate::Importer`]s themselves /// (i.e. they're logged). #[cfg(not(target_arch = "wasm32"))] -pub fn load_from_path( - settings: &crate::DataLoaderSettings, +pub fn import_from_path( + settings: &crate::ImporterSettings, file_source: FileSource, path: &std::path::Path, // NOTE: This channel must be unbounded since we serialize all operations when running on wasm. tx: &LogSender, -) -> Result<(), DataLoaderError> { +) -> Result<(), ImporterError> { re_tracing::profile_function!(path.to_string_lossy()); if !path.exists() { @@ -41,36 +41,36 @@ pub fn load_from_path( .map(|f| f.to_string_lossy().to_string()) .map(ApplicationId::from) }); - let settings = crate::DataLoaderSettings { - // When loading a LeRobot dataset, avoid sending a `SetStoreInfo` message since the LeRobot loader handles this automatically. + let settings = crate::ImporterSettings { + // When importing a LeRobot dataset, avoid sending a `SetStoreInfo` message since the LeRobot importer handles this automatically. force_store_info: !crate::lerobot::is_lerobot_dataset(path), application_id, ..settings.clone() }; - let rx = load(&settings, path, None)?; + let rx = import(&settings, path, None)?; send(settings, file_source, rx, tx); Ok(()) } -/// Loads the given `contents` using all [`crate::DataLoader`]s available. +/// Imports from the given `contents` using all [`crate::Importer`]s available. /// -/// A single file might be handled by more than one loader. +/// A single file might be handled by more than one importer. /// /// Synchronously checks that the file can be loaded. Beyond that, all errors are asynchronous -/// and handled directly by the [`crate::DataLoader`]s themselves (i.e. they're logged). +/// and handled directly by the [`crate::Importer`]s themselves (i.e. they're logged). /// /// `path` is only used for informational purposes, no data is ever read from the filesystem. -pub fn load_from_file_contents( - settings: &crate::DataLoaderSettings, +pub fn import_from_file_contents( + settings: &crate::ImporterSettings, file_source: FileSource, filepath: &std::path::Path, contents: std::borrow::Cow<'_, [u8]>, // NOTE: This channel must be unbounded since we serialize all operations when running on wasm. tx: &LogSender, -) -> Result<(), DataLoaderError> { +) -> Result<(), ImporterError> { re_tracing::profile_function!(filepath.to_string_lossy()); re_log::info!("Loading {filepath:?}…"); @@ -83,12 +83,12 @@ pub fn load_from_file_contents( .map(ApplicationId::from) }); - let settings = crate::DataLoaderSettings { + let settings = crate::ImporterSettings { application_id, ..settings.clone() }; - let data = load(&settings, filepath, Some(contents))?; + let data = import(&settings, filepath, Some(contents))?; send(settings, file_source, data, tx); @@ -98,10 +98,7 @@ pub fn load_from_file_contents( // --- /// Prepares an adequate [`re_log_types::StoreInfo`] [`LogMsg`] given the input. -pub(crate) fn prepare_store_info( - store_id: &re_log_types::StoreId, - file_source: FileSource, -) -> LogMsg { +pub fn prepare_store_info(store_id: &re_log_types::StoreId, file_source: FileSource) -> LogMsg { re_tracing::profile_function!(); use re_log_types::SetStoreInfo; @@ -114,147 +111,151 @@ pub(crate) fn prepare_store_info( }) } -/// Loads the data at `path` using all available [`crate::DataLoader`]s. +/// Imports data at `path` using all available [`crate::Importer`]s. /// -/// On success, returns a channel with all the [`LoadedData`]: +/// On success, returns a channel with all the [`ImportedData`]: /// - On native, this is filled asynchronously from other threads. /// - On wasm, this is pre-filled synchronously. /// -/// There is only one way this function can return an error: not a single [`crate::DataLoader`] +/// There is only one way this function can return an error: not a single [`crate::Importer`] /// (whether it is builtin, custom or external) was capable of loading the data, in which case -/// [`DataLoaderError::Incompatible`] will be returned. +/// [`ImporterError::Incompatible`] will be returned. #[cfg(not(target_arch = "wasm32"))] -pub(crate) fn load( - settings: &crate::DataLoaderSettings, +pub(crate) fn import( + settings: &crate::ImporterSettings, path: &std::path::Path, contents: Option>, -) -> Result, DataLoaderError> { +) -> Result, ImporterError> { re_tracing::profile_function!(path.display().to_string()); - // On native we run loaders in parallel so this needs to become static. + // On native we run importers in parallel so this needs to become static. let contents: Option>> = contents.map(|contents| std::sync::Arc::new(Cow::Owned(contents.into_owned()))); - let rx_loader = { - let (tx_loader, rx_loader) = crossbeam::channel::bounded(1024); + let rx_importer = { + let (tx_importer, rx_importer) = crossbeam::channel::bounded(1024); - let any_compatible_loader = { + let any_compatible_importer = { #[derive(Debug, PartialEq, Eq)] - struct CompatibleLoaderFound; + struct CompatibleImporterFound; let (tx_feedback, rx_feedback) = - crossbeam::channel::bounded::(128); + crossbeam::channel::bounded::(128); // When loading a file type with native support (.rrd, .mcap, .png, …) - // then we don't need the overhead and noise of external data loaders: + // then we don't need the overhead and noise of external importers: // See . - let loaders = { + let importers = { use rayon::iter::Either; - use crate::DataLoader as _; + use crate::Importer as _; let extension = crate::extension(path); if crate::is_supported_file_extension(&extension) { Either::Left( - crate::iter_loaders() - .filter(|loader| loader.name() != crate::ExternalLoader.name()), + crate::iter_importers() + .filter(|importer| importer.name() != crate::ExternalImporter.name()), ) } else { - // We need to use an external dataloader - Either::Right(crate::iter_loaders()) + // We need to use an external importer + Either::Right(crate::iter_importers()) } }; - for loader in loaders { - let loader = std::sync::Arc::clone(&loader); + for importer in importers { + let importer = std::sync::Arc::clone(&importer); let settings = settings.clone(); let path = path.to_owned(); let contents = contents.clone(); // arc - let tx_loader = tx_loader.clone(); + let tx_importer = tx_importer.clone(); let tx_feedback = tx_feedback.clone(); rayon::spawn(move || { - re_tracing::profile_scope!("inner", loader.name()); + re_tracing::profile_scope!("inner", importer.name()); if let Some(contents) = contents.as_deref() { let contents = Cow::Borrowed(contents.as_ref()); - if let Err(err) = loader.load_from_file_contents( + if let Err(err) = importer.import_from_file_contents( &settings, path.clone(), contents, - tx_loader, + tx_importer, ) { if err.is_incompatible() { return; } - re_log::error!(?path, loader = loader.name(), %err, "Failed to load data"); + re_log::error!(?path, importer = importer.name(), %err, "Failed to import data"); } } else if let Err(err) = - loader.load_from_path(&settings, path.clone(), tx_loader) + importer.import_from_path(&settings, path.clone(), tx_importer) { if err.is_incompatible() { return; } - re_log::error!(?path, loader = loader.name(), %err, "Failed to load data from file"); + re_log::error!(?path, importer = importer.name(), %err, "Failed to import data from file"); } - re_log::debug!(loader = loader.name(), ?path, "compatible loader found"); - re_quota_channel::send_crossbeam(&tx_feedback, CompatibleLoaderFound).ok(); + re_log::debug!( + importer = importer.name(), + ?path, + "compatible importer found" + ); + re_quota_channel::send_crossbeam(&tx_feedback, CompatibleImporterFound).ok(); }); } - re_tracing::profile_wait!("compatible_loader"); + re_tracing::profile_wait!("compatible_importer"); drop(tx_feedback); - rx_feedback.recv() == Ok(CompatibleLoaderFound) + rx_feedback.recv() == Ok(CompatibleImporterFound) }; - // Implicitly closing `tx_loader`! + // Implicitly closing `tx_importer`! - any_compatible_loader.then_some(rx_loader) + any_compatible_importer.then_some(rx_importer) }; - if let Some(rx_loader) = rx_loader { - Ok(rx_loader) + if let Some(rx_importer) = rx_importer { + Ok(rx_importer) } else { - Err(DataLoaderError::Incompatible(path.to_owned())) + Err(ImporterError::Incompatible(path.to_owned())) } } -/// Loads the data at `path` using all available [`crate::DataLoader`]s. +/// Imports data at `path` using all available [`crate::Importer`]s. /// -/// On success, returns a channel (pre-filled synchronously) with all the [`LoadedData`]. +/// On success, returns a channel (pre-filled synchronously) with all the [`ImportedData`]. /// -/// There is only one way this function can return an error: not a single [`crate::DataLoader`] +/// There is only one way this function can return an error: not a single [`crate::Importer`] /// (whether it is builtin, custom or external) was capable of loading the data, in which case -/// [`DataLoaderError::Incompatible`] will be returned. +/// [`ImporterError::Incompatible`] will be returned. #[cfg(target_arch = "wasm32")] #[expect(clippy::needless_pass_by_value)] -pub(crate) fn load( - settings: &crate::DataLoaderSettings, +pub(crate) fn import( + settings: &crate::ImporterSettings, path: &std::path::Path, contents: Option>, -) -> Result, DataLoaderError> { +) -> Result, ImporterError> { re_tracing::profile_function!(path.display().to_string()); - let rx_loader = { - let (tx_loader, rx_loader) = crossbeam::channel::unbounded(); + let rx_importer = { + let (tx_importer, rx_importer) = crossbeam::channel::unbounded(); - let any_compatible_loader = crate::iter_loaders().any(|loader| { + let any_compatible_importer = crate::iter_importers().any(|importer| { if let Some(contents) = contents.as_deref() { let settings = settings.clone(); - let tx_loader = tx_loader.clone(); + let tx_importer = tx_importer.clone(); let path = path.to_owned(); let contents = Cow::Borrowed(contents); - if let Err(err) = loader.load_from_file_contents(&settings, path.clone(), contents, tx_loader) { + if let Err(err) = importer.import_from_file_contents(&settings, path.clone(), contents, tx_importer) { if err.is_incompatible() { return false; } - re_log::error!(?path, loader = loader.name(), %err, "Failed to load data from file"); + re_log::error!(?path, importer = importer.name(), %err, "Failed to import data from file"); } true @@ -263,25 +264,25 @@ pub(crate) fn load( } }); - // Implicitly closing `tx_loader`! + // Implicitly closing `tx_importer`! - any_compatible_loader.then_some(rx_loader) + any_compatible_importer.then_some(rx_importer) }; - if let Some(rx_loader) = rx_loader { - Ok(rx_loader) + if let Some(rx_importer) = rx_importer { + Ok(rx_importer) } else { - Err(DataLoaderError::Incompatible(path.to_owned())) + Err(ImporterError::Incompatible(path.to_owned())) } } -/// Forwards the data in `rx_loader` to `tx`, taking care of necessary conversions, if any. +/// Forwards the data in `rx_importer` to `tx`, taking care of necessary conversions, if any. /// /// Runs asynchronously from another thread on native, synchronously on wasm. pub(crate) fn send( - settings: crate::DataLoaderSettings, + settings: crate::ImporterSettings, file_source: FileSource, - rx_loader: crossbeam::channel::Receiver, + rx_importer: crossbeam::channel::Receiver, tx: &LogSender, ) { spawn({ @@ -302,8 +303,8 @@ pub(crate) fn send( // Not our problem whether or not the other end has hung up, but we still want to // poll the channel in any case so as to make sure that the data producer // doesn't get stuck. - for data in rx_loader { - let data_loader_name = data.data_loader_name().clone(); + for data in rx_importer { + let importer_name = data.importer_name().clone(); let msg = match data.into_log_msg() { Ok(msg) => { let store_info = match &msg { @@ -319,7 +320,7 @@ pub(crate) fn send( if let Some((store_id, store_info_created)) = store_info { let tracked = store_info_tracker.entry(store_id).or_default(); tracked.is_rrd_or_rbl = - *data_loader_name == RrdLoader::name(&RrdLoader); + *importer_name == RrdImporter::name(&RrdImporter); tracked.already_has_store_info |= store_info_created; } @@ -364,7 +365,16 @@ fn spawn(f: F) where F: FnOnce() + Send + 'static, { - rayon::spawn(f); + if 1 < rayon::current_num_threads() { + rayon::spawn(f); + } else { + // Avoids a deadlock when send-channel gets full. + // We usually only use `-j1` for profiling the main application; not data loading. + std::thread::Builder::new() + .name("importer".to_owned()) + .spawn(f) + .expect("Failed to spawn a thread"); + } } #[cfg(target_arch = "wasm32")] diff --git a/crates/store/re_data_loader/src/loader_archetype.rs b/crates/store/re_importer/src/importer_archetype.rs similarity index 83% rename from crates/store/re_data_loader/src/loader_archetype.rs rename to crates/store/re_importer/src/importer_archetype.rs index 49a65f82c79e..9cd95d1dabfc 100644 --- a/crates/store/re_data_loader/src/loader_archetype.rs +++ b/crates/store/re_importer/src/importer_archetype.rs @@ -5,44 +5,44 @@ use re_sdk_types::ComponentBatch; use re_sdk_types::archetypes::{AssetVideo, VideoFrameReference}; use re_sdk_types::components::VideoTimestamp; -use crate::{DataLoader, DataLoaderError, LoadedData}; +use crate::{ImportedData, Importer, ImporterError}; // --- -/// Loads data from any supported file or in-memory contents as native [`re_sdk_types::Archetype`]s. +/// Imports data from any supported file or in-memory contents as native [`re_sdk_types::Archetype`]s. /// -/// This is a simple generic [`DataLoader`] for filetypes that match 1-to-1 with our builtin +/// This is a simple generic [`Importer`] for filetypes that match 1-to-1 with our builtin /// archetypes. -pub struct ArchetypeLoader; +pub struct ArchetypeImporter; -impl DataLoader for ArchetypeLoader { +impl Importer for ArchetypeImporter { #[inline] fn name(&self) -> String { - "rerun.data_loaders.Archetype".into() + "rerun.importers.Archetype".into() } #[cfg(not(target_arch = "wasm32"))] - fn load_from_path( + fn import_from_path( &self, - settings: &crate::DataLoaderSettings, + settings: &crate::ImporterSettings, filepath: std::path::PathBuf, - tx: crossbeam::channel::Sender, - ) -> Result<(), crate::DataLoaderError> { + tx: crossbeam::channel::Sender, + ) -> Result<(), crate::ImporterError> { use anyhow::Context as _; // NOTE: We're not just checking whether this is or isn't any kind of file here: we // are specifically checking whether this is a vanilla, run-of-the-mill, boring file. // Not a socket, not a fifo, not some obscure named pipe, and certainly not a symlink to // any of these things: just a basic file. Anything other than a vanilla file is assumed to - // be an RRD stream by default, and therefore will be handled by the RRD data loader. + // be an RRD stream by default, and therefore will be handled by the RRD importer. // // This is super important because, if that thing does turn out to be a fifo or something of - // that nature (e.g. `rerun <(curl …)`), and we end up reading from it, then the RRD data loader + // that nature (e.g. `rerun <(curl …)`), and we end up reading from it, then the RRD importer // will end up executing on top of a racy, partial RRD stream (because these virtual streams // have process-global state). The end result will be what looks like a bunch of corrupt data and // the decoder which will start spewing random confusing errors. if !filepath.is_file() { - return Err(crate::DataLoaderError::Incompatible(filepath.clone())); + return Err(crate::ImporterError::Incompatible(filepath.clone())); } re_tracing::profile_function!(filepath.display().to_string()); @@ -53,19 +53,19 @@ impl DataLoader for ArchetypeLoader { }; let contents = std::borrow::Cow::Owned(contents); - self.load_from_file_contents(settings, filepath, contents, tx) + self.import_from_file_contents(settings, filepath, contents, tx) } - fn load_from_file_contents( + fn import_from_file_contents( &self, - settings: &crate::DataLoaderSettings, + settings: &crate::ImporterSettings, filepath: std::path::PathBuf, contents: std::borrow::Cow<'_, [u8]>, - tx: crossbeam::channel::Sender, - ) -> Result<(), crate::DataLoaderError> { + tx: crossbeam::channel::Sender, + ) -> Result<(), crate::ImporterError> { let extension = crate::extension(&filepath); if !crate::is_supported_file_extension(&extension) { - return Err(crate::DataLoaderError::Incompatible(filepath.clone())); + return Err(crate::ImporterError::Incompatible(filepath.clone())); } re_tracing::profile_function!(filepath.display().to_string()); @@ -110,7 +110,7 @@ impl DataLoader for ArchetypeLoader { let mut rows = Vec::new(); if crate::SUPPORTED_IMAGE_EXTENSIONS.contains(&extension.as_str()) { - re_log::debug!(?filepath, loader = self.name(), "Loading image…",); + re_log::debug!(?filepath, importer = self.name(), "Loading image…",); rows.extend(load_image( &filepath, timepoint, @@ -118,7 +118,7 @@ impl DataLoader for ArchetypeLoader { contents.into_owned(), )?); } else if crate::SUPPORTED_DEPTH_IMAGE_EXTENSIONS.contains(&extension.as_str()) { - re_log::debug!(?filepath, loader = self.name(), "Loading depth image…",); + re_log::debug!(?filepath, importer = self.name(), "Loading depth image…",); rows.extend(load_depth_image( &filepath, timepoint, @@ -126,7 +126,7 @@ impl DataLoader for ArchetypeLoader { contents.into_owned(), )?); } else if crate::SUPPORTED_VIDEO_EXTENSIONS.contains(&extension.as_str()) { - re_log::debug!(?filepath, loader = self.name(), "Loading video…",); + re_log::debug!(?filepath, importer = self.name(), "Loading video…",); rows.extend(load_video( &filepath, timepoint, @@ -134,7 +134,7 @@ impl DataLoader for ArchetypeLoader { contents.into_owned(), )?); } else if crate::SUPPORTED_MESH_EXTENSIONS.contains(&extension.as_str()) { - re_log::debug!(?filepath, loader = self.name(), "Loading 3D model…",); + re_log::debug!(?filepath, importer = self.name(), "Loading 3D model…",); rows.extend(load_mesh( filepath.clone(), timepoint, @@ -142,10 +142,10 @@ impl DataLoader for ArchetypeLoader { contents.into_owned(), )?); } else if crate::SUPPORTED_POINT_CLOUD_EXTENSIONS.contains(&extension.as_str()) { - re_log::debug!(?filepath, loader = self.name(), "Loading 3D point cloud…",); + re_log::debug!(?filepath, importer = self.name(), "Loading 3D point cloud…",); rows.extend(load_point_cloud(timepoint, entity_path, &contents)?); } else if crate::SUPPORTED_TEXT_EXTENSIONS.contains(&extension.as_str()) { - re_log::debug!(?filepath, loader = self.name(), "Loading text document…",); + re_log::debug!(?filepath, importer = self.name(), "Loading text document…",); rows.extend(load_text_document( filepath.clone(), timepoint, @@ -153,7 +153,7 @@ impl DataLoader for ArchetypeLoader { contents.into_owned(), )?); } else { - return Err(crate::DataLoaderError::Incompatible(filepath.clone())); + return Err(crate::ImporterError::Incompatible(filepath.clone())); } if rows.is_empty() { @@ -170,7 +170,7 @@ impl DataLoader for ArchetypeLoader { ) }); for row in rows { - let data = LoadedData::Chunk(Self::name(&Self), store_id.clone(), row); + let data = ImportedData::Chunk(Self::name(&Self), store_id.clone(), row); if re_quota_channel::send_crossbeam(&tx, data).is_err() { break; // The other end has decided to hang up, not our problem. } @@ -187,7 +187,7 @@ fn load_image( timepoint: TimePoint, entity_path: EntityPath, contents: Vec, -) -> Result + use<>, DataLoaderError> { +) -> Result + use<>, ImporterError> { re_tracing::profile_function!(); let rows = [ @@ -213,7 +213,7 @@ fn load_depth_image( timepoint: TimePoint, entity_path: EntityPath, contents: Vec, -) -> Result + use<>, DataLoaderError> { +) -> Result + use<>, ImporterError> { re_tracing::profile_function!(); let rows = [{ @@ -242,7 +242,7 @@ fn load_video( mut timepoint: TimePoint, entity_path: &EntityPath, contents: Vec, -) -> Result + use<>, DataLoaderError> { +) -> Result + use<>, ImporterError> { re_tracing::profile_function!(); let video_timeline = re_log_types::Timeline::new_duration("video"); @@ -319,7 +319,7 @@ fn load_mesh( timepoint: TimePoint, entity_path: EntityPath, contents: Vec, -) -> Result, DataLoaderError> { +) -> Result, ImporterError> { re_tracing::profile_function!(); let rows = [ @@ -342,12 +342,12 @@ fn load_point_cloud( timepoint: TimePoint, entity_path: EntityPath, contents: &[u8], -) -> Result + use<>, DataLoaderError> { +) -> Result + use<>, ImporterError> { re_tracing::profile_function!(); let rows = [ { - // TODO(#4532): `.ply` data loader should support 2D point cloud & meshes + // TODO(#4532): `.ply` importer should support 2D point cloud & meshes let points3d = re_sdk_types::archetypes::Points3D::from_file_contents(contents) .map_err(anyhow::Error::from)?; Chunk::builder(entity_path) @@ -365,7 +365,7 @@ fn load_text_document( timepoint: TimePoint, entity_path: EntityPath, contents: Vec, -) -> Result, DataLoaderError> { +) -> Result, ImporterError> { re_tracing::profile_function!(); let rows = [ diff --git a/crates/store/re_data_loader/src/loader_directory.rs b/crates/store/re_importer/src/importer_directory.rs similarity index 70% rename from crates/store/re_data_loader/src/loader_directory.rs rename to crates/store/re_importer/src/importer_directory.rs index 32d68fa0d23c..6082a6979c88 100644 --- a/crates/store/re_data_loader/src/loader_directory.rs +++ b/crates/store/re_importer/src/importer_directory.rs @@ -1,46 +1,46 @@ -/// Recursively loads entire directories, using the appropriate [`crate::DataLoader`]:s for each -/// files within. +/// Recursively imports entire directories, using the appropriate [`crate::Importer`]s for each +/// file within. // // TODO(cmc): There are a lot more things than can be done be done when it comes to the semantics // of a folder, e.g.: HIVE-like partitioning, similarly named files with different indices and/or // timestamps (e.g. a folder of video frames), etc. // We could support some of those at some point, or at least add examples to show users how. -pub struct DirectoryLoader; +pub struct DirectoryImporter; -impl crate::DataLoader for DirectoryLoader { +impl crate::Importer for DirectoryImporter { #[inline] fn name(&self) -> String { - "rerun.data_loaders.Directory".into() + "rerun.importers.Directory".into() } #[cfg(not(target_arch = "wasm32"))] - fn load_from_path( + fn import_from_path( &self, - settings: &crate::DataLoaderSettings, + settings: &crate::ImporterSettings, dirpath: std::path::PathBuf, - tx: crossbeam::channel::Sender, - ) -> Result<(), crate::DataLoaderError> { + tx: crossbeam::channel::Sender, + ) -> Result<(), crate::ImporterError> { // NOTE: Checking whether this is a file is _not_ enough. It could also be a fifo, a // socket, a named pipe, a symlink to any of these things, etc. // So make sure to check whether it's a directory, and nothing else. if !dirpath.is_dir() { - return Err(crate::DataLoaderError::Incompatible(dirpath.clone())); + return Err(crate::ImporterError::Incompatible(dirpath.clone())); } if crate::lerobot::is_lerobot_dataset(&dirpath) { - // LeRobot dataset is loaded by LeRobotDatasetLoader - return Err(crate::DataLoaderError::Incompatible(dirpath.clone())); + // LeRobot dataset is loaded by LeRobotDatasetImporter + return Err(crate::ImporterError::Incompatible(dirpath.clone())); } re_tracing::profile_function!(dirpath.display().to_string()); - re_log::debug!(?dirpath, loader = self.name(), "Loading directory…",); + re_log::debug!(?dirpath, importer = self.name(), "Loading directory…",); for entry in walkdir::WalkDir::new(&dirpath) { let entry = match entry { Ok(entry) => entry, Err(err) => { - re_log::error!(loader = self.name(), ?dirpath, %err, "Failed to open filesystem entry"); + re_log::error!(importer = self.name(), ?dirpath, %err, "Failed to open filesystem entry"); continue; } }; @@ -59,7 +59,7 @@ impl crate::DataLoader for DirectoryLoader { _ = std::thread::Builder::new() .name(format!("load_dir_entry({filepath:?})")) .spawn(move || { - let data = match crate::load_file::load(&settings, &filepath, None) { + let data = match crate::import_file::import(&settings, &filepath, None) { Ok(data) => data, Err(err) => { re_log::error!(?filepath, %err, "Failed to load directory entry"); @@ -80,14 +80,14 @@ impl crate::DataLoader for DirectoryLoader { } #[inline] - fn load_from_file_contents( + fn import_from_file_contents( &self, - _settings: &crate::DataLoaderSettings, + _settings: &crate::ImporterSettings, path: std::path::PathBuf, _contents: std::borrow::Cow<'_, [u8]>, - _tx: crossbeam::channel::Sender, - ) -> Result<(), crate::DataLoaderError> { + _tx: crossbeam::channel::Sender, + ) -> Result<(), crate::ImporterError> { // TODO(cmc): This could make sense to implement for e.g. archive formats (zip, tar, …) - Err(crate::DataLoaderError::Incompatible(path)) + Err(crate::ImporterError::Incompatible(path)) } } diff --git a/crates/store/re_data_loader/src/loader_external.rs b/crates/store/re_importer/src/importer_external.rs similarity index 63% rename from crates/store/re_data_loader/src/loader_external.rs rename to crates/store/re_importer/src/importer_external.rs index 36fb70d54be4..842a4923036e 100644 --- a/crates/store/re_data_loader/src/loader_external.rs +++ b/crates/store/re_importer/src/importer_external.rs @@ -6,30 +6,36 @@ use std::sync::{Arc, LazyLock}; use ahash::HashMap; use indexmap::IndexSet; -use crate::{DataLoader as _, LoadedData}; +use crate::{ImportedData, Importer as _}; // --- -/// To register a new external data loader, simply add an executable in your $PATH whose name +/// To register a new external importer, simply add an executable in your $PATH whose name /// starts with this prefix. // NOTE: this constant is duplicated in `rerun` to avoid an extra dependency there. -pub const EXTERNAL_DATA_LOADER_PREFIX: &str = "rerun-loader-"; +pub const EXTERNAL_IMPORTER_PREFIX: &str = "rerun-importer-"; -/// When an external [`crate::DataLoader`] is asked to load some data that it doesn't know +/// Legacy prefix for external importers (deprecated since 0.32.0). +/// +/// Executables with this prefix are still discovered for backwards compatibility, +/// but a deprecation warning is logged. +const EXTERNAL_IMPORTER_PREFIX_DEPRECATED: &str = "rerun-loader-"; + +/// When an external [`crate::Importer`] is asked to load some data that it doesn't know /// how to load, it should exit with this exit code. // NOTE: Always keep in sync with other languages. // NOTE: this constant is duplicated in `rerun` to avoid an extra dependency there. -pub const EXTERNAL_DATA_LOADER_INCOMPATIBLE_EXIT_CODE: i32 = 66; +pub const EXTERNAL_IMPORTER_INCOMPATIBLE_EXIT_CODE: i32 = 66; -/// Keeps track of the paths all external executable [`crate::DataLoader`]s. +/// Keeps track of the paths all external executable [`crate::Importer`]s. /// /// Lazy initialized the first time a file is opened by running a full scan of the `$PATH`. /// -/// External loaders are _not_ registered on a per-extension basis: we want users to be able to -/// filter data on a much more fine-grained basis that just file extensions (e.g. checking the file +/// External importers are _not_ registered on a per-extension basis: we want users to be able to +/// filter data on a much more fine-grained basis than just file extensions (e.g. checking the file /// itself for magic bytes). -pub static EXTERNAL_LOADER_PATHS: LazyLock> = LazyLock::new(|| { - re_tracing::profile_scope!("initialize-external-loaders"); +pub static EXTERNAL_IMPORTER_PATHS: LazyLock> = LazyLock::new(|| { + re_tracing::profile_scope!("initialize-external-importers"); let dir_separator = if cfg!(target_os = "windows") { ';' @@ -60,12 +66,26 @@ pub static EXTERNAL_LOADER_PATHS: LazyLock> = LazyLock::new(|| { return None; }; let filepath = entry.path(); - let is_rerun_loader = filepath.file_name().is_some_and(|filename| { - filename - .to_string_lossy() - .starts_with(EXTERNAL_DATA_LOADER_PREFIX) - }); - (is_executable(&filepath) && is_rerun_loader).then_some(filepath) + let filename_str = filepath + .file_name() + .map(|f| f.to_string_lossy().to_string()) + .unwrap_or_default(); + + let is_new_prefix = filename_str.starts_with(EXTERNAL_IMPORTER_PREFIX); + let is_legacy_prefix = filename_str.starts_with(EXTERNAL_IMPORTER_PREFIX_DEPRECATED); + + if is_executable(&filepath) && (is_new_prefix || is_legacy_prefix) { + if is_legacy_prefix { + re_log::warn_once!( + "Found external loader with deprecated `rerun-loader-` prefix: {filename_str}. \ + Please rename it to use the `rerun-importer-` prefix instead. \ + Support for the `rerun-loader-` prefix will be removed in a future release." + ); + } + Some(filepath) + } else { + None + } }); for path in paths { @@ -82,7 +102,7 @@ pub static EXTERNAL_LOADER_PATHS: LazyLock> = LazyLock::new(|| { .into_iter() .filter_map(|(name, paths)| { if paths.len() > 1 { - re_log::debug!(name, ?paths, "Found duplicated data-loader in $PATH"); + re_log::debug!(name, ?paths, "Found duplicated external importer in $PATH"); } // Only keep the first entry according to PATH order. @@ -91,53 +111,54 @@ pub static EXTERNAL_LOADER_PATHS: LazyLock> = LazyLock::new(|| { .collect() }); -/// Iterator over all registered external [`crate::DataLoader`]s. +/// Iterator over all registered external [`crate::Importer`]s. #[inline] -pub fn iter_external_loaders() -> impl ExactSizeIterator { - re_tracing::profile_wait!("EXTERNAL_LOADER_PATHS"); - EXTERNAL_LOADER_PATHS.iter().cloned() +pub fn iter_external_importers() -> impl ExactSizeIterator { + re_tracing::profile_wait!("EXTERNAL_IMPORTER_PATHS"); + EXTERNAL_IMPORTER_PATHS.iter().cloned() } // --- -/// A [`crate::DataLoader`] that forwards the path to load to all executables present in -/// the user's `PATH` with a name that starts with [`EXTERNAL_DATA_LOADER_PREFIX`]. +/// An [`crate::Importer`] that forwards the path to load to all executables present in +/// the user's `PATH` with a name that starts with [`EXTERNAL_IMPORTER_PREFIX`] +/// (or the legacy `rerun-loader-` prefix). /// -/// The external loaders are expected to log rrd data to their standard output. +/// The external importers are expected to log rrd data to their standard output. /// -/// Refer to our `external_data_loader` example for more information. +/// Refer to our `external_importer` example for more information. /// -/// Checkout our [guide](https://www.rerun.io/docs/reference/data-loaders/overview) on -/// how to implement external loaders. -pub struct ExternalLoader; +/// Checkout our [guide](https://www.rerun.io/docs/concepts/logging-and-ingestion/importers/overview?speculative-link) on +/// how to implement external importers. +pub struct ExternalImporter; -impl crate::DataLoader for ExternalLoader { +impl crate::Importer for ExternalImporter { #[inline] fn name(&self) -> String { - "rerun.data_loaders.External".into() + "rerun.importers.External".into() } - fn load_from_path( + fn import_from_path( &self, - settings: &crate::DataLoaderSettings, + settings: &crate::ImporterSettings, filepath: PathBuf, - tx: crossbeam::channel::Sender, - ) -> Result<(), crate::DataLoaderError> { + tx: crossbeam::channel::Sender, + ) -> Result<(), crate::ImporterError> { use std::process::{Command, Stdio}; re_tracing::profile_function!(filepath.display().to_string()); - let external_loaders = { - re_tracing::profile_wait!("EXTERNAL_LOADER_PATHS"); - EXTERNAL_LOADER_PATHS.iter() + let external_importers = { + re_tracing::profile_wait!("EXTERNAL_IMPORTER_PATHS"); + EXTERNAL_IMPORTER_PATHS.iter() }; #[derive(Debug, PartialEq, Eq)] - struct CompatibleLoaderFound; - let (tx_feedback, rx_feedback) = crossbeam::channel::bounded::(64); + struct CompatibleImporterFound; + let (tx_feedback, rx_feedback) = crossbeam::channel::bounded::(64); let args = settings.to_cli_args(); - for exe in external_loaders { + for exe in external_importers { let args = args.clone(); let filepath = filepath.clone(); let tx = tx.clone(); @@ -149,7 +170,7 @@ impl crate::DataLoader for ExternalLoader { re_tracing::profile_function!(exe.to_string_lossy()); let child = Command::new(exe) - // Make sure the child dataloader doesn't think it's a Rerun Viewer, otherwise + // Make sure the child importer doesn't think it's a Rerun Viewer, otherwise // it's never gonna be able to log anything. .env_remove("RERUN_APP_ONLY") .arg(filepath.clone()) @@ -161,23 +182,23 @@ impl crate::DataLoader for ExternalLoader { let mut child = match child { Ok(child) => child, Err(err) => { - re_log::error!(?filepath, loader = ?exe, %err, "Failed to execute external loader"); + re_log::error!(?filepath, importer = ?exe, %err, "Failed to execute external importer"); return; } }; let Some(stdout) = child.stdout.take() else { let reason = "stdout unreachable"; - re_log::error!(?filepath, loader = ?exe, %reason, "Failed to execute external loader"); + re_log::error!(?filepath, importer = ?exe, %reason, "Failed to execute external importer"); return; }; let Some(stderr) = child.stderr.take() else { let reason = "stderr unreachable"; - re_log::error!(?filepath, loader = ?exe, %reason, "Failed to execute external loader"); + re_log::error!(?filepath, importer = ?exe, %reason, "Failed to execute external importer"); return; }; - re_log::debug!(?filepath, loader = ?exe, "Loading data from filesystem using external loader…",); + re_log::debug!(?filepath, importer = ?exe, "Importing data from filesystem using external importer…",); // A single value will be sent on this channel as soon as the child process starts // streaming data to stdout. @@ -198,7 +219,7 @@ impl crate::DataLoader for ExternalLoader { move || decode_and_stream(&filepath, &tx, is_sending_data, decoder) }) { - re_log::error!(?filepath, loader = ?exe, %err, "Failed to open spawn IO thread"); + re_log::error!(?filepath, importer = ?exe, %err, "Failed to open spawn IO thread"); return; } } @@ -209,12 +230,12 @@ impl crate::DataLoader for ExternalLoader { // down, still. } Err(err) => { - re_log::error!(?filepath, loader = ?exe, %err, "Failed to decode external loader's output"); + re_log::error!(?filepath, importer = ?exe, %err, "Failed to decode external importer's output"); return; } } - // We have to wait in order to know whether the child process is a compatible loader. + // We have to wait in order to know whether the child process is a compatible importer. // // This can manifest itself in two distinct ways: // 1. If it exits immediately with an INCOMPATIBLE exit code, then we have our @@ -229,8 +250,8 @@ impl crate::DataLoader for ExternalLoader { if is_sending_data.load(std::sync::atomic::Ordering::Relaxed) { // The child process has started streaming data, it is therefore compatible. // Let's get out ASAP. - re_log::debug!(loader = ?exe, ?filepath, "compatible external loader found"); - re_quota_channel::send_crossbeam(&tx_feedback, CompatibleLoaderFound).ok(); + re_log::debug!(importer = ?exe, ?filepath, "compatible external importer found"); + re_quota_channel::send_crossbeam(&tx_feedback, CompatibleImporterFound).ok(); break; // we still want to check for errors once it finally exits! } @@ -238,7 +259,7 @@ impl crate::DataLoader for ExternalLoader { std::thread::yield_now(); } Err(err) => { - re_log::error!(?filepath, loader = ?exe, %err, "Failed to execute external loader"); + re_log::error!(?filepath, importer = ?exe, %err, "Failed to execute external importer"); return; } } @@ -248,14 +269,14 @@ impl crate::DataLoader for ExternalLoader { let status = match child.wait() { Ok(output) => output, Err(err) => { - re_log::error!(?filepath, loader = ?exe, %err, "Failed to execute external loader"); + re_log::error!(?filepath, importer = ?exe, %err, "Failed to execute external importer"); return; } }; // NOTE: We assume that plugins are compatible until proven otherwise. let is_compatible = - status.code() != Some(crate::EXTERNAL_DATA_LOADER_INCOMPATIBLE_EXIT_CODE); + status.code() != Some(crate::EXTERNAL_IMPORTER_INCOMPATIBLE_EXIT_CODE); if is_compatible { let mut stderr = std::io::BufReader::new(stderr); @@ -263,52 +284,52 @@ impl crate::DataLoader for ExternalLoader { stderr.read_to_string(&mut stderr_str).ok(); if status.success() { - re_log::debug!(loader = ?exe, ?filepath, "Compatible external loader found"); + re_log::debug!(importer = ?exe, ?filepath, "Compatible external importer found"); - // Include any log output of the external loader in the console, because maybe it has useful information: + // Include any log output of the external importer in the console, because maybe it has useful information: let stderr_indented = stderr_str.lines().map(|line| format!(" {line}")).collect::>().join("\n"); - re_log::debug!("Dataloader stderr:\n{stderr_indented}"); + re_log::debug!("External importer stderr:\n{stderr_indented}"); - re_quota_channel::send_crossbeam(&tx_feedback, CompatibleLoaderFound).ok(); + re_quota_channel::send_crossbeam(&tx_feedback, CompatibleImporterFound).ok(); } else { - re_log::error!(?filepath, loader = ?exe, %stderr_str, "Failed to execute external loader"); + re_log::error!(?filepath, importer = ?exe, %stderr_str, "Failed to execute external importer"); } } })?; } - re_tracing::profile_wait!("compatible_loader"); + re_tracing::profile_wait!("compatible_importer"); drop(tx_feedback); - let any_compatible_loader = rx_feedback.recv() == Ok(CompatibleLoaderFound); - if !any_compatible_loader { - // NOTE: The only way to get here is if all loaders closed then sending end of the + let any_compatible_importer = rx_feedback.recv() == Ok(CompatibleImporterFound); + if !any_compatible_importer { + // NOTE: The only way to get here is if all importers closed the sending end of the // channel without sending anything, i.e. none of them are compatible. - return Err(crate::DataLoaderError::Incompatible(filepath.clone())); + return Err(crate::ImporterError::Incompatible(filepath.clone())); } Ok(()) } #[inline] - fn load_from_file_contents( + fn import_from_file_contents( &self, - _settings: &crate::DataLoaderSettings, + _settings: &crate::ImporterSettings, path: PathBuf, _contents: std::borrow::Cow<'_, [u8]>, - _tx: crossbeam::channel::Sender, - ) -> Result<(), crate::DataLoaderError> { + _tx: crossbeam::channel::Sender, + ) -> Result<(), crate::ImporterError> { // TODO(#5324): You could imagine a world where plugins can be streamed rrd data via their // standard input… but today is not world. - Err(crate::DataLoaderError::Incompatible(path)) + Err(crate::ImporterError::Incompatible(path)) } } #[expect(clippy::needless_pass_by_value)] fn decode_and_stream( filepath: &std::path::Path, - tx: &crossbeam::channel::Sender, + tx: &crossbeam::channel::Sender, is_sending_data: Arc, msgs: impl Iterator>, ) { @@ -325,7 +346,7 @@ fn decode_and_stream( } }; - let data = LoadedData::LogMsg(ExternalLoader::name(&ExternalLoader), msg); + let data = ImportedData::LogMsg(ExternalImporter::name(&ExternalImporter), msg); if re_quota_channel::send_crossbeam(tx, data).is_err() { break; // The other end has decided to hang up, not our problem. } diff --git a/crates/store/re_data_loader/src/loader_lerobot.rs b/crates/store/re_importer/src/importer_lerobot.rs similarity index 78% rename from crates/store/re_data_loader/src/loader_lerobot.rs rename to crates/store/re_importer/src/importer_lerobot.rs index 6f4798633747..4e4c6e4d2eb5 100644 --- a/crates/store/re_data_loader/src/loader_lerobot.rs +++ b/crates/store/re_importer/src/importer_lerobot.rs @@ -4,26 +4,26 @@ use anyhow::{Context as _, anyhow}; use crossbeam::channel::Sender; use crate::lerobot::{LeRobotDatasetVersion, datasetv2, datasetv3, is_lerobot_dataset}; -use crate::{DataLoader, DataLoaderError, LoadedData}; +use crate::{ImportedData, Importer, ImporterError}; -/// A [`DataLoader`] for `LeRobot` datasets. +/// An [`Importer`] for `LeRobot` datasets. /// /// An example dataset which can be loaded can be found on Hugging Face: [lerobot/pusht_image](https://huggingface.co/datasets/lerobot/pusht_image) -pub struct LeRobotDatasetLoader; +pub struct LeRobotDatasetImporter; -impl DataLoader for LeRobotDatasetLoader { +impl Importer for LeRobotDatasetImporter { fn name(&self) -> String { - "LeRobotDatasetLoader".into() + "rerun.importers.LeRobotDataset".into() } - fn load_from_path( + fn import_from_path( &self, - settings: &crate::DataLoaderSettings, + settings: &crate::ImporterSettings, filepath: std::path::PathBuf, - tx: Sender, - ) -> Result<(), DataLoaderError> { + tx: Sender, + ) -> Result<(), ImporterError> { if !is_lerobot_dataset(&filepath) { - return Err(DataLoaderError::Incompatible(filepath)); + return Err(ImporterError::Incompatible(filepath)); } let version = LeRobotDatasetVersion::find_version(&filepath) @@ -39,23 +39,23 @@ impl DataLoader for LeRobotDatasetLoader { } } - fn load_from_file_contents( + fn import_from_file_contents( &self, - _settings: &crate::DataLoaderSettings, + _settings: &crate::ImporterSettings, filepath: std::path::PathBuf, _contents: std::borrow::Cow<'_, [u8]>, - _tx: Sender, - ) -> Result<(), DataLoaderError> { - Err(DataLoaderError::Incompatible(filepath)) + _tx: Sender, + ) -> Result<(), ImporterError> { + Err(ImporterError::Incompatible(filepath)) } } -impl LeRobotDatasetLoader { +impl LeRobotDatasetImporter { fn load_v2_dataset( - settings: &crate::DataLoaderSettings, + settings: &crate::ImporterSettings, filepath: impl AsRef, - tx: Sender, - ) -> Result<(), DataLoaderError> { + tx: Sender, + ) -> Result<(), ImporterError> { let filepath = filepath.as_ref().to_owned(); let dataset = datasetv2::LeRobotDatasetV2::load_from_directory(&filepath) .map_err(|err| anyhow!("Loading LeRobot v2 dataset failed: {err}"))?; @@ -69,7 +69,7 @@ impl LeRobotDatasetLoader { // NOTE(1): `spawn` is fine, this whole function is native-only. // NOTE(2): this must spawned on a dedicated thread to avoid a deadlock! - // `load` will spawn a bunch of loaders on the common rayon thread pool and wait for + // `load` will spawn a bunch of importers on the common rayon thread pool and wait for // their response via channels: we cannot be waiting for these responses on the // common rayon thread pool. thread::Builder::new() @@ -90,10 +90,10 @@ impl LeRobotDatasetLoader { } fn load_v3_dataset( - settings: &crate::DataLoaderSettings, + settings: &crate::ImporterSettings, filepath: impl AsRef, - tx: Sender, - ) -> Result<(), DataLoaderError> { + tx: Sender, + ) -> Result<(), ImporterError> { let filepath = filepath.as_ref().to_owned(); let dataset = datasetv3::LeRobotDatasetV3::load_from_directory(&filepath) .map_err(|err| anyhow!("Loading LeRobot v3 dataset failed: {err}"))?; @@ -107,7 +107,7 @@ impl LeRobotDatasetLoader { // NOTE(1): `spawn` is fine, this whole function is native-only. // NOTE(2): this must spawned on a dedicated thread to avoid a deadlock! - // `load` will spawn a bunch of loaders on the common rayon thread pool and wait for + // `load` will spawn a bunch of importers on the common rayon thread pool and wait for // their response via channels: we cannot be waiting for these responses on the // common rayon thread pool. thread::Builder::new() diff --git a/crates/store/re_importer/src/importer_mcap/importer.rs b/crates/store/re_importer/src/importer_mcap/importer.rs new file mode 100644 index 000000000000..15ed14137101 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/importer.rs @@ -0,0 +1,363 @@ +//! MCAP file importer implementation. + +use std::collections::HashMap; +use std::io::Cursor; +use std::path::Path; +use std::sync::Arc; + +use crossbeam::channel::Sender; +use re_chunk::RowId; +use re_lenses::Lenses; +use re_log_types::{SetStoreInfo, StoreId, StoreInfo}; +use re_mcap::{DecoderIdentifier, DecoderRegistry, SelectedDecoders, TopicFilter}; +use re_quota_channel::send_crossbeam; + +use crate::{ImportedData, Importer, ImporterError, ImporterSettings, URDF_DECODER_IDENTIFIER}; + +const MCAP_IMPORTER_NAME: &str = "McapImporter"; + +/// An [`Importer`] for MCAP files. +/// +/// There are many different ways to extract and interpret information from MCAP files. +/// For example, it might be interesting to query for particular fields of messages, +/// or show information directly in the Rerun viewer. Because use-cases can vary, the +/// [`McapImporter`] is made up of [`re_mcap::Decoder`]s, each representing different views of the +/// underlying data. +/// +/// These decoders can be specified in the CLI when converting an MCAP file +/// to an .rrd. Here are a few examples: +/// - [`re_mcap::decoders::McapProtobufDecoder`] +/// - [`re_mcap::decoders::McapRawDecoder`] +#[derive(Clone)] +pub struct McapImporter { + selected_decoders: SelectedDecoders, + // TODO(RR-3491): We don't need the fallback logic anymore; use `OutputMode` instead. + raw_fallback_enabled: bool, + topic_filter: TopicFilter, + lenses_by_time_type: HashMap>, +} + +impl Default for McapImporter { + fn default() -> Self { + Self::new(&SelectedDecoders::All) + } +} + +impl McapImporter { + /// Creates a new [`McapImporter`] that uses the specified decoders. + pub fn new(selected_decoders: &SelectedDecoders) -> Self { + // Cache lenses for each supported timeline type. + let mut lenses_by_time_type = HashMap::new(); + for time_type in [ + re_log_types::TimeType::TimestampNs, + re_log_types::TimeType::DurationNs, + ] { + if let Some(lenses) = Self::build_lenses(selected_decoders, time_type) { + lenses_by_time_type.insert(time_type, lenses); + } + } + Self { + selected_decoders: selected_decoders.clone(), + raw_fallback_enabled: true, + topic_filter: TopicFilter::default(), + lenses_by_time_type, + } + } + + /// Configures whether the raw decoder is used as a fallback for unsupported channels. + pub fn with_raw_fallback(mut self, raw_fallback_enabled: bool) -> Self { + self.raw_fallback_enabled = raw_fallback_enabled; + self + } + + /// Configures a regex-based topic filter. + /// + /// See [`TopicFilter`] for matching semantics. + pub fn with_topic_filter(mut self, topic_filter: TopicFilter) -> Self { + self.topic_filter = topic_filter; + self + } + + /// Returns the cached lenses for the given [`re_log_types::TimeType`]. + fn lenses_for(&self, time_type: re_log_types::TimeType) -> Option> { + if time_type == re_log_types::TimeType::Sequence { + re_log::error_once!("Sequence is not a supported timeline type for MCAP lenses"); + return None; + } + self.lenses_by_time_type.get(&time_type).cloned() + } + + fn build_lenses( + selected_decoders: &SelectedDecoders, + time_type: re_log_types::TimeType, + ) -> Option> { + match super::lenses::mcap_lenses(selected_decoders, time_type) { + Ok(Some(lenses)) => Some(Arc::new(lenses)), + Ok(None) => None, + Err(err) => { + re_log::error_once!( + "Failed to build MCAP lenses: {err}. MCAP importer will run without them." + ); + None + } + } + } + + /// Load chunks from MCAP bytes, calling `emit_chunk` for each produced chunk. + /// + /// Bypasses the [`Importer`] / [`ImportedData`] / `SetStoreInfo` ceremony. + /// Uses the decoders, raw fallback, and lenses already configured on this importer. + pub fn emit_chunks( + &self, + mcap: &[u8], + timeline_type: re_log_types::TimeType, + timestamp_offset_ns: Option, + emit_chunk: &mut dyn FnMut(re_chunk::Chunk), + ) -> Result<(), ImporterError> { + re_tracing::profile_function!(); + + let lenses = self.lenses_for(timeline_type); + + let mut on_chunk_with_transforms = |chunk: re_chunk::Chunk| { + if let Some(ref lenses) = lenses { + for result in lenses.apply(&chunk) { + match result { + Ok(c) => emit_chunk(apply_timestamp_offset(c, timestamp_offset_ns)), + Err(partial) => { + for error in partial.errors() { + re_log::error_once!("Lens error: {error}"); + } + if let Some(c) = partial.take() { + emit_chunk(apply_timestamp_offset(c, timestamp_offset_ns)); + } + } + } + } + } else { + emit_chunk(apply_timestamp_offset(chunk, timestamp_offset_ns)); + } + }; + + let reader = Cursor::new(&mcap); + let summary = re_mcap::read_summary(reader)? + .ok_or_else(|| anyhow::anyhow!("MCAP file does not contain a summary"))?; + + DecoderRegistry::all_builtin(self.raw_fallback_enabled) + .select(&self.selected_decoders) + .plan(mcap, &summary, &self.topic_filter)? + .run(mcap, &summary, timeline_type, &mut on_chunk_with_transforms)?; + + if self + .selected_decoders + .contains(&DecoderIdentifier::from(URDF_DECODER_IDENTIFIER)) + && let Err(err) = super::robot_description::extract_urdf_from_robot_descriptions( + mcap, + &summary, + &self.topic_filter, + &mut on_chunk_with_transforms, + ) + { + re_log::warn_once!("Failed to extract URDF from robot_description topics: {err}"); + } + + Ok(()) + } +} + +impl Importer for McapImporter { + fn name(&self) -> crate::ImporterName { + MCAP_IMPORTER_NAME.into() + } + + #[cfg(not(target_arch = "wasm32"))] + fn import_from_path( + &self, + settings: &crate::ImporterSettings, + path: std::path::PathBuf, + tx: Sender, + ) -> Result<(), ImporterError> { + if !path.is_file() || !has_mcap_extension(&path) { + return Err(ImporterError::Incompatible(path)); // simply not interested + } + + re_tracing::profile_function!(); + + // NOTE(1): `spawn` is fine, this whole function is native-only. + // NOTE(2): this must spawned on a dedicated thread to avoid a deadlock! + // `load` will spawn a bunch of importers on the common rayon thread pool and wait for + // their response via channels: we cannot be waiting for these responses on the + // common rayon thread pool. + let loader = self.clone(); + let settings = settings.clone(); + std::thread::Builder::new() + .name(format!("load_mcap({path:?})")) + .spawn(move || { + let file = match std::fs::File::open(&path) { + Ok(f) => f, + Err(err) => { + re_log::error!("Failed to open MCAP file: {err}"); + return; + } + }; + + // SAFETY: file-backed mmap; we don't modify the file while mapped. + #[expect(unsafe_code)] + let mmap = match unsafe { memmap2::Mmap::map(&file) } { + Ok(m) => m, + Err(err) => { + re_log::error!("Failed to mmap MCAP file: {err}"); + return; + } + }; + + if let Err(err) = loader.load_and_send(&mmap, &settings, &tx) { + re_log::error!("Failed to load MCAP file: {err}"); + } + }) + .map_err(|err| ImporterError::Other(err.into()))?; + + Ok(()) + } + + fn import_from_file_contents( + &self, + settings: &crate::ImporterSettings, + filepath: std::path::PathBuf, + contents: std::borrow::Cow<'_, [u8]>, + tx: Sender, + ) -> Result<(), crate::ImporterError> { + if !has_mcap_extension(&filepath) { + return Err(ImporterError::Incompatible(filepath)); // simply not interested + } + + re_tracing::profile_function!(); + + let contents = contents.into_owned(); + let loader = self.clone(); + let settings = settings.clone(); + + // NOTE: this must be spawned on a dedicated thread to avoid a deadlock! + // `load` will spawn a bunch of importers on the common rayon thread pool and wait for + // their response via channels: we cannot be waiting for these responses on the + // common rayon thread pool. + cfg_if::cfg_if! { + if #[cfg(target_arch = "wasm32")] { + loader.load_and_send(&contents, &settings, &tx)?; + } else { + std::thread::Builder::new() + .name(format!("load_mcap({filepath:?})")) + .spawn(move || { + if let Err(err) = loader.load_and_send(&contents, &settings, &tx) { + re_log::error!("Failed to load MCAP file: {err}"); + } + }) + .map_err(|err| ImporterError::Other(err.into()))?; + } + } + + Ok(()) + } +} + +impl McapImporter { + /// Send `SetStoreInfo` then decode chunks via [`Self::emit_chunks`], + /// forwarding each chunk to the [`Importer`] channel. + pub fn load_and_send( + &self, + mcap: &[u8], + settings: &ImporterSettings, + tx: &Sender, + ) -> Result<(), ImporterError> { + re_log::debug!( + "Loading MCAP with timeline type {:?}", + settings.timeline_type + ); + let store_id = settings.recommended_store_id(); + + if send_crossbeam( + tx, + ImportedData::LogMsg( + MCAP_IMPORTER_NAME.to_owned(), + re_log_types::LogMsg::SetStoreInfo(store_info(store_id.clone())), + ), + ) + .is_err() + { + re_log::debug_once!( + "Failed to send `SetStoreInfo` because smart channel closed unexpectedly." + ); + return Ok(()); + } + + self.emit_chunks( + mcap, + settings.timeline_type, + settings.timestamp_offset_ns, + &mut |chunk| { + send_chunk_to_channel(tx, &store_id, chunk); + }, + ) + } +} + +fn apply_timestamp_offset(mut chunk: re_chunk::Chunk, offset_ns: Option) -> re_chunk::Chunk { + if let Some(offset_ns) = offset_ns { + let offset_timelines: Vec<_> = chunk + .timelines() + .values() + .filter(|time_col| time_col.timeline().typ() == re_log_types::TimeType::TimestampNs) + .map(|time_col| time_col.offset_by_nanos(offset_ns)) + .collect(); + for time_col in offset_timelines { + chunk.add_timeline(time_col).ok(); + } + } + chunk +} + +fn send_chunk_to_channel( + tx: &Sender, + store_id: &StoreId, + mut chunk: re_chunk::Chunk, +) { + chunk.sort_if_unsorted(); + + for (name, column) in chunk.timelines() { + if !column.is_sorted() { + let entity_path = chunk.entity_path(); + re_log::warn_once!( + "Found unsorted timeline '{name}' for entity '{entity_path}'. This may lead to suboptimal performance.", + ); + } + } + + if send_crossbeam( + tx, + ImportedData::Chunk(MCAP_IMPORTER_NAME.to_owned(), store_id.clone(), chunk), + ) + .is_err() + { + // If the other side decided to hang up this is not our problem. + re_log::debug_once!( + "Failed to send chunk because the smart channel has been closed unexpectedly." + ); + } +} + +fn store_info(store_id: StoreId) -> SetStoreInfo { + SetStoreInfo { + row_id: *RowId::new(), + info: StoreInfo::new( + store_id, + re_log_types::StoreSource::Other(MCAP_IMPORTER_NAME.to_owned()), + ), + } +} + +/// Checks if a path has the `.mcap` extension. +fn has_mcap_extension(filepath: &Path) -> bool { + filepath + .extension() + .map(|ext| ext.eq_ignore_ascii_case("mcap")) + .unwrap_or(false) +} diff --git a/crates/store/re_importer/src/importer_mcap/lenses/foxglove/camera_calibration.rs b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/camera_calibration.rs new file mode 100644 index 000000000000..59dc8ba268a3 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/camera_calibration.rs @@ -0,0 +1,39 @@ +use re_lenses::{Lens, LensBuilderError, op}; +use re_lenses_core::Selector; +use re_log_types::TimeType; +use re_sdk_types::archetypes::Pinhole; + +use crate::importer_mcap::lenses::helpers::row_major_3x3_to_column_major; + +use super::{FOXGLOVE_TIMESTAMP, IMAGE_PLANE_SUFFIX}; + +/// Creates a lens for [`foxglove.CameraCalibration`] messages. +/// +/// [`foxglove.CameraCalibration`]: https://docs.foxglove.dev/docs/sdk/schemas/camera-calibration +pub fn camera_calibration(time_type: TimeType) -> Result { + Ok(Lens::for_input_column("foxglove.CameraCalibration:message") + .output_columns(|out| { + out.time( + FOXGLOVE_TIMESTAMP, + time_type, + Selector::parse(".timestamp")?.pipe(op::timespec_to_nanos()), + )? + .component( + Pinhole::descriptor_child_frame(), + Selector::parse(".frame_id")?.pipe(op::string_suffix_nonempty(IMAGE_PLANE_SUFFIX)), + )? + .component( + Pinhole::descriptor_resolution(), + Selector::parse(".")?.pipe(op::struct_to_fixed_size_list_f32(["width", "height"])), + )? + .component( + Pinhole::descriptor_image_from_camera(), + Selector::parse(".K")?.pipe(row_major_3x3_to_column_major()), + )? + .component( + Pinhole::descriptor_parent_frame(), + Selector::parse(".frame_id")?, + ) + })? + .build()) +} diff --git a/crates/store/re_importer/src/importer_mcap/lenses/foxglove/compressed_image.rs b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/compressed_image.rs new file mode 100644 index 000000000000..65f74a4cf137 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/compressed_image.rs @@ -0,0 +1,35 @@ +use re_lenses::{Lens, LensBuilderError, op}; +use re_lenses_core::Selector; +use re_log_types::TimeType; +use re_sdk_types::archetypes::{CoordinateFrame, EncodedImage}; + +use super::{FOXGLOVE_TIMESTAMP, IMAGE_PLANE_SUFFIX}; + +/// Creates a lens for [`foxglove.CompressedImage`] messages. +/// +/// [`foxglove.CompressedImage`]: https://docs.foxglove.dev/docs/sdk/schemas/compressed-image +pub fn compressed_image(time_type: TimeType) -> Result { + Ok(Lens::for_input_column("foxglove.CompressedImage:message") + .output_columns(|out| { + out.time( + FOXGLOVE_TIMESTAMP, + time_type, + Selector::parse(".timestamp")?.pipe(op::timespec_to_nanos()), + )? + .component( + CoordinateFrame::descriptor_frame(), + Selector::parse(".frame_id")?.pipe(op::string_suffix_nonempty(IMAGE_PLANE_SUFFIX)), + )? + // The format field can be "jpeg", "png", "webp" or "avif" in the Foxglove schema. + // We prefix with "image/" to get valid MIME types for Rerun. + .component( + EncodedImage::descriptor_media_type(), + Selector::parse(".format")?.pipe(op::string_prefix("image/")), + )? + .component( + EncodedImage::descriptor_blob(), + Selector::parse(".data")?.pipe(op::binary_to_list_uint8()), + ) + })? + .build()) +} diff --git a/crates/store/re_importer/src/importer_mcap/lenses/foxglove/compressed_video.rs b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/compressed_video.rs new file mode 100644 index 000000000000..cb1f3fd2b3e7 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/compressed_video.rs @@ -0,0 +1,33 @@ +use re_lenses::{Lens, LensBuilderError, op}; +use re_lenses_core::Selector; +use re_log_types::TimeType; +use re_sdk_types::archetypes::{CoordinateFrame, VideoStream}; + +use super::{FOXGLOVE_TIMESTAMP, IMAGE_PLANE_SUFFIX}; + +/// Creates a lens for [`foxglove.CompressedVideo`] messages. +/// +/// [`foxglove.CompressedVideo`]: https://docs.foxglove.dev/docs/sdk/schemas/compressed-video +pub fn compressed_video(time_type: TimeType) -> Result { + Ok(Lens::for_input_column("foxglove.CompressedVideo:message") + .output_columns(|out| { + out.time( + FOXGLOVE_TIMESTAMP, + time_type, + Selector::parse(".timestamp")?.pipe(op::timespec_to_nanos()), + )? + .component( + CoordinateFrame::descriptor_frame(), + Selector::parse(".frame_id")?.pipe(op::string_suffix_nonempty(IMAGE_PLANE_SUFFIX)), + )? + .component( + VideoStream::descriptor_codec(), + Selector::parse(".format")?.pipe(op::string_to_video_codec()), + )? + .component( + VideoStream::descriptor_sample(), + Selector::parse(".data")?.pipe(op::binary_to_list_uint8()), + ) + })? + .build()) +} diff --git a/crates/store/re_importer/src/importer_mcap/lenses/foxglove/frame_transform.rs b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/frame_transform.rs new file mode 100644 index 000000000000..4e9fdca463db --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/frame_transform.rs @@ -0,0 +1,40 @@ +use re_lenses::{Lens, LensBuilderError, op}; +use re_lenses_core::Selector; +use re_log_types::TimeType; +use re_sdk_types::archetypes::Transform3D; + +use super::FOXGLOVE_TIMESTAMP; + +/// Creates a lens for [`foxglove.FrameTransform`] messages. +/// +/// [`foxglove.FrameTransform`]: https://docs.foxglove.dev/docs/sdk/schemas/frame-transform +pub fn frame_transform(time_type: TimeType) -> Result { + Ok(Lens::for_input_column("foxglove.FrameTransform:message") + .scatter() + .output_columns(|out| { + out.time( + FOXGLOVE_TIMESTAMP, + time_type, + Selector::parse(".timestamp")?.pipe(op::timespec_to_nanos()), + )? + .component( + Transform3D::descriptor_parent_frame(), + Selector::parse(".parent_frame_id")?, + )? + .component( + Transform3D::descriptor_child_frame(), + Selector::parse(".child_frame_id")?, + )? + .component( + Transform3D::descriptor_translation(), + Selector::parse(".translation")? + .pipe(op::struct_to_fixed_size_list_f32(["x", "y", "z"])), + )? + .component( + Transform3D::descriptor_quaternion(), + Selector::parse(".rotation")? + .pipe(op::struct_to_fixed_size_list_f32(["x", "y", "z", "w"])), + ) + })? + .build()) +} diff --git a/crates/store/re_importer/src/importer_mcap/lenses/foxglove/frame_transforms.rs b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/frame_transforms.rs new file mode 100644 index 000000000000..b7f54bb1bee9 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/frame_transforms.rs @@ -0,0 +1,40 @@ +use re_lenses::{Lens, LensBuilderError, op}; +use re_lenses_core::Selector; +use re_log_types::TimeType; +use re_sdk_types::archetypes::Transform3D; + +use super::FOXGLOVE_TIMESTAMP; + +/// Creates a lens for [`foxglove.FrameTransforms`] messages. +/// +/// [`foxglove.FrameTransforms`]: https://docs.foxglove.dev/docs/sdk/schemas/frame-transforms +pub fn frame_transforms(time_type: TimeType) -> Result { + Ok(Lens::for_input_column("foxglove.FrameTransforms:message") + .scatter() + .output_columns(|out| { + out.time( + FOXGLOVE_TIMESTAMP, + time_type, + Selector::parse(".transforms[].timestamp")?.pipe(op::timespec_to_nanos()), + )? + .component( + Transform3D::descriptor_parent_frame(), + Selector::parse(".transforms[].parent_frame_id")?, + )? + .component( + Transform3D::descriptor_child_frame(), + Selector::parse(".transforms[].child_frame_id")?, + )? + .component( + Transform3D::descriptor_translation(), + Selector::parse(".transforms[].translation")? + .pipe(op::struct_to_fixed_size_list_f32(["x", "y", "z"])), + )? + .component( + Transform3D::descriptor_quaternion(), + Selector::parse(".transforms[].rotation")? + .pipe(op::struct_to_fixed_size_list_f32(["x", "y", "z", "w"])), + ) + })? + .build()) +} diff --git a/crates/store/re_importer/src/importer_mcap/lenses/foxglove/location_fix.rs b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/location_fix.rs new file mode 100644 index 000000000000..ff812695f3bb --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/location_fix.rs @@ -0,0 +1,39 @@ +use re_lenses::{Lens, LensBuilderError, op}; +use re_lenses_core::Selector; +use re_log_types::TimeType; +use re_sdk_types::archetypes::{CoordinateFrame, GeoPoints}; + +use crate::importer_mcap::lenses::helpers::lat_lon_struct_to_fixed; + +use super::FOXGLOVE_TIMESTAMP; + +/// Creates a lens for [`foxglove.LocationFix`] messages. +/// +/// Converts a single GNSS fix to a [`GeoPoints`] archetype with one position and optional color. +/// +/// [`foxglove.LocationFix`]: https://docs.foxglove.dev/docs/sdk/schemas/location-fix +pub fn location_fix(time_type: TimeType) -> Result { + Ok(Lens::for_input_column("foxglove.LocationFix:message") + .output_columns(|out| { + out.time( + FOXGLOVE_TIMESTAMP, + time_type, + Selector::parse(".timestamp")?.pipe(op::timespec_to_nanos()), + )? + // `frame_id` can be missing in old versions of the schema. + .component( + CoordinateFrame::descriptor_frame(), + Selector::parse(".frame_id!")?, + )? + .component( + GeoPoints::descriptor_positions(), + Selector::parse(".")?.pipe(lat_lon_struct_to_fixed()), + )? + // `color` field is optional. + .component( + GeoPoints::descriptor_colors(), + Selector::parse(".color!")?.pipe(op::rgba_struct_to_uint32()), + ) + })? + .build()) +} diff --git a/crates/store/re_importer/src/importer_mcap/lenses/foxglove/location_fixes.rs b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/location_fixes.rs new file mode 100644 index 000000000000..f3a73c944061 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/location_fixes.rs @@ -0,0 +1,40 @@ +use re_lenses::{Lens, LensBuilderError, op}; +use re_lenses_core::Selector; +use re_log_types::TimeType; +use re_sdk_types::archetypes::{CoordinateFrame, GeoPoints}; + +use crate::importer_mcap::lenses::helpers::lat_lon_struct_to_fixed; + +use super::FOXGLOVE_TIMESTAMP; + +/// Creates a lens for [`foxglove.LocationFixes`] messages. +/// +/// Each fix in the batch gets its own timestamp, position, color, and coordinate frame. +/// +/// [`foxglove.LocationFixes`]: https://docs.foxglove.dev/docs/sdk/schemas/location-fixes +pub fn location_fixes(time_type: TimeType) -> Result { + Ok(Lens::for_input_column("foxglove.LocationFixes:message") + .scatter() + .output_columns(|out| { + out.time( + FOXGLOVE_TIMESTAMP, + time_type, + Selector::parse(".fixes[].timestamp")?.pipe(op::timespec_to_nanos()), + )? + // `frame_id` can be missing in old versions of the schema. + .component( + CoordinateFrame::descriptor_frame(), + Selector::parse(".fixes[].frame_id!")?, + )? + .component( + GeoPoints::descriptor_positions(), + Selector::parse(".fixes[]")?.pipe(lat_lon_struct_to_fixed()), + )? + // `color` field is optional. + .component( + GeoPoints::descriptor_colors(), + Selector::parse(".fixes[].color!")?.pipe(op::rgba_struct_to_uint32()), + ) + })? + .build()) +} diff --git a/crates/store/re_importer/src/importer_mcap/lenses/foxglove/log.rs b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/log.rs new file mode 100644 index 000000000000..9708add631d0 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/log.rs @@ -0,0 +1,57 @@ +use arrow::array::{Array as _, ArrayRef, StringArray}; +use re_lenses::{Lens, LensBuilderError, op}; +use re_lenses_core::Selector; +use re_lenses_core::combinators::Error; +use re_log_types::TimeType; +use re_sdk_types::archetypes::TextLog; + +use super::FOXGLOVE_TIMESTAMP; + +/// Creates a lens for converting [`foxglove.Log`] messages to Rerun's [`TextLog`] archetype. +/// +/// [`foxglove.Log`]: https://docs.foxglove.dev/docs/sdk/schemas/log +pub fn log(time_type: TimeType) -> Result { + Ok(Lens::for_input_column("foxglove.Log:message") + .output_columns(|out| { + out.time( + FOXGLOVE_TIMESTAMP, + time_type, + Selector::parse(".timestamp")?.pipe(op::timespec_to_nanos()), + )? + .component(TextLog::descriptor_text(), Selector::parse(".message")?)? + .component( + TextLog::descriptor_level(), + Selector::parse(".level.name")?.pipe(foxglove_to_rerun_log_level()), + ) + })? + .build()) +} + +/// Returns a pipe-compatible function that maps Foxglove log level strings to Rerun +/// [`re_sdk_types::components::TextLogLevel`] strings. +fn foxglove_to_rerun_log_level() -> impl Fn(&ArrayRef) -> Result, Error> { + move |source: &ArrayRef| { + let source = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "StringArray".to_owned(), + actual: source.data_type().clone(), + context: "foxglove_to_rerun_log_level input".to_owned(), + })?; + + let result: StringArray = source + .iter() + .map(|level| match level { + Some("WARNING") => Some("WARN"), + Some("FATAL") => Some("CRITICAL"), + // Rerun has no UNKNOWN level. + Some("UNKNOWN") | None => None, + // DEBUG, INFO, ERROR can be passed through as-is. + other => other, + }) + .collect(); + + Ok(Some(std::sync::Arc::new(result) as ArrayRef)) + } +} diff --git a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/mod.rs b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/mod.rs similarity index 53% rename from crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/mod.rs rename to crates/store/re_importer/src/importer_mcap/lenses/foxglove/mod.rs index 947e89640fc6..089e115b6b74 100644 --- a/crates/store/re_data_loader/src/loader_mcap/lenses/foxglove/mod.rs +++ b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/mod.rs @@ -5,6 +5,8 @@ mod compressed_image; mod compressed_video; mod frame_transform; mod frame_transforms; +mod location_fix; +mod location_fixes; mod log; mod packed_element_field; mod point_cloud; @@ -12,13 +14,16 @@ mod pose_in_frame; mod poses_in_frame; mod raw_image; -use re_lenses::{LensError, Lenses, OutputMode}; +use re_lenses::{LensBuilderError, Lenses, OutputMode}; +use re_log_types::TimeType; pub use camera_calibration::camera_calibration; pub use compressed_image::compressed_image; pub use compressed_video::compressed_video; pub use frame_transform::frame_transform; pub use frame_transforms::frame_transforms; +pub use location_fix::location_fix; +pub use location_fixes::location_fixes; pub use log::log; pub use point_cloud::point_cloud; pub use pose_in_frame::pose_in_frame; @@ -34,18 +39,23 @@ const IMAGE_PLANE_SUFFIX: &str = "_image_plane"; /// Name of the timestamp field in Foxglove messages and name of the corresponding Rerun timeline. const FOXGLOVE_TIMESTAMP: &str = "timestamp"; -/// Creates a collection of all Foxglove lenses. -pub fn foxglove_lenses() -> Result { - let mut lenses = Lenses::new(OutputMode::ForwardUnmatched); - lenses.add_lens(camera_calibration()?); - lenses.add_lens(compressed_image()?); - lenses.add_lens(compressed_video()?); - lenses.add_lens(frame_transform()?); - lenses.add_lens(frame_transforms()?); - lenses.add_lens(log()?); - lenses.add_lens(point_cloud()?); - lenses.add_lens(pose_in_frame()?); - lenses.add_lens(poses_in_frame()?); - lenses.add_lens(raw_image()?); - Ok(lenses) +/// Adds all Foxglove lenses to an existing collection. +pub fn add_foxglove_lenses( + lenses: &mut Lenses, + time_type: TimeType, +) -> Result<(), LensBuilderError> { + *lenses = std::mem::replace(lenses, Lenses::new(OutputMode::ForwardUnmatched)) + .add_lens(camera_calibration(time_type)?) + .add_lens(compressed_image(time_type)?) + .add_lens(compressed_video(time_type)?) + .add_lens(frame_transform(time_type)?) + .add_lens(frame_transforms(time_type)?) + .add_lens(location_fix(time_type)?) + .add_lens(location_fixes(time_type)?) + .add_lens(log(time_type)?) + .add_lens(point_cloud(time_type)?) + .add_lens(pose_in_frame(time_type)?) + .add_lens(poses_in_frame(time_type)?) + .add_lens(raw_image(time_type)?); + Ok(()) } diff --git a/crates/store/re_importer/src/importer_mcap/lenses/foxglove/packed_element_field.rs b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/packed_element_field.rs new file mode 100644 index 000000000000..cfd86dce11c8 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/packed_element_field.rs @@ -0,0 +1,313 @@ +//! Helper functions for decoding byte arrays of [`PackedElementField`] data, +//! e.g. for extracting positions and colors from [`foxglove.PointCloud`] messages. +//! +//! [`PackedElementField`]: https://docs.foxglove.dev/docs/sdk/schemas/packed-element-field +//! [`foxglove.PointCloud`]: https://docs.foxglove.dev/docs/sdk/schemas/point-cloud + +use std::sync::Arc; + +use crate::importer_mcap::lenses::helpers::get_field_as; +use arrow::array::builder::{FixedSizeListBuilder, Float32Builder, ListBuilder, UInt32Builder}; +use arrow::array::{ + Array as _, ArrayRef, BinaryArray, Int32Array, ListArray, StringArray, StructArray, UInt32Array, +}; +use arrow::datatypes::{DataType, Field}; + +/// Foxglove [`NumericType`] enum. +/// +/// [`NumericType`]: https://docs.foxglove.dev/docs/sdk/schemas/numeric-type +#[derive(Clone, Copy)] +#[repr(i32)] +enum NumericType { + Uint8 = 1, + Int8 = 2, + Uint16 = 3, + Int16 = 4, + Uint32 = 5, + Int32 = 6, + Float32 = 7, + Float64 = 8, +} + +impl TryFrom for NumericType { + type Error = re_lenses_core::combinators::Error; + + fn try_from(value: i32) -> Result { + match value { + 1 => Ok(Self::Uint8), + 2 => Ok(Self::Int8), + 3 => Ok(Self::Uint16), + 4 => Ok(Self::Int16), + 5 => Ok(Self::Uint32), + 6 => Ok(Self::Int32), + 7 => Ok(Self::Float32), + 8 => Ok(Self::Float64), + _ => Err(re_lenses_core::combinators::Error::Other(format!( + "unknown NumericType value: {value}" + ))), + } + } +} + +impl NumericType { + fn byte_size(self) -> usize { + match self { + Self::Uint8 | Self::Int8 => 1, + Self::Uint16 | Self::Int16 => 2, + Self::Uint32 | Self::Int32 | Self::Float32 => 4, + Self::Float64 => 8, + } + } + + /// Reads a value from packed data at the given byte offset and converts it to `f32`. + #[expect(clippy::cast_possible_wrap)] + fn read_as_f32(self, data: &[u8], byte_offset: usize) -> f32 { + if byte_offset + self.byte_size() > data.len() { + return 0.0; + } + let bytes = &data[byte_offset..]; + match self { + Self::Uint8 => bytes[0] as f32, + Self::Int8 => (bytes[0] as i8) as f32, + Self::Uint16 => u16::from_le_bytes([bytes[0], bytes[1]]) as f32, + Self::Int16 => i16::from_le_bytes([bytes[0], bytes[1]]) as f32, + Self::Uint32 => u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as f32, + Self::Int32 => i32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as f32, + Self::Float32 => f32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]), + Self::Float64 => f64::from_le_bytes([ + bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], + ]) as f32, + } + } + + /// Reads a numeric value from packed data at the given byte offset and clamps/converts it to `u8`. + fn read_as_u8(self, data: &[u8], byte_offset: usize) -> u8 { + if byte_offset + self.byte_size() > data.len() { + return 0; + } + let bytes = &data[byte_offset..]; + match self { + Self::Uint8 => bytes[0], + // intentional reinterpretation of raw byte as signed + #[expect(clippy::cast_possible_wrap)] + Self::Int8 => (bytes[0] as i8).clamp(0, i8::MAX) as u8, + Self::Uint16 => u16::from_le_bytes([bytes[0], bytes[1]]).min(255) as u8, + Self::Int16 => i16::from_le_bytes([bytes[0], bytes[1]]).clamp(0, 255) as u8, + Self::Uint32 => { + u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]).min(255) as u8 + } + Self::Int32 => { + i32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]).clamp(0, 255) as u8 + } + Self::Float32 => (f32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) * 255.0) + .clamp(0.0, 255.0) as u8, + Self::Float64 => (f64::from_le_bytes([ + bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], + ]) * 255.0) + .clamp(0.0, 255.0) as u8, + } + } +} + +/// Byte offset and numeric type of a packed field within a point. +struct FieldDescriptor { + byte_offset: usize, + numeric_type: NumericType, +} + +/// Searches the `fields` struct array for entries matching the given names and returns +/// their byte offsets and numeric types. +fn find_field_descriptors( + fields_struct: &StructArray, + names: &[&str], +) -> Result>, re_lenses_core::combinators::Error> { + let name_array = fields_struct + .column_by_name("name") + .and_then(|a| a.as_any().downcast_ref::().cloned()); + let offset_array = fields_struct + .column_by_name("offset") + .and_then(|a| a.as_any().downcast_ref::().cloned()); + // Protobuf enums are stored as Struct{name: Utf8, value: Int32}; extract the `value` field. + let type_array = fields_struct + .column_by_name("type") + .and_then(|a| a.as_any().downcast_ref::()) + .and_then(|s| s.column_by_name("value")) + .and_then(|a| a.as_any().downcast_ref::().cloned()); + + let (Some(name_array), Some(offset_array), Some(type_array)) = + (name_array, offset_array, type_array) + else { + return Ok(names.iter().map(|_| None).collect()); + }; + + names + .iter() + .map(|target_name| { + for i in 0..name_array.len() { + if !name_array.is_null(i) && name_array.value(i) == *target_name { + return Ok(Some(FieldDescriptor { + byte_offset: offset_array.value(i) as usize, + numeric_type: NumericType::try_from(type_array.value(i))?, + })); + } + } + Ok(None) + }) + .collect() +} + +/// Extracts position data from point cloud messages as a `List>`. +pub(crate) fn extract_positions( + source: &ArrayRef, +) -> Result, re_lenses_core::combinators::Error> { + re_tracing::profile_function!(); + + let source = source + .as_any() + .downcast_ref::() + .ok_or_else(|| re_lenses_core::combinators::Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: source.data_type().clone(), + context: "extract_positions input".to_owned(), + })?; + + let point_stride_array = get_field_as::(source, "point_stride")?; + let fields_array = get_field_as::(source, "fields")?; + let data_array = get_field_as::(source, "data")?; + + let mut builder = ListBuilder::new( + FixedSizeListBuilder::new(Float32Builder::new(), 3).with_field(Field::new( + "item", + DataType::Float32, + false, + )), + ); + + for i in 0..source.len() { + if source.is_null(i) || data_array.is_null(i) || fields_array.is_null(i) { + builder.append_null(); + continue; + } + + let point_stride = point_stride_array.value(i) as usize; + let data = data_array.value(i); + let fields_value = fields_array.value(i); + let fields_struct = fields_value + .as_any() + .downcast_ref::() + .ok_or_else(|| re_lenses_core::combinators::Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: fields_value.data_type().clone(), + context: "fields element".to_owned(), + })?; + + let descriptors = find_field_descriptors(fields_struct, &["x", "y", "z"])?; + + if let [Some(x_desc), Some(y_desc), Some(z_desc)] = &descriptors[..] + && point_stride > 0 + { + let num_points = data.len() / point_stride; + let points_builder = builder.values(); + for p in 0..num_points { + let base = p * point_stride; + points_builder.values().append_value( + x_desc + .numeric_type + .read_as_f32(data, base + x_desc.byte_offset), + ); + points_builder.values().append_value( + y_desc + .numeric_type + .read_as_f32(data, base + y_desc.byte_offset), + ); + points_builder.values().append_value( + z_desc + .numeric_type + .read_as_f32(data, base + z_desc.byte_offset), + ); + points_builder.append(true); + } + builder.append(true); + } else { + builder.append_null(); + } + } + + Ok(Some(Arc::new(builder.finish()) as ArrayRef)) +} + +/// Extracts RGBA color data from point cloud messages as a `List`. +pub(crate) fn extract_colors( + source: &ArrayRef, +) -> Result, re_lenses_core::combinators::Error> { + re_tracing::profile_function!(); + + let source = source + .as_any() + .downcast_ref::() + .ok_or_else(|| re_lenses_core::combinators::Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: source.data_type().clone(), + context: "extract_colors input".to_owned(), + })?; + + let point_stride_array = get_field_as::(source, "point_stride")?; + let fields_array = get_field_as::(source, "fields")?; + let data_array = get_field_as::(source, "data")?; + + let mut builder = ListBuilder::new(UInt32Builder::new()); + + for i in 0..source.len() { + if source.is_null(i) || data_array.is_null(i) || fields_array.is_null(i) { + builder.append_null(); + continue; + } + + let point_stride = point_stride_array.value(i) as usize; + let data = data_array.value(i); + let fields_value = fields_array.value(i); + let fields_struct = fields_value + .as_any() + .downcast_ref::() + .ok_or_else(|| re_lenses_core::combinators::Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: fields_value.data_type().clone(), + context: "fields element".to_owned(), + })?; + + let descriptors = + find_field_descriptors(fields_struct, &["red", "green", "blue", "alpha"])?; + + if let (Some(r_desc), Some(g_desc), Some(b_desc)) = + (&descriptors[0], &descriptors[1], &descriptors[2]) + && point_stride > 0 + { + let alpha_desc = &descriptors[3]; + let num_points = data.len() / point_stride; + for p in 0..num_points { + let base = p * point_stride; + let r = r_desc + .numeric_type + .read_as_u8(data, base + r_desc.byte_offset); + let g = g_desc + .numeric_type + .read_as_u8(data, base + g_desc.byte_offset); + let b = b_desc + .numeric_type + .read_as_u8(data, base + b_desc.byte_offset); + let a = alpha_desc.as_ref().map_or(255, |d| { + d.numeric_type.read_as_u8(data, base + d.byte_offset) + }); + // Convert to packed RGBA u32 format expected by Rerun. + builder.values().append_value( + ((r as u32) << 24) | ((g as u32) << 16) | ((b as u32) << 8) | (a as u32), + ); + } + builder.append(true); + } else { + builder.append_null(); + } + } + + Ok(Some(Arc::new(builder.finish()) as ArrayRef)) +} diff --git a/crates/store/re_importer/src/importer_mcap/lenses/foxglove/point_cloud.rs b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/point_cloud.rs new file mode 100644 index 000000000000..7062633881db --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/point_cloud.rs @@ -0,0 +1,57 @@ +use re_lenses::{Lens, LensBuilderError, op}; +use re_lenses_core::Selector; +use re_log_types::TimeType; +use re_sdk_types::archetypes::{CoordinateFrame, InstancePoses3D, Points3D}; + +use super::FOXGLOVE_TIMESTAMP; +use super::packed_element_field::{extract_colors, extract_positions}; + +/// Creates a lens for [`foxglove.PointCloud`] messages. +/// +/// [`foxglove.PointCloud`]: https://docs.foxglove.dev/docs/sdk/schemas/point-cloud +pub fn point_cloud(time_type: TimeType) -> Result { + let flatten = Selector::parse(".[]")?; + + Ok(Lens::for_input_column("foxglove.PointCloud:message") + .output_columns(|out| { + out.time( + FOXGLOVE_TIMESTAMP, + time_type, + Selector::parse(".timestamp")?.pipe(op::timespec_to_nanos()), + )? + .component( + CoordinateFrame::descriptor_frame(), + Selector::parse(".frame_id")?, + )? + .component( + Points3D::descriptor_positions(), + // Each message contains a variable number of packed points, so + // `extract_positions` returns a `List>`. + // The `.[]` flatten unwraps this extra list level so the component + // column contains the points directly. + Selector::parse(".")? + .pipe(extract_positions) + .pipe(flatten.clone()), + )? + .component( + Points3D::descriptor_colors(), + // Each message contains a variable number of packed colors, so + // `extract_colors` returns a `List`. The `.[]` flatten + // unwraps this extra list level so the component column contains + // the colors directly. + Selector::parse(".")?.pipe(extract_colors).pipe(flatten), + )? + // The pose field is optional. + .component( + InstancePoses3D::descriptor_translations(), + Selector::parse(".pose.position!")? + .pipe(op::struct_to_fixed_size_list_f32(["x", "y", "z"])), + )? + .component( + InstancePoses3D::descriptor_quaternions(), + Selector::parse(".pose.orientation!")? + .pipe(op::struct_to_fixed_size_list_f32(["x", "y", "z", "w"])), + ) + })? + .build()) +} diff --git a/crates/store/re_importer/src/importer_mcap/lenses/foxglove/pose_in_frame.rs b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/pose_in_frame.rs new file mode 100644 index 000000000000..ef674365b7f2 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/pose_in_frame.rs @@ -0,0 +1,35 @@ +use re_lenses::{Lens, LensBuilderError, op}; +use re_lenses_core::Selector; +use re_log_types::TimeType; +use re_sdk_types::archetypes::{CoordinateFrame, InstancePoses3D}; + +use super::FOXGLOVE_TIMESTAMP; + +/// Creates a lens for [`foxglove.PoseInFrame`] messages. +/// +/// [`foxglove.PoseInFrame`]: https://docs.foxglove.dev/docs/sdk/schemas/pose-in-frame +pub fn pose_in_frame(time_type: TimeType) -> Result { + Ok(Lens::for_input_column("foxglove.PoseInFrame:message") + .output_columns(|out| { + out.time( + FOXGLOVE_TIMESTAMP, + time_type, + Selector::parse(".timestamp")?.pipe(op::timespec_to_nanos()), + )? + .component( + CoordinateFrame::descriptor_frame(), + Selector::parse(".frame_id")?, + )? + .component( + InstancePoses3D::descriptor_translations(), + Selector::parse(".pose.position")? + .pipe(op::struct_to_fixed_size_list_f32(["x", "y", "z"])), + )? + .component( + InstancePoses3D::descriptor_quaternions(), + Selector::parse(".pose.orientation")? + .pipe(op::struct_to_fixed_size_list_f32(["x", "y", "z", "w"])), + ) + })? + .build()) +} diff --git a/crates/store/re_importer/src/importer_mcap/lenses/foxglove/poses_in_frame.rs b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/poses_in_frame.rs new file mode 100644 index 000000000000..05c0e12601aa --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/poses_in_frame.rs @@ -0,0 +1,35 @@ +use re_lenses::{Lens, LensBuilderError, op}; +use re_lenses_core::Selector; +use re_log_types::TimeType; +use re_sdk_types::archetypes::{CoordinateFrame, InstancePoses3D}; + +use super::FOXGLOVE_TIMESTAMP; + +/// Creates a lens for [`foxglove.PosesInFrame`] messages. +/// +/// [`foxglove.PosesInFrame`]: https://docs.foxglove.dev/docs/sdk/schemas/poses-in-frame +pub fn poses_in_frame(time_type: TimeType) -> Result { + Ok(Lens::for_input_column("foxglove.PosesInFrame:message") + .output_columns(|out| { + out.time( + FOXGLOVE_TIMESTAMP, + time_type, + Selector::parse(".timestamp")?.pipe(op::timespec_to_nanos()), + )? + .component( + CoordinateFrame::descriptor_frame(), + Selector::parse(".frame_id")?, + )? + .component( + InstancePoses3D::descriptor_translations(), + Selector::parse(".poses[].position")? + .pipe(op::struct_to_fixed_size_list_f32(["x", "y", "z"])), + )? + .component( + InstancePoses3D::descriptor_quaternions(), + Selector::parse(".poses[].orientation")? + .pipe(op::struct_to_fixed_size_list_f32(["x", "y", "z", "w"])), + ) + })? + .build()) +} diff --git a/crates/store/re_importer/src/importer_mcap/lenses/foxglove/raw_image.rs b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/raw_image.rs new file mode 100644 index 000000000000..d106f7521f15 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/foxglove/raw_image.rs @@ -0,0 +1,35 @@ +use re_lenses::{Lens, LensBuilderError, op}; +use re_lenses_core::Selector; +use re_log_types::TimeType; +use re_sdk_types::archetypes::{CoordinateFrame, Image}; + +use crate::importer_mcap::lenses::image_helpers::{encoding_to_image_format, extract_image_buffer}; + +use super::{FOXGLOVE_TIMESTAMP, IMAGE_PLANE_SUFFIX}; + +/// Creates a lens for [`foxglove.RawImage`] messages. +/// +/// [`foxglove.RawImage`]: https://docs.foxglove.dev/docs/sdk/schemas/raw-image +pub fn raw_image(time_type: TimeType) -> Result { + Ok(Lens::for_input_column("foxglove.RawImage:message") + .output_columns(|out| { + out.time( + FOXGLOVE_TIMESTAMP, + time_type, + Selector::parse(".timestamp")?.pipe(op::timespec_to_nanos()), + )? + .component( + CoordinateFrame::descriptor_frame(), + Selector::parse(".frame_id")?.pipe(op::string_suffix_nonempty(IMAGE_PLANE_SUFFIX)), + )? + .component( + Image::descriptor_format(), + Selector::parse(".")?.pipe(encoding_to_image_format()), + )? + .component( + Image::descriptor_buffer(), + Selector::parse(".")?.pipe(extract_image_buffer()), + ) + })? + .build()) +} diff --git a/crates/store/re_importer/src/importer_mcap/lenses/helpers.rs b/crates/store/re_importer/src/importer_mcap/lenses/helpers.rs new file mode 100644 index 000000000000..ebb3a2cfb67e --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/helpers.rs @@ -0,0 +1,76 @@ +//! Common helper functions for transforming Arrow data in lenses. + +use std::sync::Arc; + +use arrow::array::{Array, ArrayRef, Float32Array, Float64Array, ListArray, StructArray}; +use re_lenses_core::combinators::{ + Error, GetField, ListToFixedSizeList, MapFixedSizeList, PrimitiveCast, RowMajorToColumnMajor, + StructToFixedList, Transform as _, +}; + +/// Returns a pipe-compatible function that converts 3x3 row-major f64 matrices stored in variable-size lists to column-major f32 fixed-size lists. +pub fn row_major_3x3_to_column_major() +-> impl Fn(&ArrayRef) -> Result, Error> + Send + Sync { + move |source: &ArrayRef| { + let list_array = + source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "ListArray".to_owned(), + actual: source.data_type().clone(), + context: "row_major_3x3_to_column_major input".to_owned(), + })?; + let transform = ListToFixedSizeList::new(9) + .then(RowMajorToColumnMajor::new(3, 3)) + .then(MapFixedSizeList::new(PrimitiveCast::< + Float64Array, + Float32Array, + >::new())); + Ok(transform + .transform(list_array)? + .map(|arr| Arc::new(arr) as ArrayRef)) + } +} + +/// Extracts a struct field by name and downcasts it to the expected array type. +pub fn get_field_as( + source: &StructArray, + name: &str, +) -> Result { + let array_ref = GetField::new(name) + .transform(source)? + .ok_or_else(|| Error::FieldNotFound { + field_name: name.to_owned(), + available_fields: source.fields().iter().map(|f| f.name().clone()).collect(), + })?; + array_ref + .as_any() + .downcast_ref::() + .cloned() + .ok_or_else(|| Error::TypeMismatch { + expected: std::any::type_name::().to_owned(), + actual: array_ref.data_type().clone(), + context: name.to_owned(), + }) +} + +/// Converts a struct with `latitude`, `longitude` fields to a fixed-size list with two f64 values. +pub fn lat_lon_struct_to_fixed() +-> impl Fn(&ArrayRef) -> Result, Error> + Send + Sync { + move |source: &ArrayRef| { + let struct_array = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: source.data_type().clone(), + context: "lat_lon_struct_to_fixed input".to_owned(), + })?; + // [`re_sdk_types::components::LatLon`] (`DVec2D`) requires non-null f64 fields. + let transform = StructToFixedList::new(["latitude", "longitude"]).with_nullable(false); + Ok(transform + .transform(struct_array)? + .map(|arr| Arc::new(arr) as ArrayRef)) + } +} diff --git a/crates/store/re_data_loader/src/loader_mcap/lenses/image_helpers.rs b/crates/store/re_importer/src/importer_mcap/lenses/image_helpers.rs similarity index 65% rename from crates/store/re_data_loader/src/loader_mcap/lenses/image_helpers.rs rename to crates/store/re_importer/src/importer_mcap/lenses/image_helpers.rs index b596b07cefff..b8e5ba808efc 100644 --- a/crates/store/re_data_loader/src/loader_mcap/lenses/image_helpers.rs +++ b/crates/store/re_importer/src/importer_mcap/lenses/image_helpers.rs @@ -3,38 +3,30 @@ use std::sync::Arc; use arrow::array::{ - Array as _, BinaryArray, ListArray, StringArray, StructArray, UInt8Array, UInt32Array, + Array as _, ArrayRef, BinaryArray, ListArray, StringArray, StructArray, UInt8Array, UInt32Array, }; use arrow::buffer::OffsetBuffer; use arrow::datatypes::{DataType, Field}; -use re_arrow_combinators::Transform; -use re_arrow_combinators::map::MapList; -use re_lenses::OpError; +use re_lenses_core::combinators::Error; use re_sdk_types::Loggable as _; use re_sdk_types::datatypes::ImageFormat; use super::helpers::get_field_as; -/// Converts a struct with `width`, `height`, and `encoding` fields into a Rerun -/// [`ImageFormat`] struct array, using [`re_mcap::ImageEncoding`]. -pub fn encoding_to_image_format(list_array: &ListArray) -> Result { - Ok(MapList::new(EncodingToImageFormat).transform(list_array)?) -} - -/// Extracts image buffer data from a struct with `width`, `height`, `step`, `encoding`, -/// and `data` fields. Strips row padding when the data is larger than expected for the -/// given encoding. -pub fn extract_image_buffer(list_array: &ListArray) -> Result { - Ok(MapList::new(ExtractImageBuffer).transform(list_array)?) -} - -struct EncodingToImageFormat; - -impl Transform for EncodingToImageFormat { - type Source = StructArray; - type Target = StructArray; +/// Returns a pipe-compatible function that converts a struct with `width`, `height`, and +/// `encoding` fields into a Rerun [`ImageFormat`] struct array. +pub(crate) fn encoding_to_image_format() +-> impl Fn(&ArrayRef) -> Result, Error> + Send + Sync { + move |source: &ArrayRef| { + let source = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: source.data_type().clone(), + context: "encoding_to_image_format input".to_owned(), + })?; - fn transform(&self, source: &StructArray) -> Result { let width_array = get_field_as::(source, "width")?; let height_array = get_field_as::(source, "height")?; let encoding_array = get_field_as::(source, "encoding")?; @@ -50,32 +42,31 @@ impl Transform for EncodingToImageFormat { height_array.value(i), ]))) }) - .collect::>()?; + .collect::>()?; let array_ref = ImageFormat::to_arrow_opt(formats.iter().map(|f| f.as_ref())) - .map_err(|err| re_arrow_combinators::Error::Other(err.to_string()))?; + .map_err(|err| Error::Other(err.to_string()))?; - array_ref - .as_any() - .downcast_ref::() - .cloned() - .ok_or_else(|| re_arrow_combinators::Error::TypeMismatch { - expected: "StructArray".to_owned(), - actual: array_ref.data_type().clone(), - context: "ImageFormat serialization".to_owned(), - }) + Ok(Some(array_ref)) } } -struct ExtractImageBuffer; - -impl Transform for ExtractImageBuffer { - type Source = StructArray; - type Target = ListArray; - - fn transform(&self, source: &StructArray) -> Result { +/// Returns a pipe-compatible function that extracts image buffer data from a struct with +/// `width`, `height`, `step`, `encoding`, and `data` fields. +pub(crate) fn extract_image_buffer() +-> impl Fn(&ArrayRef) -> Result, Error> + Send + Sync { + move |source: &ArrayRef| { re_tracing::profile_function!(); + let source = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: source.data_type().clone(), + context: "extract_image_buffer input".to_owned(), + })?; + let width_array = get_field_as::(source, "width")?; let height_array = get_field_as::(source, "height")?; let step_array = get_field_as::(source, "step")?; @@ -96,7 +87,7 @@ impl Transform for ExtractImageBuffer { let height = height_array.value(i) as usize; let blob = data_array.value(i); - // How many bytes Rerun expects for this encoding (e.g. 8×8 16UC1 -> 128). + // How many bytes Rerun expects for this encoding (e.g. 8x8 16UC1 -> 128). let encoding = parse_encoding(encoding_array.value(i))?; let total_num_bytes = encoding .to_image_format([width_array.value(i), height_array.value(i)]) @@ -120,7 +111,7 @@ impl Transform for ExtractImageBuffer { }; if row_stride > bytes_per_row && height > 0 { - // Row stride larger than the actual pixel data — strip per-row padding. + // Row stride larger than the actual pixel data -- strip per-row padding. for row in 0..height { let start = row * row_stride; buffer.extend_from_slice(&blob[start..start + bytes_per_row]); @@ -139,31 +130,30 @@ impl Transform for ExtractImageBuffer { let values = UInt8Array::from(buffer); let field = Arc::new(Field::new_list_field(DataType::UInt8, false)); - Ok(ListArray::new( + Ok(Some(Arc::new(ListArray::new( field, OffsetBuffer::new(offsets.into()), Arc::new(values), source.nulls().cloned(), - )) + )) as ArrayRef)) } } /// Appends the current buffer length as the next offset for building a `ListArray`. -fn push_offset(buffer: &[u8], offsets: &mut Vec) -> Result<(), re_arrow_combinators::Error> { - offsets.push(i32::try_from(buffer.len()).map_err(|_err| { - re_arrow_combinators::Error::OffsetOverflow { +fn push_offset(buffer: &[u8], offsets: &mut Vec) -> Result<(), Error> { + offsets.push( + i32::try_from(buffer.len()).map_err(|_err| Error::OffsetOverflow { actual: buffer.len(), expected_type: "i32", - } - })?); + })?, + ); Ok(()) } /// Parses an encoding string into an [`re_mcap::ImageEncoding`], mapping the error for use in transforms. -fn parse_encoding(s: &str) -> Result { - s.parse() - .map_err(|_err| re_arrow_combinators::Error::UnexpectedValue { - expected: re_mcap::ImageEncoding::NAMES, - actual: s.to_owned(), - }) +fn parse_encoding(s: &str) -> Result { + s.parse().map_err(|_err| Error::UnexpectedValue { + expected: re_mcap::ImageEncoding::NAMES, + actual: s.to_owned(), + }) } diff --git a/crates/store/re_importer/src/importer_mcap/lenses/mod.rs b/crates/store/re_importer/src/importer_mcap/lenses/mod.rs new file mode 100644 index 000000000000..176cbbcf5bb7 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/mod.rs @@ -0,0 +1,38 @@ +mod foxglove; +mod helpers; +mod image_helpers; +mod ros2msg; + +pub use crate::FOXGLOVE_LENSES_IDENTIFIER; + +use re_lenses::{LensBuilderError, Lenses, OutputMode}; +use re_log_types::TimeType; +use re_mcap::{DecoderIdentifier, SelectedDecoders}; + +const ROS2MSG_DECODER_IDENTIFIER: &str = "ros2msg"; + +pub fn mcap_lenses( + selected_decoders: &SelectedDecoders, + time_type: TimeType, +) -> Result, LensBuilderError> { + let mut lenses = Lenses::new(OutputMode::ForwardUnmatched); + + let has_foxglove_lenses = if selected_decoders + .contains(&DecoderIdentifier::from(crate::FOXGLOVE_LENSES_IDENTIFIER)) + { + foxglove::add_foxglove_lenses(&mut lenses, time_type)?; + true + } else { + false + }; + + let has_ros2msg_lenses = + if selected_decoders.contains(&DecoderIdentifier::from(ROS2MSG_DECODER_IDENTIFIER)) { + ros2msg::add_ros2msg_lenses(&mut lenses, time_type)?; + true + } else { + false + }; + + Ok((has_foxglove_lenses || has_ros2msg_lenses).then_some(lenses)) +} diff --git a/crates/store/re_importer/src/importer_mcap/lenses/ros2msg/mod.rs b/crates/store/re_importer/src/importer_mcap/lenses/ros2msg/mod.rs new file mode 100644 index 000000000000..cd8b929ee9e4 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/ros2msg/mod.rs @@ -0,0 +1,22 @@ +//! Lenses for converting ROS 2 messages to Rerun components & archetypes. + +mod occupancy_grid; +mod ros_map_helpers; + +use re_lenses::{LensBuilderError, Lenses, OutputMode}; +use re_log_types::TimeType; + +pub use occupancy_grid::occupancy_grid; + +/// Name of the header-derived ROS 2 timeline. +const ROS2_TIMESTAMP: &str = "ros2_timestamp"; + +/// Adds all ROS 2 message lenses to an existing collection. +pub fn add_ros2msg_lenses( + lenses: &mut Lenses, + time_type: TimeType, +) -> Result<(), LensBuilderError> { + *lenses = std::mem::replace(lenses, Lenses::new(OutputMode::ForwardUnmatched)) + .add_lens(occupancy_grid(time_type)?); + Ok(()) +} diff --git a/crates/store/re_importer/src/importer_mcap/lenses/ros2msg/occupancy_grid.rs b/crates/store/re_importer/src/importer_mcap/lenses/ros2msg/occupancy_grid.rs new file mode 100644 index 000000000000..1eb04cfa1278 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/ros2msg/occupancy_grid.rs @@ -0,0 +1,53 @@ +use re_lenses::{Lens, LensBuilderError, op}; +use re_lenses_core::Selector; +use re_log_types::TimeType; +use re_sdk_types::archetypes::{CoordinateFrame, GridMap}; +use re_sdk_types::components::Colormap; + +use super::ROS2_TIMESTAMP; +use super::ros_map_helpers::{ + default_ros_map_colormap, map_buffer_to_image_buffer, map_dimensions_to_l8_image_format, +}; + +/// Creates a lens for `nav_msgs/msg/OccupancyGrid` messages. +pub fn occupancy_grid(time_type: TimeType) -> Result { + Ok(Lens::for_input_column("nav_msgs.msg.OccupancyGrid:message") + .output_columns(|out| { + out.time( + ROS2_TIMESTAMP, + time_type, + Selector::parse(".header.stamp")?.pipe(op::timespec_to_nanos()), + )? + .component( + CoordinateFrame::descriptor_frame(), + Selector::parse(".header.frame_id")?, + )? + .component( + GridMap::descriptor_data(), + Selector::parse(".")?.pipe(map_buffer_to_image_buffer("info", "width", "height")), + )? + .component( + GridMap::descriptor_format(), + Selector::parse(".info")?.pipe(map_dimensions_to_l8_image_format()), + )? + .component( + GridMap::descriptor_cell_size(), + Selector::parse(".info.resolution")?, + )? + .component( + GridMap::descriptor_translation(), + Selector::parse(".info.origin.position")? + .pipe(op::struct_to_fixed_size_list_f32(["x", "y", "z"])), + )? + .component( + GridMap::descriptor_quaternion(), + Selector::parse(".info.origin.orientation")? + .pipe(op::struct_to_fixed_size_list_f32(["x", "y", "z", "w"])), + )? + .component( + GridMap::descriptor_colormap(), + Selector::parse(".")?.pipe(default_ros_map_colormap(Colormap::RvizMap)), + ) + })? + .build()) +} diff --git a/crates/store/re_importer/src/importer_mcap/lenses/ros2msg/ros_map_helpers.rs b/crates/store/re_importer/src/importer_mcap/lenses/ros2msg/ros_map_helpers.rs new file mode 100644 index 000000000000..93851e886dbd --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/lenses/ros2msg/ros_map_helpers.rs @@ -0,0 +1,152 @@ +use std::sync::Arc; + +use arrow::array::{ + Array as _, ArrayRef, Int8Array, ListArray, ListBuilder, StructArray, UInt8Builder, UInt32Array, +}; +use re_lenses_core::combinators::Error; +use re_sdk_types::Loggable as _; +use re_sdk_types::components::{Colormap, ImageFormat}; +use re_sdk_types::datatypes::{ChannelDatatype, ColorModel}; + +use crate::importer_mcap::lenses::helpers::get_field_as; + +/// Returns a pipe-compatible function that converts ROS maps +/// stored in i8 buffers to Rerun image buffers. +/// +/// ROS map buffers start at map cell `(0, 0)`, i.e. the bottom image row, +/// while Rerun's image buffers are consumed top row first. +pub(crate) fn map_buffer_to_image_buffer( + metadata_field: &'static str, + width_field: &'static str, + height_field: &'static str, +) -> impl Fn(&ArrayRef) -> Result, Error> + Send + Sync { + move |source: &ArrayRef| { + let source = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: source.data_type().clone(), + context: "map_buffer_to_image_buffer input".to_owned(), + })?; + + let data = get_field_as::(source, "data")?; + let metadata = get_field_as::(source, metadata_field)?; + let width = get_field_as::(&metadata, width_field)?; + let height = get_field_as::(&metadata, height_field)?; + let values = data + .values() + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "Int8Array".to_owned(), + actual: data.values().data_type().clone(), + context: "map_buffer_to_image_buffer values".to_owned(), + })?; + + ros_map_buffer_to_image_buffer(source, &data, &metadata, &width, &height, |builder, idx| { + // Preserve ROS occupancy byte conventions, in particular `-1 -> 255` for unknown cells. + builder.append_value(values.value(idx) as u8); + }) + } +} + +/// Returns a pipe-compatible function that converts a struct with `width` and `height` +/// fields into a grayscale 8-bit Rerun [`ImageFormat`] struct array. +pub(crate) fn map_dimensions_to_l8_image_format() +-> impl Fn(&ArrayRef) -> Result, Error> + Send + Sync { + move |source: &ArrayRef| { + let source = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: source.data_type().clone(), + context: "map_dimensions_to_l8_image_format input".to_owned(), + })?; + + let width_array = get_field_as::(source, "width")?; + let height_array = get_field_as::(source, "height")?; + + let formats: Vec = (0..source.len()) + .map(|i| { + ImageFormat::from_color_model( + [width_array.value(i), height_array.value(i)], + ColorModel::L, + ChannelDatatype::U8, + ) + }) + .collect(); + + ImageFormat::to_arrow_opt(formats.iter().map(Some)) + .map(Some) + .map_err(|err| Error::Other(err.to_string())) + } +} + +/// Returns a pipe-compatible function that fills a Rerun [`Colormap`] array with a +/// single repeated ROS map colormap value. +pub(crate) fn default_ros_map_colormap( + colormap: Colormap, +) -> impl Fn(&ArrayRef) -> Result, Error> + Send + Sync { + move |source: &ArrayRef| { + let len = source.len(); + Colormap::to_arrow_opt(std::iter::repeat_n(Some(colormap), len)) + .map(Some) + .map_err(|err| Error::Other(err.to_string())) + } +} + +/// Reorders a ROS map buffer into top-row-first image order for `GridMap`. +fn ros_map_buffer_to_image_buffer( + source: &StructArray, + data: &ListArray, + metadata: &StructArray, + width: &UInt32Array, + height: &UInt32Array, + mut append_value: impl FnMut(&mut UInt8Builder, usize), +) -> Result, Error> { + let mut builder = ListBuilder::new(UInt8Builder::with_capacity(data.values().len())); + let row_nulls = data.values().nulls(); + + for row in 0..source.len() { + if data.is_null(row) || metadata.is_null(row) || width.is_null(row) || height.is_null(row) { + builder.append(false); + continue; + } + + let row_width = width.value(row) as usize; + let row_height = height.value(row) as usize; + let row_len = row_width.checked_mul(row_height).ok_or_else(|| { + Error::Other("ros_map_buffer_to_image_buffer dimensions overflow".to_owned()) + })?; + + let start = data.value_offsets()[row] as usize; + let end = data.value_offsets()[row + 1] as usize; + if end - start != row_len { + return Err(Error::Other(format!( + "ros_map_buffer_to_image_buffer expected {} cells from {}x{} grid, got {}", + row_len, + row_width, + row_height, + end - start + ))); + } + + for image_row in (0..row_height).rev() { + let row_start = start + image_row * row_width; + let row_end = row_start + row_width; + for idx in row_start..row_end { + if row_nulls.is_some_and(|nulls| !nulls.is_valid(idx)) { + builder.values().append_null(); + } else { + append_value(builder.values(), idx); + } + } + } + + builder.append(true); + } + + Ok(Some(Arc::new(builder.finish()) as ArrayRef)) +} diff --git a/crates/store/re_data_loader/src/loader_mcap/mod.rs b/crates/store/re_importer/src/importer_mcap/mod.rs similarity index 64% rename from crates/store/re_data_loader/src/loader_mcap/mod.rs rename to crates/store/re_importer/src/importer_mcap/mod.rs index 5cf888fcc55e..0e12f5907df4 100644 --- a/crates/store/re_data_loader/src/loader_mcap/mod.rs +++ b/crates/store/re_importer/src/importer_mcap/mod.rs @@ -1,6 +1,7 @@ -//! Rerun dataloader for MCAP files. +//! Rerun importer for MCAP files. -mod loader; +mod importer; +mod robot_description; /// Lens implementations for transforming various third-party data formats into Rerun components. pub mod lenses; @@ -8,5 +9,5 @@ pub mod lenses; #[cfg(test)] pub mod tests; +pub use importer::McapImporter; pub use lenses::FOXGLOVE_LENSES_IDENTIFIER; -pub use loader::{McapLoader, load_mcap}; diff --git a/crates/store/re_importer/src/importer_mcap/robot_description.rs b/crates/store/re_importer/src/importer_mcap/robot_description.rs new file mode 100644 index 000000000000..46e0689ca420 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/robot_description.rs @@ -0,0 +1,79 @@ +use std::collections::HashMap; + +/// Minimal ROS2 String message for CDR deserialization. +#[derive(serde::Deserialize)] +struct RosString { + data: String, +} + +/// Scans the MCAP for `robot_description` topics carrying `std_msgs/msg/String`, +/// extracts the URDF XML, and emits 3D visualization chunks. +/// +/// Note that transforms are not extracted from the URDF in this context. +/// These are expected to be present in the MCAP as separate transform messages. +/// +/// TODO(michael): this could be implemented as an `re_mcap` decoder if the +/// core URDF parsing logic is moved to a separate crate outside of `re_importer`. +pub(crate) fn extract_urdf_from_robot_descriptions( + mcap_bytes: &[u8], + summary: &mcap::Summary, + topic_filter: &re_mcap::TopicFilter, + emit: &mut dyn FnMut(re_chunk::Chunk), +) -> anyhow::Result<()> { + let robot_desc_channels: Vec = summary + .channels + .values() + .filter(|channel| { + topic_filter.matches(&channel.topic) + && channel.topic.contains("robot_description") + && channel.schema.as_ref().is_some_and(|schema| { + schema.name == "std_msgs/msg/String" && schema.encoding == "ros2msg" + }) + }) + .map(|channel| channel.id) + .collect(); + + if robot_desc_channels.is_empty() { + return Ok(()); + } + + re_log::debug!( + "Found {} robot_description channel(s), scanning messages…", + robot_desc_channels.len() + ); + + let mut urdf_by_channel: HashMap = HashMap::new(); + + for msg in mcap::MessageStream::new(mcap_bytes)? { + let msg = msg?; + if robot_desc_channels.contains(&msg.channel.id) + && let Ok(decoded) = re_mcap::cdr::try_decode_message::(&msg.data) + { + urdf_by_channel.insert(msg.channel.id, decoded.data); + } + } + + for urdf_xml in urdf_by_channel.into_values() { + match crate::importer_urdf::build_urdf_chunks_from_xml( + &urdf_xml, + None, + &re_log_types::TimePoint::STATIC, + false, + ) { + Ok(chunks) => { + re_log::debug!( + "URDF extraction produced {} chunks from robot_description.", + chunks.len() + ); + for chunk in chunks { + emit(chunk); + } + } + Err(err) => { + re_log::warn_once!("Failed to parse URDF from robot_description topic: {err}"); + } + } + } + + Ok(()) +} diff --git a/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_camera_calibration.mcap b/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_camera_calibration.mcap new file mode 100644 index 000000000000..87fbbf5c152b --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_camera_calibration.mcap @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edc5fa3ed6451c359fe63b58d9319c67404d60382020e27ec3972b24ca56d04c +size 1716 diff --git a/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_compressed_image.mcap b/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_compressed_image.mcap new file mode 100644 index 000000000000..00b83ce38bdb --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_compressed_image.mcap @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9350c70854c15c5c97b89f478c976dce72bfd3a5c26d25c732549ef31444f0fb +size 2016 diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/assets/foxglove_compressed_video.mcap b/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_compressed_video.mcap similarity index 100% rename from crates/store/re_data_loader/src/loader_mcap/tests/assets/foxglove_compressed_video.mcap rename to crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_compressed_video.mcap diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/assets/foxglove_frame_transforms.mcap b/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_frame_transforms.mcap similarity index 100% rename from crates/store/re_data_loader/src/loader_mcap/tests/assets/foxglove_frame_transforms.mcap rename to crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_frame_transforms.mcap diff --git a/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_location_fixes.mcap b/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_location_fixes.mcap new file mode 100644 index 000000000000..9f3e024c7548 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_location_fixes.mcap @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e537dae7879ba995e31cc68138b311a4d75b9507068ca2ec8470774de0ac3df +size 3434 diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/assets/foxglove_log.mcap b/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_log.mcap similarity index 100% rename from crates/store/re_data_loader/src/loader_mcap/tests/assets/foxglove_log.mcap rename to crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_log.mcap diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/assets/foxglove_point_cloud.mcap b/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_point_cloud.mcap similarity index 100% rename from crates/store/re_data_loader/src/loader_mcap/tests/assets/foxglove_point_cloud.mcap rename to crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_point_cloud.mcap diff --git a/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_poses_in_frame.mcap b/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_poses_in_frame.mcap new file mode 100644 index 000000000000..c3591aa76de9 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_poses_in_frame.mcap @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c61051ead91993545bfd86d80d0d0a6b180e6359979347534e443883c09cd1a +size 3170 diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/assets/foxglove_raw_image.mcap b/crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_raw_image.mcap similarity index 100% rename from crates/store/re_data_loader/src/loader_mcap/tests/assets/foxglove_raw_image.mcap rename to crates/store/re_importer/src/importer_mcap/tests/assets/foxglove_raw_image.mcap diff --git a/crates/store/re_importer/src/importer_mcap/tests/assets/ros_occupancy_grid.mcap b/crates/store/re_importer/src/importer_mcap/tests/assets/ros_occupancy_grid.mcap new file mode 100644 index 000000000000..6d70fbf815e7 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/assets/ros_occupancy_grid.mcap @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e14ad672f984fa0dc9e58fad739c7715ca6b3efbabcffca0875e249cc570530f +size 3582 diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/mod.rs b/crates/store/re_importer/src/importer_mcap/tests/foxglove/mod.rs similarity index 58% rename from crates/store/re_data_loader/src/loader_mcap/tests/foxglove/mod.rs rename to crates/store/re_importer/src/importer_mcap/tests/foxglove/mod.rs index ce5b9bc2ab82..9916bb1ff1ad 100644 --- a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/mod.rs +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/mod.rs @@ -1,7 +1,11 @@ //! Tests for Foxglove message lenses. +mod test_camera_calibration; +mod test_compressed_image; mod test_compressed_video; mod test_frame_transforms; +mod test_location_fixes; mod test_log; mod test_point_cloud; +mod test_poses_in_frame; mod test_raw_image; diff --git a/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_camera_calibration__foxglove_camera_calibration.snap b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_camera_calibration__foxglove_camera_calibration.snap new file mode 100644 index 000000000000..8444474f706e --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_camera_calibration__foxglove_camera_calibration.snap @@ -0,0 +1,28 @@ +--- +source: crates/store/re_importer/src/importer_mcap/tests/foxglove/test_camera_calibration.rs +expression: "format!(\"{:-240}\", chunk)" +--- +┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /camera/calibration │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┐ │ +│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ timestamp ┆ Pinhole:child_frame ┆ Pinhole:image_from_camera ┆ Pinhole:parent_frame ┆ Pinhole:resolution │ │ +│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Utf8) ┆ type: ┆ type: List(Utf8) ┆ type: │ │ +│ │ FixedSizeBinary(16) ┆ index_name: ┆ index_name: ┆ index_name: timestamp ┆ archetype: Pinhole ┆ List(FixedSizeList(9 x ┆ archetype: Pinhole ┆ List(FixedSizeList(2 x │ │ +│ │ ARROW:extension:metadata: ┆ message_log_time ┆ message_publish_time ┆ is_sorted: true ┆ component: ┆ non-null Float32)) ┆ component: ┆ non-null Float32)) │ │ +│ │ {"namespace":"row"} ┆ is_sorted: true ┆ is_sorted: true ┆ kind: index ┆ Pinhole:child_frame ┆ archetype: Pinhole ┆ Pinhole:parent_frame ┆ archetype: Pinhole │ │ +│ │ ARROW:extension:name: ┆ kind: index ┆ kind: index ┆ ┆ component_type: ┆ component: ┆ component_type: ┆ component: │ │ +│ │ TUID ┆ ┆ ┆ ┆ TransformFrameId ┆ Pinhole:image_from_camera ┆ TransformFrameId ┆ Pinhole:resolution │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ kind: data ┆ component_type: ┆ kind: data ┆ component_type: │ │ +│ │ kind: control ┆ ┆ ┆ ┆ ┆ PinholeProjection ┆ ┆ Resolution │ │ +│ │ ┆ ┆ ┆ ┆ ┆ kind: data ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 2026-04-02T13:27:59.61039 ┆ 2026-04-02T13:27:59.61039 ┆ 2026-04-02T13:27:59.61039 ┆ [camera_optical_frame_ima ┆ [[525.0, 0.0, 0.0, 0.0, ┆ [camera_optical_frame] ┆ [[640.0, 480.0]] │ │ +│ │ ┆ 1040 ┆ 1040 ┆ 1040 ┆ ge_plane] ┆ 525.0, 0.0, 319.5, 239.5, ┆ ┆ │ │ +│ │ ┆ ┆ ┆ ┆ ┆ 1.0]] ┆ ┆ │ │ +│ └───────────────────────────┴───────────────────────────┴───────────────────────────┴───────────────────────────┴───────────────────────────┴───────────────────────────┴───────────────────────────┴───────────────────────────┘ │ +└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_point_cloud__foxglove_point_cloud.snap b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_compressed_image__foxglove_compressed_image.snap similarity index 74% rename from crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_point_cloud__foxglove_point_cloud.snap rename to crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_compressed_image__foxglove_compressed_image.snap index 789a7fbbfcca..f892801e8710 100644 --- a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_point_cloud__foxglove_point_cloud.snap +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_compressed_image__foxglove_compressed_image.snap @@ -1,28 +1,27 @@ --- -source: crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_point_cloud.rs +source: crates/store/re_importer/src/importer_mcap/tests/foxglove/test_compressed_image.rs expression: "format!(\"{:-240}\", chunk)" --- ┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ METADATA: │ -│ * entity_path: /point_cloud │ +│ * entity_path: /camera/compressed_image │ │ * id: [**REDACTED**] │ │ * version: [**REDACTED**] │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ ┌────────────────────────────────┬───────────────────────────────┬────────────────────────────────┬───────────────────────────────┬────────────────────────────────┬────────────────────────────────┬────────────────────────────────┐ │ -│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ timestamp ┆ CoordinateFrame:frame ┆ Points3D:colors ┆ Points3D:positions │ │ +│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ timestamp ┆ CoordinateFrame:frame ┆ EncodedImage:blob ┆ EncodedImage:media_type │ │ │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable Timestamp(ns) ┆ type: nullable Timestamp(ns) ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable ┆ type: nullable List[nullable ┆ type: nullable List[nullable │ │ -│ │ ARROW:extension:metadata: ┆ index_name: message_log_time ┆ index_name: ┆ index_name: timestamp ┆ Utf8] ┆ u32] ┆ FixedSizeList[f32; 3]] │ │ -│ │ {"namespace":"row"} ┆ is_sorted: true ┆ message_publish_time ┆ is_sorted: true ┆ archetype: CoordinateFrame ┆ archetype: Points3D ┆ archetype: Points3D │ │ -│ │ ARROW:extension:name: TUID ┆ kind: index ┆ is_sorted: true ┆ kind: index ┆ component: ┆ component: Points3D:colors ┆ component: Points3D:positions │ │ -│ │ is_sorted: true ┆ ┆ kind: index ┆ ┆ CoordinateFrame:frame ┆ component_type: Color ┆ component_type: Position3D │ │ -│ │ kind: control ┆ ┆ ┆ ┆ component_type: ┆ kind: data ┆ kind: data │ │ -│ │ ┆ ┆ ┆ ┆ TransformFrameId ┆ ┆ │ │ -│ │ ┆ ┆ ┆ ┆ kind: data ┆ ┆ │ │ +│ │ type: non-null ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Utf8) ┆ type: List(List(non-null ┆ type: List(Utf8) │ │ +│ │ FixedSizeBinary(16) ┆ index_name: message_log_time ┆ index_name: ┆ index_name: timestamp ┆ archetype: CoordinateFrame ┆ UInt8)) ┆ archetype: EncodedImage │ │ +│ │ ARROW:extension:metadata: ┆ is_sorted: true ┆ message_publish_time ┆ is_sorted: true ┆ component: ┆ archetype: EncodedImage ┆ component: │ │ +│ │ {"namespace":"row"} ┆ kind: index ┆ is_sorted: true ┆ kind: index ┆ CoordinateFrame:frame ┆ component: EncodedImage:blob ┆ EncodedImage:media_type │ │ +│ │ ARROW:extension:name: TUID ┆ ┆ kind: index ┆ ┆ component_type: ┆ component_type: Blob ┆ component_type: MediaType │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ TransformFrameId ┆ kind: data ┆ kind: data │ │ +│ │ kind: control ┆ ┆ ┆ ┆ kind: data ┆ ┆ │ │ │ ╞════════════════════════════════╪═══════════════════════════════╪════════════════════════════════╪═══════════════════════════════╪════════════════════════════════╪════════════════════════════════╪════════════════════════════════╡ │ -│ │ row_[**REDACTED**] ┆ 2026-02-17T14:13:33.852164096 ┆ 2026-02-17T14:13:33.852164096 ┆ 2026-02-17T14:13:33.852164096 ┆ [world] ┆ [4278190335, 16711935, 65535, ┆ [[0.0, 0.0, 0.0], [1.0, 0.0, │ │ -│ │ ┆ ┆ ┆ ┆ ┆ 4294902015, 4278255615, ┆ 0.0], [1.0, 1.0, 0.0], [0.0, │ │ -│ │ ┆ ┆ ┆ ┆ ┆ 16777215, 4294967295, ┆ 1.0, 0.0], [0.0, 0.0, 1.0], │ │ -│ │ ┆ ┆ ┆ ┆ ┆ 4286578943] ┆ [1.0, 0.0, 1.… │ │ +│ │ row_[**REDACTED**] ┆ 2026-04-02T13:27:59.717618944 ┆ 2026-04-02T13:27:59.717618944 ┆ 2026-04-02T13:27:59.717618944 ┆ [camera_optical_frame_image_pl ┆ [[255, 216, 255, 224, 0, 16, ┆ [image/jpeg] │ │ +│ │ ┆ ┆ ┆ ┆ ane] ┆ 74, 70, 73, 70, 0, 1, 1, 0, 0, ┆ │ │ +│ │ ┆ ┆ ┆ ┆ ┆ 1, 0, 1, 0, 0, 255, 219, 0, ┆ │ │ +│ │ ┆ ┆ ┆ ┆ ┆ 67, 0, 8, 6… ┆ │ │ │ └────────────────────────────────┴───────────────────────────────┴────────────────────────────────┴───────────────────────────────┴────────────────────────────────┴────────────────────────────────┴────────────────────────────────┘ │ └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_compressed_video__foxglove_compressed_video.snap b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_compressed_video__foxglove_compressed_video.snap new file mode 100644 index 000000000000..955524ce9e05 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_compressed_video__foxglove_compressed_video.snap @@ -0,0 +1,37 @@ +--- +source: crates/store/re_importer/src/importer_mcap/tests/foxglove/test_compressed_video.rs +expression: "format!(\"{:-240}\", chunk)" +--- +┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /compressed_video │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌────────────────────────────────┬───────────────────────────────┬────────────────────────────────┬───────────────────────────────┬────────────────────────────────┬──────────────────────────────┬────────────────────────────────┐ │ +│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ timestamp ┆ CoordinateFrame:frame ┆ VideoStream:codec ┆ VideoStream:sample │ │ +│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Utf8) ┆ type: List(UInt32) ┆ type: List(List(non-null │ │ +│ │ FixedSizeBinary(16) ┆ index_name: message_log_time ┆ index_name: ┆ index_name: timestamp ┆ archetype: CoordinateFrame ┆ archetype: VideoStream ┆ UInt8)) │ │ +│ │ ARROW:extension:metadata: ┆ is_sorted: true ┆ message_publish_time ┆ is_sorted: true ┆ component: ┆ component: VideoStream:codec ┆ archetype: VideoStream │ │ +│ │ {"namespace":"row"} ┆ kind: index ┆ is_sorted: true ┆ kind: index ┆ CoordinateFrame:frame ┆ component_type: VideoCodec ┆ component: VideoStream:sample │ │ +│ │ ARROW:extension:name: TUID ┆ ┆ kind: index ┆ ┆ component_type: ┆ kind: data ┆ component_type: VideoSample │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ TransformFrameId ┆ ┆ kind: data │ │ +│ │ kind: control ┆ ┆ ┆ ┆ kind: data ┆ ┆ │ │ +│ ╞════════════════════════════════╪═══════════════════════════════╪════════════════════════════════╪═══════════════════════════════╪════════════════════════════════╪══════════════════════════════╪════════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00 ┆ 1970-01-01T00:00:00 ┆ 1970-01-01T00:00:00 ┆ [camera_frame_image_plane] ┆ [1635148593] ┆ [[0, 0, 0, 1, 103, 100, 16, │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ 10, 172, 184, 143, 66, 0, 0, │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ 3, 0, 2, 0, 0, 3, 0, 121, 8, │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ 0, 0, 0, 1, 1… │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.033333333 ┆ 1970-01-01T00:00:00.033333333 ┆ 1970-01-01T00:00:00.033333333 ┆ [camera_frame_image_plane] ┆ [1635148593] ┆ [[0, 0, 0, 1, 103, 100, 16, │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ 10, 172, 184, 143, 66, 0, 0, │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ 3, 0, 2, 0, 0, 3, 0, 121, 8, │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ 0, 0, 0, 1, 1… │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.066666666 ┆ 1970-01-01T00:00:00.066666666 ┆ 1970-01-01T00:00:00.066666666 ┆ [camera_frame_image_plane] ┆ [1635148593] ┆ [[0, 0, 0, 1, 103, 100, 16, │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ 10, 172, 184, 143, 66, 0, 0, │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ 3, 0, 2, 0, 0, 3, 0, 121, 8, │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ 0, 0, 0, 1, 1… │ │ +│ └────────────────────────────────┴───────────────────────────────┴────────────────────────────────┴───────────────────────────────┴────────────────────────────────┴──────────────────────────────┴────────────────────────────────┘ │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_frame_transforms__foxglove_frame_transform.snap b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_frame_transforms__foxglove_frame_transform.snap similarity index 92% rename from crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_frame_transforms__foxglove_frame_transform.snap rename to crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_frame_transforms__foxglove_frame_transform.snap index 1bb0009eb498..ffa69bf6f6b8 100644 --- a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_frame_transforms__foxglove_frame_transform.snap +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_frame_transforms__foxglove_frame_transform.snap @@ -1,5 +1,5 @@ --- -source: crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_frame_transforms.rs +source: crates/store/re_importer/src/importer_mcap/tests/foxglove/test_frame_transforms.rs expression: "format!(\"{:-240}\", chunk)" --- ┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ @@ -11,14 +11,14 @@ expression: "format!(\"{:-240}\", chunk)" │ ┌───────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┐ │ │ │ RowId ┆ message_log_time ┆ message_publish_time ┆ timestamp ┆ Transform3D:child_frame ┆ Transform3D:parent_frame ┆ Transform3D:quaternion ┆ Transform3D:translation │ │ │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable ┆ type: nullable ┆ type: nullable ┆ type: nullable ┆ type: nullable ┆ type: nullable ┆ type: nullable │ │ -│ │ ARROW:extension:metadata: ┆ Timestamp(ns) ┆ Timestamp(ns) ┆ Timestamp(ns) ┆ List[nullable Utf8] ┆ List[nullable Utf8] ┆ List[nullable ┆ List[nullable │ │ -│ │ {"namespace":"row"} ┆ index_name: ┆ index_name: ┆ index_name: timestamp ┆ archetype: Transform3D ┆ archetype: Transform3D ┆ FixedSizeList[f32; 4]] ┆ FixedSizeList[f32; 3]] │ │ -│ │ ARROW:extension:name: ┆ message_log_time ┆ message_publish_time ┆ is_sorted: true ┆ component: ┆ component: ┆ archetype: Transform3D ┆ archetype: Transform3D │ │ -│ │ TUID ┆ is_sorted: true ┆ is_sorted: true ┆ kind: index ┆ Transform3D:child_frame ┆ Transform3D:parent_frame ┆ component: ┆ component: │ │ -│ │ is_sorted: true ┆ kind: index ┆ kind: index ┆ ┆ component_type: ┆ component_type: ┆ Transform3D:quaternion ┆ Transform3D:translation │ │ -│ │ kind: control ┆ ┆ ┆ ┆ TransformFrameId ┆ TransformFrameId ┆ component_type: ┆ component_type: │ │ -│ │ ┆ ┆ ┆ ┆ kind: data ┆ kind: data ┆ RotationQuat ┆ Translation3D │ │ +│ │ type: non-null ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Utf8) ┆ type: List(Utf8) ┆ type: ┆ type: │ │ +│ │ FixedSizeBinary(16) ┆ index_name: ┆ index_name: ┆ index_name: timestamp ┆ archetype: Transform3D ┆ archetype: Transform3D ┆ List(FixedSizeList(4 x ┆ List(FixedSizeList(3 x │ │ +│ │ ARROW:extension:metadata: ┆ message_log_time ┆ message_publish_time ┆ is_sorted: true ┆ component: ┆ component: ┆ non-null Float32)) ┆ non-null Float32)) │ │ +│ │ {"namespace":"row"} ┆ is_sorted: true ┆ is_sorted: true ┆ kind: index ┆ Transform3D:child_frame ┆ Transform3D:parent_frame ┆ archetype: Transform3D ┆ archetype: Transform3D │ │ +│ │ ARROW:extension:name: ┆ kind: index ┆ kind: index ┆ ┆ component_type: ┆ component_type: ┆ component: ┆ component: │ │ +│ │ TUID ┆ ┆ ┆ ┆ TransformFrameId ┆ TransformFrameId ┆ Transform3D:quaternion ┆ Transform3D:translation │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ kind: data ┆ kind: data ┆ component_type: ┆ component_type: │ │ +│ │ kind: control ┆ ┆ ┆ ┆ ┆ ┆ RotationQuat ┆ Translation3D │ │ │ │ ┆ ┆ ┆ ┆ ┆ ┆ kind: data ┆ kind: data │ │ │ ╞═══════════════════════════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╡ │ │ │ row_[**REDACTED**] ┆ 2026-02-10T09:27:33.72327 ┆ 2026-02-10T09:27:33.72327 ┆ 2026-02-10T09:27:33.72327 ┆ [sensor] ┆ [world] ┆ [[0.0, 0.0, 0.0, 1.0]] ┆ [[1.0, 0.0, 0.0]] │ │ diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_frame_transforms__foxglove_frame_transforms.snap b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_frame_transforms__foxglove_frame_transforms.snap similarity index 92% rename from crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_frame_transforms__foxglove_frame_transforms.snap rename to crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_frame_transforms__foxglove_frame_transforms.snap index ea69ce2239c8..c3e1baab142e 100644 --- a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_frame_transforms__foxglove_frame_transforms.snap +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_frame_transforms__foxglove_frame_transforms.snap @@ -1,5 +1,5 @@ --- -source: crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_frame_transforms.rs +source: crates/store/re_importer/src/importer_mcap/tests/foxglove/test_frame_transforms.rs expression: "format!(\"{:-240}\", chunk)" --- ┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ @@ -11,14 +11,14 @@ expression: "format!(\"{:-240}\", chunk)" │ ┌───────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┐ │ │ │ RowId ┆ message_log_time ┆ message_publish_time ┆ timestamp ┆ Transform3D:child_frame ┆ Transform3D:parent_frame ┆ Transform3D:quaternion ┆ Transform3D:translation │ │ │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable ┆ type: nullable ┆ type: nullable ┆ type: nullable ┆ type: nullable ┆ type: nullable ┆ type: nullable │ │ -│ │ ARROW:extension:metadata: ┆ Timestamp(ns) ┆ Timestamp(ns) ┆ Timestamp(ns) ┆ List[nullable Utf8] ┆ List[nullable Utf8] ┆ List[nullable ┆ List[nullable │ │ -│ │ {"namespace":"row"} ┆ index_name: ┆ index_name: ┆ index_name: timestamp ┆ archetype: Transform3D ┆ archetype: Transform3D ┆ FixedSizeList[f32; 4]] ┆ FixedSizeList[f32; 3]] │ │ -│ │ ARROW:extension:name: ┆ message_log_time ┆ message_publish_time ┆ is_sorted: true ┆ component: ┆ component: ┆ archetype: Transform3D ┆ archetype: Transform3D │ │ -│ │ TUID ┆ is_sorted: true ┆ is_sorted: true ┆ kind: index ┆ Transform3D:child_frame ┆ Transform3D:parent_frame ┆ component: ┆ component: │ │ -│ │ is_sorted: true ┆ kind: index ┆ kind: index ┆ ┆ component_type: ┆ component_type: ┆ Transform3D:quaternion ┆ Transform3D:translation │ │ -│ │ kind: control ┆ ┆ ┆ ┆ TransformFrameId ┆ TransformFrameId ┆ component_type: ┆ component_type: │ │ -│ │ ┆ ┆ ┆ ┆ kind: data ┆ kind: data ┆ RotationQuat ┆ Translation3D │ │ +│ │ type: non-null ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Utf8) ┆ type: List(Utf8) ┆ type: ┆ type: │ │ +│ │ FixedSizeBinary(16) ┆ index_name: ┆ index_name: ┆ index_name: timestamp ┆ archetype: Transform3D ┆ archetype: Transform3D ┆ List(FixedSizeList(4 x ┆ List(FixedSizeList(3 x │ │ +│ │ ARROW:extension:metadata: ┆ message_log_time ┆ message_publish_time ┆ is_sorted: true ┆ component: ┆ component: ┆ non-null Float32)) ┆ non-null Float32)) │ │ +│ │ {"namespace":"row"} ┆ is_sorted: true ┆ is_sorted: true ┆ kind: index ┆ Transform3D:child_frame ┆ Transform3D:parent_frame ┆ archetype: Transform3D ┆ archetype: Transform3D │ │ +│ │ ARROW:extension:name: ┆ kind: index ┆ kind: index ┆ ┆ component_type: ┆ component_type: ┆ component: ┆ component: │ │ +│ │ TUID ┆ ┆ ┆ ┆ TransformFrameId ┆ TransformFrameId ┆ Transform3D:quaternion ┆ Transform3D:translation │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ kind: data ┆ kind: data ┆ component_type: ┆ component_type: │ │ +│ │ kind: control ┆ ┆ ┆ ┆ ┆ ┆ RotationQuat ┆ Translation3D │ │ │ │ ┆ ┆ ┆ ┆ ┆ ┆ kind: data ┆ kind: data │ │ │ ╞═══════════════════════════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╡ │ │ │ row_[**REDACTED**] ┆ 2026-02-10T09:27:33.72327 ┆ 2026-02-10T09:27:33.72327 ┆ 2026-02-10T09:27:33.72327 ┆ [sensor] ┆ [world] ┆ [[0.0, 0.0, 0.0, 1.0]] ┆ [[1.0, 0.0, 0.0]] │ │ diff --git a/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_location_fixes__foxglove_location_fix.snap b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_location_fixes__foxglove_location_fix.snap new file mode 100644 index 000000000000..2a9d038457e6 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_location_fixes__foxglove_location_fix.snap @@ -0,0 +1,32 @@ +--- +source: crates/store/re_importer/src/importer_mcap/tests/foxglove/test_location_fixes.rs +expression: "format!(\"{:-240}\", chunk)" +--- +┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /gps_fix │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌────────────────────────────────┬───────────────────────────────┬────────────────────────────────┬───────────────────────────────┬────────────────────────────────┬─────────────────────────────┬────────────────────────────────┐ │ +│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ timestamp ┆ CoordinateFrame:frame ┆ GeoPoints:colors ┆ GeoPoints:positions │ │ +│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Utf8) ┆ type: List(UInt32) ┆ type: List(FixedSizeList(2 x │ │ +│ │ FixedSizeBinary(16) ┆ index_name: message_log_time ┆ index_name: ┆ index_name: timestamp ┆ archetype: CoordinateFrame ┆ archetype: GeoPoints ┆ non-null Float64)) │ │ +│ │ ARROW:extension:metadata: ┆ is_sorted: true ┆ message_publish_time ┆ is_sorted: true ┆ component: ┆ component: GeoPoints:colors ┆ archetype: GeoPoints │ │ +│ │ {"namespace":"row"} ┆ kind: index ┆ is_sorted: true ┆ kind: index ┆ CoordinateFrame:frame ┆ component_type: Color ┆ component: GeoPoints:positions │ │ +│ │ ARROW:extension:name: TUID ┆ ┆ kind: index ┆ ┆ component_type: ┆ kind: data ┆ component_type: LatLon │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ TransformFrameId ┆ ┆ kind: data │ │ +│ │ kind: control ┆ ┆ ┆ ┆ kind: data ┆ ┆ │ │ +│ ╞════════════════════════════════╪═══════════════════════════════╪════════════════════════════════╪═══════════════════════════════╪════════════════════════════════╪═════════════════════════════╪════════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 2026-02-16T14:15:00.174330112 ┆ 2026-02-16T14:15:00.174330112 ┆ 2026-02-16T14:15:00.174330112 ┆ [gps] ┆ [4278190335] ┆ [[59.3192, 18.0738]] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2026-02-16T14:15:01.174330112 ┆ 2026-02-16T14:15:01.174330112 ┆ 2026-02-16T14:15:01.174330112 ┆ [gps] ┆ [16711935] ┆ [[59.3191, 18.0746]] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2026-02-16T14:15:02.174330112 ┆ 2026-02-16T14:15:02.174330112 ┆ 2026-02-16T14:15:02.174330112 ┆ [gps] ┆ [65535] ┆ [[59.3191, 18.0754]] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2026-02-16T14:15:03.174330112 ┆ 2026-02-16T14:15:03.174330112 ┆ 2026-02-16T14:15:03.174330112 ┆ [gps] ┆ [4294967295] ┆ [[59.319, 18.0762]] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2026-02-16T14:15:04.174330112 ┆ 2026-02-16T14:15:04.174330112 ┆ 2026-02-16T14:15:04.174330112 ┆ [gps] ┆ [255] ┆ [[59.3189, 18.077]] │ │ +│ └────────────────────────────────┴───────────────────────────────┴────────────────────────────────┴───────────────────────────────┴────────────────────────────────┴─────────────────────────────┴────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_location_fixes__foxglove_location_fixes.snap b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_location_fixes__foxglove_location_fixes.snap new file mode 100644 index 000000000000..ef6a518a2960 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_location_fixes__foxglove_location_fixes.snap @@ -0,0 +1,32 @@ +--- +source: crates/store/re_importer/src/importer_mcap/tests/foxglove/test_location_fixes.rs +expression: "format!(\"{:-240}\", chunk)" +--- +┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /gps_fixes │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌────────────────────────────────┬───────────────────────────────┬────────────────────────────────┬───────────────────────────────┬────────────────────────────────┬─────────────────────────────┬────────────────────────────────┐ │ +│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ timestamp ┆ CoordinateFrame:frame ┆ GeoPoints:colors ┆ GeoPoints:positions │ │ +│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Utf8) ┆ type: List(UInt32) ┆ type: List(FixedSizeList(2 x │ │ +│ │ FixedSizeBinary(16) ┆ index_name: message_log_time ┆ index_name: ┆ index_name: timestamp ┆ archetype: CoordinateFrame ┆ archetype: GeoPoints ┆ non-null Float64)) │ │ +│ │ ARROW:extension:metadata: ┆ is_sorted: true ┆ message_publish_time ┆ is_sorted: true ┆ component: ┆ component: GeoPoints:colors ┆ archetype: GeoPoints │ │ +│ │ {"namespace":"row"} ┆ kind: index ┆ is_sorted: true ┆ kind: index ┆ CoordinateFrame:frame ┆ component_type: Color ┆ component: GeoPoints:positions │ │ +│ │ ARROW:extension:name: TUID ┆ ┆ kind: index ┆ ┆ component_type: ┆ kind: data ┆ component_type: LatLon │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ TransformFrameId ┆ ┆ kind: data │ │ +│ │ kind: control ┆ ┆ ┆ ┆ kind: data ┆ ┆ │ │ +│ ╞════════════════════════════════╪═══════════════════════════════╪════════════════════════════════╪═══════════════════════════════╪════════════════════════════════╪═════════════════════════════╪════════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 2026-02-16T14:15:00.174330112 ┆ 2026-02-16T14:15:00.174330112 ┆ 2026-02-16T14:15:00.174330112 ┆ [gps] ┆ [4278190335] ┆ [[59.3192, 18.0738]] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2026-02-16T14:15:00.174330112 ┆ 2026-02-16T14:15:00.174330112 ┆ 2026-02-16T14:15:01.174330112 ┆ [gps] ┆ [16711935] ┆ [[59.3191, 18.0746]] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2026-02-16T14:15:00.174330112 ┆ 2026-02-16T14:15:00.174330112 ┆ 2026-02-16T14:15:02.174330112 ┆ [gps] ┆ [65535] ┆ [[59.3191, 18.0754]] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2026-02-16T14:15:00.174330112 ┆ 2026-02-16T14:15:00.174330112 ┆ 2026-02-16T14:15:03.174330112 ┆ [gps] ┆ [4294967295] ┆ [[59.319, 18.0762]] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2026-02-16T14:15:00.174330112 ┆ 2026-02-16T14:15:00.174330112 ┆ 2026-02-16T14:15:04.174330112 ┆ [gps] ┆ [255] ┆ [[59.3189, 18.077]] │ │ +│ └────────────────────────────────┴───────────────────────────────┴────────────────────────────────┴───────────────────────────────┴────────────────────────────────┴─────────────────────────────┴────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_log__foxglove_log.snap b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_log__foxglove_log.snap similarity index 67% rename from crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_log__foxglove_log.snap rename to crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_log__foxglove_log.snap index 683594a0d169..0481f302652a 100644 --- a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_log__foxglove_log.snap +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_log__foxglove_log.snap @@ -1,35 +1,35 @@ --- -source: crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_log.rs +source: crates/store/re_importer/src/importer_mcap/tests/foxglove/test_log.rs expression: "format!(\"{:-240}\", chunk)" --- -┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /text_log │ -│ * id: [**REDACTED**] │ -│ * version: [**REDACTED**] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌─────────────────────────────────────┬───────────────────────────────┬──────────────────────────────────┬───────────────────────────────┬────────────────────────────────────┬─────────────────────────────────────┐ │ -│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ timestamp ┆ TextLog:level ┆ TextLog:text │ │ -│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable Timestamp(ns) ┆ type: nullable Timestamp(ns) ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable Utf8] │ │ -│ │ ARROW:extension:metadata: ┆ index_name: message_log_time ┆ index_name: message_publish_time ┆ index_name: timestamp ┆ archetype: TextLog ┆ archetype: TextLog │ │ -│ │ {"namespace":"row"} ┆ is_sorted: true ┆ is_sorted: true ┆ is_sorted: true ┆ component: TextLog:level ┆ component: TextLog:text │ │ -│ │ ARROW:extension:name: TUID ┆ kind: index ┆ kind: index ┆ kind: index ┆ component_type: TextLogLevel ┆ component_type: Text │ │ -│ │ is_sorted: true ┆ ┆ ┆ ┆ kind: data ┆ kind: data │ │ -│ │ kind: control ┆ ┆ ┆ ┆ ┆ │ │ -│ ╞═════════════════════════════════════╪═══════════════════════════════╪══════════════════════════════════╪═══════════════════════════════╪════════════════════════════════════╪═════════════════════════════════════╡ │ -│ │ row_[**REDACTED**] ┆ 2026-02-05T09:40:29.675813120 ┆ 2026-02-05T09:40:29.675813120 ┆ 2026-02-05T09:40:29.675813120 ┆ [null] ┆ [This message has log level │ │ -│ │ ┆ ┆ ┆ ┆ ┆ UNKNOWN] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 2026-02-05T09:40:30.175813120 ┆ 2026-02-05T09:40:30.175813120 ┆ 2026-02-05T09:40:30.175813120 ┆ [DEBUG] ┆ [This message has log level DEBUG] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 2026-02-05T09:40:30.675813120 ┆ 2026-02-05T09:40:30.675813120 ┆ 2026-02-05T09:40:30.675813120 ┆ [INFO] ┆ [This message has log level INFO] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 2026-02-05T09:40:31.175813120 ┆ 2026-02-05T09:40:31.175813120 ┆ 2026-02-05T09:40:31.175813120 ┆ [WARN] ┆ [This message has log level │ │ -│ │ ┆ ┆ ┆ ┆ ┆ WARNING] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 2026-02-05T09:40:31.675813120 ┆ 2026-02-05T09:40:31.675813120 ┆ 2026-02-05T09:40:31.675813120 ┆ [ERROR] ┆ [This message has log level ERROR] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 2026-02-05T09:40:32.175813120 ┆ 2026-02-05T09:40:32.175813120 ┆ 2026-02-05T09:40:32.175813120 ┆ [CRITICAL] ┆ [This message has log level FATAL] │ │ -│ └─────────────────────────────────────┴───────────────────────────────┴──────────────────────────────────┴───────────────────────────────┴────────────────────────────────────┴─────────────────────────────────────┘ │ -└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /text_log │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌─────────────────────────────────────┬───────────────────────────────┬──────────────────────────────────┬───────────────────────────────┬──────────────────────────────┬─────────────────────────────────────┐ │ +│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ timestamp ┆ TextLog:level ┆ TextLog:text │ │ +│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Utf8) ┆ type: List(Utf8) │ │ +│ │ ARROW:extension:metadata: ┆ index_name: message_log_time ┆ index_name: message_publish_time ┆ index_name: timestamp ┆ archetype: TextLog ┆ archetype: TextLog │ │ +│ │ {"namespace":"row"} ┆ is_sorted: true ┆ is_sorted: true ┆ is_sorted: true ┆ component: TextLog:level ┆ component: TextLog:text │ │ +│ │ ARROW:extension:name: TUID ┆ kind: index ┆ kind: index ┆ kind: index ┆ component_type: TextLogLevel ┆ component_type: Text │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ kind: data ┆ kind: data │ │ +│ │ kind: control ┆ ┆ ┆ ┆ ┆ │ │ +│ ╞═════════════════════════════════════╪═══════════════════════════════╪══════════════════════════════════╪═══════════════════════════════╪══════════════════════════════╪═════════════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 2026-02-05T09:40:29.675813120 ┆ 2026-02-05T09:40:29.675813120 ┆ 2026-02-05T09:40:29.675813120 ┆ [null] ┆ [This message has log level │ │ +│ │ ┆ ┆ ┆ ┆ ┆ UNKNOWN] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2026-02-05T09:40:30.175813120 ┆ 2026-02-05T09:40:30.175813120 ┆ 2026-02-05T09:40:30.175813120 ┆ [DEBUG] ┆ [This message has log level DEBUG] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2026-02-05T09:40:30.675813120 ┆ 2026-02-05T09:40:30.675813120 ┆ 2026-02-05T09:40:30.675813120 ┆ [INFO] ┆ [This message has log level INFO] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2026-02-05T09:40:31.175813120 ┆ 2026-02-05T09:40:31.175813120 ┆ 2026-02-05T09:40:31.175813120 ┆ [WARN] ┆ [This message has log level │ │ +│ │ ┆ ┆ ┆ ┆ ┆ WARNING] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2026-02-05T09:40:31.675813120 ┆ 2026-02-05T09:40:31.675813120 ┆ 2026-02-05T09:40:31.675813120 ┆ [ERROR] ┆ [This message has log level ERROR] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2026-02-05T09:40:32.175813120 ┆ 2026-02-05T09:40:32.175813120 ┆ 2026-02-05T09:40:32.175813120 ┆ [CRITICAL] ┆ [This message has log level FATAL] │ │ +│ └─────────────────────────────────────┴───────────────────────────────┴──────────────────────────────────┴───────────────────────────────┴──────────────────────────────┴─────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_point_cloud__foxglove_point_cloud.snap b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_point_cloud__foxglove_point_cloud.snap new file mode 100644 index 000000000000..39db2e5dadbc --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_point_cloud__foxglove_point_cloud.snap @@ -0,0 +1,33 @@ +--- +source: crates/store/re_importer/src/importer_mcap/tests/foxglove/test_point_cloud.rs +assertion_line: 10 +expression: "format!(\"{:-240}\", chunk)" +--- +┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /point_cloud │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌─────────────────────────┬─────────────────────────┬─────────────────────────┬─────────────────────────┬─────────────────────────┬─────────────────────────┬─────────────────────────┬─────────────────────────┬─────────────────────────┐ │ +│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ timestamp ┆ CoordinateFrame:frame ┆ InstancePoses3D:quatern ┆ InstancePoses3D:transla ┆ Points3D:colors ┆ Points3D:positions │ │ +│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ ions ┆ tions ┆ --- ┆ --- │ │ +│ │ type: non-null ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Utf8) ┆ --- ┆ --- ┆ type: List(UInt32) ┆ type: │ │ +│ │ FixedSizeBinary(16) ┆ index_name: ┆ index_name: ┆ index_name: timestamp ┆ archetype: ┆ type: ┆ type: ┆ archetype: Points3D ┆ List(FixedSizeList(3 x │ │ +│ │ ARROW:extension:metadat ┆ message_log_time ┆ message_publish_time ┆ is_sorted: true ┆ CoordinateFrame ┆ List(FixedSizeList(4 x ┆ List(FixedSizeList(3 x ┆ component: ┆ non-null Float32)) │ │ +│ │ a: {"namespace":"row"} ┆ is_sorted: true ┆ is_sorted: true ┆ kind: index ┆ component: ┆ Float32)) ┆ Float32)) ┆ Points3D:colors ┆ archetype: Points3D │ │ +│ │ ARROW:extension:name: ┆ kind: index ┆ kind: index ┆ ┆ CoordinateFrame:frame ┆ archetype: ┆ archetype: ┆ component_type: Color ┆ component: │ │ +│ │ TUID ┆ ┆ ┆ ┆ component_type: ┆ InstancePoses3D ┆ InstancePoses3D ┆ kind: data ┆ Points3D:positions │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ TransformFrameId ┆ component: InstancePose ┆ component: InstancePose ┆ ┆ component_type: │ │ +│ │ kind: control ┆ ┆ ┆ ┆ kind: data ┆ s3D:quaternions ┆ s3D:translations ┆ ┆ Position3D │ │ +│ │ ┆ ┆ ┆ ┆ ┆ component_type: ┆ component_type: ┆ ┆ kind: data │ │ +│ │ ┆ ┆ ┆ ┆ ┆ RotationQuat ┆ Translation3D ┆ ┆ │ │ +│ │ ┆ ┆ ┆ ┆ ┆ kind: data ┆ kind: data ┆ ┆ │ │ +│ ╞═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 2026-02-17T14:13:33.852 ┆ 2026-02-17T14:13:33.852 ┆ 2026-02-17T14:13:33.852 ┆ [world] ┆ null ┆ null ┆ [4278190335, 16711935, ┆ [[0.0, 0.0, 0.0], [1.0, │ │ +│ │ ┆ 164096 ┆ 164096 ┆ 164096 ┆ ┆ ┆ ┆ 65535, 4294902015, ┆ 0.0, 0.0], [1.0, 1.0, │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 4278255615, 16777215, ┆ 0.0], [0.0, 1.0, 0.0], │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 4294967295, 4286578943] ┆ [0.0, 0.0, 1.0], [1.0, │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.0, 1.… │ │ +│ └─────────────────────────┴─────────────────────────┴─────────────────────────┴─────────────────────────┴─────────────────────────┴─────────────────────────┴─────────────────────────┴─────────────────────────┴─────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_point_cloud__foxglove_point_cloud_with_pose.snap b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_point_cloud__foxglove_point_cloud_with_pose.snap new file mode 100644 index 000000000000..dee5141ef08c --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_point_cloud__foxglove_point_cloud_with_pose.snap @@ -0,0 +1,33 @@ +--- +source: crates/store/re_importer/src/importer_mcap/tests/foxglove/test_point_cloud.rs +assertion_line: 18 +expression: "format!(\"{:-240}\", chunk)" +--- +┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /point_cloud_with_pose │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌─────────────────────────┬─────────────────────────┬─────────────────────────┬─────────────────────────┬─────────────────────────┬─────────────────────────┬─────────────────────────┬─────────────────────────┬─────────────────────────┐ │ +│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ timestamp ┆ CoordinateFrame:frame ┆ InstancePoses3D:quatern ┆ InstancePoses3D:transla ┆ Points3D:colors ┆ Points3D:positions │ │ +│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ ions ┆ tions ┆ --- ┆ --- │ │ +│ │ type: non-null ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Utf8) ┆ --- ┆ --- ┆ type: List(UInt32) ┆ type: │ │ +│ │ FixedSizeBinary(16) ┆ index_name: ┆ index_name: ┆ index_name: timestamp ┆ archetype: ┆ type: ┆ type: ┆ archetype: Points3D ┆ List(FixedSizeList(3 x │ │ +│ │ ARROW:extension:metadat ┆ message_log_time ┆ message_publish_time ┆ is_sorted: true ┆ CoordinateFrame ┆ List(FixedSizeList(4 x ┆ List(FixedSizeList(3 x ┆ component: ┆ non-null Float32)) │ │ +│ │ a: {"namespace":"row"} ┆ is_sorted: true ┆ is_sorted: true ┆ kind: index ┆ component: ┆ non-null Float32)) ┆ non-null Float32)) ┆ Points3D:colors ┆ archetype: Points3D │ │ +│ │ ARROW:extension:name: ┆ kind: index ┆ kind: index ┆ ┆ CoordinateFrame:frame ┆ archetype: ┆ archetype: ┆ component_type: Color ┆ component: │ │ +│ │ TUID ┆ ┆ ┆ ┆ component_type: ┆ InstancePoses3D ┆ InstancePoses3D ┆ kind: data ┆ Points3D:positions │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ TransformFrameId ┆ component: InstancePose ┆ component: InstancePose ┆ ┆ component_type: │ │ +│ │ kind: control ┆ ┆ ┆ ┆ kind: data ┆ s3D:quaternions ┆ s3D:translations ┆ ┆ Position3D │ │ +│ │ ┆ ┆ ┆ ┆ ┆ component_type: ┆ component_type: ┆ ┆ kind: data │ │ +│ │ ┆ ┆ ┆ ┆ ┆ RotationQuat ┆ Translation3D ┆ ┆ │ │ +│ │ ┆ ┆ ┆ ┆ ┆ kind: data ┆ kind: data ┆ ┆ │ │ +│ ╞═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 2026-02-17T14:13:33.852 ┆ 2026-02-17T14:13:33.852 ┆ 2026-02-17T14:13:33.852 ┆ [world] ┆ [[0.0, 0.0, 0.38268343, ┆ [[4.0, 0.0, 0.0]] ┆ [4278190335, 16711935, ┆ [[0.0, 0.0, 0.0], [1.0, │ │ +│ │ ┆ 164096 ┆ 164096 ┆ 164096 ┆ ┆ 0.9238795]] ┆ ┆ 65535, 4294902015, ┆ 0.0, 0.0], [1.0, 1.0, │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 4278255615, 16777215, ┆ 0.0], [0.0, 1.0, 0.0], │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 4294967295, 4286578943] ┆ [0.0, 0.0, 1.0], [1.0, │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.0, 1.… │ │ +│ └─────────────────────────┴─────────────────────────┴─────────────────────────┴─────────────────────────┴─────────────────────────┴─────────────────────────┴─────────────────────────┴─────────────────────────┴─────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_poses_in_frame__foxglove_pose_in_frame.snap b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_poses_in_frame__foxglove_pose_in_frame.snap new file mode 100644 index 000000000000..f9d55c9e17ca --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_poses_in_frame__foxglove_pose_in_frame.snap @@ -0,0 +1,25 @@ +--- +source: crates/store/re_importer/src/importer_mcap/tests/foxglove/test_poses_in_frame.rs +expression: "format!(\"{:-240}\", chunk)" +--- +┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /pose_in_frame │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌────────────────────────────────┬───────────────────────────────┬────────────────────────────────┬───────────────────────────────┬────────────────────────────────┬────────────────────────────────┬────────────────────────────────┐ │ +│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ timestamp ┆ CoordinateFrame:frame ┆ InstancePoses3D:quaternions ┆ InstancePoses3D:translations │ │ +│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Utf8) ┆ type: List(FixedSizeList(4 x ┆ type: List(FixedSizeList(3 x │ │ +│ │ FixedSizeBinary(16) ┆ index_name: message_log_time ┆ index_name: ┆ index_name: timestamp ┆ archetype: CoordinateFrame ┆ non-null Float32)) ┆ non-null Float32)) │ │ +│ │ ARROW:extension:metadata: ┆ is_sorted: true ┆ message_publish_time ┆ is_sorted: true ┆ component: ┆ archetype: InstancePoses3D ┆ archetype: InstancePoses3D │ │ +│ │ {"namespace":"row"} ┆ kind: index ┆ is_sorted: true ┆ kind: index ┆ CoordinateFrame:frame ┆ component: ┆ component: │ │ +│ │ ARROW:extension:name: TUID ┆ ┆ kind: index ┆ ┆ component_type: ┆ InstancePoses3D:quaternions ┆ InstancePoses3D:translations │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ TransformFrameId ┆ component_type: RotationQuat ┆ component_type: Translation3D │ │ +│ │ kind: control ┆ ┆ ┆ ┆ kind: data ┆ kind: data ┆ kind: data │ │ +│ ╞════════════════════════════════╪═══════════════════════════════╪════════════════════════════════╪═══════════════════════════════╪════════════════════════════════╪════════════════════════════════╪════════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 2026-04-02T13:40:27.787987968 ┆ 2026-04-02T13:40:27.787987968 ┆ 2026-04-02T13:40:27.787987968 ┆ [map] ┆ [[0.0, 0.0, 0.25881904, ┆ [[1.0, 2.0, 0.5]] │ │ +│ │ ┆ ┆ ┆ ┆ ┆ 0.9659258]] ┆ │ │ +│ └────────────────────────────────┴───────────────────────────────┴────────────────────────────────┴───────────────────────────────┴────────────────────────────────┴────────────────────────────────┴────────────────────────────────┘ │ +└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_poses_in_frame__foxglove_poses_in_frame.snap b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_poses_in_frame__foxglove_poses_in_frame.snap new file mode 100644 index 000000000000..ba6c13fab9be --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_poses_in_frame__foxglove_poses_in_frame.snap @@ -0,0 +1,27 @@ +--- +source: crates/store/re_importer/src/importer_mcap/tests/foxglove/test_poses_in_frame.rs +expression: "format!(\"{:-240}\", chunk)" +--- +┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /poses_in_frame │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌────────────────────────────────┬───────────────────────────────┬────────────────────────────────┬───────────────────────────────┬────────────────────────────────┬────────────────────────────────┬────────────────────────────────┐ │ +│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ timestamp ┆ CoordinateFrame:frame ┆ InstancePoses3D:quaternions ┆ InstancePoses3D:translations │ │ +│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Utf8) ┆ type: List(FixedSizeList(4 x ┆ type: List(FixedSizeList(3 x │ │ +│ │ FixedSizeBinary(16) ┆ index_name: message_log_time ┆ index_name: ┆ index_name: timestamp ┆ archetype: CoordinateFrame ┆ non-null Float32)) ┆ non-null Float32)) │ │ +│ │ ARROW:extension:metadata: ┆ is_sorted: true ┆ message_publish_time ┆ is_sorted: true ┆ component: ┆ archetype: InstancePoses3D ┆ archetype: InstancePoses3D │ │ +│ │ {"namespace":"row"} ┆ kind: index ┆ is_sorted: true ┆ kind: index ┆ CoordinateFrame:frame ┆ component: ┆ component: │ │ +│ │ ARROW:extension:name: TUID ┆ ┆ kind: index ┆ ┆ component_type: ┆ InstancePoses3D:quaternions ┆ InstancePoses3D:translations │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ TransformFrameId ┆ component_type: RotationQuat ┆ component_type: Translation3D │ │ +│ │ kind: control ┆ ┆ ┆ ┆ kind: data ┆ kind: data ┆ kind: data │ │ +│ ╞════════════════════════════════╪═══════════════════════════════╪════════════════════════════════╪═══════════════════════════════╪════════════════════════════════╪════════════════════════════════╪════════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 2026-04-02T13:40:27.888009728 ┆ 2026-04-02T13:40:27.888009728 ┆ 2026-04-02T13:40:27.888009728 ┆ [map] ┆ [[0.0, 0.0, 0.0, 1.0], [0.0, ┆ [[0.0, 0.0, 0.0], [1.0, 0.5, │ │ +│ │ ┆ ┆ ┆ ┆ ┆ 0.0, 0.38268343, 0.9238795], ┆ 0.0], [2.0, 1.0, 0.0]] │ │ +│ │ ┆ ┆ ┆ ┆ ┆ [0.0, 0.0, 0.70710677, ┆ │ │ +│ │ ┆ ┆ ┆ ┆ ┆ 0.70710677]] ┆ │ │ +│ └────────────────────────────────┴───────────────────────────────┴────────────────────────────────┴───────────────────────────────┴────────────────────────────────┴────────────────────────────────┴────────────────────────────────┘ │ +└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_raw_image__foxglove_raw_image.snap b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_raw_image__foxglove_raw_image.snap similarity index 80% rename from crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_raw_image__foxglove_raw_image.snap rename to crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_raw_image__foxglove_raw_image.snap index 05c22eeb6f5d..bd7ec111d8ef 100644 --- a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/snapshots/re_data_loader__loader_mcap__tests__foxglove__test_raw_image__foxglove_raw_image.snap +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/snapshots/re_importer__importer_mcap__tests__foxglove__test_raw_image__foxglove_raw_image.snap @@ -1,5 +1,5 @@ --- -source: crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_raw_image.rs +source: crates/store/re_importer/src/importer_mcap/tests/foxglove/test_raw_image.rs expression: "format!(\"{:-240}\", chunk)" --- ┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ @@ -11,14 +11,16 @@ expression: "format!(\"{:-240}\", chunk)" │ ┌────────────────────────────────┬───────────────────────────────┬────────────────────────────────┬───────────────────────────────┬────────────────────────────────┬────────────────────────────────┬────────────────────────────────┐ │ │ │ RowId ┆ message_log_time ┆ message_publish_time ┆ timestamp ┆ CoordinateFrame:frame ┆ Image:buffer ┆ Image:format │ │ │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable Timestamp(ns) ┆ type: nullable Timestamp(ns) ┆ type: nullable Timestamp(ns) ┆ type: nullable List[nullable ┆ type: nullable List[nullable ┆ type: nullable List[nullable │ │ -│ │ ARROW:extension:metadata: ┆ index_name: message_log_time ┆ index_name: ┆ index_name: timestamp ┆ Utf8] ┆ List[u8]] ┆ Struct[5]] │ │ -│ │ {"namespace":"row"} ┆ is_sorted: true ┆ message_publish_time ┆ is_sorted: true ┆ archetype: CoordinateFrame ┆ archetype: Image ┆ archetype: Image │ │ -│ │ ARROW:extension:name: TUID ┆ kind: index ┆ is_sorted: true ┆ kind: index ┆ component: ┆ component: Image:buffer ┆ component: Image:format │ │ -│ │ is_sorted: true ┆ ┆ kind: index ┆ ┆ CoordinateFrame:frame ┆ component_type: ImageBuffer ┆ component_type: ImageFormat │ │ -│ │ kind: control ┆ ┆ ┆ ┆ component_type: ┆ kind: data ┆ kind: data │ │ -│ │ ┆ ┆ ┆ ┆ TransformFrameId ┆ ┆ │ │ -│ │ ┆ ┆ ┆ ┆ kind: data ┆ ┆ │ │ +│ │ type: non-null ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Utf8) ┆ type: List(List(non-null ┆ type: List(Struct("width": │ │ +│ │ FixedSizeBinary(16) ┆ index_name: message_log_time ┆ index_name: ┆ index_name: timestamp ┆ archetype: CoordinateFrame ┆ UInt8)) ┆ non-null UInt32, "height": │ │ +│ │ ARROW:extension:metadata: ┆ is_sorted: true ┆ message_publish_time ┆ is_sorted: true ┆ component: ┆ archetype: Image ┆ non-null UInt32, │ │ +│ │ {"namespace":"row"} ┆ kind: index ┆ is_sorted: true ┆ kind: index ┆ CoordinateFrame:frame ┆ component: Image:buffer ┆ "pixel_format": UInt8, │ │ +│ │ ARROW:extension:name: TUID ┆ ┆ kind: index ┆ ┆ component_type: ┆ component_type: ImageBuffer ┆ "color_model": UInt8, │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ TransformFrameId ┆ kind: data ┆ "channel_datatype": UInt8)) │ │ +│ │ kind: control ┆ ┆ ┆ ┆ kind: data ┆ ┆ archetype: Image │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ component: Image:format │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ component_type: ImageFormat │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ kind: data │ │ │ ╞════════════════════════════════╪═══════════════════════════════╪════════════════════════════════╪═══════════════════════════════╪════════════════════════════════╪════════════════════════════════╪════════════════════════════════╡ │ │ │ row_[**REDACTED**] ┆ 2026-02-11T11:30:54.931614976 ┆ 2026-02-11T11:30:54.931614976 ┆ 2026-02-11T11:30:54.931614976 ┆ [camera_frame_image_plane] ┆ [[255, 0, 0, 255, 127, 0, 255, ┆ [{width: 10, height: 10, │ │ │ │ ┆ ┆ ┆ ┆ ┆ 255, 0, 0, 255, 0, 0, 0, 255, ┆ pixel_format: null, │ │ diff --git a/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_camera_calibration.rs b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_camera_calibration.rs new file mode 100644 index 000000000000..d8f90c875d19 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_camera_calibration.rs @@ -0,0 +1,11 @@ +//! Snapshot test loading an MCAP file containing [`foxglove.CameraCalibration`] messages. + +use crate::importer_mcap::tests::util; + +#[test] +fn test_foxglove_camera_calibration() { + let loaded_mcap = util::load_mcap(util::test_asset("foxglove_camera_calibration.mcap")); + // Only snapshot the chunk with the payload, not the metadata chunk. + let chunk = loaded_mcap.chunks_for_entity("/camera/calibration")[1]; + insta::assert_snapshot!(format!("{:-240}", chunk)); +} diff --git a/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_compressed_image.rs b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_compressed_image.rs new file mode 100644 index 000000000000..11deef3172bc --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_compressed_image.rs @@ -0,0 +1,11 @@ +//! Snapshot test loading an MCAP file containing [`foxglove.CompressedImage`] messages. + +use crate::importer_mcap::tests::util; + +#[test] +fn test_foxglove_compressed_image() { + let loaded_mcap = util::load_mcap(util::test_asset("foxglove_compressed_image.mcap")); + // Only snapshot the chunk with the payload, not the metadata chunk. + let chunk = loaded_mcap.chunks_for_entity("/camera/compressed_image")[1]; + insta::assert_snapshot!(format!("{:-240}", chunk)); +} diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_compressed_video.rs b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_compressed_video.rs similarity index 91% rename from crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_compressed_video.rs rename to crates/store/re_importer/src/importer_mcap/tests/foxglove/test_compressed_video.rs index b65d342f72d1..19f3e6ac2351 100644 --- a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_compressed_video.rs +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_compressed_video.rs @@ -1,4 +1,4 @@ -use crate::loader_mcap::tests::util; +use crate::importer_mcap::tests::util; /// Snapshot test loading an MCAP file containing [`foxglove.CompressedVideo`] messages. #[test] diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_frame_transforms.rs b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_frame_transforms.rs similarity index 95% rename from crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_frame_transforms.rs rename to crates/store/re_importer/src/importer_mcap/tests/foxglove/test_frame_transforms.rs index 132c9ed1913c..6a4a34acddbb 100644 --- a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_frame_transforms.rs +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_frame_transforms.rs @@ -1,6 +1,6 @@ //! Snapshot test loading an MCAP file containing [`foxglove.FrameTransform`] and [`foxglove.FrameTransforms`] messages. -use crate::loader_mcap::tests::util; +use crate::importer_mcap::tests::util; #[test] fn test_foxglove_frame_transform() { diff --git a/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_location_fixes.rs b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_location_fixes.rs new file mode 100644 index 000000000000..70440144fdc2 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_location_fixes.rs @@ -0,0 +1,19 @@ +//! Snapshot test loading an MCAP file containing [`foxglove.LocationFix`] and [`foxglove.LocationFixes`] messages. + +use crate::importer_mcap::tests::util; + +#[test] +fn test_foxglove_location_fix() { + let loaded_mcap = util::load_mcap(util::test_asset("foxglove_location_fixes.mcap")); + // Only snapshot the chunk with the payload, not the metadata chunk. + let chunk = loaded_mcap.chunks_for_entity("/gps_fix")[1]; + insta::assert_snapshot!(format!("{:-240}", chunk)); +} + +#[test] +fn test_foxglove_location_fixes() { + let loaded_mcap = util::load_mcap(util::test_asset("foxglove_location_fixes.mcap")); + // Only snapshot the chunk with the payload, not the metadata chunk. + let chunk = loaded_mcap.chunks_for_entity("/gps_fixes")[1]; + insta::assert_snapshot!(format!("{:-240}", chunk)); +} diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_log.rs b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_log.rs similarity index 90% rename from crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_log.rs rename to crates/store/re_importer/src/importer_mcap/tests/foxglove/test_log.rs index fb15d4ecf10a..0594e1c163a1 100644 --- a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_log.rs +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_log.rs @@ -1,4 +1,4 @@ -use crate::loader_mcap::tests::util; +use crate::importer_mcap::tests::util; /// Snapshot test loading an MCAP file containing [`foxglove.TextLog`] messages. #[test] diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_point_cloud.rs b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_point_cloud.rs similarity index 51% rename from crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_point_cloud.rs rename to crates/store/re_importer/src/importer_mcap/tests/foxglove/test_point_cloud.rs index 429c4d258824..b128217919f7 100644 --- a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_point_cloud.rs +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_point_cloud.rs @@ -1,6 +1,6 @@ //! Snapshot test loading an MCAP file containing [`foxglove.PointCloud`] messages. -use crate::loader_mcap::tests::util; +use crate::importer_mcap::tests::util; #[test] fn test_foxglove_point_cloud() { @@ -10,4 +10,10 @@ fn test_foxglove_point_cloud() { insta::assert_snapshot!(format!("{:-240}", chunk)); } -// TODO(michael): add also a test for the /point_cloud_with_pose channel (relates to RR-3766). +#[test] +fn test_foxglove_point_cloud_with_pose() { + let loaded_mcap = util::load_mcap(util::test_asset("foxglove_point_cloud.mcap")); + // Only snapshot the chunk with the payload, not the metadata chunk. + let chunk = loaded_mcap.chunks_for_entity("/point_cloud_with_pose")[1]; + insta::assert_snapshot!(format!("{:-240}", chunk)); +} diff --git a/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_poses_in_frame.rs b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_poses_in_frame.rs new file mode 100644 index 000000000000..e3dd23076749 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_poses_in_frame.rs @@ -0,0 +1,19 @@ +//! Snapshot test loading an MCAP file containing [`foxglove.PoseInFrame`] and [`foxglove.PosesInFrame`] messages. + +use crate::importer_mcap::tests::util; + +#[test] +fn test_foxglove_pose_in_frame() { + let loaded_mcap = util::load_mcap(util::test_asset("foxglove_poses_in_frame.mcap")); + // Only snapshot the chunk with the payload, not the metadata chunk. + let chunk = loaded_mcap.chunks_for_entity("/pose_in_frame")[1]; + insta::assert_snapshot!(format!("{:-240}", chunk)); +} + +#[test] +fn test_foxglove_poses_in_frame() { + let loaded_mcap = util::load_mcap(util::test_asset("foxglove_poses_in_frame.mcap")); + // Only snapshot the chunk with the payload, not the metadata chunk. + let chunk = loaded_mcap.chunks_for_entity("/poses_in_frame")[1]; + insta::assert_snapshot!(format!("{:-240}", chunk)); +} diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_raw_image.rs b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_raw_image.rs similarity index 91% rename from crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_raw_image.rs rename to crates/store/re_importer/src/importer_mcap/tests/foxglove/test_raw_image.rs index f67879d4415f..dd336784d30e 100644 --- a/crates/store/re_data_loader/src/loader_mcap/tests/foxglove/test_raw_image.rs +++ b/crates/store/re_importer/src/importer_mcap/tests/foxglove/test_raw_image.rs @@ -1,4 +1,4 @@ -use crate::loader_mcap::tests::util; +use crate::importer_mcap::tests::util; /// Snapshot test loading an MCAP file containing [`foxglove.RawImage`] messages. #[test] diff --git a/crates/store/re_importer/src/importer_mcap/tests/mod.rs b/crates/store/re_importer/src/importer_mcap/tests/mod.rs new file mode 100644 index 000000000000..8e56cd2ea27b --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/mod.rs @@ -0,0 +1,6 @@ +//! Test utilities for MCAP importer. + +mod foxglove; +mod ros2msg; + +pub mod util; diff --git a/crates/store/re_importer/src/importer_mcap/tests/ros2msg/mod.rs b/crates/store/re_importer/src/importer_mcap/tests/ros2msg/mod.rs new file mode 100644 index 000000000000..2641256dce55 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/ros2msg/mod.rs @@ -0,0 +1,3 @@ +//! Tests for ROS 2 message lenses. + +mod test_occupancy_grid; diff --git a/crates/store/re_importer/src/importer_mcap/tests/ros2msg/snapshots/re_importer__importer_mcap__tests__ros2msg__test_occupancy_grid__occupancy_grid.snap b/crates/store/re_importer/src/importer_mcap/tests/ros2msg/snapshots/re_importer__importer_mcap__tests__ros2msg__test_occupancy_grid__occupancy_grid.snap new file mode 100644 index 000000000000..0caa59e7b1e9 --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/ros2msg/snapshots/re_importer__importer_mcap__tests__ros2msg__test_occupancy_grid__occupancy_grid.snap @@ -0,0 +1,38 @@ +--- +source: crates/store/re_importer/src/importer_mcap/tests/ros2msg/test_occupancy_grid.rs +expression: "format!(\"{:-240}\", chunk)" +--- +┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /map │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌────────────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┐ │ +│ │ RowId ┆ message_log_time ┆ message_publish_ti ┆ ros2_timestamp ┆ CoordinateFrame:fr ┆ GridMap:cell_size ┆ GridMap:colormap ┆ GridMap:data ┆ GridMap:format ┆ GridMap:quaternion ┆ GridMap:translatio │ │ +│ │ --- ┆ --- ┆ me ┆ --- ┆ ame ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ n │ │ +│ │ type: non-null Fix ┆ type: ┆ --- ┆ type: ┆ --- ┆ type: ┆ type: List(UInt8) ┆ type: ┆ type: List(Struct( ┆ type: List(FixedSi ┆ --- │ │ +│ │ edSizeBinary(16) ┆ Timestamp(ns) ┆ type: ┆ Timestamp(ns) ┆ type: List(Utf8) ┆ List(Float32) ┆ archetype: GridMap ┆ List(List(UInt8)) ┆ "width": non-null ┆ zeList(4 x ┆ type: List(FixedSi │ │ +│ │ ARROW:extension:me ┆ index_name: ┆ Timestamp(ns) ┆ index_name: ┆ archetype: ┆ archetype: GridMap ┆ component: ┆ archetype: GridMap ┆ UInt32, "height": ┆ non-null Float32)) ┆ zeList(3 x │ │ +│ │ tadata: {"namespac ┆ message_log_time ┆ index_name: messag ┆ ros2_timestamp ┆ CoordinateFrame ┆ component: ┆ GridMap:colormap ┆ component: ┆ non-null UInt32, ┆ archetype: GridMap ┆ non-null Float32)) │ │ +│ │ e":"row"} ┆ is_sorted: true ┆ e_publish_time ┆ is_sorted: true ┆ component: Coordin ┆ GridMap:cell_size ┆ component_type: ┆ GridMap:data ┆ "pixel_format": ┆ component: ┆ archetype: GridMap │ │ +│ │ ARROW:extension:na ┆ kind: index ┆ is_sorted: true ┆ kind: index ┆ ateFrame:frame ┆ component_type: ┆ Colormap ┆ component_type: ┆ UInt8, ┆ GridMap:quaternion ┆ component: GridMap │ │ +│ │ me: TUID ┆ ┆ kind: index ┆ ┆ component_type: ┆ CellSize ┆ kind: data ┆ ImageBuffer ┆ "color_model": ┆ component_type: ┆ :translation │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ TransformFrameId ┆ kind: data ┆ ┆ kind: data ┆ UInt8, "channel_da ┆ RotationQuat ┆ component_type: │ │ +│ │ kind: control ┆ ┆ ┆ ┆ kind: data ┆ ┆ ┆ ┆ tatype": UInt8)) ┆ kind: data ┆ Translation3D │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ archetype: GridMap ┆ ┆ kind: data │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ component: ┆ ┆ │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ GridMap:format ┆ ┆ │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ component_type: ┆ ┆ │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ImageFormat ┆ ┆ │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ kind: data ┆ ┆ │ │ +│ ╞════════════════════╪════════════════════╪════════════════════╪════════════════════╪════════════════════╪════════════════════╪════════════════════╪════════════════════╪════════════════════╪════════════════════╪════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:0 ┆ 1970-01-01T00:00:0 ┆ 1970-01-01T00:00:0 ┆ [map] ┆ [0.5] ┆ [10] ┆ [[255, 0, 100, 0, ┆ [{width: 5, ┆ [[0.0, 0.0, 0.0, ┆ [[-1.0, -1.0, │ │ +│ │ ┆ 0 ┆ 0 ┆ 0 ┆ ┆ ┆ ┆ 255, 0, 100, 255, ┆ height: 5, ┆ 1.0]] ┆ 0.0]] │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 100, 0, 100, 255, ┆ pixel_format: ┆ ┆ │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0, 255, 100, 0, ┆ null, color_model: ┆ ┆ │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 100, 255, 100, 0, ┆ 1, ┆ ┆ │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 127, 0, 100… ┆ channel_datatype: ┆ ┆ │ │ +│ │ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 6}] ┆ ┆ │ │ +│ └────────────────────┴────────────────────┴────────────────────┴────────────────────┴────────────────────┴────────────────────┴────────────────────┴────────────────────┴────────────────────┴────────────────────┴────────────────────┘ │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_importer/src/importer_mcap/tests/ros2msg/test_occupancy_grid.rs b/crates/store/re_importer/src/importer_mcap/tests/ros2msg/test_occupancy_grid.rs new file mode 100644 index 000000000000..601778199f8d --- /dev/null +++ b/crates/store/re_importer/src/importer_mcap/tests/ros2msg/test_occupancy_grid.rs @@ -0,0 +1,11 @@ +use crate::importer_mcap::tests::util; + +/// Snapshot test loading an MCAP file containing [`nav_msgs/OccupancyGrid`] messages. +#[test] +fn test_occupancy_grid() { + let loaded_mcap = util::load_mcap(util::test_asset("ros_occupancy_grid.mcap")); + + // Only snapshot the chunk with the payload, not the metadata chunk. + let chunk = loaded_mcap.chunks_for_entity("/map")[1]; + insta::assert_snapshot!(format!("{:-240}", chunk)); +} diff --git a/crates/store/re_data_loader/src/loader_mcap/tests/util.rs b/crates/store/re_importer/src/importer_mcap/tests/util.rs similarity index 55% rename from crates/store/re_data_loader/src/loader_mcap/tests/util.rs rename to crates/store/re_importer/src/importer_mcap/tests/util.rs index b7638c8d00e9..fbdcbbf6e60d 100644 --- a/crates/store/re_data_loader/src/loader_mcap/tests/util.rs +++ b/crates/store/re_importer/src/importer_mcap/tests/util.rs @@ -1,44 +1,42 @@ -//! Test utilities for MCAP data loader snapshot testing. +//! Test utilities for MCAP importer snapshot testing. use std::path::{Path, PathBuf}; use re_chunk::{Chunk, EntityPath}; -use crate::loader_mcap::McapLoader; -use crate::{DataLoader as _, DataLoaderSettings, LoadedData}; +use crate::importer_mcap::McapImporter; +use crate::{ImportedData, Importer as _, ImporterSettings}; // Helper function to get the path to a test asset file. pub fn test_asset(name: &str) -> PathBuf { PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("src/loader_mcap/tests/assets") + .join("src/importer_mcap/tests/assets") .join(name) } -/// Loads an MCAP file using the default loader configuration for testing purposes. +/// Loads an MCAP file using the default importer configuration for testing purposes. pub fn load_mcap(path: impl AsRef) -> LoadedMcap { let path = path.as_ref(); - let loader = McapLoader::default(); + let importer = McapImporter::default(); let (tx, rx) = crossbeam::channel::bounded(1024); - let settings = DataLoaderSettings::recommended("test"); + let settings = ImporterSettings::recommended("test"); - loader - .load_from_path(&settings, path.to_path_buf(), tx) + importer + .import_from_path(&settings, path.to_path_buf(), tx) .unwrap_or_else(|err| { panic!("Failed to load MCAP file at {}: {err}", path.display()); }); - let chunks = rx - .iter() - .filter_map(|res| { - if let LoadedData::Chunk(_, _, chunk) = res { - Some(chunk) - } else { - None - } - }) - .collect(); + let chunks: Vec = rx.iter().filter_map(ImportedData::into_chunk).collect(); + + if 25_000 < chunks.len() { + re_log::warn!( + "MCAP file contained {} chunks. Consider running `rerun rrd optimize` on the output.", + re_format::format_uint(chunks.len()), + ); + } LoadedMcap { chunks } } diff --git a/crates/store/re_importer/src/importer_parquet.rs b/crates/store/re_importer/src/importer_parquet.rs new file mode 100644 index 000000000000..39755cd8301c --- /dev/null +++ b/crates/store/re_importer/src/importer_parquet.rs @@ -0,0 +1,203 @@ +//! Thin adapter that wraps [`re_parquet`] as an [`Importer`]. + +use crossbeam::channel::Sender; +use re_log_types::StoreId; +use re_quota_channel::send_crossbeam; + +use crate::{ImportedData, Importer, ImporterError, ImporterSettings}; + +const PARQUET_IMPORTER_NAME: &str = "rerun.importers.Parquet"; + +/// An [`Importer`] for generic Parquet files. +/// +/// Delegates to [`re_parquet`] for the actual loading logic. +#[derive(Default)] +pub struct ParquetImporter { + pub config: re_parquet::ParquetConfig, +} + +impl Importer for ParquetImporter { + fn name(&self) -> crate::ImporterName { + PARQUET_IMPORTER_NAME.into() + } + + fn import_from_path( + &self, + settings: &ImporterSettings, + path: std::path::PathBuf, + tx: Sender, + ) -> Result<(), ImporterError> { + if !path.is_file() || !has_parquet_extension(&path) { + return Err(ImporterError::Incompatible(path)); + } + + re_tracing::profile_function!(); + + let config = self.config.clone(); + let prefix = settings + .entity_path_prefix + .clone() + .unwrap_or_else(re_parquet::ParquetConfig::default_entity_path_prefix); + let store_id = settings.opened_store_id_or_recommended(); + + std::thread::Builder::new() + .name(format!("load_parquet({path:?})")) + .spawn( + move || match re_parquet::load_parquet(&path, &config, &prefix) { + Ok(chunks) => forward_chunks(chunks, &tx, &store_id), + Err(err) => re_log::error!("Failed to load Parquet: {err}"), + }, + ) + .map_err(|err| ImporterError::Other(err.into()))?; + + Ok(()) + } + + fn import_from_file_contents( + &self, + settings: &ImporterSettings, + filepath: std::path::PathBuf, + contents: std::borrow::Cow<'_, [u8]>, + tx: Sender, + ) -> Result<(), ImporterError> { + if !has_parquet_extension(&filepath) { + return Err(ImporterError::Incompatible(filepath)); + } + + re_tracing::profile_function!(); + + let contents = contents.into_owned(); + let config = self.config.clone(); + let prefix = settings + .entity_path_prefix + .clone() + .unwrap_or_else(re_parquet::ParquetConfig::default_entity_path_prefix); + let store_id = settings.opened_store_id_or_recommended(); + + std::thread::Builder::new() + .name(format!("load_parquet({filepath:?})")) + .spawn( + move || match re_parquet::load_parquet_from_bytes(&contents, &config, &prefix) { + Ok(chunks) => forward_chunks(chunks, &tx, &store_id), + Err(err) => re_log::error!("Failed to load Parquet: {err}"), + }, + ) + .map_err(|err| ImporterError::Other(err.into()))?; + + Ok(()) + } +} + +/// Forward chunks from a [`re_parquet`] iterator to the [`Importer`] channel. +/// +/// Sends a `SetStoreInfo` message first (consistent with other importers), +/// then wraps each chunk in [`ImportedData::Chunk`] and sends via `send_crossbeam`. +fn forward_chunks( + chunks: impl Iterator>, + tx: &Sender, + store_id: &StoreId, +) { + let store_info_msg = crate::prepare_store_info(store_id, re_log_types::FileSource::Sdk); + if send_crossbeam( + tx, + ImportedData::LogMsg(PARQUET_IMPORTER_NAME.to_owned(), store_info_msg), + ) + .is_err() + { + return; + } + + for chunk_result in chunks { + match chunk_result { + Ok(chunk) => { + if send_crossbeam( + tx, + ImportedData::Chunk(PARQUET_IMPORTER_NAME.to_owned(), store_id.clone(), chunk), + ) + .is_err() + { + break; + } + } + Err(err) => { + re_log::error!("Parquet error: {err}"); + } + } + } +} + +fn has_parquet_extension(path: &std::path::Path) -> bool { + path.extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("parquet")) +} + +#[cfg(test)] +#[expect(clippy::disallowed_methods)] +mod tests { + use std::sync::Arc; + + use arrow::array::{Float64Array, RecordBatch}; + use arrow::datatypes::{DataType, Field, Schema}; + use re_chunk::EntityPath; + + use crate::{ImportedData, Importer as _, ImporterSettings}; + + use super::*; + + fn write_parquet_tmp(batch: &RecordBatch) -> std::path::PathBuf { + use parquet::arrow::ArrowWriter; + + let dir = std::env::temp_dir().join("rerun_parquet_tests"); + std::fs::create_dir_all(&dir).unwrap(); + + let path = dir.join(format!("{}.parquet", re_chunk::ChunkId::new())); + let file = std::fs::File::create(&path).unwrap(); + let mut writer = ArrowWriter::try_new(file, batch.schema(), None).unwrap(); + writer.write(batch).unwrap(); + writer.close().unwrap(); + + path + } + + #[test] + fn incompatible_extension_rejected() { + let loader = ParquetImporter::default(); + let (tx, _rx) = crossbeam::channel::bounded(1024); + let settings = ImporterSettings::recommended("test"); + + let result = loader.import_from_path(&settings, "data.csv".into(), tx); + assert!(matches!(result, Err(crate::ImporterError::Incompatible(_)))); + } + + #[test] + fn parquet_loader_smoke_test() { + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + ])), + vec![ + Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0])), + Arc::new(Float64Array::from(vec![4.0, 5.0, 6.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let loader = ParquetImporter::default(); + let (tx, rx) = crossbeam::channel::bounded(1024); + let settings = ImporterSettings::recommended("test"); + + loader + .import_from_path(&settings, path, tx) + .expect("load should succeed"); + + let chunks: Vec<_> = rx + .iter() + .filter_map(ImportedData::into_chunk) + .filter(|c| c.entity_path() != &EntityPath::properties()) + .collect(); + + assert!(!chunks.is_empty(), "should produce at least one data chunk"); + } +} diff --git a/crates/store/re_data_loader/src/loader_rrd.rs b/crates/store/re_importer/src/importer_rrd.rs similarity index 94% rename from crates/store/re_data_loader/src/loader_rrd.rs rename to crates/store/re_importer/src/importer_rrd.rs index a082e7f7c188..8cf34f790565 100644 --- a/crates/store/re_data_loader/src/loader_rrd.rs +++ b/crates/store/re_importer/src/importer_rrd.rs @@ -3,26 +3,26 @@ use crossbeam::channel::Receiver; use re_log_encoding::Decoder; use re_log_types::ApplicationId; -use crate::{DataLoader as _, LoadedData}; +use crate::{ImportedData, Importer as _}; // --- -/// Loads data from any `rrd` file or in-memory contents. -pub struct RrdLoader; +/// Imports data from any `rrd` file or in-memory contents. +pub struct RrdImporter; -impl crate::DataLoader for RrdLoader { +impl crate::Importer for RrdImporter { #[inline] fn name(&self) -> String { - "rerun.data_loaders.Rrd".into() + "rerun.importers.Rrd".into() } #[cfg(not(target_arch = "wasm32"))] - fn load_from_path( + fn import_from_path( &self, - settings: &crate::DataLoaderSettings, + settings: &crate::ImporterSettings, filepath: std::path::PathBuf, - tx: crossbeam::channel::Sender, - ) -> Result<(), crate::DataLoaderError> { + tx: crossbeam::channel::Sender, + ) -> Result<(), crate::ImporterError> { use anyhow::Context as _; re_tracing::profile_function!(filepath.display().to_string()); @@ -31,7 +31,7 @@ impl crate::DataLoader for RrdLoader { if !matches!(extension.as_str(), "rbl" | "rrd") { if filepath.is_file() || filepath.is_dir() { // NOTE: blueprints and recordings have the same file format - return Err(crate::DataLoaderError::Incompatible(filepath.clone())); + return Err(crate::ImporterError::Incompatible(filepath.clone())); } else { // NOTE(1): If this is some kind of virtual file (fifo, socket, pipe, etc), then we // always assume it's an RRD stream by default. @@ -45,7 +45,7 @@ impl crate::DataLoader for RrdLoader { re_log::debug!( ?filepath, - loader = self.name(), + importer = self.name(), "Loading rrd data from filesystem…", ); @@ -138,19 +138,19 @@ impl crate::DataLoader for RrdLoader { Ok(()) } - fn load_from_file_contents( + fn import_from_file_contents( &self, - settings: &crate::DataLoaderSettings, + settings: &crate::ImporterSettings, filepath: std::path::PathBuf, contents: std::borrow::Cow<'_, [u8]>, - tx: crossbeam::channel::Sender, - ) -> Result<(), crate::DataLoaderError> { + tx: crossbeam::channel::Sender, + ) -> Result<(), crate::ImporterError> { re_tracing::profile_function!(filepath.display().to_string()); let extension = crate::extension(&filepath); if !matches!(extension.as_str(), "rbl" | "rrd") { // NOTE: blueprints and recordings has the same file format - return Err(crate::DataLoaderError::Incompatible(filepath)); + return Err(crate::ImporterError::Incompatible(filepath)); } let contents = std::io::Cursor::new(contents); @@ -192,7 +192,7 @@ impl crate::DataLoader for RrdLoader { fn decode_and_stream( filepath: &std::path::Path, - tx: &crossbeam::channel::Sender, + tx: &crossbeam::channel::Sender, msgs: impl Iterator>, forced_application_id: Option<&ApplicationId>, forced_recording_id: Option<&String>, @@ -257,7 +257,7 @@ fn decode_and_stream( msg }; - let data = LoadedData::LogMsg(RrdLoader::name(&RrdLoader), msg); + let data = ImportedData::LogMsg(RrdImporter::name(&RrdImporter), msg); if re_quota_channel::send_crossbeam(tx, data).is_err() { break; // The other end has decided to hang up, not our problem. } @@ -278,7 +278,7 @@ struct RetryableFileReader { #[cfg(not(target_arch = "wasm32"))] impl RetryableFileReader { - fn new(filepath: &std::path::Path) -> Result { + fn new(filepath: &std::path::Path) -> Result { use anyhow::Context as _; use notify::{RecursiveMode, Watcher as _}; diff --git a/crates/store/re_data_loader/src/loader_urdf/joint_transform.rs b/crates/store/re_importer/src/importer_urdf/joint_transform.rs similarity index 100% rename from crates/store/re_data_loader/src/loader_urdf/joint_transform.rs rename to crates/store/re_importer/src/importer_urdf/joint_transform.rs diff --git a/crates/store/re_data_loader/src/loader_urdf/mod.rs b/crates/store/re_importer/src/importer_urdf/mod.rs similarity index 66% rename from crates/store/re_data_loader/src/loader_urdf/mod.rs rename to crates/store/re_importer/src/importer_urdf/mod.rs index 5d493d55b7e5..cba38911442f 100644 --- a/crates/store/re_data_loader/src/loader_urdf/mod.rs +++ b/crates/store/re_importer/src/importer_urdf/mod.rs @@ -1,22 +1,24 @@ -//! Rerun data loader and utilities for URDF files. +//! Rerun importer and utilities for URDF files. pub mod joint_transform; +mod robot_description_parser; mod urdf_tree; +pub(crate) use robot_description_parser::build_urdf_chunks_from_xml; pub use urdf_tree::UrdfTree; use std::path::{Path, PathBuf}; use anyhow::{Context as _, bail}; use crossbeam::channel::Sender; -use re_chunk::{ChunkBuilder, ChunkId, EntityPath, RowId, TimePoint}; -use re_log_types::StoreId; +use re_chunk::{Chunk, ChunkBuilder, ChunkId, EntityPath, RowId, TimePoint}; use re_sdk_types::archetypes::{Asset3D, CoordinateFrame, InstancePoses3D, Transform3D}; -use re_sdk_types::datatypes::Vec3D; +use re_sdk_types::components::Color; +use re_sdk_types::datatypes::{Rgba32, Vec3D}; use re_sdk_types::external::glam; use re_sdk_types::{AsComponents, Component as _, ComponentDescriptor}; use urdf_rs::{Geometry, Joint, Vec3, Vec4}; -use crate::{DataLoader, DataLoaderError, LoadedData}; +use crate::{ImportedData, Importer, ImporterError}; fn is_urdf_file(path: impl AsRef) -> bool { path.as_ref() @@ -24,28 +26,19 @@ fn is_urdf_file(path: impl AsRef) -> bool { .is_some_and(|ext| ext.eq_ignore_ascii_case("urdf")) } -fn send_chunk_builder( - tx: &Sender, - store_id: &StoreId, - chunk: ChunkBuilder, -) -> anyhow::Result<()> { - re_quota_channel::send_crossbeam( - tx, - LoadedData::Chunk(UrdfDataLoader.name(), store_id.clone(), chunk.build()?), - )?; +fn emit_chunk_builder(emit: &mut dyn FnMut(Chunk), chunk: ChunkBuilder) -> anyhow::Result<()> { + emit(chunk.build()?); Ok(()) } -fn send_archetype( - tx: &Sender, - store_id: &StoreId, +fn emit_archetype( + emit: &mut dyn FnMut(Chunk), entity_path: EntityPath, timepoint: &TimePoint, archetype: &impl AsComponents, ) -> anyhow::Result<()> { - send_chunk_builder( - tx, - store_id, + emit_chunk_builder( + emit, ChunkBuilder::new(ChunkId::new(), entity_path).with_archetype( RowId::new(), timepoint.clone(), @@ -54,24 +47,24 @@ fn send_archetype( ) } -/// A [`DataLoader`] for [URDF](https://en.wikipedia.org/wiki/URDF) (Unified Robot Description Format), +/// An [`Importer`] for [URDF](https://en.wikipedia.org/wiki/URDF) (Unified Robot Description Format), /// common in ROS. -pub struct UrdfDataLoader; +pub struct UrdfImporter; -impl DataLoader for UrdfDataLoader { - fn name(&self) -> crate::DataLoaderName { - "URDF Loader".to_owned() +impl Importer for UrdfImporter { + fn name(&self) -> crate::ImporterName { + "rerun.importers.Urdf".to_owned() } #[cfg(not(target_arch = "wasm32"))] - fn load_from_path( + fn import_from_path( &self, - settings: &crate::DataLoaderSettings, + settings: &crate::ImporterSettings, filepath: std::path::PathBuf, - tx: Sender, - ) -> Result<(), crate::DataLoaderError> { + tx: Sender, + ) -> Result<(), crate::ImporterError> { if !is_urdf_file(&filepath) { - return Err(DataLoaderError::Incompatible(filepath)); + return Err(ImporterError::Incompatible(filepath)); } re_tracing::profile_function!(filepath.display().to_string()); @@ -79,28 +72,44 @@ impl DataLoader for UrdfDataLoader { let robot = urdf_rs::read_file(&filepath) .with_context(|| format!("Path: {}", filepath.display()))?; - log_robot( + let store_id = settings.opened_store_id_or_recommended(); + let mut send_error = None; + let mut emit = |chunk| { + if send_error.is_none() { + send_error = re_quota_channel::send_crossbeam( + &tx, + ImportedData::Chunk(Self.name(), store_id.clone(), chunk), + ) + .err(); + } + }; + + emit_robot( + &mut emit, robot, - &filepath, - &tx, - &settings.opened_store_id_or_recommended(), - &settings.entity_path_prefix, + filepath.parent().map(|path| path.to_path_buf()), + settings.entity_path_prefix.as_ref(), &settings.timepoint.clone().unwrap_or_default(), + true, ) .with_context(|| "Failed to load URDF file!")?; + if let Some(err) = send_error { + return Err(anyhow::anyhow!(err.to_string()).into()); + } + Ok(()) } - fn load_from_file_contents( + fn import_from_file_contents( &self, - settings: &crate::DataLoaderSettings, + settings: &crate::ImporterSettings, filepath: std::path::PathBuf, contents: std::borrow::Cow<'_, [u8]>, - tx: Sender, - ) -> Result<(), crate::DataLoaderError> { + tx: Sender, + ) -> Result<(), crate::ImporterError> { if !is_urdf_file(&filepath) { - return Err(DataLoaderError::Incompatible(filepath)); + return Err(ImporterError::Incompatible(filepath)); } re_tracing::profile_function!(filepath.display().to_string()); @@ -108,56 +117,93 @@ impl DataLoader for UrdfDataLoader { let robot = urdf_rs::read_from_string(&String::from_utf8_lossy(&contents)) .with_context(|| format!("Path: {}", filepath.display()))?; - log_robot( + let store_id = settings.opened_store_id_or_recommended(); + let mut send_error = None; + let mut emit = |chunk| { + if send_error.is_none() { + send_error = re_quota_channel::send_crossbeam( + &tx, + ImportedData::Chunk(Self.name(), store_id.clone(), chunk), + ) + .err(); + } + }; + + emit_robot( + &mut emit, robot, - &filepath, - &tx, - &settings.opened_store_id_or_recommended(), - &settings.entity_path_prefix, + filepath.parent().map(|path| path.to_path_buf()), + settings.entity_path_prefix.as_ref(), &settings.timepoint.clone().unwrap_or_default(), + true, ) .with_context(|| "Failed to load URDF file!")?; + if let Some(err) = send_error { + return Err(anyhow::anyhow!(err.to_string()).into()); + } + Ok(()) } } -fn log_robot( +pub(crate) fn emit_robot( + emit: &mut dyn FnMut(Chunk), robot: urdf_rs::Robot, - filepath: &Path, - tx: &Sender, - store_id: &StoreId, - entity_path_prefix: &Option, + urdf_dir: Option, + entity_path_prefix: Option<&EntityPath>, timepoint: &TimePoint, + include_joint_transforms: bool, ) -> anyhow::Result<()> { - let urdf_dir = filepath.parent().map(|path| path.to_path_buf()); - - let urdf_tree = UrdfTree::new(robot, urdf_dir, entity_path_prefix.clone()) + let urdf_tree = UrdfTree::new(robot, urdf_dir, entity_path_prefix.cloned()) .with_context(|| "Failed to build URDF tree!")?; - // The robot's root coordinate frame_id. - send_archetype( - tx, - store_id, - urdf_tree.log_paths.root.clone(), - timepoint, - &CoordinateFrame::update_fields().with_frame(urdf_tree.root().name.clone()), - )?; + urdf_tree.emit(emit, timepoint, include_joint_transforms) +} - let transforms = walk_tree(&urdf_tree, tx, store_id, timepoint, &urdf_tree.root().name)?; +impl UrdfTree { + /// Emit the full robot model (geometry + transforms) as [`Chunk`]s. + pub fn emit( + &self, + emit: &mut dyn FnMut(Chunk), + timepoint: &TimePoint, + include_joint_transforms: bool, + ) -> anyhow::Result<()> { + // The robot's root coordinate frame_id. + emit_archetype( + emit, + self.log_paths.root.clone(), + timepoint, + &CoordinateFrame::update_fields() + .with_frame(self.apply_frame_prefix(&self.root().name)), + )?; - // Send all transforms as rows in a single chunk. - if !transforms.is_empty() { - send_static_transforms_batch(tx, store_id, &urdf_tree.log_paths.transforms, &transforms)?; - } + // Bridge the prefixed root frame to the entity hierarchy: a Transform3D with only + // child_frame defaults its parent to the entity's implicit parent frame (tf#/…). + if self.frame_prefix().is_some() { + emit_archetype( + emit, + self.log_paths.root.clone(), + timepoint, + &Transform3D::update_fields() + .with_child_frame(self.apply_frame_prefix(&self.root().name)), + )?; + } - Ok(()) + let transforms = walk_tree(emit, self, timepoint, &self.root().name)?; + + // Emit all transforms as rows in a single chunk. + if include_joint_transforms && !transforms.is_empty() { + emit_static_transforms_batch(emit, &self.log_paths.transforms, &transforms)?; + } + + Ok(()) + } } fn walk_tree( + emit: &mut dyn FnMut(Chunk), urdf_tree: &UrdfTree, - tx: &Sender, - store_id: &StoreId, timepoint: &TimePoint, link_name: &str, ) -> anyhow::Result> { @@ -166,7 +212,7 @@ fn walk_tree( .with_context(|| format!("Link {link_name:?} missing from map"))?; re_log::debug_assert_eq!(link_name, link.name); - log_link(urdf_tree, tx, store_id, timepoint, link)?; + emit_link(urdf_tree, timepoint, link, emit)?; let Some(joints) = urdf_tree.get_children(link_name) else { // if there's no more joints connecting this link to anything else we've reached the end of this branch. @@ -175,18 +221,17 @@ fn walk_tree( let mut joint_transforms_for_link = Vec::new(); for joint in joints { - joint_transforms_for_link.push(get_joint_transform(joint)); + joint_transforms_for_link.push(get_joint_transform(urdf_tree, joint)); // Recurse - let mut child_transforms = - walk_tree(urdf_tree, tx, store_id, timepoint, &joint.child.link)?; + let mut child_transforms = walk_tree(emit, urdf_tree, timepoint, &joint.child.link)?; joint_transforms_for_link.append(&mut child_transforms); } Ok(joint_transforms_for_link) } -fn get_joint_transform(joint: &Joint) -> Transform3D { +fn get_joint_transform(urdf_tree: &UrdfTree, joint: &Joint) -> Transform3D { let Joint { name: _, joint_type: _, @@ -201,16 +246,19 @@ fn get_joint_transform(joint: &Joint) -> Transform3D { safety_controller: _, } = joint; - transform_from_pose(origin, parent.link.clone(), child.link.clone()) + transform_from_pose( + origin, + urdf_tree.apply_frame_prefix(&parent.link), + urdf_tree.apply_frame_prefix(&child.link), + ) } -/// Send a batch of static transforms as a single chunk. +/// Emit a batch of static transforms as a single chunk. /// /// We always do this statically for URDF, because this allows users to override them later /// on any other transform entity of their choice. -fn send_static_transforms_batch( - tx: &Sender, - store_id: &StoreId, +fn emit_static_transforms_batch( + emit: &mut dyn FnMut(Chunk), transforms_path: &EntityPath, transforms: &[Transform3D], ) -> anyhow::Result<()> { @@ -220,7 +268,7 @@ fn send_static_transforms_batch( chunk = chunk.with_archetype(RowId::new(), TimePoint::STATIC, transform); } - send_chunk_builder(tx, store_id, chunk) + emit_chunk_builder(emit, chunk) } fn transform_from_pose( @@ -253,25 +301,22 @@ fn instance_poses_from_pose(origin: &urdf_rs::Pose, scale: Option) -> Ins poses } -fn send_instance_pose_with_frame( - tx: &Sender, - store_id: &StoreId, +fn emit_instance_pose_with_frame( + emit: &mut dyn FnMut(Chunk), entity_path: EntityPath, timepoint: &TimePoint, origin: &urdf_rs::Pose, parent_frame: String, scale: Option, ) -> anyhow::Result<()> { - send_archetype( - tx, - store_id, + emit_archetype( + emit, entity_path.clone(), timepoint, &instance_poses_from_pose(origin, scale), )?; - send_archetype( - tx, - store_id, + emit_archetype( + emit, entity_path, timepoint, &CoordinateFrame::update_fields().with_frame(parent_frame), @@ -288,12 +333,11 @@ fn extract_instance_scale(geometry: &Geometry) -> Option { } } -fn log_link( +fn emit_link( urdf_tree: &UrdfTree, - tx: &Sender, - store_id: &StoreId, timepoint: &TimePoint, link: &urdf_rs::Link, + emit: &mut dyn FnMut(Chunk), ) -> anyhow::Result<()> { let urdf_rs::Link { name: link_name, @@ -302,6 +346,8 @@ fn log_link( collision: _, } = link; + let frame_id = urdf_tree.apply_frame_prefix(link_name); + for (visual_entity_path, visual) in urdf_tree.get_visual_geometries(link).unwrap_or_default() { let urdf_rs::Visual { name: _, @@ -322,20 +368,18 @@ fn log_link( // A visual geometry has no frame ID of its own and has a constant pose, // so we attach it to the link using an instance pose. - send_instance_pose_with_frame( - tx, - store_id, + emit_instance_pose_with_frame( + emit, visual_entity_path.clone(), timepoint, origin, - link_name.clone(), + frame_id.clone(), instance_scale, )?; - log_geometry( + emit_geometry( + emit, urdf_tree, - tx, - store_id, visual_entity_path, geometry, material, @@ -355,20 +399,18 @@ fn log_link( // A collision geometry has no frame ID of its own and has a constant pose, // so we attach it to the link using an instance pose. - send_instance_pose_with_frame( - tx, - store_id, + emit_instance_pose_with_frame( + emit, collision_entity_path.clone(), timepoint, origin, - link_name.clone(), + frame_id.clone(), instance_scale, )?; - log_geometry( + emit_geometry( + emit, urdf_tree, - tx, - store_id, collision_entity_path.clone(), geometry, None, @@ -378,9 +420,8 @@ fn log_link( if false { // TODO(michael): consider hiding collision geometries by default. // TODO(#6541): the viewer should respect the `Visible` component. - send_chunk_builder( - tx, - store_id, + emit_chunk_builder( + emit, ChunkBuilder::new(ChunkId::new(), collision_entity_path).with_component_batch( RowId::new(), timepoint.clone(), @@ -400,8 +441,8 @@ fn log_link( Ok(()) } -/// TODO(emilk): create a trait for this, so that one can use this URDF loader -/// from e.g. a ROS-bag loader. +/// TODO(emilk): create a trait for this, so that one can use this URDF importer +/// from e.g. a ROS-bag importer. #[cfg(target_arch = "wasm32")] fn load_ros_resource(_root_dir: Option<&PathBuf>, resource_path: &str) -> anyhow::Result> { bail!("Loading ROS resources is not supported in WebAssembly: {resource_path}"); @@ -433,10 +474,9 @@ fn load_ros_resource( } } -fn log_geometry( +fn emit_geometry( + emit: &mut dyn FnMut(Chunk), urdf_tree: &UrdfTree, - tx: &Sender, - store_id: &StoreId, entity_path: EntityPath, geometry: &Geometry, material: Option<&urdf_rs::Material>, @@ -450,82 +490,95 @@ fn log_geometry( let mut asset3d = Asset3D::from_file_contents(mesh_bytes, MediaType::guess_from_path(filename)); + if let Some(albedo_factor) = material_albedo_factor(material) { + asset3d = asset3d.with_albedo_factor(albedo_factor); + } + if let Some(material) = material { let urdf_rs::Material { name: _, - color, + color: _, texture, } = material; - if let Some(color) = color { - let urdf_rs::Color { - rgba: Vec4([r, g, b, a]), - } = color; - asset3d = asset3d.with_albedo_factor( - // TODO(emilk): is this linear or sRGB? - re_sdk_types::datatypes::Rgba32::from_linear_unmultiplied_rgba_f32( - *r as f32, *g as f32, *b as f32, *a as f32, - ), - ); - } + if texture.is_some() { re_log::warn_once!("Material texture not supported"); // TODO(emilk): support textures } } - send_archetype(tx, store_id, entity_path, timepoint, &asset3d)?; + emit_archetype(emit, entity_path, timepoint, &asset3d)?; } Geometry::Box { size: Vec3([x, y, z]), } => { - send_archetype( - tx, - store_id, + emit_archetype( + emit, entity_path, timepoint, &re_sdk_types::archetypes::Boxes3D::from_sizes([Vec3D::new( *x as _, *y as _, *z as _, - )]), + )]) + .with_colors([material_color(material)]), )?; } Geometry::Cylinder { radius, length } => { // URDF and Rerun both use Z as the main axis - send_archetype( - tx, - store_id, + emit_archetype( + emit, entity_path, timepoint, &re_sdk_types::archetypes::Cylinders3D::from_lengths_and_radii( [*length as f32], [*radius as f32], - ), + ) + .with_colors([material_color(material)]), )?; } Geometry::Capsule { radius, length } => { // URDF and Rerun both use Z as the main axis - send_archetype( - tx, - store_id, + emit_archetype( + emit, entity_path, timepoint, &re_sdk_types::archetypes::Capsules3D::from_lengths_and_radii( [*length as f32], [*radius as f32], - ), + ) + .with_colors([material_color(material)]), )?; } Geometry::Sphere { radius } => { - send_archetype( - tx, - store_id, + emit_archetype( + emit, entity_path, timepoint, - &re_sdk_types::archetypes::Ellipsoids3D::from_radii([*radius as f32]), + &re_sdk_types::archetypes::Ellipsoids3D::from_radii([*radius as f32]) + .with_colors([material_color(material)]), )?; } } Ok(()) } +/// Extracts the RGBA color from a URDF material. Falls back to white if no color is specified. +fn material_color(material: Option<&urdf_rs::Material>) -> Color { + Color::new(material_albedo_factor(material).unwrap_or(Rgba32::WHITE)) +} + +/// Extracts the URDF material color for mesh albedo, if one is explicitly specified. +fn material_albedo_factor(material: Option<&urdf_rs::Material>) -> Option { + material + .and_then(|material| material.color.as_ref()) + .map(|color| { + let urdf_rs::Color { + rgba: Vec4([r, g, b, a]), + } = color; + + // TODO(emilk): is this linear or sRGB? + Rgba32::from_linear_unmultiplied_rgba_f32(*r as f32, *g as f32, *b as f32, *a as f32) + }) +} + fn quat_from_rpy(rpy: &[f64; 3]) -> glam::Quat { glam::Quat::from_euler( glam::EulerRot::ZYX, diff --git a/crates/store/re_importer/src/importer_urdf/robot_description_parser.rs b/crates/store/re_importer/src/importer_urdf/robot_description_parser.rs new file mode 100644 index 000000000000..d3123fdac37b --- /dev/null +++ b/crates/store/re_importer/src/importer_urdf/robot_description_parser.rs @@ -0,0 +1,30 @@ +//! Utilities for parsing URDF XML from strings, like e.g. a ROS `robot_description` topic. + +use re_chunk::EntityPath; +use re_log_types::TimePoint; + +/// Parses URDF XML and returns the chunks emitted by Rerun's built-in URDF importer. +/// +/// `include_joint_transforms` controls whether static joint transforms from the URDF +/// are emitted in addition to the robot geometry. +pub(crate) fn build_urdf_chunks_from_xml( + urdf_xml: &str, + entity_path_prefix: Option<&EntityPath>, + timepoint: &TimePoint, + include_joint_transforms: bool, +) -> anyhow::Result> { + let robot = urdf_rs::read_from_string(urdf_xml)?; + + let mut chunks = Vec::new(); + + super::emit_robot( + &mut |chunk| chunks.push(chunk), + robot, + None, + entity_path_prefix, + timepoint, + include_joint_transforms, + )?; + + Ok(chunks) +} diff --git a/crates/store/re_data_loader/src/loader_urdf/urdf_tree.rs b/crates/store/re_importer/src/importer_urdf/urdf_tree.rs similarity index 57% rename from crates/store/re_data_loader/src/loader_urdf/urdf_tree.rs rename to crates/store/re_importer/src/importer_urdf/urdf_tree.rs index 1eddb2b39b9e..69a0abf49c4f 100644 --- a/crates/store/re_data_loader/src/loader_urdf/urdf_tree.rs +++ b/crates/store/re_importer/src/importer_urdf/urdf_tree.rs @@ -5,7 +5,23 @@ use anyhow::bail; use itertools::Itertools as _; use re_chunk::EntityPath; use re_log_types::EntityPathPart; -use urdf_rs::{Joint, Link, Material, Robot}; +use re_sdk_types::archetypes::Transform3D; +use urdf_rs::{Geometry, Joint, Link, Material, Robot}; + +use super::joint_transform; + +const DEFAULT_TF_STATIC_ENTITY_PATH: &str = "tf_static"; + +/// Returns a short name for the geometry type, used as a path segment. +fn geometry_type_name(geometry: &Geometry) -> &'static str { + match geometry { + Geometry::Mesh { .. } => "mesh", + Geometry::Box { .. } => "box", + Geometry::Cylinder { .. } => "cylinder", + Geometry::Capsule { .. } => "capsule", + Geometry::Sphere { .. } => "sphere", + } +} /// Helper struct containing the (root) entity paths where the different parts of the URDF model are logged. pub(crate) struct UrdfLogPaths { @@ -23,12 +39,13 @@ pub(crate) struct UrdfLogPaths { impl UrdfLogPaths { pub fn new(robot_name: &str, entity_path_prefix: Option) -> Self { - let root = entity_path_prefix - .map(|prefix| prefix / EntityPath::from_single_string(robot_name)) - .unwrap_or_else(|| EntityPath::from_single_string(robot_name)); + let root = match entity_path_prefix { + Some(prefix) => prefix / EntityPath::from_single_string(robot_name), + None => EntityPath::from_single_string(robot_name), + }; let visual_root = root.clone() / EntityPathPart::new("visual_geometries"); let collision_root = root.clone() / EntityPathPart::new("collision_geometries"); - let transforms = root.clone() / EntityPathPart::new("joint_transforms"); + let transforms = EntityPath::from_single_string(DEFAULT_TF_STATIC_ENTITY_PATH); Self { root, @@ -55,6 +72,7 @@ pub struct UrdfTree { links: HashMap, children: HashMap>, materials: HashMap, + frame_prefix: Option, } impl UrdfTree { @@ -149,14 +167,64 @@ impl UrdfTree { children, materials, log_paths, + frame_prefix: None, }) } + /// Set the frame prefix applied to all frame IDs. + pub fn with_frame_prefix(mut self, prefix: impl Into) -> Self { + self.frame_prefix = Some(prefix.into()); + self + } + + /// Set the entity path used for static transforms emitted by this URDF tree. + /// + /// This path is not affected by the tree's entity path prefix. + pub fn with_static_transform_entity(mut self, entity_path: impl Into) -> Self { + self.log_paths.transforms = entity_path.into(); + self + } + /// Name of the robot defined in the URDF. pub fn name(&self) -> &str { &self.name } + /// The frame prefix, if set. + pub fn frame_prefix(&self) -> Option<&str> { + self.frame_prefix.as_deref() + } + + /// Applies [`Self::frame_prefix`] to the given name, if set. + pub fn apply_frame_prefix(&self, name: &str) -> String { + match &self.frame_prefix { + Some(prefix) => format!("{prefix}{name}"), + None => name.to_owned(), + } + } + + /// Computes a [`Transform3D`] for a joint at the given value. + /// + /// If [`Self::frame_prefix`] is set, the frame IDs of the transform are prefixed with it. + pub fn compute_joint_transform( + &self, + joint: &Joint, + value: f64, + clamp: bool, + ) -> Result { + let result = joint_transform::internal::compute_joint_transform(joint, value, clamp)?; + + if let Some(warning) = &result.warning { + re_log::warn!("{warning}"); + } + + Ok(Transform3D::update_fields() + .with_translation(result.translation.to_array()) + .with_quaternion(result.quaternion.to_array()) + .with_parent_frame(self.apply_frame_prefix(&result.parent_frame)) + .with_child_frame(self.apply_frame_prefix(&result.child_frame))) + } + /// The root [`Link`] in the URDF hierarchy. pub fn root(&self) -> &Link { &self.root @@ -211,6 +279,10 @@ impl UrdfTree { } /// Get the collision geometries of a link and their entity paths, if any. + /// + /// Collision geometries are organized by geometry type under `collision_root`: + /// `collision_geometries/{geometry_type}/{link_name}/{collision_name}`. + /// This makes it easy to toggle visibility per geometry type (e.g. hide meshes but keep primitives). pub fn get_collision_geometries( &self, link: &Link, @@ -220,24 +292,22 @@ impl UrdfTree { return None; } - // The base path for all collision geometries of this link. - // We use flat paths under `collision_root` since link names have to be unique and to avoid deep nesting. - let collision_base_path_for_link = - self.log_paths.collision_root.clone() / EntityPathPart::new(&link.name); - - // Collect all the link's collision geometries and build their entity paths. + // Collect all the link's collision geometries and build their entity paths, + // organized by geometry type for easy per-type visibility toggling. link.collision .iter() .enumerate() .map(|(i, collision)| { + let geometry_type = geometry_type_name(&collision.geometry); let collision_name = collision .name .clone() .unwrap_or_else(|| format!("collision_{i}")); - ( - collision_base_path_for_link.clone() / EntityPathPart::new(collision_name), - collision, - ) + let path = self.log_paths.collision_root.clone() + / EntityPathPart::new(geometry_type) + / EntityPathPart::new(&link.name) + / EntityPathPart::new(collision_name); + (path, collision) }) .collect::>() .into() @@ -252,3 +322,74 @@ impl UrdfTree { self.materials.get(name) } } + +#[cfg(test)] +mod tests { + use super::*; + + fn make_minimal_link(name: &str) -> urdf_rs::Link { + urdf_rs::Link { + name: name.to_owned(), + inertial: Default::default(), + visual: vec![], + collision: vec![], + } + } + + #[test] + fn test_apply_frame_prefix_without_prefix() { + let robot = urdf_rs::Robot { + name: "test".to_owned(), + links: vec![make_minimal_link("base")], + joints: vec![], + materials: vec![], + }; + let tree = UrdfTree::new(robot, None, None).unwrap(); + assert_eq!(tree.apply_frame_prefix("base"), "base"); + assert!(tree.frame_prefix().is_none()); + } + + #[test] + fn test_apply_frame_prefix_with_prefix() { + let robot = urdf_rs::Robot { + name: "test".to_owned(), + links: vec![make_minimal_link("base")], + joints: vec![], + materials: vec![], + }; + let tree = UrdfTree::new(robot, None, None) + .unwrap() + .with_frame_prefix("left_arm/"); + assert_eq!(tree.apply_frame_prefix("base"), "left_arm/base"); + assert_eq!(tree.frame_prefix(), Some("left_arm/")); + } + + #[test] + fn test_with_static_transform_entity_overrides_default_path() { + let robot = urdf_rs::Robot { + name: "test".to_owned(), + links: vec![make_minimal_link("base")], + joints: vec![], + materials: vec![], + }; + let tree = UrdfTree::new(robot, None, None) + .unwrap() + .with_static_transform_entity("robot/tf"); + + assert_eq!(tree.log_paths.transforms.to_string(), "/robot/tf"); + } + + #[test] + fn test_static_transforms_path_defaults_to_tf_static() { + let paths = UrdfLogPaths::new("test", None); + + assert_eq!(paths.transforms.to_string(), "/tf_static"); + } + + #[test] + fn test_static_transforms_path_is_unaffected_by_prefix() { + let paths = UrdfLogPaths::new("test", Some(EntityPath::parse_forgiving("robots/left_arm"))); + + assert_eq!(paths.transforms.to_string(), "/tf_static"); + } +} diff --git a/crates/store/re_data_loader/src/lerobot/common.rs b/crates/store/re_importer/src/lerobot/common.rs similarity index 82% rename from crates/store/re_data_loader/src/lerobot/common.rs rename to crates/store/re_importer/src/lerobot/common.rs index 2afd00a578f7..81e6060ddec0 100644 --- a/crates/store/re_data_loader/src/lerobot/common.rs +++ b/crates/store/re_importer/src/lerobot/common.rs @@ -18,7 +18,7 @@ use re_sdk_types::archetypes; use re_sdk_types::archetypes::EncodedImage; use crate::lerobot::{EpisodeIndex, Feature}; -use crate::{DataLoaderError, LoadedData, load_file::prepare_store_info}; +use crate::{ImportedData, ImporterError, import_file::prepare_store_info}; /// Shared interface for all `LeRobot` dataset versions. pub trait LeRobotDataset { @@ -26,7 +26,7 @@ pub trait LeRobotDataset { fn iter_episode_indices(&self) -> impl Iterator; /// Loads a specific episode and returns its chunks. - fn load_episode_chunks(&self, episode: EpisodeIndex) -> Result, DataLoaderError>; + fn load_episode_chunks(&self, episode: EpisodeIndex) -> Result, ImporterError>; } /// Columns in the `LeRobot` dataset schema that we do not visualize in the viewer, and thus ignore. @@ -37,14 +37,14 @@ pub const LEROBOT_DATASET_IGNORED_COLUMNS: &[&str] = pub fn prepare_episode_chunks( episodes: impl IntoIterator, application_id: &ApplicationId, - tx: &Sender, + tx: &Sender, loader_name: &str, ) -> Vec<(EpisodeIndex, StoreId)> { let mut store_ids = vec![]; for episode in episodes { let store_id = StoreId::recording(application_id.clone(), format!("episode_{}", episode.0)); - let set_store_info = LoadedData::LogMsg( + let set_store_info = ImportedData::LogMsg( loader_name.to_owned(), prepare_store_info(&store_id, re_log_types::FileSource::Sdk), ); @@ -63,9 +63,9 @@ pub fn prepare_episode_chunks( pub fn load_and_stream_common( dataset: &Dataset, store_ids: &[(EpisodeIndex, StoreId)], - tx: &Sender, + tx: &Sender, loader_name: &str, - load_episode: impl Fn(&Dataset, EpisodeIndex) -> Result, DataLoaderError>, + load_episode: impl Fn(&Dataset, EpisodeIndex) -> Result, ImporterError>, ) { for (episode, store_id) in store_ids { // log episode data to its respective recording @@ -85,8 +85,8 @@ pub fn load_and_stream_common( return; }; - for chunk in std::iter::once(initial).chain(chunks.into_iter()) { - let data = LoadedData::Chunk(loader_name.to_owned(), store_id.clone(), chunk); + for chunk in std::iter::once(initial).chain(chunks) { + let data = ImportedData::Chunk(loader_name.to_owned(), store_id.clone(), chunk); if send_crossbeam(tx, data).is_err() { break; // The other end has decided to hang up, not our problem. @@ -107,7 +107,7 @@ pub fn load_and_stream_common( pub fn load_and_stream_versioned( dataset: &D, application_id: &ApplicationId, - tx: &Sender, + tx: &Sender, loader_name: &str, ) { let store_ids = prepare_episode_chunks( @@ -125,7 +125,7 @@ pub fn load_episode_images( observation: &str, timeline: &re_chunk::Timeline, data: &RecordBatch, -) -> Result + use<>, DataLoaderError> { +) -> Result + use<>, ImporterError> { let image_bytes = data .column_by_name(observation) .and_then(|c| c.downcast_array_ref::()) @@ -157,7 +157,7 @@ pub fn load_episode_depth_images( observation: &str, timeline: &re_chunk::Timeline, data: &RecordBatch, -) -> Result + use<>, DataLoaderError> { +) -> Result + use<>, ImporterError> { let image_bytes = data .column_by_name(observation) .and_then(|c| c.downcast_array_ref::()) @@ -215,7 +215,7 @@ pub fn load_scalar( feature: &Feature, timelines: &IntMap, data: &RecordBatch, -) -> Result { +) -> Result { let field = data .schema_ref() .field_with_name(feature_key) @@ -231,13 +231,17 @@ pub fn load_scalar( .column_by_name(feature_key) .and_then(|col| col.downcast_array_ref::()) .ok_or_else(|| { - DataLoaderError::Other(anyhow!( + ImporterError::Other(anyhow!( "Failed to downcast feature to FixedSizeListArray" )) })?; - let batch_chunks = - make_scalar_batch_entity_chunks(entity_path, feature, timelines, fixed_size_array)?; + let batch_chunks = make_scalar_batch_entity_chunks( + &entity_path, + feature, + timelines, + fixed_size_array, + )?; Ok(ScalarChunkIterator::Batch(Box::new(batch_chunks))) } DataType::List(_field) => { @@ -245,20 +249,28 @@ pub fn load_scalar( .column_by_name(feature_key) .and_then(|col| col.downcast_array_ref::()) .ok_or_else(|| { - DataLoaderError::Other(anyhow!("Failed to downcast feature to ListArray")) + ImporterError::Other(anyhow!("Failed to downcast feature to ListArray")) })?; let sliced = extract_list_array_elements_as_f64(list_array).with_context(|| { format!("Failed to cast scalar feature {entity_path} to Float64") })?; - Ok(ScalarChunkIterator::Single(Box::new(std::iter::once( - make_scalar_entity_chunk(entity_path, timelines, &sliced)?, - )))) + let mut chunks = vec![make_scalar_entity_chunk( + entity_path.clone(), + timelines, + &sliced, + )?]; + + if let Some(names_chunk) = make_names_chunk(&entity_path, feature, sliced.len())? { + chunks.push(names_chunk); + } + + Ok(ScalarChunkIterator::Batch(Box::new(chunks.into_iter()))) } DataType::Float32 | DataType::Float64 => { let feature_data = data.column_by_name(feature_key).ok_or_else(|| { - DataLoaderError::Other(anyhow!( + ImporterError::Other(anyhow!( "Failed to get LeRobot dataset column data for: {:?}", field.name() )) @@ -284,11 +296,11 @@ pub fn load_scalar( } fn make_scalar_batch_entity_chunks( - entity_path: EntityPath, + entity_path: &EntityPath, feature: &Feature, timelines: &IntMap, data: &FixedSizeListArray, -) -> Result + use<>, DataLoaderError> { +) -> Result + use<>, ImporterError> { let num_elements = data.value_length() as usize; let mut chunks = Vec::with_capacity(num_elements); @@ -302,34 +314,47 @@ fn make_scalar_batch_entity_chunks( &sliced, )?); - // If we have names for this feature, we insert a single static chunk containing the names. - if let Some(names) = feature.names.clone() { - let names: Vec<_> = (0..data.value_length() as usize) - .map(|idx| names.name_for_index(idx)) - .collect(); - - chunks.push( - Chunk::builder(entity_path) - .with_row( - RowId::new(), - TimePoint::default(), - std::iter::once(( - archetypes::SeriesLines::descriptor_names(), - Arc::new(StringArray::from_iter(names)) as Arc, - )), - ) - .build()?, - ); + if let Some(names_chunk) = make_names_chunk(entity_path, feature, data.value_length() as usize)? + { + chunks.push(names_chunk); } Ok(chunks.into_iter()) } +/// If the feature has names, create a static chunk containing them. +fn make_names_chunk( + entity_path: &EntityPath, + feature: &Feature, + num_elements: usize, +) -> Result, ImporterError> { + let Some(names) = feature.names.clone() else { + return Ok(None); + }; + + let names: Vec<_> = (0..num_elements) + .map(|idx| names.name_for_index(idx)) + .collect(); + + Ok(Some( + Chunk::builder(entity_path.clone()) + .with_row( + RowId::new(), + TimePoint::default(), + std::iter::once(( + archetypes::SeriesLines::descriptor_names(), + Arc::new(StringArray::from_iter(names)) as Arc, + )), + ) + .build()?, + )) +} + fn make_scalar_entity_chunk( entity_path: EntityPath, timelines: &IntMap, sliced_data: &[ArrayRef], -) -> Result { +) -> Result { let data_arrays = sliced_data .iter() .map(|e| Some(e.as_ref())) diff --git a/crates/store/re_data_loader/src/lerobot/datasetv2.rs b/crates/store/re_importer/src/lerobot/datasetv2.rs similarity index 92% rename from crates/store/re_data_loader/src/lerobot/datasetv2.rs rename to crates/store/re_importer/src/lerobot/datasetv2.rs index 5c1400957688..472b33b5ffa9 100644 --- a/crates/store/re_data_loader/src/lerobot/datasetv2.rs +++ b/crates/store/re_importer/src/lerobot/datasetv2.rs @@ -12,7 +12,7 @@ use std::path::{Path, PathBuf}; use ahash::HashMap; use anyhow::{Context as _, anyhow}; -use arrow::array::{Int64Array, RecordBatch}; +use arrow::array::{Float64Array, Int64Array, RecordBatch}; use crossbeam::channel::Sender; use itertools::Either; use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; @@ -27,7 +27,7 @@ use re_sdk_types::{ components::VideoTimestamp, }; -use crate::{DataLoaderError, LoadedData}; +use crate::{ImportedData, ImporterError}; /// A `LeRobot` dataset consists of structured metadata and recorded episode data stored in /// Parquet files. @@ -233,7 +233,7 @@ pub struct LeRobotDatasetInfo { pub image_path: Option, /// The frame rate of the recorded episode data. - pub fps: usize, + pub fps: f32, /// A mapping of feature names to their respective [`Feature`] definitions. pub features: HashMap, @@ -349,7 +349,7 @@ pub struct LeRobotDatasetEpisode { pub fn load_and_stream( dataset: &LeRobotDatasetV2, application_id: &ApplicationId, - tx: &Sender, + tx: &Sender, loader_name: &str, ) { load_and_stream_versioned(dataset, application_id, tx, loader_name); @@ -363,23 +363,34 @@ pub fn load_and_stream( fn load_episode( dataset: &LeRobotDatasetV2, episode: EpisodeIndex, -) -> Result, DataLoaderError> { +) -> Result, ImporterError> { let data = dataset .read_episode_data(episode) .map_err(|err| anyhow!("Reading data for episode {} failed: {err}", episode.0))?; - let frame_indices = data - .column_by_name("frame_index") - .ok_or_else(|| anyhow!("Failed to get frame index column in LeRobot dataset"))? - .clone(); - - let timeline = re_log_types::Timeline::new_sequence("frame_index"); - let times: &arrow::buffer::ScalarBuffer = frame_indices - .downcast_array_ref::() - .ok_or_else(|| anyhow!("LeRobot dataset frame indices are of an unexpected type"))? - .values(); - - let time_column = re_chunk::TimeColumn::new(None, timeline, times.clone()); + let (timeline, time_column) = if let Some(frame_indices) = data.column_by_name("frame_index") { + let timeline = re_log_types::Timeline::new_sequence("frame_index"); + let times: &arrow::buffer::ScalarBuffer = frame_indices + .downcast_array_ref::() + .ok_or_else(|| anyhow!("LeRobot dataset frame indices are of an unexpected type"))? + .values(); + ( + timeline, + re_chunk::TimeColumn::new(None, timeline, times.clone()), + ) + } else if let Some(timestamps) = data.column_by_name("timestamp") { + let timeline = re_log_types::Timeline::new_duration("timestamp"); + let times: arrow::buffer::ScalarBuffer = timestamps + .downcast_array_ref::() + .ok_or_else(|| anyhow!("LeRobot dataset timestamps are of an unexpected type"))? + .values() + .iter() + .map(|t| re_log_types::Duration::from_secs(*t).as_nanos()) + .collect(); + (timeline, re_chunk::TimeColumn::new(None, timeline, times)) + } else { + return Err(anyhow!("LeRobot dataset has neither frame_index nor timestamp column").into()); + }; let timelines = std::iter::once((*timeline.name(), time_column.clone())).collect(); let mut chunks = Vec::new(); @@ -439,7 +450,7 @@ impl LeRobotDataset for LeRobotDatasetV2 { self.metadata.iter_episode_indices() } - fn load_episode_chunks(&self, episode: EpisodeIndex) -> Result, DataLoaderError> { + fn load_episode_chunks(&self, episode: EpisodeIndex) -> Result, ImporterError> { load_episode(self, episode) } } @@ -448,7 +459,7 @@ fn log_episode_task( dataset: &LeRobotDatasetV2, timeline: &Timeline, data: &RecordBatch, -) -> Result + use<>, DataLoaderError> { +) -> Result + use<>, ImporterError> { let task_indices = data .column_by_name("task_index") .and_then(|c| c.downcast_array_ref::()) @@ -485,7 +496,7 @@ fn load_episode_video( episode: EpisodeIndex, timeline: &Timeline, time_column: TimeColumn, -) -> Result + use<>, DataLoaderError> { +) -> Result + use<>, ImporterError> { let contents = dataset .read_episode_video_contents(observation, episode) .with_context(|| format!("Reading video contents for episode {episode:?} failed!"))?; diff --git a/crates/store/re_data_loader/src/lerobot/datasetv3.rs b/crates/store/re_importer/src/lerobot/datasetv3.rs similarity index 86% rename from crates/store/re_data_loader/src/lerobot/datasetv3.rs rename to crates/store/re_importer/src/lerobot/datasetv3.rs index 0be71acb6446..5cdfdfbaa473 100644 --- a/crates/store/re_data_loader/src/lerobot/datasetv3.rs +++ b/crates/store/re_importer/src/lerobot/datasetv3.rs @@ -2,7 +2,10 @@ use crate::lerobot::common::{ LEROBOT_DATASET_IGNORED_COLUMNS, LeRobotDataset, load_and_stream_versioned, load_episode_depth_images, load_episode_images, load_scalar, }; -use crate::lerobot::{DType, EpisodeIndex, Feature, LeRobotDatasetTask, LeRobotError, TaskIndex}; +use crate::lerobot::{ + DType, EpisodeIndex, Feature, LeRobotDatasetSubtask, LeRobotDatasetTask, LeRobotError, + SubtaskIndex, TaskIndex, +}; use std::fs::File; use std::io::BufReader; @@ -26,7 +29,7 @@ use re_chunk::{Chunk, RowId, TimeColumn, TimePoint, Timeline}; use re_log_types::ApplicationId; use re_sdk_types::archetypes::{TextDocument, VideoStream}; -use crate::{DataLoaderError, LoadedData}; +use crate::{ImportedData, ImporterError}; /// A `LeRobot` dataset consists of structured metadata and recorded episode data stored in /// Parquet files. @@ -372,28 +375,48 @@ impl LeRobotDatasetV3 { self.metadata.tasks.tasks.get(&task) } + /// Retrieve the subtask using the provided subtask index. + pub fn subtask_by_index(&self, subtask: SubtaskIndex) -> Option<&LeRobotDatasetSubtask> { + self.metadata.subtasks.as_ref()?.subtasks.get(&subtask) + } + /// Loads a single episode from a `LeRobot` dataset and converts it into a collection of Rerun chunks. /// /// This function processes an episode from the dataset by extracting the relevant data columns and /// converting them into appropriate Rerun data structures. It handles different types of data /// (videos, images, scalar values, etc.) based on their data type specifications in the dataset metadata. - fn load_episode(&self, episode: EpisodeIndex) -> Result, DataLoaderError> { + fn load_episode(&self, episode: EpisodeIndex) -> Result, ImporterError> { let data = self .read_episode_data(episode) .map_err(|err| anyhow!("Reading data for episode {} failed: {err}", episode.0))?; - let frame_indices = data - .column_by_name("frame_index") - .ok_or_else(|| anyhow!("Failed to get frame index column in LeRobot dataset"))? - .clone(); - - let timeline = re_log_types::Timeline::new_sequence("frame_index"); - let times: &arrow::buffer::ScalarBuffer = frame_indices - .downcast_array_ref::() - .ok_or_else(|| anyhow!("LeRobot dataset frame indices are of an unexpected type"))? - .values(); - - let time_column = re_chunk::TimeColumn::new(None, timeline, times.clone()); + let (timeline, time_column) = if let Some(frame_indices) = + data.column_by_name("frame_index") + { + let timeline = re_log_types::Timeline::new_sequence("frame_index"); + let times: &arrow::buffer::ScalarBuffer = frame_indices + .downcast_array_ref::() + .ok_or_else(|| anyhow!("LeRobot dataset frame indices are of an unexpected type"))? + .values(); + ( + timeline, + re_chunk::TimeColumn::new(None, timeline, times.clone()), + ) + } else if let Some(timestamps) = data.column_by_name("timestamp") { + let timeline = re_log_types::Timeline::new_duration("timestamp"); + let times: arrow::buffer::ScalarBuffer = timestamps + .downcast_array_ref::() + .ok_or_else(|| anyhow!("LeRobot dataset timestamps are of an unexpected type"))? + .values() + .iter() + .map(|t| re_log_types::Duration::from_secs(*t).as_nanos()) + .collect(); + (timeline, re_chunk::TimeColumn::new(None, timeline, times)) + } else { + return Err( + anyhow!("LeRobot dataset has neither frame_index nor timestamp column").into(), + ); + }; let timelines = std::iter::once((*timeline.name(), time_column.clone())).collect(); let mut chunks = Vec::new(); @@ -438,6 +461,11 @@ impl LeRobotDatasetV3 { // this always refers to the task description in the dataset metadata. chunks.extend(self.log_episode_task(&timeline, &data)?); } + DType::Int64 if feature_key == "subtask_index" => { + // special case int64 subtask_index columns + // this always refers to the subtask description in the dataset metadata. + chunks.extend(self.log_episode_subtask(&timeline, &data)?); + } DType::Int16 | DType::Int64 | DType::Bool | DType::String => { re_log::warn_once!( "Loading LeRobot feature ({feature_key}) of dtype `{:?}` into Rerun is not yet implemented", @@ -457,7 +485,7 @@ impl LeRobotDatasetV3 { &self, timeline: &Timeline, data: &RecordBatch, - ) -> Result + use<>, DataLoaderError> { + ) -> Result + use<>, ImporterError> { let task_indices = data .column_by_name("task_index") .and_then(|c| c.downcast_array_ref::()) @@ -488,6 +516,41 @@ impl LeRobotDatasetV3 { Ok(std::iter::once(chunk.build()?)) } + fn log_episode_subtask( + &self, + timeline: &Timeline, + data: &RecordBatch, + ) -> Result + use<>, ImporterError> { + let subtask_indices = data + .column_by_name("subtask_index") + .and_then(|c| c.downcast_array_ref::()) + .with_context(|| "Failed to get subtask_index field from dataset!")?; + + let mut chunk = Chunk::builder("subtask"); + let mut row_id = RowId::new(); + + for (frame_idx, subtask_index_opt) in subtask_indices.iter().enumerate() { + let Some(subtask_idx) = subtask_index_opt + .and_then(|i| usize::try_from(i).ok()) + .map(SubtaskIndex) + else { + continue; + }; + + if let Some(subtask) = self.subtask_by_index(subtask_idx) { + let frame_idx = i64::try_from(frame_idx) + .map_err(|err| anyhow!("Frame index exceeds max value: {err}"))?; + + let timepoint = TimePoint::default().with(*timeline, frame_idx); + let text = TextDocument::new(subtask.subtask.clone()); + chunk = chunk.with_archetype(row_id, timepoint, &text); + row_id = row_id.next(); + } + } + + Ok(std::iter::once(chunk.build()?)) + } + /// Extract feature-specific timestamp metadata for a given episode and observation. /// /// Returns (`start_time`, `end_time`) in seconds, defaulting to (0.0, 0.0) if not found. @@ -510,7 +573,7 @@ impl LeRobotDatasetV3 { episode: EpisodeIndex, timeline: &Timeline, time_column: &TimeColumn, - ) -> Result + use<>, DataLoaderError> { + ) -> Result + use<>, ImporterError> { let contents = self .read_episode_video_contents(observation, episode) .with_context(|| format!("Reading video contents for episode {episode:?} failed!"))?; @@ -532,7 +595,7 @@ impl LeRobotDatasetV3 { let (start_time, end_time) = self.get_feature_timestamps(episode, observation); if video.samples.is_empty() { - return Err(DataLoaderError::Other(anyhow!( + return Err(ImporterError::Other(anyhow!( "Video feature '{observation}' for episode {episode:?} did not contain any samples" ))); } @@ -553,17 +616,17 @@ impl LeRobotDatasetV3 { let end_keyframe = video .presentation_time_keyframe_index(end_video_time) .or_else(|| video.keyframe_indices.len().checked_sub(1)) - .ok_or(DataLoaderError::Other(anyhow!("No keyframes in the video")))?; + .ok_or(ImporterError::Other(anyhow!("No keyframes in the video")))?; // Determine the sample range to extract from the video let start_sample = video .gop_sample_range_for_keyframe(start_keyframe) - .ok_or(DataLoaderError::Other(anyhow!("Bad video data")))? + .ok_or(ImporterError::Other(anyhow!("Bad video data")))? .start; let end_sample = video .gop_sample_range_for_keyframe(end_keyframe) - .ok_or(DataLoaderError::Other(anyhow!("Bad video data")))? + .ok_or(ImporterError::Other(anyhow!("Bad video data")))? .end; let sample_range = start_sample..end_sample; @@ -625,12 +688,13 @@ impl LeRobotDatasetV3 { ScalarBuffer::from(uniform_times), ); - let codec = re_sdk_types::components::VideoCodec::try_from(video.codec).map_err(|err| { - anyhow!( - "Unsupported video codec {:?} for feature: '{observation}': {err}", - video.codec - ) - })?; + let codec = + re_sdk_types::components::VideoCodec::try_from(video.codec.clone()).map_err(|err| { + anyhow!( + "Unsupported video codec {:?} for feature: '{observation}': {err}", + video.codec + ) + })?; let codec_chunk = Chunk::builder(entity_path) .with_archetype( @@ -656,7 +720,7 @@ impl LeRobotDataset for LeRobotDatasetV3 { self.metadata.iter_episode_indices() } - fn load_episode_chunks(&self, episode: EpisodeIndex) -> Result, DataLoaderError> { + fn load_episode_chunks(&self, episode: EpisodeIndex) -> Result, ImporterError> { let result = self.load_episode(episode); // Release video blob references for this episode regardless of success or failure to avoid leaking memory if we fail to load an episode after caching its video blobs. @@ -673,6 +737,7 @@ impl LeRobotDataset for LeRobotDatasetV3 { pub struct LeRobotDatasetMetadataV3 { pub info: LeRobotDatasetInfoV3, pub tasks: LeRobotDatasetV3Tasks, + pub subtasks: Option, pub episodes: HashMap, } @@ -703,6 +768,15 @@ impl LeRobotDatasetMetadataV3 { let info = LeRobotDatasetInfoV3::load_from_json_file(metadir.join("info.json"))?; let tasks = LeRobotDatasetV3Tasks::load_from_parquet_file(metadir.join("tasks.parquet"))?; + let subtasks_path = metadir.join("subtasks.parquet"); + let subtasks = if subtasks_path.is_file() { + Some(LeRobotDatasetV3Subtasks::load_from_parquet_file( + subtasks_path, + )?) + } else { + None + }; + // Convert episode data Vec to HashMap for O(1) lookups let episodes = episode_data .into_iter() @@ -712,6 +786,7 @@ impl LeRobotDatasetMetadataV3 { Ok(Self { info, tasks, + subtasks, episodes, }) } @@ -968,7 +1043,7 @@ pub struct LeRobotDatasetInfoV3 { pub image_path: Option, /// The frame rate of the recorded episode data. - pub fps: usize, + pub fps: f32, /// A mapping of feature names to their respective [`Feature`] definitions. pub features: HashMap, @@ -1106,10 +1181,52 @@ impl LeRobotDatasetV3Tasks { } } +pub struct LeRobotDatasetV3Subtasks { + pub subtasks: HashMap, +} + +impl LeRobotDatasetV3Subtasks { + pub fn load_from_parquet_file(filepath: impl AsRef) -> Result { + let filepath = filepath.as_ref().to_owned(); + let parquet_data = + File::open(&filepath).map_err(|err| LeRobotError::io(err, filepath.clone()))?; + + let reader = ParquetRecordBatchReaderBuilder::try_new(parquet_data)?.build()?; + + let subtasks = reader + .filter_map(|record_batch| { + let b = record_batch.ok()?; + let subtask_index_col = b.column_by_name("subtask_index")?; + let subtask_col = b.column_by_name("subtask")?; + let subtask_index = subtask_index_col.as_any().downcast_ref::()?; + let subtask = subtask_col.as_any().downcast_ref::()?; + + let num_rows = b.num_rows(); + Some( + (0..num_rows) + .map(move |i| { + ( + SubtaskIndex(subtask_index.value(i) as usize), + LeRobotDatasetSubtask { + index: SubtaskIndex(subtask_index.value(i) as usize), + subtask: subtask.value(i).to_owned(), + }, + ) + }) + .collect(), + ) + }) + .flat_map(|e: Vec<(SubtaskIndex, LeRobotDatasetSubtask)>| e) + .collect::>(); + + Ok(Self { subtasks }) + } +} + pub fn load_and_stream( dataset: &LeRobotDatasetV3, application_id: &ApplicationId, - tx: &Sender, + tx: &Sender, loader_name: &str, ) { load_and_stream_versioned(dataset, application_id, tx, loader_name); diff --git a/crates/store/re_data_loader/src/lerobot/mod.rs b/crates/store/re_importer/src/lerobot/mod.rs similarity index 90% rename from crates/store/re_data_loader/src/lerobot/mod.rs rename to crates/store/re_importer/src/lerobot/mod.rs index 7334853b3d3e..998c60e1de49 100644 --- a/crates/store/re_data_loader/src/lerobot/mod.rs +++ b/crates/store/re_importer/src/lerobot/mod.rs @@ -158,8 +158,9 @@ pub enum DType { /// Name metadata for a feature in the `LeRobot` dataset. /// /// The name metadata can consist of +/// - A single string (e.g., `"img_state_delta"`). /// - A flat list of names for each dimension of a feature (e.g., `["height", "width", "channel"]`). -/// - A nested list of names for each dimension of a feature (e.g., `[[""kLeftShoulderPitch", "kLeftShoulderRoll"]]`) +/// - A nested list of names for each dimension of a feature (e.g., `[["kLeftShoulderPitch", "kLeftShoulderRoll"]]`) /// - A map with a string array value (e.g., `{ "motors": ["motor_0", "motor_1", …] }` or `{ "axes": ["x", "y", "z"] }`). #[derive(Debug, Clone, PartialEq, Eq, Serialize)] pub struct Names(pub(super) Vec); @@ -176,6 +177,7 @@ impl Names { /// Visitor implementation for deserializing the [`Names`] type. /// /// Handles multiple representation formats: +/// - Single strings: `"img_state_delta"` /// - Flat string arrays: `["x", "y", "z"]` /// - Nested string arrays: `[["motor_1", "motor_2"]]` /// - Single-entry objects: `{"motors": ["motor_1", "motor_2"]}` or `{"axes": null}` @@ -188,10 +190,18 @@ impl<'de> Visitor<'de> for NamesVisitor { fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { formatter.write_str( - "a flat string array, a nested string array, or a single-entry object with a string array or null value", + "a string, a flat string array, a nested string array, or a single-entry object with a string array or null value", ) } + /// Handle a single string: `"img_state_delta"` + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + Ok(Names(vec![v.to_owned()])) + } + /// Handle sequences: /// - Flat string arrays: `["x", "y", "z"]` /// - Nested string arrays: `[["motor_1", "motor_2"]]` @@ -296,6 +306,11 @@ pub struct EpisodeIndex(pub usize); #[serde(transparent)] pub struct TaskIndex(pub usize); +/// Newtype wrapper for subtask indices. +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[serde(transparent)] +pub struct SubtaskIndex(pub usize); + /// A task in a `LeRobot` dataset. /// /// Each task consists of its index and a task description. @@ -306,7 +321,18 @@ pub struct LeRobotDatasetTask { pub task: String, } -/// Errors that might happen when loading data through a [`crate::loader_lerobot::LeRobotDatasetLoader`]. +/// A subtask in a `LeRobot` dataset. +/// +/// Subtasks break down complex tasks into finer-grained, interpretable steps. +/// Each subtask consists of its index and a subtask description. +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct LeRobotDatasetSubtask { + #[serde(rename = "subtask_index")] + pub index: SubtaskIndex, + pub subtask: String, +} + +/// Errors that might happen when importing data through a [`crate::importer_lerobot::LeRobotDatasetImporter`]. #[derive(thiserror::Error, Debug)] pub enum LeRobotError { #[error("IO error occurred on path: {path}")] @@ -364,6 +390,14 @@ mod tests { use super::*; + #[test] + fn test_deserialize_single_string() { + let json = r#""some_name""#; + let expected = Names(vec!["some_name".to_owned()]); + let names: Names = serde_json::from_str(json).unwrap(); + assert_eq!(names, expected); + } + #[test] fn test_deserialize_flat_list() { let json = r#"["a", "b", "c"]"#; diff --git a/crates/store/re_data_loader/src/lib.rs b/crates/store/re_importer/src/lib.rs similarity index 64% rename from crates/store/re_data_loader/src/lib.rs rename to crates/store/re_importer/src/lib.rs index f65976060e0c..7bd0ec30922f 100644 --- a/crates/store/re_data_loader/src/lib.rs +++ b/crates/store/re_importer/src/lib.rs @@ -1,46 +1,49 @@ -//! Handles loading of Rerun data from file using data loader plugins. +//! Handles importing of Rerun data from file using importer plugins. +use std::collections::BTreeSet; use std::sync::{Arc, LazyLock}; use re_chunk::{Chunk, ChunkResult}; use re_log_types::{ArrowMsg, EntityPath, LogMsg, RecordingId, StoreId, TimePoint}; -use re_mcap::SelectedLayers; // ---------------------------------------------------------------------------- -mod load_file; -mod loader_archetype; -mod loader_directory; -mod loader_rrd; -mod loader_urdf; +mod import_file; +mod importer_archetype; +mod importer_directory; +mod importer_rrd; +mod importer_urdf; #[cfg(not(target_arch = "wasm32"))] pub mod lerobot; -// This loader currently only works when loading the entire dataset directory, and we cannot do that on web yet. +// This importer currently only works when loading the entire dataset directory, and we cannot do that on web yet. #[cfg(not(target_arch = "wasm32"))] -pub mod loader_lerobot; +pub mod importer_lerobot; -// This loader currently uses native-only features under the hood, and we cannot do that on web yet. -pub mod loader_mcap; +// This importer currently uses native-only features under the hood, and we cannot do that on web yet. +pub mod importer_mcap; #[cfg(not(target_arch = "wasm32"))] -mod loader_external; - -pub use self::load_file::load_from_file_contents; -pub use self::loader_archetype::ArchetypeLoader; -pub use self::loader_directory::DirectoryLoader; -pub use self::loader_mcap::McapLoader; -pub use self::loader_rrd::RrdLoader; -pub use self::loader_urdf::{UrdfDataLoader, UrdfTree, joint_transform as urdf_joint_transform}; +mod importer_external; +#[cfg(not(target_arch = "wasm32"))] +pub mod importer_parquet; + +pub use self::import_file::{import_from_file_contents, prepare_store_info}; +pub use self::importer_archetype::ArchetypeImporter; +pub use self::importer_directory::DirectoryImporter; +pub use self::importer_mcap::McapImporter; +pub use self::importer_rrd::RrdImporter; +pub use self::importer_urdf::{UrdfImporter, UrdfTree, joint_transform as urdf_joint_transform}; #[cfg(not(target_arch = "wasm32"))] pub use self::{ - load_file::load_from_path, - loader_external::{ - EXTERNAL_DATA_LOADER_INCOMPATIBLE_EXIT_CODE, EXTERNAL_DATA_LOADER_PREFIX, ExternalLoader, - iter_external_loaders, + import_file::import_from_path, + importer_external::{ + EXTERNAL_IMPORTER_INCOMPATIBLE_EXIT_CODE, EXTERNAL_IMPORTER_PREFIX, ExternalImporter, + iter_external_importers, }, - loader_lerobot::LeRobotDatasetLoader, + importer_lerobot::LeRobotDatasetImporter, + importer_parquet::ParquetImporter, }; pub mod external { @@ -49,11 +52,39 @@ pub mod external { // ---------------------------------------------------------------------------- -/// Recommended settings for the [`DataLoader`]. +/// The identifier used to enable or disable Foxglove lenses when loading MCAP files. +pub const FOXGLOVE_LENSES_IDENTIFIER: &str = "foxglove"; + +/// The identifier used to enable or disable URDF extraction from MCAP `robot_description` topics. +pub const URDF_DECODER_IDENTIFIER: &str = "urdf"; + +/// All decoder-like identifiers supported by [`McapImporter`]. +/// +/// This merges the built-in MCAP decoders from [`re_mcap`] and the semantic interpretation (e.g. lenses) that are in this crate. +pub fn supported_mcap_decoder_identifiers( + raw_fallback_enabled: bool, +) -> Vec { + let mut identifiers = re_mcap::DecoderRegistry::all_builtin(raw_fallback_enabled) + .all_identifiers() + .into_iter() + .map(re_mcap::DecoderIdentifier::from) + .collect::>(); + + identifiers.extend([ + re_mcap::DecoderIdentifier::from(FOXGLOVE_LENSES_IDENTIFIER), + re_mcap::DecoderIdentifier::from(URDF_DECODER_IDENTIFIER), + ]); + + identifiers.into_iter().collect() +} + +// ---------------------------------------------------------------------------- + +/// Recommended settings for the [`Importer`]. /// -/// The loader is free to ignore some or all of these. +/// The importer is free to ignore some or all of these. /// -/// External [`DataLoader`]s will be passed the following CLI parameters: +/// External [`Importer`]s will be passed the following CLI parameters: /// * `--application-id ` /// * `--opened-application-id ` (if set) /// * `--recording-id ` @@ -65,14 +96,14 @@ pub mod external { /// * `--time_duration_nanos = = ...` (if `timepoint` contains duration data) in nanos /// * `--time_timestamp_nanos = = ...` (if `timepoint` contains timestamp data) in nanos since epoch #[derive(Debug, Clone)] -pub struct DataLoaderSettings { +pub struct ImporterSettings { /// The recommended [`re_log_types::ApplicationId`] to log the data to, based on the surrounding context. pub application_id: Option, /// The recommended recording id to log the data to, based on the surrounding context. /// /// Log data to this recording if you want it to appear in a new recording shared by all - /// data-loaders for the current loading session. + /// importers for the current loading session. pub recording_id: RecordingId, /// The [`re_log_types::StoreId`] that is currently opened in the viewer, if any. @@ -94,9 +125,19 @@ pub struct DataLoaderSettings { /// /// Defaults to `false`. pub follow: bool, + + /// If set, an offset in nanoseconds to add to all `TimestampNs` time columns. + pub timestamp_offset_ns: Option, + + /// The timeline type to use for timestamp timelines. + /// + /// Defaults to [`re_log_types::TimeType::TimestampNs`]. + /// When set to [`re_log_types::TimeType::DurationNs`], all timestamp timelines + /// will be created as duration timelines instead. + pub timeline_type: re_log_types::TimeType, } -impl DataLoaderSettings { +impl ImporterSettings { #[inline] pub fn recommended(recording_id: impl Into) -> Self { Self { @@ -107,6 +148,8 @@ impl DataLoaderSettings { entity_path_prefix: None, timepoint: None, follow: false, + timestamp_offset_ns: None, + timeline_type: re_log_types::TimeType::TimestampNs, } } @@ -128,7 +171,7 @@ impl DataLoaderSettings { .unwrap_or_else(|| self.recommended_store_id()) } - /// Generates CLI flags from these settings, for external data loaders. + /// Generates CLI flags from these settings, for external importers. pub fn to_cli_args(&self) -> Vec { let Self { application_id, @@ -138,6 +181,8 @@ impl DataLoaderSettings { entity_path_prefix, timepoint, follow: _, + timestamp_offset_ns: _, + timeline_type: _, } = self; let mut args = Vec::new(); @@ -215,51 +260,51 @@ impl DataLoaderSettings { } } -pub type DataLoaderName = String; +pub type ImporterName = String; -/// A [`DataLoader`] loads data from a file path and/or a file's contents. +/// An [`Importer`] imports data from a file path and/or a file's contents. /// -/// Files can be loaded in 3 different ways: +/// Files can be imported in 3 different ways: /// - via the Rerun CLI (`rerun myfile.jpeg`), /// - using drag-and-drop, /// - using the open dialog in the Rerun Viewer. /// -/// All these file loading methods support loading a single file, many files at once, or even +/// All these file importing methods support importing a single file, many files at once, or even /// folders. /// ⚠ Drag-and-drop of folders does not yet work on the web version of Rerun Viewer ⚠ /// -/// We only support loading files from the local filesystem at the moment, and consequently only +/// We only support importing files from the local filesystem at the moment, and consequently only /// accept filepaths as input. /// [There are plans to make this generic over any URI](https://github.com/rerun-io/rerun/issues/4525). /// -/// Rerun comes with a few [`DataLoader`]s by default: -/// - [`RrdLoader`] for [Rerun files]. -/// - [`ArchetypeLoader`] for: +/// Rerun comes with a few [`Importer`]s by default: +/// - [`RrdImporter`] for [Rerun files]. +/// - [`ArchetypeImporter`] for: /// - [3D models] /// - [Images] /// - [Point clouds] /// - [Text files] -/// - [`DirectoryLoader`] for recursively loading folders. -/// - [`ExternalLoader`], which looks for user-defined data loaders in $PATH. +/// - [`DirectoryImporter`] for recursively importing folders. +/// - [`ExternalImporter`], which looks for user-defined importers in $PATH. /// -/// ## Registering custom loaders +/// ## Registering custom importers /// -/// Checkout our [guide](https://www.rerun.io/docs/reference/data-loaders/overview). +/// Checkout our [guide](https://www.rerun.io/docs/concepts/logging-and-ingestion/importers/overview?speculative-link). /// /// ## Execution /// -/// **All** known [`DataLoader`]s get called when a user tries to open a file, unconditionally. -/// This gives [`DataLoader`]s maximum flexibility to decide what files they are interested in, as +/// **All** known [`Importer`]s get called when a user tries to open a file, unconditionally. +/// This gives [`Importer`]s maximum flexibility to decide what files they are interested in, as /// opposed to e.g. only being able to look at files' extensions. /// -/// If a [`DataLoader`] has no interest in the given file, it should fail as soon as possible -/// with a [`DataLoaderError::Incompatible`] error. +/// If an [`Importer`] has no interest in the given file, it should fail as soon as possible +/// with a [`ImporterError::Incompatible`] error. /// -/// Iff all [`DataLoader`]s (including custom and external ones) return with a [`DataLoaderError::Incompatible`] +/// Iff all [`Importer`]s (including custom and external ones) return with a [`ImporterError::Incompatible`] /// error, the Viewer will show an error message to the user indicating that the file type is not /// supported. /// -/// On native, [`DataLoader`]s are executed in parallel. +/// On native, [`Importer`]s are executed in parallel. /// /// [Rerun files]: crate::SUPPORTED_RERUN_EXTENSIONS /// [3D models]: crate::SUPPORTED_MESH_EXTENSIONS @@ -267,20 +312,20 @@ pub type DataLoaderName = String; /// [Point clouds]: crate::SUPPORTED_POINT_CLOUD_EXTENSIONS /// [Text files]: crate::SUPPORTED_TEXT_EXTENSIONS // -// TODO(#4525): `DataLoader`s should support arbitrary URIs -// TODO(#4527): Web Viewer `?url` parameter should accept anything our `DataLoader`s support -pub trait DataLoader: Send + Sync { - /// Name of the [`DataLoader`]. +// TODO(#4525): `Importer`s should support arbitrary URIs +// TODO(#4527): Web Viewer `?url` parameter should accept anything our `Importer`s support +pub trait Importer: Send + Sync { + /// Name of the [`Importer`]. /// /// Should be globally unique. - fn name(&self) -> DataLoaderName; + fn name(&self) -> ImporterName; - /// Loads data from a file on the local filesystem and sends it to `tx`. + /// Imports data from a file on the local filesystem and sends it to `tx`. /// /// This is generally called when opening files with the Rerun CLI or via the open menu in the /// Rerun Viewer on native platforms. /// - /// The passed-in `store_id` is a shared recording created by the file loading machinery: + /// The passed-in `store_id` is a shared recording created by the file importing machinery: /// implementers can decide to use it or not (e.g. it might make sense to log all images with a /// similar name in a shared recording, while an rrd file is already its own recording). /// @@ -289,27 +334,27 @@ pub trait DataLoader: Send + Sync { /// /// ## Error handling /// - /// Most implementers of `load_from_path` are expected to be asynchronous in nature. + /// Most implementers of `import_from_path` are expected to be asynchronous in nature. /// /// Asynchronous implementers should make sure to fail early (and thus synchronously) when /// possible (e.g. didn't even manage to open the file). /// Otherwise, they should log errors that happen in an asynchronous context. /// - /// If a [`DataLoader`] has no interest in the given file, it should fail as soon as possible - /// with a [`DataLoaderError::Incompatible`] error. + /// If an [`Importer`] has no interest in the given file, it should fail as soon as possible + /// with a [`ImporterError::Incompatible`] error. #[cfg(not(target_arch = "wasm32"))] - fn load_from_path( + fn import_from_path( &self, - settings: &DataLoaderSettings, + settings: &ImporterSettings, path: std::path::PathBuf, - tx: crossbeam::channel::Sender, - ) -> Result<(), DataLoaderError>; + tx: crossbeam::channel::Sender, + ) -> Result<(), ImporterError>; - /// Loads data from in-memory file contents and sends it to `tx`. + /// Imports data from in-memory file contents and sends it to `tx`. /// /// This is generally called when opening files via drag-and-drop or when using the web viewer. /// - /// The passed-in `store_id` is a shared recording created by the file loading machinery: + /// The passed-in `store_id` is a shared recording created by the file importing machinery: /// implementers can decide to use it or not (e.g. it might make sense to log all images with a /// similar name in a shared recording, while an rrd file is already its own recording). /// @@ -322,26 +367,26 @@ pub trait DataLoader: Send + Sync { /// /// ## Error handling /// - /// Most implementers of `load_from_file_contents` are expected to be asynchronous in nature. + /// Most implementers of `import_from_file_contents` are expected to be asynchronous in nature. /// /// Asynchronous implementers should make sure to fail early (and thus synchronously) when /// possible (e.g. didn't even manage to open the file). /// Otherwise, they should log errors that happen in an asynchronous context. /// - /// If a [`DataLoader`] has no interest in the given file, it should fail as soon as possible - /// with a [`DataLoaderError::Incompatible`] error. - fn load_from_file_contents( + /// If an [`Importer`] has no interest in the given file, it should fail as soon as possible + /// with a [`ImporterError::Incompatible`] error. + fn import_from_file_contents( &self, - settings: &DataLoaderSettings, + settings: &ImporterSettings, filepath: std::path::PathBuf, contents: std::borrow::Cow<'_, [u8]>, - tx: crossbeam::channel::Sender, - ) -> Result<(), DataLoaderError>; + tx: crossbeam::channel::Sender, + ) -> Result<(), ImporterError>; } -/// Errors that might happen when loading data through a [`DataLoader`]. +/// Errors that might happen when importing data through an [`Importer`]. #[derive(thiserror::Error, Debug)] -pub enum DataLoaderError { +pub enum ImporterError { #[cfg(not(target_arch = "wasm32"))] #[error(transparent)] IO(#[from] std::io::Error), @@ -355,7 +400,7 @@ pub enum DataLoaderError { #[error(transparent)] Decode(#[from] re_log_encoding::DecodeError), - #[error("No data-loader support for {0:?}")] + #[error("No importer support for {0:?}")] Incompatible(std::path::PathBuf), #[error(transparent)] @@ -365,7 +410,7 @@ pub enum DataLoaderError { Other(#[from] anyhow::Error), } -impl DataLoaderError { +impl ImporterError { #[inline] pub fn is_path_not_found(&self) -> bool { match self { @@ -381,22 +426,22 @@ impl DataLoaderError { } } -/// What [`DataLoader`]s load. +/// What [`Importer`]s produce. /// -/// This makes it trivial for [`DataLoader`]s to build the data in whatever form is +/// This makes it trivial for [`Importer`]s to build the data in whatever form is /// most convenient for them, whether it is raw components, arrow chunks or even /// full-on [`LogMsg`]s. #[derive(Debug)] -pub enum LoadedData { - Chunk(DataLoaderName, re_log_types::StoreId, Chunk), - ArrowMsg(DataLoaderName, re_log_types::StoreId, ArrowMsg), - LogMsg(DataLoaderName, LogMsg), +pub enum ImportedData { + Chunk(ImporterName, re_log_types::StoreId, Chunk), + ArrowMsg(ImporterName, re_log_types::StoreId, ArrowMsg), + LogMsg(ImporterName, LogMsg), } -impl LoadedData { - /// Returns the name of the [`DataLoader`] that generated this data. +impl ImportedData { + /// Returns the name of the [`Importer`] that generated this data. #[inline] - pub fn data_loader_name(&self) -> &DataLoaderName { + pub fn importer_name(&self) -> &ImporterName { match self { Self::Chunk(name, ..) | Self::ArrowMsg(name, ..) | Self::LogMsg(name, ..) => name, } @@ -415,49 +460,63 @@ impl LoadedData { Self::LogMsg(_name, msg) => Ok(msg), } } + + /// Convert the data into a [`Chunk`], ignoring all non-chunk-related things. + pub fn into_chunk(self) -> Option { + match self { + Self::Chunk(_name, _store_id, chunk) => Some(chunk), + Self::ArrowMsg(_name, _store_id, arrow_msg) => Chunk::from_arrow_msg(&arrow_msg).ok(), + Self::LogMsg(_name, msg) => match msg { + LogMsg::ArrowMsg(_store_id, arrow_msg) => Chunk::from_arrow_msg(&arrow_msg).ok(), + LogMsg::SetStoreInfo { .. } | LogMsg::BlueprintActivationCommand { .. } => None, + }, + } + } } // ---------------------------------------------------------------------------- -/// Keeps track of all builtin [`DataLoader`]s. +/// Keeps track of all builtin [`Importer`]s. /// /// Lazy initialized the first time a file is opened. -static BUILTIN_LOADERS: LazyLock>> = LazyLock::new(|| { +static BUILTIN_IMPORTERS: LazyLock>> = LazyLock::new(|| { vec![ - Arc::new(RrdLoader) as Arc, - Arc::new(ArchetypeLoader), - Arc::new(DirectoryLoader), - Arc::new(McapLoader::new(SelectedLayers::All)), + Arc::new(RrdImporter) as Arc, + Arc::new(ArchetypeImporter), + Arc::new(DirectoryImporter), + Arc::new(McapImporter::default()), #[cfg(not(target_arch = "wasm32"))] - Arc::new(LeRobotDatasetLoader), + Arc::new(ParquetImporter::default()), #[cfg(not(target_arch = "wasm32"))] - Arc::new(ExternalLoader), - Arc::new(UrdfDataLoader), + Arc::new(LeRobotDatasetImporter), + #[cfg(not(target_arch = "wasm32"))] + Arc::new(ExternalImporter), + Arc::new(UrdfImporter), ] }); -/// Iterator over all registered [`DataLoader`]s. +/// Iterator over all registered [`Importer`]s. #[inline] -pub fn iter_loaders() -> impl Iterator> { - BUILTIN_LOADERS +pub fn iter_importers() -> impl Iterator> { + BUILTIN_IMPORTERS .clone() .into_iter() - .chain(CUSTOM_LOADERS.read().clone()) + .chain(CUSTOM_IMPORTERS.read().clone()) } -/// Keeps track of all custom [`DataLoader`]s. +/// Keeps track of all custom [`Importer`]s. /// -/// Use [`register_custom_data_loader`] to add new loaders. -static CUSTOM_LOADERS: LazyLock>>> = +/// Use [`register_custom_importer`] to add new importers. +static CUSTOM_IMPORTERS: LazyLock>>> = LazyLock::new(parking_lot::RwLock::default); -/// Register a custom [`DataLoader`]. +/// Register a custom [`Importer`]. /// -/// Any time the Rerun Viewer opens a file or directory, this custom loader will be notified. -/// Refer to [`DataLoader`]'s documentation for more information. +/// Any time the Rerun Viewer opens a file or directory, this custom importer will be notified. +/// Refer to [`Importer`]'s documentation for more information. #[inline] -pub fn register_custom_data_loader(loader: impl DataLoader + 'static) { - CUSTOM_LOADERS.write().push(Arc::new(loader)); +pub fn register_custom_importer(importer: impl Importer + 'static) { + CUSTOM_IMPORTERS.write().push(Arc::new(importer)); } // ---------------------------------------------------------------------------- @@ -474,7 +533,7 @@ pub(crate) fn extension(path: &std::path::Path) -> String { // ---------------------------------------------------------------------------- -// …given that all feature flags are turned on for the `image` crate. +// ...given that all feature flags are turned on for the `image` crate. pub const SUPPORTED_IMAGE_EXTENSIONS: &[&str] = &[ "avif", "bmp", "dds", "exr", "farbfeld", "ff", "gif", "hdr", "ico", "jpeg", "jpg", "pam", "pbm", "pgm", "png", "ppm", "tga", "tif", "tiff", "webp", @@ -486,7 +545,7 @@ pub const SUPPORTED_VIDEO_EXTENSIONS: &[&str] = &["mp4"]; pub const SUPPORTED_MESH_EXTENSIONS: &[&str] = &["glb", "gltf", "obj", "stl", "dae"]; -// TODO(#4532): `.ply` data loader should support 2D point cloud & meshes +// TODO(#4532): `.ply` importer should support 2D point cloud & meshes pub const SUPPORTED_POINT_CLOUD_EXTENSIONS: &[&str] = &["ply"]; pub const SUPPORTED_RERUN_EXTENSIONS: &[&str] = &["rbl", "rrd"]; @@ -494,10 +553,12 @@ pub const SUPPORTED_RERUN_EXTENSIONS: &[&str] = &["rbl", "rrd"]; /// 3rd party formats with built-in support. pub const SUPPORTED_THIRD_PARTY_FORMATS: &[&str] = &["mcap", "urdf"]; -// TODO(#4555): Add catch-all builtin `DataLoader` for text files +pub const SUPPORTED_PARQUET_EXTENSIONS: &[&str] = &["parquet"]; + +// TODO(#4555): Add catch-all builtin `Importer` for text files pub const SUPPORTED_TEXT_EXTENSIONS: &[&str] = &["txt", "md"]; -/// All file extension supported by our builtin [`DataLoader`]s. +/// All file extension supported by our builtin [`Importer`]s. pub fn supported_extensions() -> impl Iterator { SUPPORTED_RERUN_EXTENSIONS .iter() @@ -507,11 +568,12 @@ pub fn supported_extensions() -> impl Iterator { .chain(SUPPORTED_VIDEO_EXTENSIONS) .chain(SUPPORTED_MESH_EXTENSIONS) .chain(SUPPORTED_POINT_CLOUD_EXTENSIONS) + .chain(SUPPORTED_PARQUET_EXTENSIONS) .chain(SUPPORTED_TEXT_EXTENSIONS) .copied() } -/// Is this a supported file extension by any of our builtin [`DataLoader`]s? +/// Is this a supported file extension by any of our builtin [`Importer`]s? pub fn is_supported_file_extension(extension: &str) -> bool { re_log::debug_assert!( !extension.starts_with('.'), @@ -559,6 +621,26 @@ fn test_supported_extensions() { assert!(is_supported_file_extension("urdf")); } +#[test] +fn test_supported_mcap_decoder_identifiers() { + let identifiers = supported_mcap_decoder_identifiers(true); + let as_strings = identifiers + .iter() + .map(ToString::to_string) + .collect::>(); + + // Check that expected identifiers are present. + assert!(as_strings.contains(&FOXGLOVE_LENSES_IDENTIFIER.to_owned())); + assert!(as_strings.contains(&URDF_DECODER_IDENTIFIER.to_owned())); + assert!(as_strings.contains(&"raw".to_owned())); + assert!(as_strings.contains(&"protobuf".to_owned())); + assert!(as_strings.contains(&"ros2msg".to_owned())); + + // Check that all identifiers are unique. + let unique = as_strings.iter().collect::>(); + assert_eq!(as_strings.len(), unique.len()); +} + #[test] fn test_detect_format_from_bytes() { assert_eq!( diff --git a/crates/store/re_data_loader/tests/.gitattributes b/crates/store/re_importer/tests/.gitattributes similarity index 100% rename from crates/store/re_data_loader/tests/.gitattributes rename to crates/store/re_importer/tests/.gitattributes diff --git a/crates/store/re_data_loader/tests/assets/supported_ros2_messages.mcap b/crates/store/re_importer/tests/assets/supported_ros2_messages.mcap similarity index 100% rename from crates/store/re_data_loader/tests/assets/supported_ros2_messages.mcap rename to crates/store/re_importer/tests/assets/supported_ros2_messages.mcap diff --git a/crates/store/re_importer/tests/snapshots/test_mcap_importer__tests__ros2.snap b/crates/store/re_importer/tests/snapshots/test_mcap_importer__tests__ros2.snap new file mode 100644 index 000000000000..baaa1950ba8c --- /dev/null +++ b/crates/store/re_importer/tests/snapshots/test_mcap_importer__tests__ros2.snap @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:558daa716ab1f9268a48789df5286aefad58fba2008390277ec0268d8dbb03fd +size 243162 diff --git a/crates/store/re_data_loader/tests/test_mcap_loader.rs b/crates/store/re_importer/tests/test_mcap_importer.rs similarity index 66% rename from crates/store/re_data_loader/tests/test_mcap_loader.rs rename to crates/store/re_importer/tests/test_mcap_importer.rs index 7ed70cf8ce56..8635ff296586 100644 --- a/crates/store/re_data_loader/tests/test_mcap_loader.rs +++ b/crates/store/re_importer/tests/test_mcap_importer.rs @@ -4,10 +4,9 @@ mod tests { use re_chunk::Chunk; use re_chunk_store::{ChunkStore, ChunkStoreConfig, ChunkStoreHandle}; - use re_data_loader::loader_mcap::load_mcap; - use re_data_loader::{DataLoaderSettings, LoadedData}; + use re_importer::importer_mcap::McapImporter; + use re_importer::{ImportedData, ImporterSettings}; use re_log_types::StoreId; - use re_mcap::layers::SelectedLayers; // Load an MCAP file into a list of chunks. fn load_mcap_chunks(path: impl AsRef) -> Vec { @@ -15,40 +14,27 @@ mod tests { println!("Loading MCAP file: {}", path.display()); let mcap_data = std::fs::read(path).unwrap(); let (tx, rx) = crossbeam::channel::bounded(1024); - let settings = DataLoaderSettings::recommended("test"); - load_mcap( - &mcap_data, - &settings, - &tx, - &SelectedLayers::All, - false, - None, - ) - .unwrap(); + let settings = ImporterSettings::recommended("test"); + McapImporter::default() + .with_raw_fallback(false) + .load_and_send(&mcap_data, &settings, &tx) + .unwrap(); drop(tx); // Collect chunks - rx.iter() - .filter_map(|res| { - if let LoadedData::Chunk(_, _, chunk) = res { - Some(chunk) - } else { - None - } - }) - .collect() + rx.iter().filter_map(ImportedData::into_chunk).collect() } // TODO(grtlr): This should be something like a snippet / backwards-compatibility test, but // we don't really have the infrastructure for this yet and we already test a different // MCAP file in snippets. #[test] - fn test_mcap_loader_ros2() { + fn test_mcap_importer_ros2() { let chunks = load_mcap_chunks("tests/assets/supported_ros2_messages.mcap"); // Create a ChunkStore and ChunkStoreHandle let store = ChunkStore::new( - StoreId::random(re_log_types::StoreKind::Recording, "test_mcap_loader"), + StoreId::random(re_log_types::StoreKind::Recording, "test_mcap_importer"), ChunkStoreConfig::default(), ); let store_handle = ChunkStoreHandle::new(store); @@ -62,7 +48,7 @@ mod tests { } // Extract and snapshot the schema - let schema = store_handle.read().schema(); + let schema = store_handle.read().schema().chunk_column_descriptors(); insta::assert_debug_snapshot!("ros2", schema); } } diff --git a/crates/store/re_lenses/Cargo.toml b/crates/store/re_lenses/Cargo.toml index b837c734d17b..b541f25f10ef 100644 --- a/crates/store/re_lenses/Cargo.toml +++ b/crates/store/re_lenses/Cargo.toml @@ -18,14 +18,8 @@ workspace = true all-features = true [dependencies] -re_arrow_combinators.workspace = true -re_arrow_util.workspace = true -re_chunk.workspace = true -re_log_types.workspace = true +re_lenses_core.workspace = true +re_log.workspace = true re_sdk_types.workspace = true arrow.workspace = true -itertools.workspace = true -nohash-hasher.workspace = true -thiserror.workspace = true -vec1.workspace = true diff --git a/crates/store/re_lenses/src/ast.rs b/crates/store/re_lenses/src/ast.rs deleted file mode 100644 index 328d0b92fd0d..000000000000 --- a/crates/store/re_lenses/src/ast.rs +++ /dev/null @@ -1,816 +0,0 @@ -//! Private module with the AST-like definitions of lenses. -//! -//! **Note**: Apart from high-level entry points (like [`Op`] and [`Lens`], -//! we should not leak these elements into the public API. This allows us to -//! evolve the definition of lenses over time, if requirements change. - -use std::str::FromStr as _; - -use arrow::array::{AsArray as _, Int64Array, ListArray}; -use arrow::compute::take; -use arrow::datatypes::DataType; -use itertools::Either; -use nohash_hasher::IntMap; -use re_arrow_combinators::{Selector, Transform as _}; -use re_arrow_combinators::{map, reshape}; -use re_chunk::{ - ArrowArray as _, Chunk, ChunkId, ComponentIdentifier, EntityPath, TimeColumn, Timeline, - TimelineName, -}; -use re_log_types::{EntityPathFilter, TimeType}; -use re_sdk_types::{ComponentDescriptor, SerializedComponentColumn}; -use vec1::Vec1; - -use crate::semantic; - -use crate::LensError; -use crate::builder::LensBuilder; -use crate::op::{self, OpError}; - -pub struct InputColumn { - pub entity_path_filter: EntityPathFilter, - pub component: ComponentIdentifier, -} - -/// Target entity path for lens outputs. -#[derive(Debug, Clone, Default)] -pub enum TargetEntity { - /// Use the matched input entity path. - #[default] - SameAsInput, - - /// Use a specific entity path. - Explicit(EntityPath), -} - -/// A component output. -/// -/// Depending on the context in which this output is used, the result from -/// applying the `ops` should be a list array (1:1) or a list array of list arrays (1:N). -#[derive(Debug)] -pub struct ComponentOutput { - pub component_descr: ComponentDescriptor, - pub ops: Vec, -} - -/// A time extraction output. -#[derive(Debug)] -pub struct TimeOutput { - pub timeline_name: TimelineName, - pub timeline_type: TimeType, - pub ops: Vec, -} - -#[derive(Debug)] -/// Each input row produces exactly one output row (1:1 mapping). -/// -/// Outputs inherit times from the input chunk. -pub struct OneToOne { - pub target_entity: TargetEntity, - - /// Component columns that will be created. - pub components: Vec1, - - /// Time columns that will be created. - pub times: Vec, -} - -#[derive(Debug)] -/// Each input row produces multiple output rows (1:N flat-map). -/// -/// Outputs inherit times from the input chunk. -pub struct OneToMany { - pub target_entity: TargetEntity, - - /// Component columns that will be created. - pub components: Vec1, - - /// Time columns that will be created. - pub times: Vec, -} - -#[derive(Debug)] -/// Static lens: outputs have no timelines (timeless data). -/// -/// In many cases, static lenses will omit the input column entirely. -pub struct Static { - pub target_entity: TargetEntity, - - /// Component columns that will be created. - pub components: Vec1, -} - -/// Determines how a lens transforms input rows to output rows. -#[derive(Debug)] -pub enum LensKind { - Columns(OneToOne), - ScatterColumns(OneToMany), - StaticColumns(Static), -} - -type CustomFn = Box Result + Sync + Send>; - -/// Provides commonly used transformations of component columns. -/// -/// Individual operations are wrapped to hide their implementation details. -#[non_exhaustive] -pub enum Op { - /// Selector operation using jq-like syntax for navigating and transforming Arrow data. - /// - /// The selector query string is parsed at execution time. - Selector(String), - - /// Converts binary arrays to list arrays of `u8`. - BinaryToListUInt8, - - /// Efficiently casts a component to a new `DataType`. - Cast(op::Cast), - - /// Converts video codec strings to Rerun `VideoCodec` enum values (as `u32`). - StringToVideoCodecUInt32, - - /// Prepends a prefix to each string value, including empty strings. - StringPrefix(String), - - /// Prepends a prefix to each non-empty string value, leaving empty strings unchanged. - StringPrefixNonEmpty(String), - - /// Appends a suffix to each string value, including empty strings. - StringSuffix(String), - - /// Appends a suffix to each non-empty string value, leaving empty strings unchanged. - StringSuffixNonEmpty(String), - - /// Converts timestamp structs with `seconds` and `nanos` fields to total nanoseconds. - TimeSpecToNanos, - - /// A user-defined arbitrary function to convert a component column. - Func(CustomFn), -} - -impl std::fmt::Debug for Op { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Selector(query) => f.debug_tuple("Selector").field(query).finish(), - Self::BinaryToListUInt8 => f.debug_struct("BinaryToListUInt8").finish(), - Self::Cast(inner) => f.debug_tuple("Cast").field(inner).finish(), - Self::StringToVideoCodecUInt32 => f.debug_struct("StringToVideoCodecUInt32").finish(), - Self::StringPrefix(prefix) => f.debug_tuple("StringPrefix").field(prefix).finish(), - Self::StringPrefixNonEmpty(prefix) => { - f.debug_tuple("StringPrefixNonEmpty").field(prefix).finish() - } - Self::StringSuffix(suffix) => f.debug_tuple("StringSuffix").field(suffix).finish(), - Self::StringSuffixNonEmpty(suffix) => { - f.debug_tuple("StringSuffixNonEmpty").field(suffix).finish() - } - Self::TimeSpecToNanos => f.debug_struct("TimeSpecToNanos").finish(), - Self::Func(_) => f.debug_tuple("Func").field(&"").finish(), - } - } -} - -impl From<&str> for Op { - fn from(value: &str) -> Self { - Self::Selector(value.to_owned()) - } -} - -impl Op { - /// Creates a selector operation from a query string. - /// - /// The selector uses jq-like syntax for navigating and transforming Arrow data. - /// The query string is parsed at execution time. - /// - /// # Examples - /// - /// - `.field` - Access a field in a struct - /// - `.parent.child` - Access nested fields - /// - `.array[]` - Explode/flatten an array into multiple rows - /// - `.array[].field` - Explode array and access a field in each element - pub fn selector(query: impl Into) -> Self { - Self::Selector(query.into()) - } - - /// Converts binary arrays to list arrays of `u8`. - pub fn binary_to_list_uint8() -> Self { - Self::BinaryToListUInt8 - } - - /// Efficiently casts a component to a new `DataType`. - pub fn cast(data_type: DataType) -> Self { - Self::Cast(op::Cast { - to_inner_type: data_type, - }) - } - - /// Ignores any input and returns a constant `ListArray`. - /// - /// Commonly used with [`LensBuilder::output_static_columns`]. - /// When used in non-static columns this function will _not_ guarantee the correct amount of rows. - pub fn constant(value: ListArray) -> Self { - Self::func(move |_| Ok(value.clone())) - } - - /// Converts video codec strings to Rerun `VideoCodec` enum values (as `u32`). - pub fn string_to_video_codec() -> Self { - Self::StringToVideoCodecUInt32 - } - - /// Prepends a prefix to each string value, including empty strings. - pub fn string_prefix(prefix: impl Into) -> Self { - Self::StringPrefix(prefix.into()) - } - - /// Prepends a prefix to each non-empty string value, leaving empty strings unchanged. - pub fn string_prefix_nonempty(prefix: impl Into) -> Self { - Self::StringPrefixNonEmpty(prefix.into()) - } - - /// Appends a suffix to each string value, including empty strings. - pub fn string_suffix(suffix: impl Into) -> Self { - Self::StringSuffix(suffix.into()) - } - - /// Appends a suffix to each non-empty string value, leaving empty strings unchanged. - pub fn string_suffix_nonempty(suffix: impl Into) -> Self { - Self::StringSuffixNonEmpty(suffix.into()) - } - - /// Converts timestamp structs with `seconds` and `nanos` fields to total nanoseconds. - pub fn time_spec_to_nanos() -> Self { - Self::TimeSpecToNanos - } - - /// A user-defined arbitrary function to convert a component column. - pub fn func(func: F) -> Self - where - F: for<'a> Fn(&'a ListArray) -> Result + Send + Sync + 'static, - { - Self::Func(Box::new(func)) - } -} - -impl Op { - fn call(&self, list_array: &ListArray) -> Result { - match self { - Self::Selector(query) => { - let selector = Selector::from_str(query)?; - selector.transform(list_array).map_err(Into::into) - } - Self::Cast(op) => op.call(list_array), - Self::BinaryToListUInt8 => map::MapList::new(semantic::BinaryToListUInt8::::new()) - .transform(list_array) - .map_err(Into::into), - Self::StringToVideoCodecUInt32 => { - map::MapList::new(semantic::StringToVideoCodecUInt32::default()) - .transform(list_array) - .map_err(Into::into) - } - Self::StringPrefix(prefix) => map::MapList::new(map::StringPrefix::new(prefix.clone())) - .transform(list_array) - .map_err(Into::into), - Self::StringPrefixNonEmpty(prefix) => map::MapList::new( - map::StringPrefix::new(prefix.clone()).with_prefix_empty_string(false), - ) - .transform(list_array) - .map_err(Into::into), - Self::StringSuffix(suffix) => map::MapList::new(map::StringSuffix::new(suffix.clone())) - .transform(list_array) - .map_err(Into::into), - Self::StringSuffixNonEmpty(suffix) => map::MapList::new( - map::StringSuffix::new(suffix.clone()).with_suffix_empty_string(false), - ) - .transform(list_array) - .map_err(Into::into), - Self::TimeSpecToNanos => map::MapList::new(semantic::TimeSpecToNanos::default()) - .transform(list_array) - .map_err(Into::into), - Self::Func(func) => func(list_array), - } - } -} - -/// A lens that transforms component data from one form to another. -/// -/// Lenses allow you to extract, transform, and restructure component data. They -/// are applied to chunks that match the specified entity path filter and contain -/// the target component. -/// -/// # Assumptions -/// -/// Works on component columns within a chunk. Because what goes into a chunk -/// is non-deterministic, and dependent on the batcher, no assumptions should be -/// made for values across rows. -pub struct Lens { - pub(crate) input: InputColumn, - pub(crate) outputs: Vec, -} - -impl Lens { - /// Returns a new [`LensBuilder`] with the given input column. - /// - /// By default, creates a one-to-one (temporal) lens. Call `.with_static()` or `.with_to_many()` - /// on the builder to switch to a different mode. - pub fn for_input_column( - entity_path_filter: EntityPathFilter, - component: impl Into, - ) -> LensBuilder { - LensBuilder::new(entity_path_filter, component) - } - - /// Applies this lens and creates one or more chunks. - fn apply(&self, chunk: &Chunk) -> impl Iterator> { - let found = chunk.components().get(self.input.component); - - // This means we drop chunks that belong to the same entity but don't have the component. - let Some(column) = found else { - return Either::Left(std::iter::empty()); - }; - - Either::Right(self.outputs.iter().map(|output| match output { - LensKind::Columns(one_to_one) => one_to_one.apply(chunk, column), - LensKind::StaticColumns(static_columns) => static_columns.apply(chunk, column), - LensKind::ScatterColumns(one_to_many) => one_to_many.apply(chunk, column), - })) - } -} - -/// An optional [`Chunk`] that only contains the component and time columns that we were able to compute. -/// -/// Also contains a list of contextualized errors that describe which columns failed. -#[derive(Debug)] -pub struct PartialChunk { - /// [`Self`] is only used in an [`Result::Err`] variant. - /// - /// We therefore box the actual payload to keep the happy path optimized. - inner: Box, -} - -#[derive(Debug)] -struct PartialChunkInner { - /// In some cases we might not be able to produce a chunk at all. - chunk: Option, - - /// Collection of errors encountered while executing the Lens. - errors: Vec, -} - -impl PartialChunk { - /// Returns the partial chunk if any and consumes `self`. - pub fn take(self) -> Option { - self.inner.chunk - } - - pub fn errors(&self) -> impl Iterator { - self.inner.errors.iter() - } -} - -fn apply_ops(initial: ListArray, ops: &[Op]) -> Result { - ops.iter().try_fold(initial, |array, op| op.call(&array)) -} - -fn collect_output_components_iter<'a>( - input: &'a SerializedComponentColumn, - components: &'a [ComponentOutput], -) -> impl Iterator> + 'a { - components.iter().map( - |output| match apply_ops(input.list_array.clone(), &output.ops) { - Ok(list_array) => Ok((output.component_descr.clone(), list_array)), - Err(source) => Err(LensError::ComponentOperationFailed { - component: output.component_descr.component, - source: Box::new(source), - }), - }, - ) -} - -fn collect_output_times_iter<'a>( - input: &'a SerializedComponentColumn, - timelines: &'a [TimeOutput], -) -> impl Iterator> + 'a { - timelines.iter().map( - |time| match apply_ops(input.list_array.clone(), &time.ops) { - Ok(list_array) => Ok((time.timeline_name, time.timeline_type, list_array)), - Err(source) => Err(LensError::TimeOperationFailed { - timeline_name: time.timeline_name, - source: Box::new(source), - }), - }, - ) -} - -/// Converts a time array to a time column. -/// -/// Checks if the `list_array` values are [`arrow::array::Int64Array`] and if so, creates a [`re_chunk::TimeColumn`]. -fn try_convert_time_column( - timeline_name: TimelineName, - timeline_type: TimeType, - list_array: &ListArray, -) -> Result<(TimelineName, TimeColumn), LensError> { - if let Some(time_vals) = list_array.values().as_any().downcast_ref::() { - let time_column = re_chunk::TimeColumn::new( - None, - Timeline::new(timeline_name, timeline_type), - time_vals.values().clone(), - ); - Ok((timeline_name, time_column)) - } else { - Err(LensError::InvalidTimeColumn { - timeline_name, - actual_type: list_array.values().data_type().clone().into(), - }) - } -} - -fn resolve_entity_path<'a>(chunk: &'a Chunk, target_entity: &'a TargetEntity) -> &'a EntityPath { - match target_entity { - TargetEntity::SameAsInput => chunk.entity_path(), - TargetEntity::Explicit(path) => path, - } -} - -/// Creates a chunk from the given components and timelines, handling errors appropriately. -/// -/// Returns `Ok(chunk)` if successful with no errors, or `Err(PartialChunk)` if there were -/// errors during processing (with an optional chunk if creation succeeded despite errors). -fn finalize_chunk( - entity_path: EntityPath, - chunk_times: IntMap, - component_results: re_chunk::ChunkComponents, - mut errors: Vec, -) -> Result { - match Chunk::from_auto_row_ids(ChunkId::new(), entity_path, chunk_times, component_results) { - Ok(chunk) => { - if errors.is_empty() { - Ok(chunk) - } else { - Err(PartialChunk { - inner: Box::new(PartialChunkInner { - chunk: Some(chunk), - errors, - }), - }) - } - } - Err(err) => { - errors.push(err.into()); - Err(PartialChunk { - inner: Box::new(PartialChunkInner { - chunk: None, - errors, - }), - }) - } - } -} - -impl OneToOne { - /// Applies a one-to-one lens transformation where each input row produces exactly one output row. - /// - /// The output chunk inherits all timelines from the input chunk, with additional timelines - /// extracted from the component data if specified. Component columns are transformed according - /// to the provided operations. - fn apply( - &self, - chunk: &Chunk, - input: &SerializedComponentColumn, - ) -> Result { - let entity_path = resolve_entity_path(chunk, &self.target_entity); - - let mut errors = Vec::new(); - - // Collect successful components directly into ChunkComponents, accumulate errors - let component_results: re_chunk::ChunkComponents = - collect_output_components_iter(input, &self.components) - .filter_map(|result| match result { - Ok(component) => Some(component), - Err(err) => { - errors.push(err); - None - } - }) - .collect(); - - // Inherit all existing time columns as-is (since row count doesn't change) - let mut chunk_times = chunk.timelines().clone(); - - // Collect successful time columns, accumulate errors - chunk_times.extend( - collect_output_times_iter(input, &self.times).filter_map(|result| match result { - Ok((timeline_name, timeline_type, list_array)) => { - match try_convert_time_column(timeline_name, timeline_type, &list_array) { - Ok(time_col) => Some(time_col), - Err(err) => { - errors.push(err); - None - } - } - } - Err(err) => { - errors.push(err); - None - } - }), - ); - - finalize_chunk(entity_path.clone(), chunk_times, component_results, errors) - } -} - -impl Static { - /// Applies a static lens transformation that produces timeless output data. - /// - /// The output chunk contains no time columns, only the transformed component columns. - /// This is useful for metadata or other data that should not be associated with any timeline. - fn apply( - &self, - chunk: &Chunk, - input: &SerializedComponentColumn, - ) -> Result { - let entity_path = resolve_entity_path(chunk, &self.target_entity); - - let mut errors = Vec::new(); - - // Collect successful components directly into ChunkComponents, accumulate errors - let component_results: re_chunk::ChunkComponents = - collect_output_components_iter(input, &self.components) - .filter_map(|result| match result { - Ok(component) => Some(component), - Err(err) => { - errors.push(err); - None - } - }) - .collect(); - - // TODO(grtlr): In case of static, should we enforce single rows (i.e. unit chunks)? - finalize_chunk( - entity_path.clone(), - Default::default(), - component_results, - errors, - ) - } -} - -impl OneToMany { - /// Applies a one-to-many lens transformation where each input row potentially produces multiple output rows. - /// - /// The output chunk inherits all time columns from the input chunk, with additional time columns - /// extracted from the component data if specified. Component columns are transformed according - /// to the provided operations. - fn apply( - &self, - chunk: &Chunk, - input: &SerializedComponentColumn, - ) -> Result { - use arrow::array::UInt32Array; - - let entity_path = resolve_entity_path(chunk, &self.target_entity); - - let mut errors = Vec::new(); - - let mut output_components = - collect_output_components_iter(input, &self.components).peekable(); - - // Peek at the first component to establish the scatter pattern (how many output rows - // each input row produces). All components must have the same outer list structure. - // We use .peek() instead of consuming the iterator so we can still process all - // components (including this first one) later. - let reference_array = match output_components.peek() { - Some(Ok((_descr, reference_array))) => reference_array, - Some(Err(_)) => { - // If the first component failed, collect all errors and return - errors.extend(output_components.filter_map(|r| r.err())); - return Err(PartialChunk { - inner: Box::new(PartialChunkInner { - chunk: None, - errors, - }), - }); - } - None => { - return Err(PartialChunk { - inner: Box::new(PartialChunkInner { - chunk: None, - errors: vec![LensError::NoOutputColumnsProduced { - input_entity: chunk.entity_path().clone(), - input_component: input.descriptor.component, - target_entity: entity_path.clone(), - }], - }), - }); - } - }; - - // Build scatter indices: tracks which input row each output row came from - // Example: [0, 0, 0, 1, 2] means rows 0-2 from input 0, row 3 from input 1, row 4 from input 2 - let mut scatter_indices = Vec::new(); - let offsets = reference_array.value_offsets(); - - for (row_idx, window) in offsets.windows(2).enumerate() { - let start = window[0]; - let end = window[1]; - let count = end - start; - - if reference_array.is_null(row_idx) || count == 0 { - // Null or empty list produces one output row - scatter_indices.push(row_idx as u32); - } else { - // Each element produces one output row - for _ in 0..count { - scatter_indices.push(row_idx as u32); - } - } - } - - let scatter_indices_array = UInt32Array::from(scatter_indices); - - // Replicate all existing time values using scatter indices. - let mut chunk_times: IntMap = Default::default(); - for (timeline_name, time_column) in chunk.timelines() { - let time_values = time_column.times_raw(); - let time_values_array = Int64Array::from(time_values.to_vec()); - - // `arrow::compute::take` is fine to use in this context, because we want to allow nullability. - #[expect(clippy::disallowed_methods)] - match take(&time_values_array, &scatter_indices_array, None) { - Ok(scattered) => { - let scattered_i64 = scattered.as_primitive::(); - let new_time_column = re_chunk::TimeColumn::new( - None, - *time_column.timeline(), - scattered_i64.values().clone(), - ); - chunk_times.insert(*timeline_name, new_time_column); - } - Err(source) => { - errors.push(LensError::ScatterExistingTimeFailed { - timeline_name: *timeline_name, - source, - }); - } - } - } - - // Explode all output time columns and collect errors - chunk_times.extend( - collect_output_times_iter(input, &self.times).filter_map(|result| match result { - Ok((timeline_name, timeline_type, list_array)) => { - match reshape::Explode.transform(&list_array) { - Ok(exploded) => { - match try_convert_time_column(timeline_name, timeline_type, &exploded) { - Ok(time_col) => Some(time_col), - Err(err) => { - errors.push(err); - None - } - } - } - Err(err) => { - errors.push(LensError::TimeOperationFailed { - timeline_name, - source: Box::new(err.into()), - }); - None - } - } - } - Err(err) => { - errors.push(err); - None - } - }), - ); - - // Explode all component outputs and collect errors - let chunk_components: re_chunk::ChunkComponents = output_components - .filter_map(|result| match result { - Ok((component_descr, list_array)) => { - match reshape::Explode.transform(&list_array) { - Ok(exploded) => { - Some(SerializedComponentColumn::new(exploded, component_descr)) - } - Err(err) => { - errors.push(LensError::ComponentOperationFailed { - component: component_descr.component, - source: Box::new(err.into()), - }); - None - } - } - } - Err(err) => { - errors.push(err); - None - } - }) - .collect(); - - // Verify that all columns have the same length happens during chunk creation. - finalize_chunk(entity_path.clone(), chunk_times, chunk_components, errors) - } -} - -/// Controls how data is processed when applying lenses. -/// -/// This determines what happens to logged data when lenses are applied, particularly -/// how unmatched original data is handled. -#[derive(Copy, Clone)] -pub enum OutputMode { - /// Forward both the transformed data from matching lenses and the original data. - /// - /// Use this when you want to preserve all original data alongside transformations. - ForwardAll, - - /// Forward transformed data if lenses match, otherwise forward the original data unchanged. - /// - /// Use this when you want to transform matching data but ensure unmatched data isn't dropped. - ForwardUnmatched, - - /// Only forward transformed data, drop data that doesn't match any lens. - /// - /// Use this when you want a pure transformation pipeline where only explicitly transformed - /// data should be output. - DropUnmatched, -} - -/// A collection that holds multiple lenses and applies them to chunks. -/// -/// This can hold multiple lenses that match different entity paths and components. -/// When a chunk is processed, all relevant lenses (those whose entity path filters match -/// the chunk's entity path) are applied. -pub struct Lenses { - lenses: Vec, - mode: OutputMode, -} - -impl Lenses { - /// Creates a new lens collection with the specified mode. - pub fn new(mode: OutputMode) -> Self { - Self { - lenses: Default::default(), - mode, - } - } - - /// Adds a lens to this collection. - pub fn add_lens(&mut self, lens: Lens) { - self.lenses.push(lens); - } - - /// Adds a lens to this collection. - pub fn set_output_mode(&mut self, mode: OutputMode) { - self.mode = mode; - } - - fn relevant(&self, chunk: &Chunk) -> impl Iterator { - self.lenses.iter().filter(|lens| { - lens.input - .entity_path_filter - .clone() - .resolve_without_substitutions() - .matches(chunk.entity_path()) - && chunk.components().contains_component(lens.input.component) - }) - } - - /// Applies all relevant lenses and returns the results. - /// - /// The behavior depends on the configured [`OutputMode`]: - /// - [`OutputMode::ForwardAll`]: Returns both transformed and original data - /// - [`OutputMode::ForwardUnmatched`]: Returns transformed data if lenses match, otherwise original data - /// - [`OutputMode::DropUnmatched`]: Returns only transformed data, drops unmatched data - pub fn apply<'a>( - &'a self, - chunk: &'a Chunk, - ) -> impl Iterator> + 'a { - match self.mode { - OutputMode::ForwardAll => { - // Apply all relevant lenses and also forward the original chunk - let chunk_clone = chunk.clone(); - Either::Left( - self.relevant(chunk) - .flat_map(|lens| lens.apply(chunk)) - .chain(std::iter::once(Ok(chunk_clone))), - ) - } - OutputMode::ForwardUnmatched => { - // Apply relevant lenses if any exist, otherwise forward the original chunk - let chunk_clone = chunk.clone(); - let mut relevant_lenses = self.relevant(chunk).peekable(); - let has_relevant = relevant_lenses.peek().is_some(); - - Either::Right(Either::Left( - relevant_lenses - .flat_map(|lens| lens.apply(chunk)) - .chain((!has_relevant).then_some(Ok(chunk_clone))), - )) - } - OutputMode::DropUnmatched => Either::Right(Either::Right( - self.relevant(chunk).flat_map(|lens| lens.apply(chunk)), - )), - } - } -} diff --git a/crates/store/re_lenses/src/builder.rs b/crates/store/re_lenses/src/builder.rs deleted file mode 100644 index 4cdccace1fc0..000000000000 --- a/crates/store/re_lenses/src/builder.rs +++ /dev/null @@ -1,332 +0,0 @@ -//! Builder API for constructing lenses. - -use re_chunk::{ComponentIdentifier, EntityPath, TimelineName}; -use re_log_types::{EntityPathFilter, TimeType}; -use re_sdk_types::ComponentDescriptor; - -use crate::ast::{OneToMany, OneToOne, Static}; -use crate::{LensError, Op, ast}; - -/// Builder for lenses with support for multiple output modes. -#[must_use] -pub struct LensBuilder { - input: ast::InputColumn, - outputs: Vec, -} - -impl LensBuilder { - pub(crate) fn new( - entity_path_filter: EntityPathFilter, - component: impl Into, - ) -> Self { - Self { - input: ast::InputColumn { - entity_path_filter, - component: component.into(), - }, - outputs: vec![], - } - } - - /// Adds a temporal output with 1:1 row mapping. - /// - /// Each input row produces exactly one output row. Outputs inherit time columns from - /// the input, plus any additional time columns specified via `.time()`. - /// - /// The output will use the same entity path as the input. - pub fn output_columns( - mut self, - builder: impl FnOnce(ColumnsBuilder) -> ColumnsBuilder, - ) -> Result { - let output_builder = ColumnsBuilder::new(ast::TargetEntity::SameAsInput); - let output = builder(output_builder).build(&self.input)?; - self.outputs.push(output); - Ok(self) - } - - /// Adds a temporal output with 1:1 row mapping at a specific entity path. - /// - /// Each input row produces exactly one output row. Outputs inherit time columns from - /// the input, plus any additional time columns specified via `.time()`. - pub fn output_columns_at( - mut self, - entity_path: impl Into, - builder: impl FnOnce(ColumnsBuilder) -> ColumnsBuilder, - ) -> Result { - let output_builder = ColumnsBuilder::new(ast::TargetEntity::Explicit(entity_path.into())); - let output = builder(output_builder).build(&self.input)?; - self.outputs.push(output); - Ok(self) - } - - /// Adds a static output (timeless data). - /// - /// Creates data that does not change over time and has no associated time columns. - /// - /// The output will use the same entity path as the input. - pub fn output_static_columns( - mut self, - builder: impl FnOnce(StaticColumnsBuilder) -> StaticColumnsBuilder, - ) -> Result { - let output_builder = StaticColumnsBuilder::new(ast::TargetEntity::SameAsInput); - let output = builder(output_builder).build(&self.input)?; - self.outputs.push(output); - Ok(self) - } - - /// Adds a static output (timeless data) at a specific entity path. - /// - /// Creates data that does not change over time and has no associated time columns. - pub fn output_static_columns_at( - mut self, - entity_path: impl Into, - builder: impl FnOnce(StaticColumnsBuilder) -> StaticColumnsBuilder, - ) -> Result { - let output_builder = - StaticColumnsBuilder::new(ast::TargetEntity::Explicit(entity_path.into())); - let output = builder(output_builder).build(&self.input)?; - self.outputs.push(output); - Ok(self) - } - - /// Adds a temporal output with 1:N row mapping (scatter). - /// - /// Each input row produces multiple output rows at the same timepoint. The timepoint - /// is replicated/scattered across the output rows. Useful for flattening lists or - /// exploding batches. - /// - /// The output will use the same entity path as the input. - pub fn output_scatter_columns( - mut self, - builder: impl FnOnce(ScatterColumnsBuilder) -> ScatterColumnsBuilder, - ) -> Result { - let output_builder = ScatterColumnsBuilder::new(ast::TargetEntity::SameAsInput); - let output = builder(output_builder).build(&self.input)?; - self.outputs.push(output); - Ok(self) - } - - /// Adds a temporal output with 1:N row mapping (scatter) at a specific entity path. - /// - /// Each input row produces multiple output rows at the same timepoint. The timepoint - /// is replicated/scattered across the output rows. Useful for flattening lists or - /// exploding batches. - pub fn output_scatter_columns_at( - mut self, - entity_path: impl Into, - builder: impl FnOnce(ScatterColumnsBuilder) -> ScatterColumnsBuilder, - ) -> Result { - let output_builder = - ScatterColumnsBuilder::new(ast::TargetEntity::Explicit(entity_path.into())); - let output = builder(output_builder).build(&self.input)?; - self.outputs.push(output); - Ok(self) - } - - /// Finalizes this builder and returns the corresponding lens. - pub fn build(self) -> ast::Lens { - ast::Lens { - input: self.input, - outputs: self.outputs, - } - } -} - -// ==================== Output Builders ==================== - -/// Builder for temporal outputs with 1:1 row mapping. -/// -/// Each input row produces exactly one output row. Outputs inherit time columns -/// from the input, plus any additional time columns specified. -#[must_use] -pub struct ColumnsBuilder { - target_entity: ast::TargetEntity, - components: Vec, - time_outputs: Vec, -} - -impl ColumnsBuilder { - fn new(target_entity: ast::TargetEntity) -> Self { - Self { - target_entity, - components: vec![], - time_outputs: vec![], - } - } - - /// Adds a component output column. - /// - /// # Arguments - /// * `component_descr` - The descriptor for the output component - /// * `ops` - Sequence of operations to apply to transform the input column - pub fn component( - mut self, - component_descr: ComponentDescriptor, - ops: impl IntoIterator, - ) -> Self { - self.components.push(ast::ComponentOutput { - component_descr, - ops: ops.into_iter().collect(), - }); - self - } - - /// Adds a time extraction. - /// - /// Extracts data from the input column to create a new time column for the output rows. - /// - /// # Arguments - /// * `timeline_name` - Name of the timeline to create - /// * `timeline_type` - Type of timeline (Sequence or Time) - /// * `ops` - Sequence of operations to extract time values (must produce [`arrow::array::Int64Array`]) - pub fn time( - mut self, - timeline_name: impl Into, - timeline_type: TimeType, - ops: impl IntoIterator, - ) -> Self { - self.time_outputs.push(ast::TimeOutput { - timeline_name: timeline_name.into(), - timeline_type, - ops: ops.into_iter().collect(), - }); - self - } - - /// Builds a [`ast::LensKind`], the `input` is passed for providing contextualized errors. - fn build(self, input: &ast::InputColumn) -> Result { - Ok(ast::LensKind::Columns(OneToOne { - target_entity: self.target_entity, - components: self.components.try_into().map_err(|_err| { - LensError::MissingOutputComponent { - input_filter: input.entity_path_filter.clone(), - input_component: input.component, - } - })?, - times: self.time_outputs, - })) - } -} - -/// Builder for static outputs (timeless data). -/// -/// Creates data that does not change over time. Static outputs have no associated time columns. -#[must_use] -pub struct StaticColumnsBuilder { - target_entity: ast::TargetEntity, - components: Vec, -} - -impl StaticColumnsBuilder { - fn new(target_entity: ast::TargetEntity) -> Self { - Self { - target_entity, - components: vec![], - } - } - - /// Adds a component output column. - /// - /// # Arguments - /// * `component_descr` - The descriptor for the output component - /// * `ops` - Sequence of operations to apply to transform the input column - pub fn component( - mut self, - component_descr: ComponentDescriptor, - ops: impl IntoIterator, - ) -> Self { - self.components.push(ast::ComponentOutput { - component_descr, - ops: ops.into_iter().collect(), - }); - self - } - - /// Builds a [`ast::LensKind`], the `input` is passed for providing contextualized errors. - fn build(self, input: &ast::InputColumn) -> Result { - Ok(ast::LensKind::StaticColumns(Static { - target_entity: self.target_entity, - components: self.components.try_into().map_err(|_err| { - LensError::MissingOutputComponent { - input_filter: input.entity_path_filter.clone(), - input_component: input.component, - } - })?, - })) - } -} - -/// Builder for temporal outputs with 1:N row mapping (scatter). -/// -/// Each input row produces multiple output rows at the same timepoint. The timepoint -/// is replicated/scattered across all output rows. This is useful for flattening lists -/// or exploding batches while maintaining temporal alignment. -#[must_use] -pub struct ScatterColumnsBuilder { - target_entity: ast::TargetEntity, - components: Vec, - time_outputs: Vec, -} - -impl ScatterColumnsBuilder { - fn new(target_entity: ast::TargetEntity) -> Self { - Self { - target_entity, - components: vec![], - time_outputs: vec![], - } - } - - /// Adds a component output column. - /// - /// # Arguments - /// * `component_descr` - The descriptor for the output component - /// * `ops` - Sequence of operations to apply to transform the input column - pub fn component( - mut self, - component_descr: ComponentDescriptor, - ops: impl IntoIterator, - ) -> Self { - self.components.push(ast::ComponentOutput { - component_descr, - ops: ops.into_iter().collect(), - }); - self - } - - /// Adds a time extraction. - /// - /// Extracts data from the input column to create a new time column for the output rows. - /// - /// # Arguments - /// * `timeline_name` - Name of the timeline to create - /// * `timeline_type` - Type of timeline (Sequence or Time) - /// * `ops` - Sequence of operations to extract time values (must produce [`arrow::array::Int64Array`]) - pub fn time( - mut self, - timeline_name: impl Into, - timeline_type: TimeType, - ops: impl IntoIterator, - ) -> Self { - self.time_outputs.push(ast::TimeOutput { - timeline_name: timeline_name.into(), - timeline_type, - ops: ops.into_iter().collect(), - }); - self - } - - /// Builds a [`ast::LensKind`], the `input` is passed for providing contextualized errors. - fn build(self, input: &ast::InputColumn) -> Result { - Ok(ast::LensKind::ScatterColumns(OneToMany { - target_entity: self.target_entity, - components: self.components.try_into().map_err(|_err| { - LensError::MissingOutputComponent { - input_filter: input.entity_path_filter.clone(), - input_component: input.component, - } - })?, - times: self.time_outputs, - })) - } -} diff --git a/crates/store/re_lenses/src/error.rs b/crates/store/re_lenses/src/error.rs deleted file mode 100644 index fa32ddc20f24..000000000000 --- a/crates/store/re_lenses/src/error.rs +++ /dev/null @@ -1,59 +0,0 @@ -use re_arrow_util::DisplayDataType; -use re_chunk::{ComponentIdentifier, EntityPath, TimelineName}; -use re_log_types::EntityPathFilter; - -use crate::op::OpError; - -/// Different variants of errors that can happen when executing lenses. -#[expect(missing_docs)] -#[derive(Debug, thiserror::Error)] -pub enum LensError { - #[error( - "Lens for input component `{input_component} with entity path filter `{input_filter:?}` is missing output components" - )] - MissingOutputComponent { - input_filter: EntityPathFilter, - input_component: ComponentIdentifier, - }, - - // TODO(grtlr): This is very similar to the error above (just at a later stage). Can we combine those? - // We probably want to split builder errors from computational errors once the API stabilizes. - #[error("No component outputs were produced for target entity `{target_entity}`")] - NoOutputColumnsProduced { - input_entity: EntityPath, - input_component: ComponentIdentifier, - target_entity: EntityPath, - }, - - #[error("Chunk validation failed: {0}")] - ChunkValidationFailed(#[from] re_chunk::ChunkError), - - #[error("Failed to apply operations to component '{component}': {source}")] - ComponentOperationFailed { - component: ComponentIdentifier, - #[source] - source: Box, // Box because of size. - }, - - #[error("Failed to apply operations to timeline '{timeline_name}': {source}")] - TimeOperationFailed { - timeline_name: TimelineName, - #[source] - source: Box, // Box because of size. - }, - - #[error( - "Invalid time column type for timeline '{timeline_name}': expected List, got {actual_type}" - )] - InvalidTimeColumn { - timeline_name: TimelineName, - actual_type: DisplayDataType, - }, - - #[error("Failed to scatter existing timeline '{timeline_name}' across output rows")] - ScatterExistingTimeFailed { - timeline_name: TimelineName, - #[source] - source: arrow::error::ArrowError, - }, -} diff --git a/crates/store/re_lenses/src/lib.rs b/crates/store/re_lenses/src/lib.rs index 8ae3c07f2133..4e768aa3e58b 100644 --- a/crates/store/re_lenses/src/lib.rs +++ b/crates/store/re_lenses/src/lib.rs @@ -1,18 +1,12 @@ //! Lenses allow you to extract, transform, and restructure component data. They -//! are applied to chunks that match the specified entity path filter and contain -//! the target component. +//! are applied to chunks that contain the target component. //! //! See [`Lens`] for more details and assumptions. -mod ast; -mod builder; -mod error; -mod op; -mod semantic; +pub mod op; -pub use self::{ - ast::{Lens, Lenses, Op, OutputMode, PartialChunk}, - builder::{ColumnsBuilder, LensBuilder, ScatterColumnsBuilder, StaticColumnsBuilder}, - error::LensError, - op::OpError, +// Re-export core types for backward compatibility. +pub use re_lenses_core::{ + ChunkExt, Lens, LensBuilder, LensBuilderError, LensRuntimeError, Lenses, OutputBuilder, + OutputMode, PartialChunk, }; diff --git a/crates/store/re_lenses/src/op.rs b/crates/store/re_lenses/src/op.rs deleted file mode 100644 index 7cbfc5ff80fa..000000000000 --- a/crates/store/re_lenses/src/op.rs +++ /dev/null @@ -1,56 +0,0 @@ -//! Provides commonly used transformations of Arrow arrays. -//! -//! These operations should not be exposed publicly, but instead be wrapped by the [`crate::Op`] abstraction. - -// TODO(grtlr): Eventually we will want to make the types in here compatible with Datafusion UDFs. - -use std::sync::Arc; - -use arrow::array::{Array as _, ListArray}; -use arrow::compute; -use arrow::datatypes::{DataType, Field}; - -/// Errors that occur during low-level operation execution on columns. -#[derive(Debug, thiserror::Error)] -pub enum OpError { - /// Error from Arrow combinator transformations. - #[error(transparent)] - Transform(#[from] re_arrow_combinators::Error), - - /// Error from selector parsing. - #[error(transparent)] - Selector(Box), // Box because of size. - - /// Error from Arrow operations. - #[error(transparent)] - Arrow(#[from] arrow::error::ArrowError), - - /// Other custom errors. - #[error(transparent)] - Other(Box), -} - -impl From for OpError { - fn from(value: re_arrow_combinators::SelectorError) -> Self { - Self::Selector(value.into()) - } -} - -/// Casts the `value_type` (inner array) of a `ListArray` to a different data type. -#[derive(Debug)] -pub struct Cast { - pub(crate) to_inner_type: DataType, -} - -impl Cast { - pub fn call(&self, list_array: &ListArray) -> Result { - let (_field, offsets, ref array, nulls) = list_array.clone().into_parts(); - let res = compute::cast(array, &self.to_inner_type)?; - Ok(ListArray::new( - Arc::new(Field::new_list_field(res.data_type().clone(), true)), - offsets, - res, - nulls, - )) - } -} diff --git a/crates/store/re_lenses/src/op/basic.rs b/crates/store/re_lenses/src/op/basic.rs new file mode 100644 index 000000000000..b1222e5b638d --- /dev/null +++ b/crates/store/re_lenses/src/op/basic.rs @@ -0,0 +1,53 @@ +//! Basic transforms for common operations. + +use std::sync::Arc; + +use arrow::array::{ArrayRef, FixedSizeListArray, StructArray}; +use arrow::datatypes::{DataType, Field}; + +use re_lenses_core::combinators::{Error, StructToFixedList, Transform as _}; + +/// Extracts named fields from a struct, packs them into a [`FixedSizeListArray`], +/// and casts the element type to `Float32`. +pub fn struct_to_fixed_size_list_f32( + field_names: impl IntoIterator>, +) -> impl Fn(&ArrayRef) -> Result, Error> + Send + Sync { + let field_names: Vec = field_names.into_iter().map(Into::into).collect(); + move |source: &ArrayRef| { + let struct_array = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: source.data_type().clone(), + context: "struct_to_fixed_size_list_f32 input".to_owned(), + })?; + let fixed = StructToFixedList::new(field_names.iter().map(String::as_str)) + .transform(struct_array)?; + match fixed { + Some(arr) => { + let (_field, size, values, nulls) = arr.into_parts(); + let cast_values = arrow::compute::cast(&values, &DataType::Float32)?; + let new_field = Arc::new(Field::new_list_field( + DataType::Float32, + cast_values.is_nullable(), + )); + Ok(Some( + Arc::new(FixedSizeListArray::new(new_field, size, cast_values, nulls)) + as ArrayRef, + )) + } + None => Ok(None), + } + } +} + +/// Creates a transform that casts the input array to a new [`DataType`]. +pub fn cast( + to_type: DataType, +) -> impl Fn(&ArrayRef) -> Result, Error> + Send + Sync { + move |source: &ArrayRef| { + let cast_values = arrow::compute::cast(source, &to_type)?; + Ok(Some(cast_values)) + } +} diff --git a/crates/store/re_lenses/src/op/mod.rs b/crates/store/re_lenses/src/op/mod.rs new file mode 100644 index 000000000000..0f38da55c85a --- /dev/null +++ b/crates/store/re_lenses/src/op/mod.rs @@ -0,0 +1,13 @@ +//! Re-exports of all available element-level transform ops. + +pub mod basic; +pub mod semantic; +pub mod string; + +pub use self::{ + basic::{cast, struct_to_fixed_size_list_f32}, + semantic::{ + binary_to_list_uint8, rgba_struct_to_uint32, string_to_video_codec, timespec_to_nanos, + }, + string::{string_prefix, string_prefix_nonempty, string_suffix, string_suffix_nonempty}, +}; diff --git a/crates/store/re_lenses/src/op/semantic.rs b/crates/store/re_lenses/src/op/semantic.rs new file mode 100644 index 000000000000..6edda067d2f6 --- /dev/null +++ b/crates/store/re_lenses/src/op/semantic.rs @@ -0,0 +1,655 @@ +//! Semantic array transforms for concrete applications. + +use std::marker::PhantomData; +use std::sync::Arc; + +use arrow::array::{ + Array as _, ArrowNativeTypeOp as _, AsArray as _, GenericBinaryArray, GenericListArray, + Int64Array, OffsetSizeTrait, StringArray, StructArray, UInt32Array, UInt32Builder, +}; +use arrow::datatypes::{DataType, Field, Float64Type, Int32Type, Int64Type}; +use arrow::error::ArrowError; +use re_sdk_types::components::VideoCodec; + +use re_lenses_core::combinators::{DowncastRef, Error, GetField, Transform}; + +/// Converts binary arrays to list arrays where each binary element becomes a list of `u8`. +/// +/// The underlying bytes buffer is reused, making this transformation almost zero-copy. +#[derive(Clone, Debug, Default)] +pub struct BinaryToListUInt8 { + _from_offset: PhantomData, + _to_offset: PhantomData, + + /// This transform is specifically intended for contiguous byte data, + /// so we default to non-nullable lists. + nullable: bool, +} + +impl BinaryToListUInt8 { + /// Create a new transformation to convert a binary array to a list array of `u8` arrays. + pub fn new() -> Self { + Default::default() + } +} + +impl Transform for BinaryToListUInt8 { + type Source = GenericBinaryArray; + type Target = GenericListArray; + + fn transform(&self, source: &GenericBinaryArray) -> Result, Error> { + use arrow::array::UInt8Array; + use arrow::buffer::ScalarBuffer; + + let scalar_buffer: ScalarBuffer = ScalarBuffer::from(source.values().clone()); + let uint8_array = UInt8Array::new(scalar_buffer, None); + + // Convert from O1 to O2. Most offset buffers will be small in real-world + // examples, so we're fine copying them. + // + // This could be true zero copy if Rust had specialization. + // More info: https://std-dev-guide.rust-lang.org/policy/specialization.html + let old_offsets = source.offsets().iter(); + let new_offsets: Result, Error> = old_offsets + .map(|&offset| { + let offset_usize = offset.as_usize(); + O2::from_usize(offset_usize).ok_or_else(|| Error::OffsetOverflow { + actual: offset_usize, + expected_type: std::any::type_name::(), + }) + }) + .collect(); + let offsets = arrow::buffer::OffsetBuffer::new(new_offsets?.into()); + + let list = Self::Target::new( + Arc::new(Field::new_list_field(DataType::UInt8, self.nullable)), + offsets, + Arc::new(uint8_array), + source.nulls().cloned(), + ); + + Ok(Some(list)) + } +} + +/// Converts `StructArray` of timestamps with `seconds`/`nanos` or `sec`/`nsec` fields (i64/i32) +/// to `Int64Array` containing the corresponding total nanoseconds timestamps. +#[derive(Default)] +pub struct TimeSpecToNanos {} + +impl TimeSpecToNanos { + /// Extracts a struct field from different possible field name variants, + /// by trying each name in order. Casts to the target primitive type. + fn get_field_from_variants( + source: &StructArray, + field_names: &[&str], + ) -> Result>, Error> { + for &name in field_names { + if let Ok(Some(array_ref)) = GetField::new(name).transform(source) { + let casted = arrow::compute::cast(&array_ref, &TargetType::DATA_TYPE)?; + let downcasted = DowncastRef::::new().transform(&casted)?; + + re_log::debug_assert!( + downcasted.is_some(), + "downcasting directly after casting should not fail" + ); + + return Ok(downcasted); + } + } + Err(Error::FieldNotFound { + field_name: field_names.join(" | "), + available_fields: source.fields().iter().map(|f| f.name().clone()).collect(), + }) + } +} + +impl Transform for TimeSpecToNanos { + type Source = StructArray; + type Target = Int64Array; + + fn transform(&self, source: &StructArray) -> Result, Error> { + let (Some(seconds_array), Some(nanos_array)) = ( + Self::get_field_from_variants::(source, &["seconds", "sec"])?, + Self::get_field_from_variants::(source, &["nanos", "nanosec", "nsec"])?, + ) else { + return Ok(None); + }; + + Ok(Some(arrow::compute::try_binary( + &seconds_array, + &nanos_array, + |seconds: i64, nanos: i32| -> Result { + seconds + .mul_checked(1_000_000_000)? + .add_checked(nanos as i64) + }, + )?)) + } +} + +/// Transforms a `StringArray` of video codec names to a `UInt32Array`, +/// where each u32 corresponds to a Rerun `VideoCodec` enum value. +#[derive(Default)] +pub struct StringToVideoCodecUInt32 {} + +impl Transform for StringToVideoCodecUInt32 { + type Source = StringArray; + type Target = UInt32Array; + + fn transform(&self, source: &StringArray) -> Result, Error> { + Ok(Some( + source + .iter() + .try_fold( + UInt32Builder::with_capacity(source.len()), + |mut builder, maybe_str| { + if let Some(codec_str) = maybe_str { + let codec = match codec_str.to_lowercase().as_str() { + "h264" => VideoCodec::H264, + "h265" => VideoCodec::H265, + "av1" => VideoCodec::AV1, + _ => { + return Err(Error::UnexpectedValue { + expected: &["h264", "h265", "av1"], + actual: codec_str.to_owned(), + }); + } + }; + builder.append_value(codec as u32); + } else { + builder.append_null(); + } + Ok(builder) + }, + )? + .finish(), + )) + } +} + +/// Converts RGBA structs (r, g, b, a as f32 or f64 in 0..1) to packed RGBA u32 values. +#[derive(Default)] +pub struct RgbaStructToUInt32 {} + +impl Transform for RgbaStructToUInt32 { + type Source = StructArray; + type Target = UInt32Array; + + fn transform(&self, source: &StructArray) -> Result, Error> { + // Helper to extract a color channel field, supporting both f32 and f64. + let get_channel = |name: &str| -> Result<_, Error> { + let field = + GetField::new(name) + .transform(source)? + .ok_or_else(|| Error::FieldNotFound { + field_name: name.to_owned(), + available_fields: source + .fields() + .iter() + .map(|f| f.name().to_owned()) + .collect(), + })?; + Ok(arrow::compute::cast(&field, &DataType::Float64)? + .as_primitive::() + .clone()) + }; + + let r = get_channel("r")?; + let g = get_channel("g")?; + let b = get_channel("b")?; + let a = get_channel("a")?; + + let result: UInt32Array = (0..source.len()) + .map(|i| { + if source.is_null(i) { + None + } else { + let rv = (r.value(i).clamp(0.0, 1.0) * 255.0).round() as u32; + let gv = (g.value(i).clamp(0.0, 1.0) * 255.0).round() as u32; + let bv = (b.value(i).clamp(0.0, 1.0) * 255.0).round() as u32; + let av = (a.value(i).clamp(0.0, 1.0) * 255.0).round() as u32; + Some((rv << 24) | (gv << 16) | (bv << 8) | av) + } + }) + .collect(); + + Ok(Some(result)) + } +} + +/// Converts binary data (i32 offsets) to a list of `u8` values. +pub fn binary_to_list_uint8() +-> impl Fn(&arrow::array::ArrayRef) -> Result, Error> + Send + Sync { + move |source: &arrow::array::ArrayRef| { + let binary = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "BinaryArray".to_owned(), + actual: source.data_type().clone(), + context: "binary_to_list_uint8 input".to_owned(), + })?; + Ok(BinaryToListUInt8::::new() + .transform(binary)? + .map(|arr| Arc::new(arr) as arrow::array::ArrayRef)) + } +} + +/// Converts a timestamp struct (`seconds`/`nanos`) to nanoseconds. +pub fn timespec_to_nanos() +-> impl Fn(&arrow::array::ArrayRef) -> Result, Error> + Send + Sync { + move |source: &arrow::array::ArrayRef| { + let struct_array = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: source.data_type().clone(), + context: "timespec_to_nanos input".to_owned(), + })?; + Ok(TimeSpecToNanos::default() + .transform(struct_array)? + .map(|arr| Arc::new(arr) as arrow::array::ArrayRef)) + } +} + +/// Converts video codec name strings to `VideoCodec` enum values. +pub fn string_to_video_codec() +-> impl Fn(&arrow::array::ArrayRef) -> Result, Error> + Send + Sync { + move |source: &arrow::array::ArrayRef| { + let string_array = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "StringArray".to_owned(), + actual: source.data_type().clone(), + context: "string_to_video_codec input".to_owned(), + })?; + Ok(StringToVideoCodecUInt32::default() + .transform(string_array)? + .map(|arr| Arc::new(arr) as arrow::array::ArrayRef)) + } +} + +/// Converts structs with r, g, b, a fields to packed RGBA u32 values. +/// +/// Supports both f32 and f64 field types, and handles clamping and nulls. +pub fn rgba_struct_to_uint32() +-> impl Fn(&arrow::array::ArrayRef) -> Result, Error> + Send + Sync { + move |source: &arrow::array::ArrayRef| { + let struct_array = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: source.data_type().clone(), + context: "rgba_struct_to_uint32 input".to_owned(), + })?; + Ok(RgbaStructToUInt32::default() + .transform(struct_array)? + .map(|arr| Arc::new(arr) as arrow::array::ArrayRef)) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use arrow::array::{ + Array as _, Float32Array, Float64Array, GenericByteBuilder, Int32Array, Int64Array, + StringArray, StructArray, UInt32Array, + }; + use arrow::datatypes::{DataType, Field, GenericBinaryType}; + use re_lenses_core::combinators::{Error, Transform as _}; + use re_sdk_types::components::VideoCodec; + use re_sdk_types::reflection::Enum as _; + + use super::*; + + // Generic test for binary arrays where the offset is the same. + fn impl_binary_test() -> Result<(), Error> { + let mut builder = GenericByteBuilder::>::new(); + builder.append_value(b"hello"); + builder.append_value(b"world"); + builder.append_null(); + builder.append_value(b""); + builder.append_value([0x00, 0xFF, 0x42]); + let binary_array = builder.finish(); + + let result = BinaryToListUInt8::::new() + .transform(&binary_array)? + .unwrap(); + + // Verify structure + assert_eq!(result.len(), 5); + assert!(!result.is_null(0)); + assert!(!result.is_null(1)); + assert!(result.is_null(2)); + assert!(!result.is_null(3)); + assert!(!result.is_null(4)); + + { + let list = result.value(0); + let uint8 = list + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(uint8.len(), 5); + assert_eq!(uint8.value(0) as char, 'h'); + assert_eq!(uint8.value(1) as char, 'e'); + assert_eq!(uint8.value(2) as char, 'l'); + assert_eq!(uint8.value(3) as char, 'l'); + assert_eq!(uint8.value(4) as char, 'o'); + } + + { + let list = result.value(1); + let uint8 = list + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(list.len(), 5); + assert_eq!(uint8.value(0) as char, 'w'); + assert_eq!(uint8.value(1) as char, 'o'); + assert_eq!(uint8.value(2) as char, 'r'); + assert_eq!(uint8.value(3) as char, 'l'); + assert_eq!(uint8.value(4) as char, 'd'); + } + + assert!(result.is_null(2)); + + { + let list = result.value(3); + let uint8 = list + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(uint8.len(), 0); + } + + { + let list = result.value(4); + let uint8 = list + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(uint8.len(), 3); + assert_eq!(uint8.value(0), 0x00); + assert_eq!(uint8.value(1), 0xFF); + assert_eq!(uint8.value(2), 0x42); + } + + Ok(()) + } + + #[test] + fn test_binary_to_list_uint8() -> Result<(), Error> { + // We test the different offset combinations. + impl_binary_test::()?; + impl_binary_test::()?; + impl_binary_test::()?; + impl_binary_test::()?; + + Ok(()) + } + + #[test] + fn test_binary_offset_overflow() { + use arrow::array::LargeBinaryArray; + use arrow::buffer::OffsetBuffer; + + // Create a LargeBinaryArray with an offset that exceeds i32::MAX + let large_offset = i32::MAX as i64 + 1; + + let offsets = vec![0i64, large_offset]; + let offsets_buffer = OffsetBuffer::new(offsets.into()); + + let values = vec![0u8; large_offset as usize]; + + let large_binary = LargeBinaryArray::new(offsets_buffer, values.into(), None); + + // Try to convert from LargeBinaryArray (i64 offsets) to ListArray (i32 offsets) + let transform = BinaryToListUInt8::::new(); + let result = transform.transform(&large_binary); + + // Should fail with OffsetOverflow + assert!(result.is_err()); + match result.unwrap_err() { + Error::OffsetOverflow { + actual, + expected_type, + } => { + assert_eq!(actual, large_offset as usize); + assert_eq!(expected_type, "i32"); + } + other => panic!("Expected OffsetOverflow error, got: {other:?}"), + } + } + + /// Tests that timespec structs are correctly converted to nanoseconds, including (mixed) null handling. + #[test] + fn test_timespec_to_nanos() -> Result<(), Error> { + let seconds_field = Arc::new(Field::new("seconds", DataType::Int64, true)); + let nanos_field = Arc::new(Field::new("nanos", DataType::Int32, true)); + + let seconds_array = Arc::new(Int64Array::from(vec![ + Some(1), + Some(2), + None, + Some(3), + None, + ])); + let nanos_array = Arc::new(Int32Array::from(vec![ + Some(500_000_000), + None, + Some(0), + Some(250_000_000), + None, + ])); + + let struct_array = StructArray::new( + vec![seconds_field, nanos_field].into(), + vec![seconds_array, nanos_array], + None, + ); + let output_array = TimeSpecToNanos::default() + .transform(&struct_array)? + .unwrap(); + let expected_array = Int64Array::from(vec![ + Some(1_500_000_000), + None, + None, + Some(3_250_000_000), + None, + ]); + assert_eq!(output_array, expected_array); + + Ok(()) + } + + /// Tests that timespec structs with `sec`/`nsec` field names work too. + #[test] + fn test_timespec_to_nanos_sec_nsec() -> Result<(), Error> { + let seconds_field = Arc::new(Field::new("sec", DataType::Int64, true)); + let nanos_field = Arc::new(Field::new("nsec", DataType::Int32, true)); + + let seconds_array = Arc::new(Int64Array::from(vec![Some(1), Some(2)])); + let nanos_array = Arc::new(Int32Array::from(vec![Some(500_000_000), Some(0)])); + + let struct_array = StructArray::new( + vec![seconds_field, nanos_field].into(), + vec![seconds_array, nanos_array], + None, + ); + let output_array = TimeSpecToNanos::default() + .transform(&struct_array)? + .unwrap(); + let expected_array = Int64Array::from(vec![Some(1_500_000_000), Some(2_000_000_000)]); + assert_eq!(output_array, expected_array); + + Ok(()) + } + + /// Tests that timespec with uint32 seconds and nanos fields are cast correctly. + #[test] + fn test_timespec_to_nanos_uint32() -> Result<(), Error> { + let seconds_field = Arc::new(Field::new("sec", DataType::UInt32, false)); + let nanos_field = Arc::new(Field::new("nsec", DataType::UInt32, false)); + + let seconds_array = Arc::new(UInt32Array::from(vec![1u32, 2])); + let nanos_array = Arc::new(UInt32Array::from(vec![500_000_000u32, 0])); + + let struct_array = StructArray::new( + vec![seconds_field, nanos_field].into(), + vec![seconds_array, nanos_array], + None, + ); + let output_array = TimeSpecToNanos::default() + .transform(&struct_array)? + .unwrap(); + let expected_array = Int64Array::from(vec![1_500_000_000i64, 2_000_000_000]); + assert_eq!(output_array, expected_array); + + Ok(()) + } + + /// Tests that supported codecs are correctly converted, and checks case-insensitivity and null handling. + #[test] + fn test_string_to_codec_uint32() -> Result<(), Error> { + // Note: mixed codecs normally don't make sense, but should be fine from a pure conversion perspective. + let input_array = StringArray::from(vec![ + Some("H264"), + None, + Some("h264"), + Some("H265"), + Some("aV1"), + ]); + assert_eq!(input_array.null_count(), 1); + let output_array = StringToVideoCodecUInt32::default() + .transform(&input_array)? + .unwrap(); + assert_eq!(output_array.null_count(), 1); + let expected_array = UInt32Array::from(vec![ + Some(VideoCodec::H264 as u32), + None, + Some(VideoCodec::H264 as u32), + Some(VideoCodec::H265 as u32), + Some(VideoCodec::AV1 as u32), + ]); + assert_eq!(output_array, expected_array); + + Ok(()) + } + + /// Tests that we return the correct error when an unsupported codec is in the data. + #[test] + fn test_string_to_codec_uint32_unsupported() { + let unsupported_codecs = ["vp9"]; + for &bad_codec in &unsupported_codecs { + let input_array = StringArray::from(vec![Some("h264"), Some(bad_codec)]); + let result = StringToVideoCodecUInt32::default().transform(&input_array); + assert!(result.is_err()); + let Err(Error::UnexpectedValue { actual, .. }) = result else { + panic!("wrong error type"); + }; + assert_eq!(actual, bad_codec); + } + } + + /// Tests that all codecs defined in `VideoCodec` are accepted. + #[test] + fn test_string_to_codec_uint32_all_supported() -> Result<(), Error> { + let variants = VideoCodec::variants(); + let variant_names = variants + .iter() + .map(|v| format!("{v:?}").to_lowercase()) + .collect::>(); + let input_array = StringArray::from( + variant_names + .iter() + .map(|name| Some(name.as_str())) + .collect::>>(), + ); + let output_array = StringToVideoCodecUInt32::default() + .transform(&input_array)? + .unwrap(); + let expected_array = UInt32Array::from( + variants + .iter() + .map(|v| Some(*v as u32)) + .collect::>>(), + ); + assert_eq!(output_array, expected_array); + + Ok(()) + } + + /// Helper to build an RGBA struct array from channel arrays of a given type. + fn make_rgba_struct( + r: T, + g: T, + b: T, + a: T, + nulls: Option, + ) -> StructArray { + let dt = r.data_type().clone(); + StructArray::new( + vec![ + Arc::new(Field::new("r", dt.clone(), true)), + Arc::new(Field::new("g", dt.clone(), true)), + Arc::new(Field::new("b", dt.clone(), true)), + Arc::new(Field::new("a", dt, true)), + ] + .into(), + vec![Arc::new(r), Arc::new(g), Arc::new(b), Arc::new(a)], + nulls, + ) + } + + /// Tests RGBA conversion with f64 fields, including clamping and null handling. + #[test] + fn test_rgba_struct_to_uint32_f64() { + let nulls = arrow::buffer::NullBuffer::from(vec![true, true, false, true]); + let struct_array = make_rgba_struct( + Float64Array::from(vec![1.0, 0.0, 0.0, 1.5]), + Float64Array::from(vec![0.5, 0.0, 0.0, -0.1]), + Float64Array::from(vec![0.0, 1.0, 0.0, 0.0]), + Float64Array::from(vec![1.0, 0.5, 0.0, 1.0]), + Some(nulls), + ); + let output = RgbaStructToUInt32::default() + .transform(&struct_array) + .expect("transformation failed"); + let expected = UInt32Array::from(vec![ + Some(0xFF80_00FF), // r=255, g=128, b=0, a=255 + Some(0x0000_FF80), // r=0, g=0, b=255, a=128 + None, // struct-level null + Some(0xFF00_00FF), // clamped: r=255, g=0, b=0, a=255 + ]); + assert_eq!(output, Some(expected)); + } + + /// Tests RGBA conversion with f32 fields, including clamping and null handling. + #[test] + fn test_rgba_struct_to_uint32_f32() { + let nulls = arrow::buffer::NullBuffer::from(vec![true, false, true, true]); + let struct_array = make_rgba_struct( + Float32Array::from(vec![1.0f32, 0.0, 0.0, 1.5]), + Float32Array::from(vec![0.0f32, 0.0, 1.0, -0.1]), + Float32Array::from(vec![0.0f32, 0.0, 0.0, 0.0]), + Float32Array::from(vec![1.0f32, 0.0, 1.0, 1.0]), + Some(nulls), + ); + let output = RgbaStructToUInt32::default() + .transform(&struct_array) + .expect("transformation failed with f32 input"); + let expected = UInt32Array::from(vec![ + Some(0xFF00_00FF), // red + None, // struct-level null + Some(0x00FF_00FF), // green + Some(0xFF00_00FF), // clamped: r=255, g=0, b=0, a=255 + ]); + assert_eq!(output, Some(expected)); + } +} diff --git a/crates/store/re_lenses/src/op/string.rs b/crates/store/re_lenses/src/op/string.rs new file mode 100644 index 000000000000..9458551fda48 --- /dev/null +++ b/crates/store/re_lenses/src/op/string.rs @@ -0,0 +1,86 @@ +//! String transforms that operate on flat `ArrayRef` values. + +use std::sync::Arc; + +use arrow::array::{ArrayRef, StringArray}; +use re_lenses_core::combinators::{Error, StringPrefix, StringSuffix, Transform as _}; + +/// Prepends a prefix to each string value. +pub fn string_prefix( + prefix: impl Into, +) -> impl Fn(&ArrayRef) -> Result, Error> + Send + Sync { + let transform = StringPrefix::new(prefix); + move |source: &ArrayRef| { + let string_array = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "StringArray".to_owned(), + actual: source.data_type().clone(), + context: "string_prefix input".to_owned(), + })?; + Ok(transform + .transform(string_array)? + .map(|arr| Arc::new(arr) as ArrayRef)) + } +} + +/// Prepends a prefix to each non-empty string value. +pub fn string_prefix_nonempty( + prefix: impl Into, +) -> impl Fn(&ArrayRef) -> Result, Error> + Send + Sync { + let transform = StringPrefix::new(prefix).with_prefix_empty_string(false); + move |source: &ArrayRef| { + let string_array = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "StringArray".to_owned(), + actual: source.data_type().clone(), + context: "string_prefix_nonempty input".to_owned(), + })?; + Ok(transform + .transform(string_array)? + .map(|arr| Arc::new(arr) as ArrayRef)) + } +} + +/// Appends a suffix to each string value. +pub fn string_suffix( + suffix: impl Into, +) -> impl Fn(&ArrayRef) -> Result, Error> + Send + Sync { + let transform = StringSuffix::new(suffix); + move |source: &ArrayRef| { + let string_array = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "StringArray".to_owned(), + actual: source.data_type().clone(), + context: "string_suffix input".to_owned(), + })?; + Ok(transform + .transform(string_array)? + .map(|arr| Arc::new(arr) as ArrayRef)) + } +} + +/// Appends a suffix to each non-empty string value. +pub fn string_suffix_nonempty( + suffix: impl Into, +) -> impl Fn(&ArrayRef) -> Result, Error> + Send + Sync { + let transform = StringSuffix::new(suffix).with_suffix_empty_string(false); + move |source: &ArrayRef| { + let string_array = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "StringArray".to_owned(), + actual: source.data_type().clone(), + context: "string_suffix_nonempty input".to_owned(), + })?; + Ok(transform + .transform(string_array)? + .map(|arr| Arc::new(arr) as ArrayRef)) + } +} diff --git a/crates/store/re_lenses/src/semantic.rs b/crates/store/re_lenses/src/semantic.rs deleted file mode 100644 index 62ebd0c67317..000000000000 --- a/crates/store/re_lenses/src/semantic.rs +++ /dev/null @@ -1,381 +0,0 @@ -//! Semantic array transforms for concrete applications. -//! -//! Note: These should not be exposed as part of the public API, but rather wrapped in [`crate::Op`]. - -use std::marker::PhantomData; -use std::sync::Arc; - -use arrow::array::{ - Array as _, ArrowNativeTypeOp as _, GenericBinaryArray, GenericListArray, Int64Array, - OffsetSizeTrait, StringArray, StructArray, UInt32Array, UInt32Builder, -}; -use arrow::datatypes::{DataType, Field, Int32Type, Int64Type}; -use arrow::error::ArrowError; -use re_sdk_types::components::VideoCodec; - -use re_arrow_combinators::cast::DowncastRef; -use re_arrow_combinators::{Error, Transform, reshape}; - -/// Converts binary arrays to list arrays where each binary element becomes a list of `u8`. -/// -/// The underlying bytes buffer is reused, making this transformation almost zero-copy. -#[derive(Clone, Debug, Default)] -pub struct BinaryToListUInt8 { - _from_offset: PhantomData, - _to_offset: PhantomData, - - /// This transform is specifically intended for contiguous byte data, - /// so we default to non-nullable lists. - nullable: bool, -} - -impl BinaryToListUInt8 { - /// Create a new transformation to convert a binary array to a list array of `u8` arrays. - pub fn new() -> Self { - Default::default() - } -} - -impl Transform for BinaryToListUInt8 { - type Source = GenericBinaryArray; - type Target = GenericListArray; - - fn transform(&self, source: &GenericBinaryArray) -> Result { - use arrow::array::UInt8Array; - use arrow::buffer::ScalarBuffer; - - let scalar_buffer: ScalarBuffer = ScalarBuffer::from(source.values().clone()); - let uint8_array = UInt8Array::new(scalar_buffer, None); - - // Convert from O1 to O2. Most offset buffers will be small in real-world - // examples, so we're fine copying them. - // - // This could be true zero copy if Rust had specialization. - // More info: https://std-dev-guide.rust-lang.org/policy/specialization.html - let old_offsets = source.offsets().iter(); - let new_offsets: Result, Error> = old_offsets - .map(|&offset| { - let offset_usize = offset.as_usize(); - O2::from_usize(offset_usize).ok_or_else(|| Error::OffsetOverflow { - actual: offset_usize, - expected_type: std::any::type_name::(), - }) - }) - .collect(); - let offsets = arrow::buffer::OffsetBuffer::new(new_offsets?.into()); - - let list = Self::Target::new( - Arc::new(Field::new_list_field(DataType::UInt8, self.nullable)), - offsets, - Arc::new(uint8_array), - source.nulls().cloned(), - ); - - Ok(list) - } -} - -/// Converts `StructArray` of timestamps with `seconds` (i64) and `nanos` (i32) fields -/// to `Int64Array` containing the corresponding total nanoseconds timestamps. -#[derive(Default)] -pub struct TimeSpecToNanos {} - -impl Transform for TimeSpecToNanos { - type Source = StructArray; - type Target = Int64Array; - - fn transform(&self, source: &StructArray) -> Result { - let seconds_array = reshape::GetField::new("seconds") - .then(DowncastRef::::new()) - .transform(source)?; - let nanos_array = reshape::GetField::new("nanos") - .then(DowncastRef::::new()) - .transform(source)?; - - Ok(arrow::compute::try_binary( - &seconds_array, - &nanos_array, - |seconds: i64, nanos: i32| -> Result { - seconds - .mul_checked(1_000_000_000)? - .add_checked(nanos as i64) - }, - )?) - } -} - -/// Transforms a `StringArray` of video codec names to a `UInt32Array`, -/// where each u32 corresponds to a Rerun `VideoCodec` enum value. -#[derive(Default)] -pub struct StringToVideoCodecUInt32 {} - -impl Transform for StringToVideoCodecUInt32 { - type Source = StringArray; - type Target = UInt32Array; - - fn transform(&self, source: &StringArray) -> Result { - Ok(source - .iter() - .try_fold( - UInt32Builder::with_capacity(source.len()), - |mut builder, maybe_str| { - if let Some(codec_str) = maybe_str { - let codec = match codec_str.to_lowercase().as_str() { - "h264" => VideoCodec::H264, - "h265" => VideoCodec::H265, - "av1" => VideoCodec::AV1, - _ => { - return Err(Error::UnexpectedValue { - expected: &["h264", "h265", "av1"], - actual: codec_str.to_owned(), - }); - } - }; - builder.append_value(codec as u32); - } else { - builder.append_null(); - } - Ok(builder) - }, - )? - .finish()) - } -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use arrow::array::{ - Array as _, GenericByteBuilder, Int32Array, Int64Array, StringArray, StructArray, - UInt32Array, - }; - use arrow::datatypes::{DataType, Field, GenericBinaryType}; - use re_arrow_combinators::{Error, Transform as _}; - use re_sdk_types::components::VideoCodec; - use re_sdk_types::reflection::Enum as _; - - use super::*; - - // Generic test for binary arrays where the offset is the same. - fn impl_binary_test() { - let mut builder = GenericByteBuilder::>::new(); - builder.append_value(b"hello"); - builder.append_value(b"world"); - builder.append_null(); - builder.append_value(b""); - builder.append_value([0x00, 0xFF, 0x42]); - let binary_array = builder.finish(); - - let result = BinaryToListUInt8::::new() - .transform(&binary_array) - .unwrap(); - - // Verify structure - assert_eq!(result.len(), 5); - assert!(!result.is_null(0)); - assert!(!result.is_null(1)); - assert!(result.is_null(2)); - assert!(!result.is_null(3)); - assert!(!result.is_null(4)); - - { - let list = result.value(0); - let uint8 = list - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(uint8.len(), 5); - assert_eq!(uint8.value(0) as char, 'h'); - assert_eq!(uint8.value(1) as char, 'e'); - assert_eq!(uint8.value(2) as char, 'l'); - assert_eq!(uint8.value(3) as char, 'l'); - assert_eq!(uint8.value(4) as char, 'o'); - } - - { - let list = result.value(1); - let uint8 = list - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(list.len(), 5); - assert_eq!(uint8.value(0) as char, 'w'); - assert_eq!(uint8.value(1) as char, 'o'); - assert_eq!(uint8.value(2) as char, 'r'); - assert_eq!(uint8.value(3) as char, 'l'); - assert_eq!(uint8.value(4) as char, 'd'); - } - - assert!(result.is_null(2)); - - { - let list = result.value(3); - let uint8 = list - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(uint8.len(), 0); - } - - { - let list = result.value(4); - let uint8 = list - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(uint8.len(), 3); - assert_eq!(uint8.value(0), 0x00); - assert_eq!(uint8.value(1), 0xFF); - assert_eq!(uint8.value(2), 0x42); - } - } - - #[test] - fn test_binary_to_list_uint8() { - // We test the different offset combinations. - impl_binary_test::(); - impl_binary_test::(); - impl_binary_test::(); - impl_binary_test::(); - } - - #[test] - fn test_binary_offset_overflow() { - use arrow::array::LargeBinaryArray; - use arrow::buffer::OffsetBuffer; - - // Create a LargeBinaryArray with an offset that exceeds i32::MAX - let large_offset = i32::MAX as i64 + 1; - - let offsets = vec![0i64, large_offset]; - let offsets_buffer = OffsetBuffer::new(offsets.into()); - - let values = vec![0u8; large_offset as usize]; - - let large_binary = LargeBinaryArray::new(offsets_buffer, values.into(), None); - - // Try to convert from LargeBinaryArray (i64 offsets) to ListArray (i32 offsets) - let transform = BinaryToListUInt8::::new(); - let result = transform.transform(&large_binary); - - // Should fail with OffsetOverflow - assert!(result.is_err()); - match result.unwrap_err() { - Error::OffsetOverflow { - actual, - expected_type, - } => { - assert_eq!(actual, large_offset as usize); - assert_eq!(expected_type, "i32"); - } - other => panic!("Expected OffsetOverflow error, got: {other:?}"), - } - } - - /// Tests that timespec structs are correctly converted to nanoseconds, including (mixed) null handling. - #[test] - fn test_timespec_to_nanos() { - let seconds_field = Arc::new(Field::new("seconds", DataType::Int64, true)); - let nanos_field = Arc::new(Field::new("nanos", DataType::Int32, true)); - - let seconds_array = Arc::new(Int64Array::from(vec![ - Some(1), - Some(2), - None, - Some(3), - None, - ])); - let nanos_array = Arc::new(Int32Array::from(vec![ - Some(500_000_000), - None, - Some(0), - Some(250_000_000), - None, - ])); - - let struct_array = StructArray::new( - vec![seconds_field, nanos_field].into(), - vec![seconds_array, nanos_array], - None, - ); - let output_array = TimeSpecToNanos::default() - .transform(&struct_array) - .expect("transformation failed"); - let expected_array = Int64Array::from(vec![ - Some(1_500_000_000), - None, - None, - Some(3_250_000_000), - None, - ]); - assert_eq!(output_array, expected_array); - } - - /// Tests that supported codecs are correctly converted, and checks case-insensitivity and null handling. - #[test] - fn test_string_to_codec_uint32() { - // Note: mixed codecs normally don't make sense, but should be fine from a pure conversion perspective. - let input_array = StringArray::from(vec![ - Some("H264"), - None, - Some("h264"), - Some("H265"), - Some("aV1"), - ]); - assert_eq!(input_array.null_count(), 1); - let output_array = StringToVideoCodecUInt32::default() - .transform(&input_array) - .expect("transformation failed"); - assert_eq!(output_array.null_count(), 1); - let expected_array = UInt32Array::from(vec![ - Some(VideoCodec::H264 as u32), - None, - Some(VideoCodec::H264 as u32), - Some(VideoCodec::H265 as u32), - Some(VideoCodec::AV1 as u32), - ]); - assert_eq!(output_array, expected_array); - } - - /// Tests that we return the correct error when an unsupported codec is in the data. - #[test] - fn test_string_to_codec_uint32_unsupported() { - let unsupported_codecs = ["vp9"]; - for &bad_codec in &unsupported_codecs { - let input_array = StringArray::from(vec![Some("h264"), Some(bad_codec)]); - let result = StringToVideoCodecUInt32::default().transform(&input_array); - assert!(result.is_err()); - let Err(Error::UnexpectedValue { actual, .. }) = result else { - panic!("wrong error type"); - }; - assert_eq!(actual, bad_codec); - } - } - - /// Tests that all codecs defined in `VideoCodec` are accepted. - #[test] - fn test_string_to_codec_uint32_all_supported() { - let variants = VideoCodec::variants(); - let variant_names = variants - .iter() - .map(|v| format!("{v:?}").to_lowercase()) - .collect::>(); - let input_array = StringArray::from( - variant_names - .iter() - .map(|name| Some(name.as_str())) - .collect::>>(), - ); - let output_array = StringToVideoCodecUInt32::default() - .transform(&input_array) - .expect("transformation failed - are all variants of VideoCodec supported?"); - let expected_array = UInt32Array::from( - variants - .iter() - .map(|v| Some(*v as u32)) - .collect::>>(), - ); - assert_eq!(output_array, expected_array); - } -} diff --git a/crates/store/re_arrow_combinators/Cargo.toml b/crates/store/re_lenses_core/Cargo.toml similarity index 63% rename from crates/store/re_arrow_combinators/Cargo.toml rename to crates/store/re_lenses_core/Cargo.toml index fa335423363c..2291cb1e3524 100644 --- a/crates/store/re_arrow_combinators/Cargo.toml +++ b/crates/store/re_lenses_core/Cargo.toml @@ -1,7 +1,7 @@ [package] -name = "re_arrow_combinators" +name = "re_lenses_core" authors.workspace = true -description = "Type-safe, composable transformations for Arrow arrays." +description = "Core lens types and composable Arrow array transformations." edition.workspace = true homepage.workspace = true include.workspace = true @@ -21,9 +21,16 @@ all-features = true [dependencies] re_arrow_util.workspace = true +re_byte_size.workspace = true +re_chunk.workspace = true +re_log_types.workspace = true re_log.workspace = true +re_sdk_types.workspace = true +ahash.workspace = true arrow.workspace = true +itertools.workspace = true +nohash-hasher.workspace = true thiserror.workspace = true vec1.workspace = true diff --git a/crates/store/re_lenses_core/README.md b/crates/store/re_lenses_core/README.md new file mode 100644 index 000000000000..2af757d60bd5 --- /dev/null +++ b/crates/store/re_lenses_core/README.md @@ -0,0 +1,15 @@ +# re_lenses_core + +Part of the [`rerun`](https://github.com/rerun-io/rerun) family of crates. + +[![Latest version](https://img.shields.io/crates/v/re_lenses_core.svg)](https://crates.io/crates/re_lenses_core) +[![Documentation](https://docs.rs/re_lenses_core/badge.svg)](https://docs.rs/re_lenses_core) +![MIT](https://img.shields.io/badge/license-MIT-blue.svg) +![Apache](https://img.shields.io/badge/license-Apache-blue.svg) + +Core lens types and composable Arrow array transformations. + +This crate provides the Lenses definitions and builders, and composable +transformations for Arrow arrays. Transformations are composable operations +that convert one array type to another, preserving structural properties +like row counts and null handling. diff --git a/crates/store/re_lenses_core/src/ast.rs b/crates/store/re_lenses_core/src/ast.rs new file mode 100644 index 000000000000..6ebe5a7dfd26 --- /dev/null +++ b/crates/store/re_lenses_core/src/ast.rs @@ -0,0 +1,634 @@ +//! Private module with the AST-like definitions of lenses. +//! +//! **Note**: Apart from high-level entry points (like [`Lens`]), +//! we should not leak these elements into the public API. This allows us to +//! evolve the definition of lenses over time, if requirements change. + +use std::collections::BTreeMap; + +use crate::combinators::{Explode, Transform as _}; +use crate::{DynExpr, LensRuntimeError, Selector}; +use arrow::array::{AsArray as _, Int64Array, ListArray}; +use arrow::compute::take; +use itertools::Itertools as _; +use nohash_hasher::IntMap; +use re_chunk::{ + ArrowArray as _, Chunk, ChunkId, ComponentIdentifier, EntityPath, TimeColumn, Timeline, + TimelineName, +}; +use re_log_types::{ResolvedEntityPathFilter, TimeType}; +use re_sdk_types::{ComponentDescriptor, SerializedComponentColumn}; +use vec1::Vec1; + +use crate::builder::LensBuilder; + +type ChunkTimelines = IntMap; + +/// A component output. +/// +/// Depending on the context in which this output is used, the result from +/// applying the transform should be a list array (1:1) or a list array of list arrays (1:N). +#[derive(Clone, Debug)] +pub struct ComponentOutput { + pub component_descr: ComponentDescriptor, + pub selector: Selector, +} + +/// A time extraction output. +#[derive(Clone, Debug)] +pub struct TimeOutput { + pub timeline_name: TimelineName, + pub timeline_type: TimeType, + pub selector: Selector, +} + +#[derive(Clone)] +pub struct LensOutput { + /// Can be used for better errors/warnings. + pub input_id: ComponentIdentifier, + + /// Component columns that will be created. + pub output_components: Vec1, + + /// Time columns that will be created. + pub output_timelines: Vec, +} + +impl LensOutput { + fn apply( + &self, + scatter: bool, + target_entity: &EntityPath, + timelines: &ChunkTimelines, + input_data: &SerializedComponentColumn, + ) -> Result { + let Self { + input_id, + output_components, + output_timelines, + } = self; + + let chunk = if scatter { + apply_one_to_many( + target_entity, + timelines, + output_timelines, + output_components, + input_data, + )? + } else { + apply_one_to_one( + target_entity, + timelines, + output_timelines, + output_components, + input_data, + )? + }; + + assert!( + chunk.is_sorted(), + "Lens produced unsorted chunk. This is a bug. Fix it." + ); + + for (name, times) in chunk.timelines() { + if !times.is_sorted() { + re_log::debug_warn_once!( + "Lens for component `{input_id}` produced unsorted time column `{name}` for entity `{target_entity}`" + ); + } + } + + Ok(chunk) + } +} + +/// A lens that transforms component data from one form to another. +/// +/// Lenses allow you to extract, transform, and restructure component data. They +/// are applied to chunks that match the specified entity path filter and contain +/// the target component. +/// +/// # Assumptions +/// +/// There can be at most one set of output columns per target entity within a lens. +/// +/// Works on component columns within a chunk. Because what goes into a chunk +/// is non-deterministic, and dependent on the batcher, no assumptions should be +/// made for values across rows. +#[derive(Clone)] +pub struct Lens { + pub(crate) input: ComponentIdentifier, + + /// When `true`, use 1:N row mapping (scatter/explode lists). + /// When `false`, use 1:1 row mapping. + pub(crate) scatter: bool, + + /// Output for the same entity as the input. + pub(crate) same_entity_output: Option, + + /// Outputs keyed by explicit target entity path. + pub(crate) entity_outputs: BTreeMap, +} + +impl Lens { + /// Returns a new [`LensBuilder`] for the given input component column. + /// + /// By default, creates a one-to-one (temporal) lens. Call `.with_static()` or `.with_to_many()` + /// on the builder to switch to a different mode. + pub fn for_input_column(component: impl Into) -> LensBuilder { + LensBuilder::new(component) + } + + /// Applies this lens and creates one or more chunks. + fn apply<'a>( + &'a self, + original_entity: &'a EntityPath, + timelines: &'a ChunkTimelines, + input: &'a SerializedComponentColumn, + ) -> impl Iterator> + 'a { + let scatter = self.scatter; + self.same_entity_output + .iter() + .map(move |output| output.apply(scatter, original_entity, timelines, input)) + .chain( + self.entity_outputs + .iter() + .map(move |(path, output)| output.apply(scatter, path, timelines, input)), + ) + } +} + +/// An optional [`Chunk`] that only contains the component and time columns that we were able to compute. +/// +/// Also contains a list of contextualized errors that describe which columns failed. +#[derive(Debug)] +pub struct PartialChunk { + /// [`Self`] is only used in an [`Result::Err`] variant. + /// + /// We therefore box the actual payload to keep the happy path optimized. + inner: Box, +} + +#[derive(Debug)] +struct PartialChunkInner { + /// In some cases we might not be able to produce a chunk at all. + chunk: Option, + + /// Collection of errors encountered while executing the Lens. + errors: Vec, +} + +impl PartialChunk { + /// Returns the partial chunk if any and consumes `self`. + pub fn take(self) -> Option { + self.inner.chunk + } + + pub fn errors(&self) -> impl Iterator { + self.inner.errors.iter() + } +} + +fn collect_components_iter<'a>( + input: &'a SerializedComponentColumn, + components: &'a [ComponentOutput], + target_entity: &'a EntityPath, +) -> impl Iterator> + 'a { + components.iter().filter_map(move |output| { + match output.selector.execute_per_row(&input.list_array) { + Ok(Some(list_array)) => Some(Ok((output.component_descr.clone(), list_array))), + Ok(None) => { + re_log::debug_once!( + "Lens suppressed for `{target_entity}` component `{}`", + output.component_descr.component + ); + None + } + Err(source) => Some(Err(LensRuntimeError::ComponentOperationFailed { + target_entity: target_entity.clone(), + input_component: input.descriptor.component, + component: output.component_descr.component, + source: Box::new(source), + })), + } + }) +} + +fn collect_output_times_iter<'a>( + input: &'a SerializedComponentColumn, + timelines: &'a [TimeOutput], + target_entity: &'a EntityPath, +) -> impl Iterator> + 'a { + timelines.iter().filter_map(move |time| { + match time.selector.execute_per_row(&input.list_array) { + Ok(Some(list_array)) => Some(Ok((time.timeline_name, time.timeline_type, list_array))), + Ok(None) => { + re_log::debug_once!( + "Lens suppressed for `{target_entity}` timeline `{}`", + time.timeline_name, + ); + None + } + Err(source) => Some(Err(LensRuntimeError::TimeOperationFailed { + target_entity: target_entity.clone(), + input_component: input.descriptor.component, + timeline_name: time.timeline_name, + source: Box::new(source), + })), + } + }) +} + +/// Converts a time array to a time column. +/// +/// Checks if the `list_array` values are [`arrow::array::Int64Array`] and if so, creates a [`re_chunk::TimeColumn`]. +fn try_convert_time_column( + timeline_name: TimelineName, + timeline_type: TimeType, + list_array: &ListArray, +) -> Result<(TimelineName, TimeColumn), LensRuntimeError> { + if let Some(time_vals) = list_array.values().as_any().downcast_ref::() { + let time_column = re_chunk::TimeColumn::new( + None, + Timeline::new(timeline_name, timeline_type), + time_vals.values().clone(), + ); + Ok((timeline_name, time_column)) + } else { + Err(LensRuntimeError::InvalidTimeColumn { + timeline_name, + actual_type: list_array.values().data_type().clone(), + }) + } +} + +/// Creates a chunk from the given components and timelines, handling errors appropriately. +/// +/// Returns `Ok(chunk)` if successful with no errors, or `Err(PartialChunk)` if there were +/// errors during processing (with an optional chunk if creation succeeded despite errors). +fn finalize_chunk( + entity_path: EntityPath, + chunk_times: IntMap, + component_results: re_chunk::ChunkComponents, + mut errors: Vec, +) -> Result { + match Chunk::from_auto_row_ids(ChunkId::new(), entity_path, chunk_times, component_results) { + Ok(chunk) => { + if errors.is_empty() { + Ok(chunk) + } else { + Err(PartialChunk { + inner: Box::new(PartialChunkInner { + chunk: Some(chunk), + errors, + }), + }) + } + } + Err(err) => { + errors.push(err.into()); + Err(PartialChunk { + inner: Box::new(PartialChunkInner { + chunk: None, + errors, + }), + }) + } + } +} + +/// Applies a one-to-one lens transformation where each input row produces exactly one output row. +/// +/// The output chunk inherits all timelines from the input chunk, with additional timelines +/// extracted from the component data if specified. Component columns are transformed according +/// to the provided operations. +fn apply_one_to_one( + target_entity: &EntityPath, + original_timelines: &ChunkTimelines, + timelines: &[TimeOutput], + components: &[ComponentOutput], + input: &SerializedComponentColumn, +) -> Result { + let mut errors = Vec::new(); + + let mut component_results = re_chunk::ChunkComponents::default(); + + // Collect successful components directly into ChunkComponents, accumulate errors. + for result in collect_components_iter(input, components, target_entity) { + match result { + Ok((component_descr, list_array)) => { + component_results + .insert(SerializedComponentColumn::new(list_array, component_descr)); + } + Err(err) => errors.push(err), + } + } + + // Inherit all existing time columns as-is (since row count doesn't change) + let mut chunk_times = original_timelines.clone(); + + // Collect successful time columns, accumulate errors + chunk_times.extend( + collect_output_times_iter(input, timelines, target_entity).filter_map( + |result| match result { + Ok((timeline_name, timeline_type, list_array)) => { + match try_convert_time_column(timeline_name, timeline_type, &list_array) { + Ok(time_col) => Some(time_col), + Err(err) => { + errors.push(err); + None + } + } + } + Err(err) => { + errors.push(err); + None + } + }, + ), + ); + + finalize_chunk( + target_entity.clone(), + chunk_times, + component_results, + errors, + ) +} + +/// Applies a one-to-many lens transformation where each input row potentially produces multiple output rows. +/// +/// The output chunk inherits all time columns from the input chunk, with additional time columns +/// extracted from the component data if specified. Component columns are transformed according +/// to the provided operations. +fn apply_one_to_many( + target_entity: &EntityPath, + original_timelines: &ChunkTimelines, + timelines: &[TimeOutput], + components: &[ComponentOutput], + input: &SerializedComponentColumn, +) -> Result { + use arrow::array::UInt32Array; + + let mut errors = Vec::new(); + + let mut components = collect_components_iter(input, components, target_entity).peekable(); + + // Peek at the first component to establish the scatter pattern (how many output rows + // each input row produces). All components must have the same outer list structure. + // We use .peek() instead of consuming the iterator so we can still process all + // components (including this first one) later. + let reference_array = match components.peek() { + Some(Ok((_descr, reference_array))) => reference_array, + Some(Err(_)) => { + // If the first component failed, collect all errors and return + errors.extend(components.filter_map(|r| r.err())); + return Err(PartialChunk { + inner: Box::new(PartialChunkInner { + chunk: None, + errors, + }), + }); + } + None => { + return Err(PartialChunk { + inner: Box::new(PartialChunkInner { + chunk: None, + errors: vec![LensRuntimeError::NoOutputColumnsProduced { + input_component: input.descriptor.component, + target_entity: target_entity.clone(), + }], + }), + }); + } + }; + + // Build scatter indices: tracks which input row each output row came from + // Example: [0, 0, 0, 1, 2] means rows 0-2 from input 0, row 3 from input 1, row 4 from input 2 + let mut scatter_indices = Vec::new(); + let offsets = reference_array.value_offsets(); + + for (row_idx, window) in offsets.windows(2).enumerate() { + let start = window[0]; + let end = window[1]; + let count = end - start; + + if reference_array.is_null(row_idx) || count == 0 { + // Null or empty list produces one output row + scatter_indices.push(row_idx as u32); + } else { + // Each element produces one output row + for _ in 0..count { + scatter_indices.push(row_idx as u32); + } + } + } + + let scatter_indices_array = UInt32Array::from(scatter_indices); + + // Replicate all existing time values using scatter indices. + let mut chunk_times: IntMap = Default::default(); + for (timeline_name, time_column) in original_timelines { + let time_values = time_column.times_raw(); + let time_values_array = Int64Array::from(time_values.to_vec()); + + // `arrow::compute::take` is fine to use in this context, because we want to allow nullability. + #[expect(clippy::disallowed_methods)] + match take(&time_values_array, &scatter_indices_array, None) { + Ok(scattered) => { + let scattered_i64 = scattered.as_primitive::(); + let new_time_column = re_chunk::TimeColumn::new( + None, + *time_column.timeline(), + scattered_i64.values().clone(), + ); + chunk_times.insert(*timeline_name, new_time_column); + } + Err(source) => { + errors.push(LensRuntimeError::ScatterExistingTimeFailed { + timeline_name: *timeline_name, + source, + }); + } + } + } + + // Explode all output time columns and collect errors + chunk_times.extend( + collect_output_times_iter(input, timelines, target_entity).filter_map( + |result| match result { + Ok((timeline_name, timeline_type, list_array)) => { + match Explode.transform(&list_array) { + Ok(Some(exploded)) => { + match try_convert_time_column(timeline_name, timeline_type, &exploded) { + Ok(time_col) => Some(time_col), + Err(err) => { + errors.push(err); + None + } + } + } + Ok(None) => None, + Err(err) => { + errors.push(LensRuntimeError::TimeOperationFailed { + target_entity: target_entity.clone(), + input_component: input.descriptor.component, + timeline_name, + source: Box::new(err.into()), + }); + None + } + } + } + Err(err) => { + errors.push(err); + None + } + }, + ), + ); + + let mut chunk_components = re_chunk::ChunkComponents::default(); + + for result in components { + match result { + Ok((component_descr, list_array)) => match Explode.transform(&list_array) { + Ok(Some(exploded)) => { + chunk_components + .insert(SerializedComponentColumn::new(exploded, component_descr)); + } + Ok(None) => {} + Err(err) => { + errors.push(LensRuntimeError::ComponentOperationFailed { + target_entity: target_entity.clone(), + input_component: input.descriptor.component, + component: component_descr.component, + source: Box::new(err.into()), + }); + } + }, + Err(err) => errors.push(err), + } + } + + // Verify that all columns have the same length happens during chunk creation. + finalize_chunk(target_entity.clone(), chunk_times, chunk_components, errors) +} + +/// Controls how data is processed when applying lenses. +/// +/// This determines what happens to columns when lenses are applied, particularly +/// how unmatched original columns are handled. +#[derive(Copy, Clone)] +pub enum OutputMode { + /// Forward both the transformed data from matching lenses and the original data. + /// + /// Use this when you want to preserve all original data alongside transformations. + ForwardAll, + + /// Forward transformed data if lenses match, otherwise forward the original data unchanged. + /// + /// Use this when you want to transform matching data but ensure unmatched data isn't dropped. + ForwardUnmatched, + + /// Only forward transformed data, drop data that doesn't match any lens. + /// + /// Use this when you want a pure transformation pipeline where only explicitly transformed + /// data should be output. + DropUnmatched, +} + +/// A collection that holds multiple lenses and applies them to chunks. +/// +/// When a chunk is processed, all relevant lenses (those whose input component +/// matches a component in the chunk) are applied. +/// +/// Each lens is paired with a [`ResolvedEntityPathFilter`] to control which +/// entity paths it applies to. +#[derive(Clone)] +pub struct Lenses { + lenses: Vec<(ResolvedEntityPathFilter, Lens)>, + mode: OutputMode, +} + +impl Lenses { + /// Creates a new lens collection with the specified mode. + pub fn new(mode: OutputMode) -> Self { + Self { + lenses: Default::default(), + mode, + } + } + + /// Adds a lens that applies to all entity paths. + pub fn add_lens(mut self, lens: Lens) -> Self { + self.lenses.push(( + re_log_types::EntityPathFilter::all().resolve_without_substitutions(), + lens, + )); + self + } + + /// Adds a lens with an entity path filter. + /// + /// The lens will only be applied to chunks whose entity path matches the filter. + pub fn add_lens_with_filter( + mut self, + filter: re_log_types::EntityPathFilter, + lens: Lens, + ) -> Self { + self.lenses + .push((filter.resolve_without_substitutions(), lens)); + self + } + + /// Sets the output mode for this collection. + pub fn set_output_mode(&mut self, mode: OutputMode) { + self.mode = mode; + } + + fn relevant_lenses(&self, chunk: &Chunk) -> impl Iterator { + let entity_path = chunk.entity_path(); + self.lenses + .iter() + .filter(|(filter, lens)| { + filter.matches(entity_path) && chunk.components().contains_component(lens.input) + }) + .map(|(_, lens)| lens) + } + + /// Applies all relevant lenses and returns the results. + /// + /// The behavior depends on the configured [`OutputMode`]: + /// - [`OutputMode::ForwardAll`]: Returns both transformed and original data + /// - [`OutputMode::ForwardUnmatched`]: Returns transformed data if lenses match, otherwise original data + /// - [`OutputMode::DropUnmatched`]: Returns only transformed data, drops unmatched data + pub fn apply<'a>( + &'a self, + chunk: &'a Chunk, + ) -> impl Iterator> + 'a { + let prefix: Option = match self.mode { + OutputMode::ForwardAll => Some(chunk.clone()), + OutputMode::ForwardUnmatched => { + let relevant_components = self + .relevant_lenses(chunk) + .map(|lens| lens.input) + .unique() + .collect::>(); + let untouched = chunk.components_dropped(&relevant_components); + (untouched.num_components() > 0).then_some(untouched) + } + OutputMode::DropUnmatched => None, + }; + + prefix.into_iter().map(Ok).chain( + self.relevant_lenses(chunk) + .filter_map(|lens| { + let component = chunk.components().get(lens.input)?; + Some(lens.apply(chunk.entity_path(), chunk.timelines(), component)) + }) + .flatten(), + ) + } +} diff --git a/crates/store/re_lenses_core/src/builder.rs b/crates/store/re_lenses_core/src/builder.rs new file mode 100644 index 000000000000..8861fa5c7c8d --- /dev/null +++ b/crates/store/re_lenses_core/src/builder.rs @@ -0,0 +1,172 @@ +//! Builder API for constructing lenses. + +use std::collections::BTreeMap; + +use re_chunk::{ComponentIdentifier, EntityPath, TimelineName}; +use re_log_types::TimeType; +use re_sdk_types::ComponentDescriptor; + +use crate::selector::DynExpr; +use crate::{LensBuilderError, Selector, ast}; + +/// Builder for lenses with support for multiple output modes. +#[must_use] +pub struct LensBuilder { + input: ComponentIdentifier, + scatter: bool, + same_entity_output: Option, + entity_outputs: BTreeMap, +} + +impl LensBuilder { + pub(crate) fn new(component: impl Into) -> Self { + Self { + input: component.into(), + scatter: false, + same_entity_output: None, + entity_outputs: BTreeMap::new(), + } + } + + /// Enables 1:N row mapping (scatter) for this lens. + /// + /// When scatter is enabled, each input row produces multiple output rows by + /// exploding list arrays. The timepoint is replicated/scattered across the + /// output rows. Useful for flattening lists or exploding batches. + /// + /// By default, lenses use 1:1 row mapping where each input row produces + /// exactly one output row. + pub fn scatter(mut self) -> Self { + self.scatter = true; + self + } + + /// Adds an output with the same entity as the input. + /// + /// Each input row produces exactly one output row (unless `.scatter()` is set on the + /// builder). Outputs inherit time columns from the input, plus any additional time + /// columns specified via `.time()`. + pub fn output_columns( + mut self, + builder: impl FnOnce(OutputBuilder) -> Result, + ) -> Result { + if self.same_entity_output.is_some() { + return Err(LensBuilderError::DuplicateSameEntityOutput); + } + let output_builder = OutputBuilder::new(); + let output = builder(output_builder)?.build(self.input)?; + self.same_entity_output = Some(output); + Ok(self) + } + + /// Adds an output targeting an explicit entity path. + /// + /// Each input row produces exactly one output row (unless `.scatter()` is set on the + /// builder). Outputs inherit time columns from the input, plus any additional time + /// columns specified via `.time()`. + pub fn output_columns_at( + mut self, + entity_path: impl Into, + builder: impl FnOnce(OutputBuilder) -> Result, + ) -> Result { + let entity_path = entity_path.into(); + if self.entity_outputs.contains_key(&entity_path) { + return Err(LensBuilderError::DuplicateTargetEntity { + target_entity: entity_path, + }); + } + let output_builder = OutputBuilder::new(); + let output = builder(output_builder)?.build(self.input)?; + self.entity_outputs.insert(entity_path, output); + Ok(self) + } + + /// Finalizes this builder and returns the corresponding lens. + pub fn build(self) -> ast::Lens { + ast::Lens { + input: self.input, + scatter: self.scatter, + same_entity_output: self.same_entity_output, + entity_outputs: self.entity_outputs, + } + } +} + +// ==================== Output Builder ==================== + +/// Builder for lens output groups. +/// +/// Defines the component and time columns that a lens output produces. +#[must_use] +pub struct OutputBuilder { + components: Vec, + time_outputs: Vec, +} + +// TODO(RR-3962): Get rid of the `unnecessary_wraps`. +#[expect( + clippy::unnecessary_wraps, + reason = "Result return enables `?` chaining in builder closures" +)] +impl OutputBuilder { + fn new() -> Self { + Self { + components: vec![], + time_outputs: vec![], + } + } + + /// Adds a component output column. + /// + /// # Arguments + /// * `component_descr` - The descriptor for the output component + /// * `selector` - Selector to apply to the input column + pub fn component( + mut self, + component_descr: ComponentDescriptor, + selector: impl Into>, + ) -> Result { + self.components.push(ast::ComponentOutput { + component_descr, + selector: selector.into(), + }); + Ok(self) + } + + /// Adds a time extraction. + /// + /// Extracts data from the input column to create a new time column for the output rows. + /// + /// # Arguments + /// * `timeline_name` - Name of the timeline to create + /// * `timeline_type` - Type of timeline (Sequence or Time) + /// * `selector` - Selector to extract time values (must produce [`arrow::array::Int64Array`]) + pub fn time( + mut self, + timeline_name: impl Into, + timeline_type: TimeType, + selector: impl Into>, + ) -> Result { + self.time_outputs.push(ast::TimeOutput { + timeline_name: timeline_name.into(), + timeline_type, + selector: selector.into(), + }); + Ok(self) + } + + /// Builds a [`ast::LensOutput`], the `input` is passed for providing contextualized errors. + fn build(self, input: ComponentIdentifier) -> Result { + let components = self.components.try_into().map_err(|_err| { + LensBuilderError::MissingOutputComponent { + input_component: input, + } + })?; + + Ok(ast::LensOutput { + input_id: input, + output_components: components, + output_timelines: self.time_outputs, + }) + } +} diff --git a/crates/store/re_lenses_core/src/chunk.rs b/crates/store/re_lenses_core/src/chunk.rs new file mode 100644 index 000000000000..ccfe3d343642 --- /dev/null +++ b/crates/store/re_lenses_core/src/chunk.rs @@ -0,0 +1,70 @@ +use re_chunk::{Chunk, ComponentIdentifier}; + +use crate::{DynExpr, Lens, LensRuntimeError, Lenses, OutputMode, PartialChunk, Selector}; + +/// Extension methods for applying lenses to a [`Chunk`]. +pub trait ChunkExt { + /// Apply one or more lenses to this chunk, returning transformed chunks. + /// + /// Each lens matches by input component. Columns not consumed by any + /// matching lens are forwarded unchanged as a separate chunk + /// ([`OutputMode::ForwardUnmatched`]). + /// + /// If no lens matches the chunk (including when an empty slice is passed), + /// the original chunk is returned unchanged. + fn apply_lenses(&self, lenses: &[Lens]) -> Result, PartialChunk>; + + /// Apply a selector to a single component, returning a new chunk with the + /// component transformed in-place. + /// + /// All other columns (timelines, other components) are preserved unchanged. + /// The source component's existing descriptor is preserved. + fn apply_selector( + &self, + source: ComponentIdentifier, + selector: &Selector, + ) -> Result; +} + +impl ChunkExt for Chunk { + fn apply_lenses(&self, lenses: &[Lens]) -> Result, PartialChunk> { + let mut collection = Lenses::new(OutputMode::ForwardUnmatched); + for lens in lenses { + collection = collection.add_lens(lens.clone()); + } + + collection.apply(self).collect::, _>>() + } + + fn apply_selector( + &self, + source: ComponentIdentifier, + selector: &Selector, + ) -> Result { + if !self.components().contains_component(source) { + return Err(LensRuntimeError::ComponentNotFound { + entity_path: self.entity_path().clone(), + component: source, + }); + } + + let entity_path = self.entity_path().clone(); + let selector = selector.clone(); + + self.with_mapped_component(source, None, |list_array| { + let result = selector.execute_per_row(&list_array).map_err(|err| { + LensRuntimeError::ComponentOperationFailed { + target_entity: entity_path.clone(), + input_component: source, + component: source, + source: Box::new(err), + } + })?; + + result.ok_or_else(|| LensRuntimeError::NoOutputColumnsProduced { + input_component: source, + target_entity: entity_path.clone(), + }) + }) + } +} diff --git a/crates/store/re_arrow_combinators/src/cast.rs b/crates/store/re_lenses_core/src/combinators/cast.rs similarity index 90% rename from crates/store/re_arrow_combinators/src/cast.rs rename to crates/store/re_lenses_core/src/combinators/cast.rs index cc1a429838e9..078a0ded8e7d 100644 --- a/crates/store/re_arrow_combinators/src/cast.rs +++ b/crates/store/re_lenses_core/src/combinators/cast.rs @@ -6,7 +6,7 @@ use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray}; use arrow::compute::cast; use arrow::datatypes::Field; -use crate::{Error, Transform}; +use super::{error::Error, transform::Transform}; /// Casts a primitive array from one type to another using Arrow's type casting. /// @@ -43,7 +43,7 @@ where type Source = PrimitiveArray; type Target = PrimitiveArray; - fn transform(&self, source: &PrimitiveArray) -> Result, Error> { + fn transform(&self, source: &PrimitiveArray) -> Result>, Error> { let source_ref: &dyn Array = source; let target_type = T::DATA_TYPE; let casted = cast(source_ref, &target_type)?; @@ -74,7 +74,7 @@ where type Source = ArrayRef; type Target = PrimitiveArray; - fn transform(&self, source: &ArrayRef) -> Result, Error> { + fn transform(&self, source: &ArrayRef) -> Result>, Error> { source .as_any() .downcast_ref::>() @@ -84,6 +84,7 @@ where context: "downcast_ref".to_owned(), }) .cloned() + .map(Some) } } @@ -106,7 +107,7 @@ impl Transform for ListToFixedSizeList { type Source = arrow::array::ListArray; type Target = arrow::array::FixedSizeListArray; - fn transform(&self, source: &Self::Source) -> Result { + fn transform(&self, source: &Self::Source) -> Result, Error> { // Check that each list has exactly the expected length (or is null). let offsets = source.value_offsets(); let expected_length = self.value_length as usize; @@ -129,11 +130,11 @@ impl Transform for ListToFixedSizeList { source.value_type().clone(), source.is_nullable(), )); - Ok(arrow::array::FixedSizeListArray::try_new( + Ok(Some(arrow::array::FixedSizeListArray::try_new( field, self.value_length, source.values().clone(), source.nulls().cloned(), - )?) + )?)) } } diff --git a/crates/store/re_arrow_combinators/src/error.rs b/crates/store/re_lenses_core/src/combinators/error.rs similarity index 81% rename from crates/store/re_arrow_combinators/src/error.rs rename to crates/store/re_lenses_core/src/combinators/error.rs index ebb7b1deaf8f..24607c34f9a3 100644 --- a/crates/store/re_arrow_combinators/src/error.rs +++ b/crates/store/re_lenses_core/src/combinators/error.rs @@ -1,4 +1,4 @@ -//! Error types used in the `re_arrow_combinators` crate. +//! Error types used in the `re_lenses_core` crate. use std::num::TryFromIntError; use std::sync::Arc; @@ -6,6 +6,8 @@ use std::sync::Arc; use arrow::datatypes::DataType; use arrow::error::ArrowError; +use crate::selector::function_registry::FunctionRegistryError; + /// Errors that can occur during array transformations. #[derive(Debug, thiserror::Error, Clone)] pub enum Error { @@ -29,11 +31,8 @@ pub enum Error { context: String, }, - #[error("Struct is missing required field '{field_name}'. Available fields: [{}]", struct_fields.join(", "))] - MissingStructField { - field_name: String, - struct_fields: Vec, - }, + #[error(transparent)] + FunctionRegistry(#[from] FunctionRegistryError), #[error("Unexpected value: expected one of {expected:?}, got {actual}")] UnexpectedValue { @@ -50,9 +49,6 @@ pub enum Error { #[error("Fixed-size list contains unexpected value type: expected {expected}, got {actual}")] UnexpectedFixedSizeListValueType { expected: String, actual: DataType }, - #[error("Expected list to contain struct values, but got {actual}")] - ExpectedStructInList { actual: DataType }, - #[error( "Field '{field_name}' has type {actual_type}, but expected {expected_type} (inferred from field '{reference_field}')" )] @@ -75,9 +71,6 @@ pub enum Error { expected_type: &'static str, }, - #[error("Index {index} out of bounds for array of length {length}")] - IndexOutOfBounds { index: usize, length: usize }, - #[error(transparent)] Arrow(Arc), @@ -97,9 +90,9 @@ impl From for Error { match err { // If the selector error is already a runtime error, unwrap it crate::selector::Error::Runtime(e) => e, - // For lex/parse errors, wrap them in a generic error message - // These shouldn't typically happen at runtime since selectors are pre-parsed - other => ArrowError::InvalidArgumentError(format!("Selector error: {other}")).into(), + // For lex/parse errors, wrap them in a generic error message. + // These shouldn't typically happen at runtime since selectors are pre-parsed. + other => Self::Other(format!("{other}")), } } } diff --git a/crates/store/re_arrow_combinators/src/index.rs b/crates/store/re_lenses_core/src/combinators/index.rs similarity index 87% rename from crates/store/re_arrow_combinators/src/index.rs rename to crates/store/re_lenses_core/src/combinators/index.rs index 065810e7c513..b36e4d7fbf5e 100644 --- a/crates/store/re_arrow_combinators/src/index.rs +++ b/crates/store/re_lenses_core/src/combinators/index.rs @@ -3,7 +3,7 @@ use arrow::array::{Array as _, ArrayRef, ListArray, UInt64Array}; use arrow::buffer::NullBuffer; -use crate::{Error, Transform}; +use super::{error::Error, transform::Transform}; /// Extracts a single element at a specific index from a list array. /// @@ -31,16 +31,16 @@ impl Transform for GetIndexList { type Source = ListArray; type Target = ArrayRef; - fn transform(&self, source: &ListArray) -> Result { + fn transform(&self, source: &ListArray) -> Result, Error> { let offsets = source.offsets(); let values = source.values(); // If values is empty, all lists are empty, so all results are null. if values.is_empty() { - return Ok(arrow::array::new_null_array( + return Ok(Some(arrow::array::new_null_array( values.data_type(), source.len(), - )); + ))); } // Collect indices to extract from the values array @@ -93,7 +93,7 @@ impl Transform for GetIndexList { let new_data = result_data.into_builder().nulls(combined_nulls).build()?; result = arrow::array::make_array(new_data); - Ok(result) + Ok(Some(result)) } } @@ -104,39 +104,43 @@ mod tests { use arrow::datatypes::Int32Type; #[test] - fn test_get_index_basic() { + fn test_get_index_basic() -> Result<(), Box> { let input = ListArray::from_iter_primitive::(vec![ Some(vec![Some(1), Some(2), Some(3)]), Some(vec![Some(4), Some(5)]), ]); - let result = GetIndexList::new(0).transform(&input).unwrap(); + let result = GetIndexList::new(0).transform(&input)?.unwrap(); let result_i32 = result.as_primitive::(); assert_eq!(result_i32.len(), 2); assert_eq!(result_i32.value(0), 1); assert_eq!(result_i32.value(1), 4); + + Ok(()) } #[test] - fn test_get_index_out_of_bounds() { + fn test_get_index_out_of_bounds() -> Result<(), Box> { let input = ListArray::from_iter_primitive::(vec![ Some(vec![Some(1), Some(2)]), Some(vec![Some(3)]), Some(vec![]), ]); - let result = GetIndexList::new(5).transform(&input).unwrap(); + let result = GetIndexList::new(5).transform(&input)?.unwrap(); let result_i32 = result.as_primitive::(); assert_eq!(result_i32.len(), 3); assert!(result_i32.is_null(0)); // Out of bounds assert!(result_i32.is_null(1)); // Out of bounds assert!(result_i32.is_null(2)); // Empty list + + Ok(()) } #[test] - fn test_get_index_with_nulls() { + fn test_get_index_with_nulls() -> Result<(), Box> { let input = ListArray::from_iter_primitive::(vec![ Some(vec![Some(1), Some(2)]), None, @@ -144,7 +148,7 @@ mod tests { Some(vec![]), ]); - let result = GetIndexList::new(1).transform(&input).unwrap(); + let result = GetIndexList::new(1).transform(&input)?.unwrap(); let result_i32 = result.as_primitive::(); assert_eq!(result_i32.len(), 4); @@ -152,5 +156,7 @@ mod tests { assert!(result_i32.is_null(1)); // Null row assert!(result_i32.is_null(2)); // Null element at index 1 assert!(result_i32.is_null(3)); // Out of bounds (empty list) + + Ok(()) } } diff --git a/crates/store/re_arrow_combinators/src/map.rs b/crates/store/re_lenses_core/src/combinators/map.rs similarity index 74% rename from crates/store/re_arrow_combinators/src/map.rs rename to crates/store/re_lenses_core/src/combinators/map.rs index 7d9945cf58c8..9d1cdf2de3df 100644 --- a/crates/store/re_arrow_combinators/src/map.rs +++ b/crates/store/re_lenses_core/src/combinators/map.rs @@ -7,7 +7,7 @@ use arrow::array::{ }; use arrow::datatypes::Field; -use crate::{Error, Transform}; +use super::{error::Error, transform::Transform}; /// Maps a transformation over the elements within a list array. /// @@ -34,7 +34,7 @@ where type Source = ListArray; type Target = ListArray; - fn transform(&self, source: &ListArray) -> Result { + fn transform(&self, source: &ListArray) -> Result, Error> { let (field, offsets, values, nulls) = source.clone().into_parts(); let downcast = values @@ -45,18 +45,23 @@ where actual: values.data_type().clone(), })?; - let transformed = self.transform.transform(downcast)?; - - let new_field = field - .as_ref() - .clone() - .with_data_type(transformed.data_type().clone()); - Ok(ListArray::new( - new_field.into(), - offsets, - Arc::new(transformed), - nulls, - )) + let inner = self.transform.transform(downcast)?; + + match inner { + Some(transformed) => { + let new_field = field + .as_ref() + .clone() + .with_data_type(transformed.data_type().clone()); + Ok(Some(ListArray::new( + new_field.into(), + offsets, + Arc::new(transformed), + nulls, + ))) + } + None => Ok(None), + } } } @@ -85,7 +90,7 @@ where type Source = FixedSizeListArray; type Target = FixedSizeListArray; - fn transform(&self, source: &FixedSizeListArray) -> Result { + fn transform(&self, source: &FixedSizeListArray) -> Result, Error> { let values = source.values(); let downcast = values.as_any().downcast_ref::().ok_or_else(|| { Error::UnexpectedFixedSizeListValueType { @@ -94,20 +99,25 @@ where } })?; - let transformed = self.transform.transform(downcast)?; - let field = Arc::new(Field::new_list_field( - transformed.data_type().clone(), - transformed.is_nullable(), - )); + let inner = self.transform.transform(downcast)?; let size = source.value_length(); let nulls = source.nulls().cloned(); - Ok(FixedSizeListArray::new( - field, - size, - Arc::new(transformed), - nulls, - )) + match inner { + Some(transformed) => { + let field = Arc::new(Field::new_list_field( + transformed.data_type().clone(), + transformed.is_nullable(), + )); + Ok(Some(FixedSizeListArray::new( + field, + size, + Arc::new(transformed), + nulls, + ))) + } + None => Ok(None), + } } } @@ -152,9 +162,9 @@ where type Source = PrimitiveArray; type Target = PrimitiveArray; - fn transform(&self, source: &PrimitiveArray) -> Result, Error> { + fn transform(&self, source: &PrimitiveArray) -> Result>, Error> { let result: PrimitiveArray = source.iter().map(|opt| opt.map(|v| (self.f)(v))).collect(); - Ok(result) + Ok(Some(result)) } } @@ -191,12 +201,12 @@ where type Source = PrimitiveArray; type Target = PrimitiveArray; - fn transform(&self, source: &PrimitiveArray) -> Result, Error> { + fn transform(&self, source: &PrimitiveArray) -> Result>, Error> { let result: PrimitiveArray = source .iter() .map(|opt| Some(opt.unwrap_or(self.default_value))) .collect(); - Ok(result) + Ok(Some(result)) } } @@ -232,7 +242,7 @@ impl Transform for StringPrefix { type Source = StringArray; type Target = StringArray; - fn transform(&self, source: &StringArray) -> Result { + fn transform(&self, source: &StringArray) -> Result, Error> { let result: StringArray = source .iter() .map(|opt| { @@ -246,7 +256,7 @@ impl Transform for StringPrefix { }) }) .collect(); - Ok(result) + Ok(Some(result)) } } @@ -282,7 +292,7 @@ impl Transform for StringSuffix { type Source = StringArray; type Target = StringArray; - fn transform(&self, source: &StringArray) -> Result { + fn transform(&self, source: &StringArray) -> Result, Error> { let result: StringArray = source .iter() .map(|opt| { @@ -296,6 +306,26 @@ impl Transform for StringSuffix { }) }) .collect(); - Ok(result) + Ok(Some(result)) } } + +/// Prepends a prefix to each string value, including empty strings. +pub fn string_prefix(prefix: impl Into) -> StringPrefix { + StringPrefix::new(prefix) +} + +/// Prepends a prefix to each non-empty string value, leaving empty strings unchanged. +pub fn string_prefix_nonempty(prefix: impl Into) -> StringPrefix { + StringPrefix::new(prefix).with_prefix_empty_string(false) +} + +/// Appends a suffix to each string value, including empty strings. +pub fn string_suffix(suffix: impl Into) -> StringSuffix { + StringSuffix::new(suffix) +} + +/// Appends a suffix to each non-empty string value, leaving empty strings unchanged. +pub fn string_suffix_nonempty(suffix: impl Into) -> StringSuffix { + StringSuffix::new(suffix).with_suffix_empty_string(false) +} diff --git a/crates/store/re_lenses_core/src/combinators/mod.rs b/crates/store/re_lenses_core/src/combinators/mod.rs new file mode 100644 index 000000000000..2a18d0d8a286 --- /dev/null +++ b/crates/store/re_lenses_core/src/combinators/mod.rs @@ -0,0 +1,21 @@ +//! Composable Arrow array transformations. + +mod cast; +mod error; +mod index; +mod map; +mod reshape; +mod transform; + +pub use self::{ + cast::{DowncastRef, ListToFixedSizeList, PrimitiveCast}, + error::Error, + map::{ + MapFixedSizeList, MapList, MapPrimitive, ReplaceNull, StringPrefix, StringSuffix, + string_prefix, string_prefix_nonempty, string_suffix, string_suffix_nonempty, + }, + reshape::{Explode, Flatten, GetField, RowMajorToColumnMajor, StructToFixedList}, + transform::{Then, Transform}, +}; + +pub(crate) use self::index::GetIndexList; diff --git a/crates/store/re_arrow_combinators/src/reshape.rs b/crates/store/re_lenses_core/src/combinators/reshape.rs similarity index 82% rename from crates/store/re_arrow_combinators/src/reshape.rs rename to crates/store/re_lenses_core/src/combinators/reshape.rs index 6b80c51cd552..15ff6d06be97 100644 --- a/crates/store/re_arrow_combinators/src/reshape.rs +++ b/crates/store/re_lenses_core/src/combinators/reshape.rs @@ -1,20 +1,21 @@ //! Transforms that extract and reshape arrays. +use std::borrow::Cow; use std::sync::Arc; use arrow::array::{ Array, ArrayRef, FixedSizeListArray, ListArray, StructArray, UInt32Array, UInt64Array, }; -use arrow::buffer::{NullBuffer, OffsetBuffer}; +use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer}; use arrow::datatypes::Field; use re_log::debug_assert_eq; -use crate::{Error, Transform}; +use super::{error::Error, transform::Transform}; /// Extracts a field from a struct array. /// -/// Returns the field's array if it exists, otherwise returns an error. +/// Returns `None` if the field does not exist in the struct. #[derive(Clone)] pub struct GetField { field_name: String, @@ -33,17 +34,10 @@ impl Transform for GetField { type Source = StructArray; type Target = ArrayRef; - fn transform(&self, source: &StructArray) -> Result { - let field_array = source - .column_by_name(&self.field_name) - .ok_or_else(|| { - let available_fields = source.fields().iter().map(|f| f.name().clone()).collect(); - Error::FieldNotFound { - field_name: self.field_name.clone(), - available_fields, - } - })? - .clone(); + fn transform(&self, source: &StructArray) -> Result, Error> { + let Some(field_array) = source.column_by_name(&self.field_name).cloned() else { + return Ok(None); + }; // If the struct has nulls, we need to combine them with the field's nulls // because in Arrow, when a struct is null, its fields should also be null @@ -66,18 +60,34 @@ impl Transform for GetField { .into_builder() .nulls(Some(combined_nulls)) .build()?; - Ok(arrow::array::make_array(new_data)) + Ok(Some(arrow::array::make_array(new_data))) } else { // No struct nulls - just return the field as-is - Ok(field_array) + Ok(Some(field_array)) } } } +/// Converts a `FixedSizeListArray` to a `ListArray` by synthesizing offsets. +fn fixed_size_list_to_list(fixed: &FixedSizeListArray) -> Result { + let (field, size, values, nulls) = fixed.clone().into_parts(); + let len = i32::try_from(fixed.len()).map_err(|_err| Error::OffsetOverflow { + actual: fixed.len(), + expected_type: "i32", + })?; + let offsets: Vec = (0..=len).map(|i| i * size).collect(); + Ok(ListArray::new( + field, + OffsetBuffer::new(ScalarBuffer::from(offsets)), + values, + nulls, + )) +} + /// Flattens a nested list array by one level. /// -/// Takes `List>` and flattens it to `List` by concatenating all inner lists -/// within each outer list row. +/// Takes `List>` or `List>` and flattens it to `List` by +/// concatenating all inner lists within each outer list row. /// /// # Example /// @@ -99,19 +109,22 @@ impl Transform for Flatten { type Source = ListArray; type Target = ListArray; - fn transform(&self, source: &ListArray) -> Result { + fn transform(&self, source: &ListArray) -> Result, Error> { let values = source.values(); - // The values should be a ListArray that we want to flatten - let inner_list = - values - .as_any() - .downcast_ref::() - .ok_or_else(|| Error::TypeMismatch { - expected: "List".to_owned(), + // The values should be a ListArray (or FixedSizeListArray) that we want to flatten + let inner_list: Cow<'_, ListArray> = + if let Some(list) = values.as_any().downcast_ref::() { + Cow::Borrowed(list) + } else if let Some(fixed) = values.as_any().downcast_ref::() { + Cow::Owned(fixed_size_list_to_list(fixed)?) + } else { + return Err(Error::TypeMismatch { + expected: "List or FixedSizeList".to_owned(), actual: values.data_type().clone(), - context: "Flatten expects List>".to_owned(), - })?; + context: "Flatten expects List> or List>".to_owned(), + }); + }; let outer_offsets = source.offsets(); let inner_offsets = inner_list.offsets(); @@ -149,12 +162,12 @@ impl Transform for Flatten { )); let offsets = arrow::buffer::OffsetBuffer::new(new_offsets.into()); - return Ok(ListArray::new( + return Ok(Some(ListArray::new( field, offsets, inner_values.clone(), source.nulls().cloned(), - )); + ))); } // General case: build new offsets and collect value ranges @@ -224,12 +237,12 @@ impl Transform for Flatten { )); let offsets = arrow::buffer::OffsetBuffer::new(new_offsets.into()); - Ok(ListArray::new( + Ok(Some(ListArray::new( field, offsets, flattened_values, source.nulls().cloned(), - )) + ))) } } @@ -243,6 +256,7 @@ impl Transform for Flatten { #[derive(Clone)] pub struct StructToFixedList { field_names: Vec, + nullable: bool, } impl StructToFixedList { @@ -253,22 +267,36 @@ impl StructToFixedList { pub fn new(field_names: impl IntoIterator>) -> Self { Self { field_names: field_names.into_iter().map(|s| s.into()).collect(), + nullable: true, } } + + /// Sets the nullability of the resulting fixed-size list item field. + /// + /// The default is `true`. + pub fn with_nullable(mut self, nullable: bool) -> Self { + self.nullable = nullable; + self + } } impl Transform for StructToFixedList { type Source = StructArray; type Target = FixedSizeListArray; - fn transform(&self, source: &StructArray) -> Result { + fn transform(&self, source: &StructArray) -> Result, Error> { if self.field_names.is_empty() { return Err(Error::NoFieldNames); } // Get the first field to determine the element type let first_field_name = &self.field_names[0]; - let first_array = GetField::new(first_field_name).transform(source)?; + let first_array = GetField::new(first_field_name) + .transform(source)? + .ok_or_else(|| Error::FieldNotFound { + field_name: first_field_name.clone(), + available_fields: source.fields().iter().map(|f| f.name().clone()).collect(), + })?; let element_type = first_array.data_type().clone(); // Collect all field arrays, ensuring they all have the same type @@ -276,7 +304,12 @@ impl Transform for StructToFixedList { field_arrays.push(first_array); for field_name in &self.field_names[1..] { - let array = GetField::new(field_name).transform(source)?; + let array = GetField::new(field_name) + .transform(source)? + .ok_or_else(|| Error::FieldNotFound { + field_name: field_name.clone(), + available_fields: source.fields().iter().map(|f| f.name().clone()).collect(), + })?; // Verify type consistency if array.data_type() != &element_type { @@ -303,16 +336,16 @@ impl Transform for StructToFixedList { let refs: Vec<&dyn Array> = concatenated_arrays.iter().map(|a| a.as_ref()).collect(); let values = re_arrow_util::concat_arrays(&refs)?; - let field = Arc::new(Field::new_list_field(element_type, true)); + let field = Arc::new(Field::new_list_field(element_type, self.nullable)); let list_size = self.field_names.len(); let list_size = i32::try_from(list_size).map_err(|err| Error::InvalidNumberOfFields { actual: list_size, err, })?; - Ok(FixedSizeListArray::new( + Ok(Some(FixedSizeListArray::new( field, list_size, values, None, // No outer nulls - )) + ))) } } @@ -332,7 +365,7 @@ impl Transform for Explode { type Source = ListArray; type Target = ListArray; - fn transform(&self, source: &Self::Source) -> Result { + fn transform(&self, source: &Self::Source) -> Result, Error> { let values_array = source.values(); let offsets = source.offsets(); @@ -407,12 +440,12 @@ impl Transform for Explode { }; let field = Arc::new(Field::new_list_field(source.value_type(), true)); - Ok(ListArray::new( + Ok(Some(ListArray::new( field, OffsetBuffer::new(new_offsets.into()), values, Some(NullBuffer::from(new_validity)), - )) + ))) } } @@ -449,7 +482,7 @@ impl Transform for RowMajorToColumnMajor { type Source = FixedSizeListArray; type Target = FixedSizeListArray; - fn transform(&self, source: &Self::Source) -> Result { + fn transform(&self, source: &Self::Source) -> Result, Error> { // First, check that the input array has the expected value length. let expected_list_size = self.output_rows * self.output_columns; let value_length = source.value_length() as usize; @@ -481,11 +514,11 @@ impl Transform for RowMajorToColumnMajor { source.value_type().clone(), source.is_nullable(), )); - Ok(FixedSizeListArray::new( + Ok(Some(FixedSizeListArray::new( field, source.value_length(), reordered_values, source.nulls().cloned(), - )) + ))) } } diff --git a/crates/store/re_lenses_core/src/combinators/transform.rs b/crates/store/re_lenses_core/src/combinators/transform.rs new file mode 100644 index 000000000000..0afcec18842a --- /dev/null +++ b/crates/store/re_lenses_core/src/combinators/transform.rs @@ -0,0 +1,66 @@ +use arrow::array::{Array, ListArray}; + +use super::error::Error; + +/// A fallible transformation from one Arrow array to another. +/// +/// Can be composed using the [`then`](Transform::then) method to create transformation pipelines. +/// +/// Some transformations may decide not to output a value (e.g. when a struct field is not found), +/// which is represented by returning `Ok(None)`. When composing transforms using [`Transform::then`] +/// and the first returned `Ok(None)`, then the second [`Transform`] will not be executed. +pub trait Transform { + type Source: Array; + type Target: Array; + + /// Apply the transformation to the source array. + fn transform(&self, source: &Self::Source) -> Result, Error>; + + /// Chain this transformation with another transformation. + fn then(self, next: T2) -> Then + where + Self: Sized, + T2: Transform, + { + Then { + first: self, + second: next, + } + } +} + +impl Transform for T +where + T: Fn(&ListArray) -> Result, Error>, +{ + type Source = ListArray; + type Target = ListArray; + + fn transform(&self, source: &Self::Source) -> Result, Error> { + (self)(source) + } +} + +/// Composed transformation created by calling [`.then()`](Transform::then). +#[derive(Clone)] +pub struct Then { + first: T1, + second: T2, +} + +impl Transform for Then +where + T1: Transform, + T2: Transform, + M: Array, +{ + type Source = T1::Source; + type Target = T2::Target; + + fn transform(&self, source: &Self::Source) -> Result, Error> { + match self.first.transform(source)? { + Some(mid) => self.second.transform(&mid), + None => Ok(None), + } + } +} diff --git a/crates/store/re_lenses_core/src/lens_error.rs b/crates/store/re_lenses_core/src/lens_error.rs new file mode 100644 index 000000000000..659a01272766 --- /dev/null +++ b/crates/store/re_lenses_core/src/lens_error.rs @@ -0,0 +1,78 @@ +use arrow::datatypes::DataType; +use re_chunk::{ComponentIdentifier, EntityPath, TimelineName}; + +/// Errors that can occur when constructing a lens via the builder API. +#[expect(missing_docs)] +#[derive(Debug, thiserror::Error)] +pub enum LensBuilderError { + #[error("Lens for input component `{input_component}` is missing output components")] + MissingOutputComponent { + input_component: ComponentIdentifier, + }, + + #[error("Duplicate output for target entity `{target_entity}`")] + DuplicateTargetEntity { target_entity: EntityPath }, + + #[error("Duplicate output for same-as-input entity")] + DuplicateSameEntityOutput, + + #[error(transparent)] + SelectorParseFailed(#[from] crate::SelectorError), +} + +/// Errors that can occur when executing lenses at runtime. +#[expect(missing_docs)] +#[derive(Debug, thiserror::Error)] +pub enum LensRuntimeError { + #[error("Component '{component}' not found in chunk for entity `{entity_path}`")] + ComponentNotFound { + entity_path: EntityPath, + component: ComponentIdentifier, + }, + + #[error("No component outputs were produced for target entity `{target_entity}`")] + NoOutputColumnsProduced { + input_component: ComponentIdentifier, + target_entity: EntityPath, + }, + + #[error("Chunk validation failed: {0}")] + ChunkValidationFailed(#[from] re_chunk::ChunkError), + + #[error( + "Failed to apply operations to component '{component}' (entity: `{target_entity}`, input: `{input_component}`): {source}" + )] + ComponentOperationFailed { + target_entity: EntityPath, + input_component: ComponentIdentifier, + component: ComponentIdentifier, + #[source] + source: Box, + }, + + #[error( + "Failed to apply operations to timeline '{timeline_name}' (entity: `{target_entity}`, input: `{input_component}`): {source}" + )] + TimeOperationFailed { + target_entity: EntityPath, + input_component: ComponentIdentifier, + timeline_name: TimelineName, + #[source] + source: Box, + }, + + #[error( + "Invalid time column type for timeline '{timeline_name}': expected List, got {actual_type}" + )] + InvalidTimeColumn { + timeline_name: TimelineName, + actual_type: DataType, + }, + + #[error("Failed to scatter existing timeline '{timeline_name}' across output rows")] + ScatterExistingTimeFailed { + timeline_name: TimelineName, + #[source] + source: arrow::error::ArrowError, + }, +} diff --git a/crates/store/re_lenses_core/src/lib.rs b/crates/store/re_lenses_core/src/lib.rs new file mode 100644 index 000000000000..c55e74e43cab --- /dev/null +++ b/crates/store/re_lenses_core/src/lib.rs @@ -0,0 +1,30 @@ +//! Core lens types and composable Arrow array transformations. +//! +//! This crate provides the Lenses definitions and builders, and composable +//! transformations for Arrow arrays. Transformations are composable operations +//! that convert one array type to another, preserving structural properties +//! like row counts and null handling. + +// Arrow `Transform` and combinators +pub mod combinators; + +// Selector +mod selector; + +pub use crate::selector::{ + DynExpr, Error as SelectorError, IntoDynExpr, Literal, Runtime, Selector, + extract_nested_fields, function_registry, +}; + +// Lenses +mod ast; +mod builder; +mod chunk; +mod lens_error; + +pub use self::{ + ast::{Lens, Lenses, OutputMode, PartialChunk}, + builder::{LensBuilder, OutputBuilder}, + chunk::ChunkExt, + lens_error::{LensBuilderError, LensRuntimeError}, +}; diff --git a/crates/store/re_lenses_core/src/selector/dyn_expr.rs b/crates/store/re_lenses_core/src/selector/dyn_expr.rs new file mode 100644 index 000000000000..0f24e53cc626 --- /dev/null +++ b/crates/store/re_lenses_core/src/selector/dyn_expr.rs @@ -0,0 +1,71 @@ +use crate::function_registry::BoxedFunction; + +use super::{Selector, parser::Expr}; + +/// A dynamic expression that extends `Expr` with support for anonymous functions. +/// +/// Unlike `Expr`, a `DynExpr` cannot implement [`Display`](std::fmt::Display) because +/// anonymous functions are not serializable. +#[derive(Clone)] +pub enum DynExpr { + /// Delegate to a static `Expr`. + Expr(Expr), + + /// Pipe two `DynExpr`s together (left then right). + Pipe { left: Box, right: Box }, + + /// An anonymous (unregistered) function. + Function(BoxedFunction), +} + +impl From for DynExpr { + fn from(expr: Expr) -> Self { + Self::Expr(expr) + } +} + +impl> From> for DynExpr { + fn from(selector: Selector) -> Self { + selector.expr.into() + } +} + +impl std::fmt::Debug for DynExpr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Expr(expr) => f.debug_tuple("Expr").field(expr).finish(), + Self::Pipe { left, right } => f + .debug_struct("Pipe") + .field("left", left) + .field("right", right) + .finish(), + Self::Function(_) => f.debug_tuple("Function").field(&"").finish(), + } + } +} + +impl Selector { + /// Returns a human-readable string representation of this selector. + /// + /// Anonymous functions are not serializable, so they are represented as ``. + pub fn to_string_lossy(&self) -> String { + fn fmt(expr: &DynExpr) -> String { + match expr { + DynExpr::Expr(expr) => format!("{expr}"), + DynExpr::Pipe { left, right } => { + format!("{} | {}", fmt(left), fmt(right)) + } + DynExpr::Function(_) => "".to_owned(), + } + } + fmt(&self.expr) + } +} + +impl std::fmt::Debug for Selector { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { expr } = self; + + f.debug_struct("Selector").field("expr", expr).finish() + } +} diff --git a/crates/store/re_lenses_core/src/selector/eval.rs b/crates/store/re_lenses_core/src/selector/eval.rs new file mode 100644 index 000000000000..cdaf701d0d8e --- /dev/null +++ b/crates/store/re_lenses_core/src/selector/eval.rs @@ -0,0 +1,385 @@ +//! Evaluation of [`Expr`] and [`DynExpr`] against Arrow arrays. + +use std::sync::Arc; + +use arrow::array::{ + Array as _, ArrayRef, AsArray as _, BooleanBufferBuilder, ListArray, OffsetSizeTrait, +}; +use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer}; +use arrow::datatypes::{DataType, Field}; +use arrow::error::ArrowError; + +use crate::combinators::{GetField, GetIndexList, Transform as _}; + +use super::DynExpr; +use super::parser::Expr; +use super::runtime::Runtime; + +/// Internal trait for expression types that can be evaluated against Arrow arrays. +pub trait Eval { + fn eval( + &self, + source: ArrayRef, + runtime: &Runtime, + ) -> Result, crate::combinators::Error>; +} + +/// Result of evaluating an expression on a flat array. +/// +/// When evaluating a selector like `.poses[].x` that contains an `Expr::Each`, +/// the expression runs on the flattened inner values. This means the result is a flat +/// array that needs to be reassembled back into a `ListArray` when collecting it, +/// for example in a `Expr::Map` with the correct row boundaries. [`EvalResult`] +/// carries the bookkeeping needed for that reassembly. +/// +/// NOTE: This is essentially a destructured [`ListArray`]. +pub struct EvalResult { + /// The transformed values array. + array: ArrayRef, + + /// Optional offsets introduced by `Each` (`[]`) operations. + /// + /// When present, these map "intermediate groups" to positions in `array`. + /// They get composed with the outer row offsets in `execute_per_row`. + /// + /// When `None`, it means that there is a 1:1 mapping between surrounding rows + /// and the current `array`. + offsets: Option>, + + /// Optional null buffer from `NonNull` (`!`) operations. + /// + /// When present, marks which intermediate groups are null. + /// Has length `offsets.len() - 1` if offsets are present, or `array.len()` otherwise. + nulls: Option, +} + +impl EvalResult { + fn flat(array: ArrayRef) -> Self { + Self { + array, + offsets: None, + nulls: None, + } + } +} + +/// Compose two optional offset buffers (outer ∘ inner). +fn compose_offsets( + outer: Option<&OffsetBuffer>, + inner: Option<&OffsetBuffer>, +) -> Option> { + match (outer, inner) { + (None, None) => None, + (Some(o), None) => Some(o.clone()), + (None, Some(i)) => Some(i.clone()), + (Some(a), Some(b)) => Some(compose_offset_buffers(a, b)), + } +} + +/// Given outer offsets mapping N → M and inner offsets mapping M → K, +/// produce composed offsets mapping N → K. +fn compose_offset_buffers( + outer: &OffsetBuffer, + inner: &OffsetBuffer, +) -> OffsetBuffer { + let scalars: ScalarBuffer = outer.iter().map(|&o| inner[o.as_usize()]).collect(); + OffsetBuffer::new(scalars) +} + +/// Promote inner nulls to outer nulls on an `EvalResult`. +/// +/// For each group (defined by `offsets`, or one-element-per-group if `None`), +/// the group is marked null if ALL values within it are null. +fn promote_inner_nulls(result: EvalResult) -> EvalResult { + let Some(inner_nulls) = result.array.logical_nulls() else { + // No inner nulls at all → nothing to promote + return result; + }; + + let promoted = match &result.offsets { + Some(offsets) => aggregate_nulls(offsets, &inner_nulls), + None => inner_nulls, // 1:1: each value is its own group + }; + + EvalResult { + array: result.array, + offsets: result.offsets, + nulls: combine_null_buffers(result.nulls.as_ref(), Some(&promoted)), + } +} + +/// Aggregate per-group nulls into per-row nulls. +/// +/// For each row (defined by `outer_offsets`), the row is null if ALL +/// intermediate groups in the row are null according to `eval_nulls`. +fn aggregate_nulls(outer_offsets: &OffsetBuffer, inner_nulls: &NullBuffer) -> NullBuffer { + let num_rows = outer_offsets.len() - 1; + let mut buf = BooleanBufferBuilder::new(num_rows); + + for row in 0..num_rows { + let start = outer_offsets[row] as usize; + let end = outer_offsets[row + 1] as usize; + + if start == end { + // Empty row: keep as valid + buf.append(true); + } else { + // Row is valid if ANY group in it is valid + buf.append((start..end).any(|i| inner_nulls.is_valid(i))); + } + } + + NullBuffer::from(buf.finish()) +} + +/// Combine two optional null buffers with AND. +fn combine_null_buffers(a: Option<&NullBuffer>, b: Option<&NullBuffer>) -> Option { + match (a, b) { + (None, None) => None, + (Some(n), None) | (None, Some(n)) => Some(n.clone()), + (Some(a), Some(b)) => { + let combined = a.inner() & b.inner(); + Some(NullBuffer::new(combined)) + } + } +} + +/// Executes the given expression against a raw array. +/// +/// This is the `ArrayRef`-based entry point used by `Selector::execute`. +pub(super) fn execute( + expr: &E, + source: ArrayRef, + runtime: &Runtime, +) -> Result, crate::combinators::Error> { + let result = expr.eval(source, runtime)?; + Ok(result.map(|r| r.array)) +} + +/// Evaluate an expression within a [`ListArray`]. +/// +/// Decomposes the list, evaluates the expression on the inner values, +/// and reconstructs a [`ListArray`] by composing offsets and nulls. +pub(super) fn eval_map( + list: &ListArray, + body: &E, + runtime: &Runtime, +) -> Result, crate::combinators::Error> { + let (_, outer_offsets, values, outer_nulls) = list.clone().into_parts(); + + let Some(result) = body.eval(values, runtime)? else { + return Ok(None); + }; + + // Compose offsets: outer maps rows → intermediate, eval maps intermediate → values + let final_offsets = match &result.offsets { + Some(eval_offsets) => compose_offset_buffers(&outer_offsets, eval_offsets), + None => outer_offsets.clone(), + }; + + // Combine nulls + let final_nulls = match &result.nulls { + Some(eval_nulls) => { + let row_nulls = aggregate_nulls(&outer_offsets, eval_nulls); + combine_null_buffers(outer_nulls.as_ref(), Some(&row_nulls)) + } + None => outer_nulls, + }; + + let new_field = Arc::new(Field::new_list_field( + result.array.data_type().clone(), + true, + )); + + Ok(Some(ListArray::new( + new_field, + final_offsets, + result.array, + final_nulls, + ))) +} + +impl Eval for Expr { + fn eval( + &self, + source: ArrayRef, + runtime: &Runtime, + ) -> Result, crate::combinators::Error> { + match self { + Self::Identity => Ok(Some(EvalResult::flat(source))), + + Self::Field(field_name) => match source.data_type() { + DataType::Struct(..) => { + let struct_array = source.as_struct(); + match GetField::new(field_name.clone()).transform(struct_array)? { + Some(field_array) => Ok(Some(EvalResult::flat(field_array))), + None => Ok(None), + } + } + dt => Err(ArrowError::InvalidArgumentError(format!( + "cannot access field `.{field_name}` on unexpected type {dt}" + )))?, + }, + + Self::Index(index) => match source.data_type() { + DataType::List(_) => { + let list_array = source.as_list::(); + match GetIndexList::new(*index).transform(list_array)? { + Some(result) => Ok(Some(EvalResult::flat(result))), + None => Ok(None), + } + } + // TODO(RR-3435): Add indexing into `FixedSizeListArray`. + dt @ DataType::FixedSizeList(..) => Err(ArrowError::NotYetImplemented(format!( + "index access `[{index}]` is not yet implemented for {dt}" + )))?, + dt => Err(ArrowError::InvalidArgumentError(format!( + "cannot access `[{index}]` on unexpected type {dt}" + )))?, + }, + + Self::Each => match source.data_type() { + DataType::List(_) => { + let list_array = source.as_list::().clone(); + Ok(Some({ + let (_, offsets, values, nulls) = list_array.into_parts(); + EvalResult { + array: values, + offsets: Some(offsets), + nulls, + } + })) + } + DataType::FixedSizeList(_, _) => { + let fixed = source.as_fixed_size_list().clone(); + let len = i32::try_from(fixed.len()).map_err(|_err| { + ArrowError::ArithmeticOverflow(format!( + "`.[]` can't handle fixed size list with length {}", + fixed.len() + )) + })?; + + let (_field, size, values, nulls) = fixed.into_parts(); + let offsets: Vec = (0..=len).map(|i| i * size).collect(); + let offsets = OffsetBuffer::new(ScalarBuffer::from(offsets)); + + // TODO(grtlr): Since we don't keep track that these offsets came from a fixed size + // list array we also can't restore it back up in the tree. To fix this we'd have + // to make `offsets` an enum to distinguish between containers. + Ok(Some(EvalResult { + array: values, + offsets: Some(offsets), + nulls, + })) + } + dt => Err(ArrowError::InvalidArgumentError(format!( + "`.[]` called on unexpected type {dt}" + )))?, + }, + + Self::Pipe { left, right, .. } => { + let Some(left_result) = left.eval(source, runtime)? else { + return Ok(None); + }; + let Some(right_result) = right.eval(left_result.array, runtime)? else { + return Ok(None); + }; + + Ok(Some(EvalResult { + array: right_result.array, + offsets: compose_offsets( + left_result.offsets.as_ref(), + right_result.offsets.as_ref(), + ), + nulls: combine_null_buffers( + left_result.nulls.as_ref(), + right_result.nulls.as_ref(), + ), + })) + } + + // TODO(RR-3435): FixedSizeListArray errors must be suppressed via `?`, but ListArray should not need it. + Self::Try(inner) => match inner.eval(source, runtime) { + Ok(result) => Ok(result), + Err(err) => { + re_log::trace!("try expression suppressed error: {err}"); + Ok(None) + } + }, + + Self::NonNull(inner) => { + let Some(result) = inner.eval(source, runtime)? else { + return Ok(None); + }; + + Ok(Some(promote_inner_nulls(result))) + } + + Self::Function { name, arguments } => { + let function = runtime + .function_registry + .get(name, arguments.as_ref().map_or(&[], |v| v.as_slice()))?; + match function(&source)? { + Some(result) => Ok(Some(EvalResult::flat(result))), + None => Ok(None), + } + } + + Self::Map(body) => match source.data_type() { + DataType::List(_) => { + let list_array = source.as_list::(); + match eval_map(list_array, body.as_ref(), runtime)? { + Some(inner_list_array) => { + Ok(Some(EvalResult::flat(Arc::new(inner_list_array)))) + } + None => Ok(None), + } + } + dt @ DataType::FixedSizeList(..) => Err(ArrowError::NotYetImplemented(format!( + "`map()` is not yet implemented for {dt}" + )))?, + dt => Err(ArrowError::InvalidArgumentError(format!( + "cannot call `.map()` on unexpected type {dt}" + )))?, + }, + } + } +} + +impl Eval for DynExpr { + fn eval( + &self, + source: ArrayRef, + runtime: &Runtime, + ) -> Result, crate::combinators::Error> { + match self { + Self::Expr(expr) => expr.eval(source, runtime), + + Self::Pipe { left, right } => { + let Some(left_result) = left.eval(source, runtime)? else { + return Ok(None); + }; + let Some(right_result) = right.eval(left_result.array, runtime)? else { + return Ok(None); + }; + + Ok(Some(EvalResult { + array: right_result.array, + offsets: compose_offsets( + left_result.offsets.as_ref(), + right_result.offsets.as_ref(), + ), + nulls: combine_null_buffers( + left_result.nulls.as_ref(), + right_result.nulls.as_ref(), + ), + })) + } + + Self::Function(f) => match f(&source)? { + Some(result) => Ok(Some(EvalResult::flat(result))), + None => Ok(None), + }, + } + } +} diff --git a/crates/store/re_lenses_core/src/selector/function_registry.rs b/crates/store/re_lenses_core/src/selector/function_registry.rs new file mode 100644 index 000000000000..1a8754ee2c93 --- /dev/null +++ b/crates/store/re_lenses_core/src/selector/function_registry.rs @@ -0,0 +1,151 @@ +use std::sync::Arc; + +use arrow::array::ArrayRef; + +use super::Literal; + +/// A shared, type-erased function operating on Arrow arrays. +/// +/// Uses `Arc` so that `DynExpr` can derive `Clone`, which is needed by language bindings like `PyO3`. +pub type BoxedFunction = + Arc Result, crate::combinators::Error> + Send + Sync>; + +/// A constructor that creates a [`BoxedFunction`] from a list of arguments. +type BoxedFunctionConstructor = Box Option + Send + Sync>; + +/// Errors that can occur when working with the function registry. +#[derive(Clone, Debug, thiserror::Error)] +pub enum FunctionRegistryError { + #[error("Duplicate function registered: `{name}`")] + DuplicateFunction { name: String }, + + #[error("Unknown function: `{name}`")] + UnknownFunction { name: String }, + + #[error("Wrong arguments for function: `{name}`")] + WrongArguments { name: String }, +} + +/// A registry of named function constructors. +/// +/// Functions are registered by name along with a constructor that takes +/// arguments and produces a concrete [`BoxedFunction`] implementation. This +/// allows referencing functions by name and instantiate them at runtime. +pub struct FunctionRegistry { + constructors: ahash::HashMap, +} + +impl re_byte_size::SizeBytes for FunctionRegistry { + fn heap_size_bytes(&self) -> u64 { + let Self { constructors } = self; + + // Can't know internal heap size of the type erased function constructor, so assume it's + // zero. + constructors.capacity() as u64 + * (std::mem::size_of::() + std::mem::size_of::()) + as u64 + + constructors + .keys() + .map(|s| s.heap_size_bytes()) + .sum::() + } +} + +impl FunctionRegistry { + pub fn new() -> Self { + Self { + constructors: ahash::HashMap::default(), + } + } + + /// Register a function constructor under the given name. + #[inline] + pub fn register>( + &mut self, + name: impl Into, + f: F, + ) -> Result<(), FunctionRegistryError> { + use std::collections::hash_map::Entry; + match self.constructors.entry(name.into()) { + Entry::Occupied(entry) => Err(FunctionRegistryError::DuplicateFunction { + name: entry.key().clone(), + }), + Entry::Vacant(entry) => { + entry.insert(Box::new(move |arguments| f.constructor(arguments))); + + Ok(()) + } + } + } + + /// Instantiate a function by name with the given arguments. + pub fn get( + &self, + name: &str, + args: &[Literal], + ) -> Result { + let constructor = + self.constructors + .get(name) + .ok_or_else(|| FunctionRegistryError::UnknownFunction { + name: name.to_owned(), + })?; + constructor(args).ok_or_else(|| FunctionRegistryError::WrongArguments { + name: name.to_owned(), + }) + } +} + +impl Default for FunctionRegistry { + fn default() -> Self { + Self::new() + } +} + +trait FromLiteral: Sized { + fn from_literal(literal: &Literal) -> Option; +} + +impl FromLiteral for String { + fn from_literal(literal: &Literal) -> Option { + match literal { + Literal::String(s) => Some(s.clone()), + } + } +} + +pub trait FunctionConstructor: Send + Sync + 'static { + fn constructor(&self, arguments: &[Literal]) -> Option; +} + +macro_rules! impl_function_constructors { + () => { + impl_function_constructors!(impl); + }; + (impl $($ident:ident)*) => { + #[expect(clippy::allow_attributes)] + #[allow(unused_parens)] + impl< + $($ident: FromLiteral,)* + T: Fn(&ArrayRef) -> Result, crate::combinators::Error> + Send + Sync + 'static, + F: Fn($($ident),*) -> T + Send + Sync + 'static, + > FunctionConstructor<($($ident),*)> for F { + fn constructor(&self, arguments: &[Literal]) -> Option { + let mut _args = arguments.iter(); + let t = (self)( + $( + $ident::from_literal(_args.next()?)?, + )* + ); + + Some(Arc::new(t)) + } + } + }; + ($head:ident $($tail:ident)*) => { + impl_function_constructors!($($tail)*); + impl_function_constructors!(impl $head $($tail)*); + }; +} + +impl_function_constructors!(T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12); diff --git a/crates/store/re_arrow_combinators/src/selector/lexer.rs b/crates/store/re_lenses_core/src/selector/lexer.rs similarity index 55% rename from crates/store/re_arrow_combinators/src/selector/lexer.rs rename to crates/store/re_lenses_core/src/selector/lexer.rs index fd9117519f7b..eeee19539e33 100644 --- a/crates/store/re_arrow_combinators/src/selector/lexer.rs +++ b/crates/store/re_lenses_core/src/selector/lexer.rs @@ -6,17 +6,26 @@ #[derive(Clone, Debug, PartialEq, Eq)] pub enum TokenType { // Literals + /// A dot-prefixed field name, e.g. `.foo` produces `Field("foo")`. Field(String), + + /// A bare identifier, e.g. `my_func` produces `Ident("my_func")`. + Ident(String), Integer(u64), // TODO(grtlr): distinguish between float and integers. + StringLiteral(String), // Brackets LBracket, RBracket, + LParen, + RParen, // Operators Dot, Pipe, + Semicolon, QuestionMark, + ExclamationMark, } #[derive(Debug, PartialEq, Eq, thiserror::Error, Clone)] @@ -30,22 +39,38 @@ pub enum Error { // TODO(grtlr): Add location information to other variants too (tricky because of line breaks). #[error("failed to parse `{lexeme}` as integer: {err}")] - ParseIntError { + ParseInt { err: std::num::ParseIntError, lexeme: String, }, + + #[error("unterminated string at line {line}, column {column}")] + UnterminatedString { line: usize, column: usize }, + + #[error("invalid escape sequence `\\{ch}` at line {line}, column {column}")] + InvalidEscape { + ch: char, + line: usize, + column: usize, + }, } impl std::fmt::Display for TokenType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Field(s) => write!(f, ".{s}"), + Self::Ident(s) => write!(f, "{s}"), Self::Integer(n) => write!(f, "{n}"), + Self::StringLiteral(s) => write!(f, "{s:?}"), Self::LBracket => write!(f, "["), Self::RBracket => write!(f, "]"), + Self::LParen => write!(f, "("), + Self::RParen => write!(f, ")"), Self::Dot => write!(f, "."), Self::Pipe => write!(f, "|"), + Self::Semicolon => write!(f, ";"), Self::QuestionMark => write!(f, "?"), + Self::ExclamationMark => write!(f, "!"), } } } @@ -117,7 +142,7 @@ impl<'a> Lexer<'a> { let number = lexeme .parse::() - .map_err(|err| Error::ParseIntError { err, lexeme })?; + .map_err(|err| Error::ParseInt { err, lexeme })?; Ok(Token { typ: TokenType::Integer(number), @@ -125,6 +150,74 @@ impl<'a> Lexer<'a> { }) } + fn make_string(&mut self) -> Result { + let start_line = self.line; + let start_column = self.column; + let mut value = String::new(); + + loop { + match self.chars.next() { + None => { + return Err(Error::UnterminatedString { + line: start_line, + column: start_column, + }); + } + Some('"') => { + self.column += 1; + break; + } + Some('\\') => { + self.column += 1; + match self.chars.next() { + None => { + return Err(Error::UnterminatedString { + line: start_line, + column: start_column, + }); + } + Some('\\') => value.push('\\'), + Some('"') => value.push('"'), + Some('n') => value.push('\n'), + Some('t') => value.push('\t'), + Some(ch) => { + return Err(Error::InvalidEscape { + ch, + line: self.line, + column: self.column, + }); + } + } + self.column += 1; + } + Some(ch) => { + self.column += 1; + value.push(ch); + } + } + } + + Ok(Token { + typ: TokenType::StringLiteral(value), + line: start_line, + }) + } + + fn make_bare_identifier(&mut self) -> Token { + while let Some(next) = self.chars.peek().copied() + && (next.is_alphanumeric() || next == '-' || next == '_') + { + self.advance(); + } + + let text = std::mem::take(&mut self.lexeme_buffer); + + Token { + typ: TokenType::Ident(text), + line: self.line, + } + } + fn scan_token(&mut self) -> Result, Error> { let c = self.advance().ok_or(Error::UnexpectedChar { ch: '\0', @@ -144,8 +237,15 @@ impl<'a> Lexer<'a> { // Single-char tokens '|' => Ok(Some(self.make_token(TokenType::Pipe))), '?' => Ok(Some(self.make_token(TokenType::QuestionMark))), + '!' => Ok(Some(self.make_token(TokenType::ExclamationMark))), '[' => Ok(Some(self.make_token(TokenType::LBracket))), ']' => Ok(Some(self.make_token(TokenType::RBracket))), + '(' => Ok(Some(self.make_token(TokenType::LParen))), + ')' => Ok(Some(self.make_token(TokenType::RParen))), + ';' => Ok(Some(self.make_token(TokenType::Semicolon))), + + // String literals + '"' => self.make_string().map(Some), // Dot '.' => { @@ -163,6 +263,9 @@ impl<'a> Lexer<'a> { // Numbers '0'..='9' => self.make_number().map(Some), + // Bare identifiers for function names + c if c.is_alphabetic() || c == '_' => Ok(Some(self.make_bare_identifier())), + unexpected => Err(Error::UnexpectedChar { ch: unexpected, line: self.line, @@ -254,6 +357,22 @@ mod test { ); } + #[test] + fn exclamation_mark() { + assert_eq!( + extract_inner(Lexer::new(".foo!").scan_tokens().unwrap()), + vec![TokenType::Field("foo".into()), TokenType::ExclamationMark,] + ); + assert_eq!( + extract_inner(Lexer::new(".foo?!").scan_tokens().unwrap()), + vec![ + TokenType::Field("foo".into()), + TokenType::QuestionMark, + TokenType::ExclamationMark, + ] + ); + } + #[test] fn numbers() { assert_eq!( @@ -282,4 +401,68 @@ mod test { ] ); } + + #[test] + fn function_tokens() { + assert_eq!( + extract_inner(Lexer::new("my_func(").scan_tokens().unwrap()), + vec![TokenType::Ident("my_func".into()), TokenType::LParen,] + ); + assert_eq!( + extract_inner(Lexer::new("my_func()").scan_tokens().unwrap()), + vec![ + TokenType::Ident("my_func".into()), + TokenType::LParen, + TokenType::RParen, + ] + ); + } + + #[test] + fn string_literals() { + assert_eq!( + extract_inner(Lexer::new(r#""hello""#).scan_tokens().unwrap()), + vec![TokenType::StringLiteral("hello".into()),] + ); + assert_eq!( + extract_inner(Lexer::new(r#""hello"; "world""#).scan_tokens().unwrap()), + vec![ + TokenType::StringLiteral("hello".into()), + TokenType::Semicolon, + TokenType::StringLiteral("world".into()), + ] + ); + } + + #[test] + fn string_escape_sequences() { + assert_eq!( + extract_inner(Lexer::new(r#""he\"llo""#).scan_tokens().unwrap()), + vec![TokenType::StringLiteral("he\"llo".into()),] + ); + assert_eq!( + extract_inner(Lexer::new(r#""a\\b""#).scan_tokens().unwrap()), + vec![TokenType::StringLiteral("a\\b".into()),] + ); + assert_eq!( + extract_inner(Lexer::new(r#""a\nb""#).scan_tokens().unwrap()), + vec![TokenType::StringLiteral("a\nb".into()),] + ); + assert_eq!( + extract_inner(Lexer::new(r#""a\tb""#).scan_tokens().unwrap()), + vec![TokenType::StringLiteral("a\tb".into()),] + ); + } + + #[test] + fn unterminated_string() { + let result = Lexer::new(r#""hello"#).scan_tokens(); + assert!(matches!(result, Err(Error::UnterminatedString { .. }))); + } + + #[test] + fn invalid_escape() { + let result = Lexer::new(r#""he\xllo""#).scan_tokens(); + assert!(matches!(result, Err(Error::InvalidEscape { ch: 'x', .. }))); + } } diff --git a/crates/store/re_lenses_core/src/selector/mod.rs b/crates/store/re_lenses_core/src/selector/mod.rs new file mode 100644 index 000000000000..2abc5a294dfb --- /dev/null +++ b/crates/store/re_lenses_core/src/selector/mod.rs @@ -0,0 +1,323 @@ +//! Selector API for parsing and executing [`jq`](https://github.com/jqlang/jq/)-like queries on Arrow arrays. +//! +//! This module provides a high-level path-based API, but in contrast to `jq` its semantics are **columnar**, +//! following Apache Arrow's data model rather than a row-oriented object model. +//! +//! # Syntax +//! +//! The selector syntax is a subset of `jq`: +//! +//! | Syntax | Meaning | Example | +//! |-------------|------------------------------------------------------------|----------------| +//! | `.field` | Access a named field in a struct | `.location` | +//! | `[]` | Iterate over every element of a list | `.poses[]` | +//! | `[N]` | Index into a list by position | `.[0]` | +//! | `?` | Error suppression / optional operator | `.field?` | +//! | `!` | Assert non-null (promotes all-null rows to outer nulls) | `.field!` | +//! | `\|` | Pipe the output of one expression to another | `.foo \| .bar` | +//! +//! Segments can be chained without an explicit pipe: `.poses[].x` is equivalent to `.poses[] | .x`. +//! +//! # Differences from `jq` +//! +//! * **Columnar, not row-oriented** — operations apply to entire Arrow columns rather than individual JSON values. +//! * **No filters, arithmetic, or built-in functions** — only path navigation and iteration are supported. +//! * **No quoted field names or string interpolation** — field names must be bare identifiers +//! (alphanumeric, `-`, `_`). +//! +//! # Protobuf and null handling +//! +//! The `?` and `!` operators exist primarily to handle Arrow columns produced from protobuf +//! messages. Proto3 `optional` fields have **presence tracking**: when a field is unset the +//! corresponding Arrow column contains `null` rather than the type's default value. Navigating +//! into a struct with optional sub-fields can therefore yield lists whose inner values are all +//! null (e.g. `[null]` instead of a top-level `null`). +//! +//! * `?` suppresses errors when a field is entirely absent from the schema, which happens +//! during schema evolution or when optional columns are omitted. +//! * `!` promotes rows where **all** inner values are null to an outer null, collapsing +//! `[null]` → `null` so downstream consumers see clean nullability. +//! +//! # Anonymous functions +//! +//! Using [`Selector::pipe`] it is possible to chain anonymous functions to selectors. The result will be a +//! [`Selector`], which can be executed just like a regular selector. + +mod dyn_expr; +mod eval; +mod lexer; +mod parser; +mod runtime; + +pub mod function_registry; + +pub use dyn_expr::DynExpr; +pub use parser::Literal; +pub use runtime::Runtime; + +use arrow::{ + array::{ArrayRef, ListArray}, + datatypes::{DataType, Fields}, +}; +use vec1::Vec1; + +use parser::Expr; + +/// A parsed selector expression that can be executed against Arrow arrays. +#[derive(Clone)] +pub struct Selector { + expr: E, +} + +impl re_byte_size::SizeBytes for Selector { + fn heap_size_bytes(&self) -> u64 { + let Self { expr } = self; + + expr.heap_size_bytes() + } +} + +impl std::fmt::Debug for Selector { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { expr } = self; + + f.debug_struct("Selector").field("expr", expr).finish() + } +} + +impl PartialEq for Selector { + fn eq(&self, other: &Self) -> bool { + let Self { expr } = self; + + *expr == other.expr + } +} + +impl Eq for Selector {} + +impl std::hash::Hash for Selector { + fn hash(&self, state: &mut H) { + let Self { expr } = self; + + expr.hash(state); + } +} + +impl std::fmt::Display for Selector { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.expr) + } +} + +impl Selector { + /// Create a new selector. + pub fn new(expr: Expr) -> Self { + Self { expr } + } + + /// Parse a selector from a query string. + /// + /// This is a convenience wrapper around [`FromStr`](std::str::FromStr). + pub fn parse(query: &str) -> Result { + query.parse() + } +} + +/// Implementors can be converted into [`DynExpr`] and therefore be piped into [`Selector`]s. +pub trait IntoDynExpr { + fn into_dyn_expr(self) -> DynExpr; +} + +impl< + F: Fn(&ArrayRef) -> Result, crate::combinators::Error> + Send + Sync + 'static, +> IntoDynExpr for F +{ + fn into_dyn_expr(self) -> DynExpr { + DynExpr::Function(std::sync::Arc::new(self)) + } +} + +impl IntoDynExpr for Selector { + fn into_dyn_expr(self) -> DynExpr { + let Self { expr } = self; + DynExpr::Expr(expr) + } +} + +impl IntoDynExpr for Selector { + fn into_dyn_expr(self) -> DynExpr { + let Self { expr } = self; + expr + } +} + +impl> Selector { + /// Execute this selector against a raw array using the default runtime. + /// + /// This is the `ArrayRef`-based entry point. For per-row execution + /// on a [`ListArray`], use [`execute_per_row`](Self::execute_per_row). + /// + /// To execute with a custom runtime, use [`Runtime::execute`] directly. + pub fn execute(&self, source: ArrayRef) -> Result, Error> { + runtime::default_runtime().execute(self, source) + } + + /// Execute this selector against each row of a [`ListArray`] using the default runtime. + /// + /// Performs implicit iteration over the inner list array, and reconstructs the array at the end. + /// + /// `map(.poses[].x)` is the actual query, we only require writing the `.poses[].x` portion. + /// + /// The output is guaranteed to have the same number of rows as the input. + /// + /// Returns `None` if the expression's error was suppressed (e.g. `.field?`). + /// + /// To execute with a custom runtime, use [`Runtime::execute_per_row`] directly. + pub fn execute_per_row(&self, source: &ListArray) -> Result, Error> { + runtime::default_runtime().execute_per_row(self, source) + } + + /// Pipe this selector into another expression, producing a [`Selector`]. + /// + /// Accepts any type that converts into a [`DynExpr`], including [`Selector`] and + /// [`Selector`], and anonymous functions that operate on [`ArrayRef`]. + pub fn pipe(self, rhs: impl IntoDynExpr) -> Selector { + Selector { + expr: DynExpr::Pipe { + left: Box::new(self.expr.into()), + right: Box::new(rhs.into_dyn_expr()), + }, + } + } +} + +impl From for Selector { + fn from(selector: Selector) -> Self { + Self { + expr: DynExpr::from(selector.expr), + } + } +} + +impl std::str::FromStr for Selector { + type Err = Error; + + fn from_str(query: &str) -> Result { + // Lex the query string, collecting tokens and checking for lex errors + let lexer = lexer::Lexer::new(query); + let tokens = lexer.scan_tokens()?; + + let parser = parser::Parser::new(tokens.into_iter()); + let expr = parser.parse()?; + + Ok(Self::new(expr)) + } +} + +/// Errors that can occur during selector parsing or execution. +#[derive(Debug, thiserror::Error, Clone)] +pub enum Error { + /// Error during lexing. + #[error(transparent)] + Lex(#[from] lexer::Error), + + /// Error during parsing. + #[error(transparent)] + Parse(#[from] parser::Error), + + /// Error during runtime execution. + #[error(transparent)] + Runtime(#[from] crate::combinators::Error), +} + +/// Fold an iterator of `Expr` into a left-associative chain of implicit pipes. +fn chain(exprs: impl IntoIterator) -> Expr { + let mut iter = exprs.into_iter(); + let Some(first) = iter.next() else { + return Expr::Identity; + }; + iter.fold(first, |left, right| Expr::Pipe { + left: Box::new(left), + right: Box::new(right), + implicit: true, + }) +} + +/// Dispatch a single datatype: enqueue structs, unwrap lists, or check the predicate. +fn process_datatype<'a, P>( + mut path: Vec, + datatype: &'a DataType, + predicate: &P, + result: &mut Vec<(Selector, DataType)>, + queue: &mut std::collections::VecDeque<(Vec, &'a Fields)>, +) where + P: Fn(&DataType) -> bool, +{ + match datatype { + dt if predicate(dt) => { + result.push((Selector::new(chain(path)), dt.clone())); + } + DataType::Struct(fields) => { + queue.push_back((path, fields)); + } + DataType::List(inner) | DataType::FixedSizeList(inner, ..) => { + path.push(Expr::Each); + match inner.data_type() { + dt if predicate(dt) => { + result.push((Selector::new(chain(path)), dt.clone())); + } + DataType::Struct(nested_fields) => { + queue.push_back((path, nested_fields)); + } + DataType::FixedSizeList(field, ..) => { + let dt = field.data_type(); + if predicate(dt) { + path.push(Expr::Each); + result.push((Selector::new(chain(path)), dt.clone())); + } + } + _ => {} + } + } + _ => {} + } +} + +/// Extract nested fields from a struct array that match a predicate. +/// +/// Returns `None` if no fields match the predicate, or if `datatype` is not a `DataType::Struct`. +pub fn extract_nested_fields

( + datatype: &DataType, + predicate: P, +) -> Option> +where + P: Fn(&DataType) -> bool, +{ + let mut result = Vec::new(); + let mut queue: std::collections::VecDeque<(Vec, &Fields)> = + std::collections::VecDeque::new(); + + match datatype { + DataType::Struct(_) | DataType::List(_) | DataType::FixedSizeList(..) => { + process_datatype(Vec::new(), datatype, &predicate, &mut result, &mut queue); + } + _ => return None, + } + + // Breadth-first traversal + while let Some((path, fields)) = queue.pop_front() { + for field in fields { + let mut field_path = path.clone(); + field_path.push(Expr::Field(field.name().clone())); + process_datatype( + field_path, + field.data_type(), + &predicate, + &mut result, + &mut queue, + ); + } + } + + Vec1::try_from_vec(result).ok() +} diff --git a/crates/store/re_lenses_core/src/selector/parser.rs b/crates/store/re_lenses_core/src/selector/parser.rs new file mode 100644 index 000000000000..8bd82865378a --- /dev/null +++ b/crates/store/re_lenses_core/src/selector/parser.rs @@ -0,0 +1,740 @@ +//! Turns a list of [`Token`]s into an executable [`Expr`]. +//! +//! The [`Parser`] should roughly follow the structure from: +//! +//! +//! # Grammar +//! +//! Simplified jq-like grammar with implicit piping: +//! +//! ```text +//! Expr → Term ( '|' Term )* +//! Term → Segment ( '?' | '!' )* ( Segment ( '?' | '!' )* )* +//! Segment → '.' FIELD +//! | '[' INTEGER ']' +//! | '[' ']' +//! | '.' (identity) +//! | 'map' '(' Expr ')' (map) +//! | IDENT ( '(' ArgList? ')' )? (function) +//! ArgList → Literal ( ';' Literal )* +//! Literal → STRING_LITERAL +//! ``` + +// NOTE: Please keep the grammar above up-to-date. + +use super::lexer::{Token, TokenType}; + +pub struct Parser +where + I: Iterator, +{ + tokens: std::iter::Peekable, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Literal { + String(String), +} + +impl re_byte_size::SizeBytes for Literal { + fn heap_size_bytes(&self) -> u64 { + match self { + Self::String(s) => s.heap_size_bytes(), + } + } +} + +impl std::fmt::Display for Literal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::String(v) => write!(f, "{v:?}"), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Expr { + Identity, + Field(String), + Index(u64), + Each, + Pipe { + left: Box, + right: Box, + + // TODO(RR-4178): Right now we still assume that `Selectors` have to + // roundtrip in the UI, which is why we have to model if a pipe was + // written out by the user in the AST. Long-term, we should avoid + // coupling the Selector AST to the UI code. + /// `true` when the pipe was inferred from adjacent segments (`.foo.bar`), + /// `false` when the user wrote an explicit `|`. + implicit: bool, + }, + Try(Box), + NonNull(Box), + Function { + name: String, + + /// This is `None` if the function was written as `my_func`, and + /// is `Some([])` if it's written as `my_func()`. These should + /// semantically be the same though. + arguments: Option>, + }, + + // TODO(grtlr): For now we define `map()` as an `Expr` in the tree. The + // correct modeling would be to add the `map` function to the registry, + // and defining it in terms of collect (`[ .[] | f]`). + Map(Box), +} + +impl re_byte_size::SizeBytes for Expr { + fn heap_size_bytes(&self) -> u64 { + match self { + Self::Identity | Self::Index(_) | Self::Each => 0, + Self::Field(s) => s.heap_size_bytes(), + Self::Pipe { left, right, .. } => left.heap_size_bytes() + right.heap_size_bytes(), + Self::Try(inner) | Self::NonNull(inner) | Self::Map(inner) => inner.heap_size_bytes(), + Self::Function { name, arguments } => { + name.heap_size_bytes() + arguments.heap_size_bytes() + } + } + } +} + +impl std::fmt::Display for Expr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Identity => write!(f, "."), + Self::Field(name) => write!(f, ".{name}"), + Self::Index(n) => write!(f, "[{n}]"), + Self::Each => write!(f, "[]"), + Self::Pipe { + left, + right, + implicit, + } => { + if *implicit { + write!(f, "{left}{right}") + } else { + write!(f, "{left} | {right}") + } + } + Self::Try(inner) => write!(f, "{inner}?"), + Self::NonNull(inner) => write!(f, "{inner}!"), + Self::Function { name, arguments } => { + write!(f, "{name}")?; + + if let Some(arguments) = arguments { + write!(f, "(")?; + for (idx, literal) in arguments.iter().enumerate() { + if idx > 0 { + write!(f, "; ")?; + } + write!(f, "{literal}")?; + } + write!(f, ")")?; + } + + Ok(()) + } + Self::Map(body) => write!(f, "map({body})"), + } + } +} + +// TODO(RR-3438): Add error location reporting. +#[derive(Debug, PartialEq, Eq, thiserror::Error, Clone)] +pub enum Error { + #[error("expected `{expected}` but found `{found}`")] + ExpectedSymbol { + expected: TokenType, + found: TokenType, + }, + + #[error("unexpected symbol `{symbol}`")] + UnexpectedSymbol { symbol: TokenType }, + + #[error("unexpected end of input")] + UnexpectedEof, +} + +type Result = std::result::Result; + +impl Parser +where + I: Iterator, +{ + /// Create a parser from any iterator of tokens + pub fn new(tokens: I) -> Self { + Self { + tokens: tokens.peekable(), + } + } + + pub fn parse(mut self) -> Result { + let expr = self.expr()?; + + if let Some(token) = self.tokens.peek() { + Err(Error::UnexpectedSymbol { + symbol: token.typ.clone(), + }) + } else { + Ok(expr) + } + } + + fn expr(&mut self) -> Result { + let mut left = self.term()?; + + while let Some(token) = self.tokens.peek() { + if token.typ == TokenType::Pipe { + self.tokens.next(); // Consume explicit pipe + let right = self.term()?; + left = Expr::Pipe { + left: Box::new(left), + right: Box::new(right), + implicit: false, + }; + } else { + break; + } + } + + Ok(left) + } + + fn term(&mut self) -> Result { + // Bare identifier: `map(expr)` or a function call + if let Some(token) = self.tokens.peek() + && let TokenType::Ident(name) = &token.typ + { + let name = name.clone(); + self.tokens.next(); + + if name == "map" { + return self.map_expr(); + } + + return self.function_args(name); + } + + // Check if it starts with identity (.) + if let Some(token) = self.tokens.peek() { + if token.typ == TokenType::Dot { + self.tokens.next(); + // If only `.`, return Identity + if !self.is_segment_start() { + return Ok(Expr::Identity); + } + } + } else { + return Err(Error::UnexpectedEof); + } + + // Parse first segment + let mut left = self.primary()?; + left = self.postfix(left); + + // Parse remaining segments, joining with implicit pipes + while self.is_segment_start() { + let mut right = self.primary()?; + right = self.postfix(right); + left = Expr::Pipe { + left: Box::new(left), + right: Box::new(right), + implicit: true, + }; + } + + Ok(left) + } + + /// Apply any postfix `?` or `!` operators. + fn postfix(&mut self, mut expr: Expr) -> Expr { + while let Some(token) = self.tokens.peek() { + match token.typ { + TokenType::QuestionMark => { + self.tokens.next(); + expr = Expr::Try(Box::new(expr)); + } + TokenType::ExclamationMark => { + self.tokens.next(); + expr = Expr::NonNull(Box::new(expr)); + } + _ => break, + } + } + expr + } + + /// Parse a `map(expr)` expression. + /// The `map` identifier has already been consumed. + fn map_expr(&mut self) -> Result { + self.consume(TokenType::LParen)?; + let body = self.expr()?; + self.consume(TokenType::RParen)?; + Ok(Expr::Map(Box::new(body))) + } + + /// Parse function arguments: `(arg1; arg2; …)`. + /// The `name` has already been consumed; parentheses are optional for no-arg calls. + fn function_args(&mut self, name: String) -> Result { + // Allow bare function name without parentheses + if self.tokens.peek().map(|t| &t.typ) != Some(&TokenType::LParen) { + return Ok(Expr::Function { + name, + arguments: None, + }); + } + self.tokens.next(); // consume LParen + + let mut arguments = Vec::new(); + + // Check for empty argument list + if let Some(token) = self.tokens.peek() + && token.typ == TokenType::RParen + { + self.tokens.next(); + return Ok(Expr::Function { + name, + arguments: Some(arguments), + }); + } + + // Parse first argument + arguments.push(self.literal()?); + + // Parse remaining semicolon-separated arguments + while let Some(token) = self.tokens.peek() + && token.typ == TokenType::Semicolon + { + self.tokens.next(); + arguments.push(self.literal()?); + } + + self.consume(TokenType::RParen)?; + + Ok(Expr::Function { + name, + arguments: Some(arguments), + }) + } + + fn literal(&mut self) -> Result { + match self.tokens.peek() { + Some(token) => match &token.typ { + TokenType::StringLiteral(s) => { + let value = s.clone(); + self.tokens.next(); + Ok(Literal::String(value)) + } + unexpected => Err(Error::UnexpectedSymbol { + symbol: unexpected.clone(), + }), + }, + None => Err(Error::UnexpectedEof), + } + } + + fn is_segment_start(&mut self) -> bool { + matches!( + self.tokens.peek().map(|t| &t.typ), + Some(TokenType::Field(_) | TokenType::LBracket) + ) + } + + fn primary(&mut self) -> Result { + match self.tokens.peek() { + Some(token) => match &token.typ { + TokenType::Field(s) => { + let result = s.clone(); + self.tokens.next(); + Ok(Expr::Field(result)) + } + TokenType::LBracket => { + self.tokens.next(); // Consume `[` + + match self.tokens.peek() { + Some(token) => match &token.typ { + TokenType::RBracket => { + self.tokens.next(); // Consume `]` + Ok(Expr::Each) + } + TokenType::Integer(n) => { + let index = *n; + self.tokens.next(); + self.consume(TokenType::RBracket)?; + Ok(Expr::Index(index)) + } + unexpected => Err(Error::UnexpectedSymbol { + symbol: unexpected.clone(), + }), + }, + None => Err(Error::UnexpectedEof), + } + } + unexpected => Err(Error::UnexpectedSymbol { + symbol: unexpected.clone(), + }), + }, + None => Err(Error::UnexpectedEof), + } + } + + /// Consume the current token if it matches the expected type, otherwise return an error. + fn consume(&mut self, expected: TokenType) -> Result { + let token = self.tokens.next().ok_or(Error::UnexpectedEof)?; + if token.typ == expected { + Ok(token) + } else { + Err(Error::ExpectedSymbol { + expected, + found: token.typ.clone(), + }) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + use super::super::lexer::Lexer; + + fn parse(input: &str) -> Result { + let tokens = Lexer::new(input).scan_tokens().unwrap(); + Parser::new(tokens.into_iter()).parse() + } + + fn field(name: &str) -> Expr { + Expr::Field(name.into()) + } + + fn index(n: u64) -> Expr { + Expr::Index(n) + } + + fn each() -> Expr { + Expr::Each + } + + fn implicit_pipe(left: Expr, right: Expr) -> Expr { + Expr::Pipe { + left: Box::new(left), + right: Box::new(right), + implicit: true, + } + } + + fn try_expr(inner: Expr) -> Expr { + Expr::Try(Box::new(inner)) + } + + fn non_null(inner: Expr) -> Expr { + Expr::NonNull(Box::new(inner)) + } + + fn pipe(left: Expr, right: Expr) -> Expr { + Expr::Pipe { + left: Box::new(left), + right: Box::new(right), + implicit: false, + } + } + + #[test] + fn basic() { + assert_eq!( + parse(".a.b.c"), + Ok(implicit_pipe( + implicit_pipe(field("a"), field("b")), + field("c") + )) + ); + } + + #[test] + fn explicit_pipe() { + assert_eq!(parse(".foo | .bar"), Ok(pipe(field("foo"), field("bar")))); + } + + #[test] + fn identity() { + assert_eq!(parse("."), Ok(Expr::Identity)); + } + + #[test] + fn identity_pipe() { + assert_eq!(parse(". | .foo"), Ok(pipe(Expr::Identity, field("foo")))); + } + + #[test] + fn unexpected_eof() { + assert_eq!(parse(".foo |"), Err(Error::UnexpectedEof)); + } + + #[test] + fn empty_input() { + assert_eq!(parse(""), Err(Error::UnexpectedEof)); + } + + #[test] + fn array_index() { + assert_eq!(parse(".[0]"), Ok(index(0))); + assert_eq!(parse(".[42]"), Ok(index(42))); + } + + #[test] + fn array_index_with_pipe() { + assert_eq!(parse(".foo | .[0]"), Ok(pipe(field("foo"), index(0)))); + } + + #[test] + fn array_index_implicit_pipe() { + assert_eq!(parse(".foo[0]"), Ok(implicit_pipe(field("foo"), index(0)))); + assert_eq!( + parse(".foo[0][1]"), + Ok(implicit_pipe( + implicit_pipe(field("foo"), index(0)), + index(1) + )) + ); + } + + #[test] + fn array_each() { + assert_eq!(parse(".[]"), Ok(each())); + assert_eq!(parse(".foo[]"), Ok(implicit_pipe(field("foo"), each()))); + assert_eq!( + parse(".foo[] | .bar"), + Ok(pipe(implicit_pipe(field("foo"), each()), field("bar"))) + ); + } + + #[test] + fn array_each_implicit_pipe() { + assert_eq!( + parse(".foo[].bar"), + Ok(implicit_pipe( + implicit_pipe(field("foo"), each()), + field("bar") + )) + ); + assert_eq!( + parse(".foo[][0]"), + Ok(implicit_pipe(implicit_pipe(field("foo"), each()), index(0))) + ); + } + + #[test] + fn array_index_errors() { + assert_eq!(parse(".[0"), Err(Error::UnexpectedEof)); + } + + #[test] + fn test_display_chain_vs_pipe() { + let chain = parse(".location.x").unwrap(); + assert_eq!(chain.to_string(), ".location.x"); + + let piped = parse(".foo | .bar").unwrap(); + assert_eq!(piped.to_string(), ".foo | .bar"); + + let identity = parse(".").unwrap(); + assert_eq!(identity.to_string(), "."); + + let complex = parse(".a.b[] | .c[0]").unwrap(); + assert_eq!(complex.to_string(), ".a.b[] | .c[0]"); + } + + #[test] + fn optional_field() { + assert_eq!(parse(".foo?"), Ok(try_expr(field("foo")))); + assert_eq!( + parse(".foo?.bar"), + Ok(implicit_pipe(try_expr(field("foo")), field("bar"))) + ); + } + + #[test] + fn optional_index() { + assert_eq!(parse(".[0]?"), Ok(try_expr(index(0)))); + } + + #[test] + fn optional_each() { + assert_eq!(parse(".[]?"), Ok(try_expr(each()))); + assert_eq!( + parse(".[]?.foo"), + Ok(implicit_pipe(try_expr(each()), field("foo"))) + ); + assert_eq!( + parse(".foo[]?.bar"), + Ok(implicit_pipe( + implicit_pipe(field("foo"), try_expr(each())), + field("bar") + )) + ); + } + + #[test] + fn test_display_optional() { + let expr = parse(".foo?").unwrap(); + assert_eq!(expr.to_string(), ".foo?"); + + let expr = parse(".foo?.bar").unwrap(); + assert_eq!(expr.to_string(), ".foo?.bar"); + + // Note: leading `.` is consumed by the path parser, not stored in segments. + let expr = parse(".[0]?").unwrap(); + assert_eq!(expr.to_string(), "[0]?"); + + let expr = parse(".[]?").unwrap(); + assert_eq!(expr.to_string(), "[]?"); + } + + #[test] + fn non_null_field() { + assert_eq!(parse(".foo!"), Ok(non_null(field("foo")))); + assert_eq!( + parse(".foo!.bar"), + Ok(implicit_pipe(non_null(field("foo")), field("bar"))) + ); + } + + #[test] + fn non_null_index() { + assert_eq!(parse(".[0]!"), Ok(non_null(index(0)))); + } + + #[test] + fn non_null_combined_with_optional() { + assert_eq!(parse(".foo?!"), Ok(non_null(try_expr(field("foo"))))); + assert_eq!(parse(".foo!?"), Ok(try_expr(non_null(field("foo"))))); + } + + #[test] + fn test_display_non_null() { + let expr = parse(".foo!").unwrap(); + assert_eq!(expr.to_string(), ".foo!"); + + let expr = parse(".foo!.bar").unwrap(); + assert_eq!(expr.to_string(), ".foo!.bar"); + + let expr = parse(".[0]!").unwrap(); + assert_eq!(expr.to_string(), "[0]!"); + + let expr = parse(".foo?!").unwrap(); + assert_eq!(expr.to_string(), ".foo?!"); + } + + fn func(name: &str, args: Option>) -> Expr { + Expr::Function { + name: name.to_owned(), + arguments: args, + } + } + + #[test] + fn function_no_args() { + assert_eq!(parse("my_func()"), Ok(func("my_func", Some(vec![])))); + assert_eq!(parse("my_func"), Ok(func("my_func", None))); + } + + #[test] + fn function_one_arg() { + assert_eq!( + parse(r#"my_func("hello")"#), + Ok(func("my_func", Some(vec![Literal::String("hello".into())]))) + ); + } + + #[test] + fn function_multiple_args() { + assert_eq!( + parse(r#"my_func("foo"; "bar")"#), + Ok(func( + "my_func", + Some(vec![ + Literal::String("foo".into()), + Literal::String("bar".into()) + ]) + )) + ); + } + + #[test] + fn function_no_args_in_pipe() { + assert_eq!( + parse(".path | my_func"), + Ok(pipe(field("path"), func("my_func", None))) + ); + } + + #[test] + fn function_in_pipe() { + assert_eq!( + parse(r#".path | my_func("arg")"#), + Ok(pipe( + field("path"), + func("my_func", Some(vec![Literal::String("arg".into())])) + )) + ); + } + + #[test] + fn function_display_roundtrip() { + // `my_func` & `my_func()` are functionally the same, but we want + // both to work for roundtrip. + let expr = parse("my_func").unwrap(); + assert_eq!(expr.to_string(), "my_func"); + + let expr = parse("my_func()").unwrap(); + assert_eq!(expr.to_string(), "my_func()"); + + let expr = parse(r#"my_func("hello")"#).unwrap(); + assert_eq!(expr.to_string(), r#"my_func("hello")"#); + + let expr = parse(r#"my_func("foo"; "bar")"#).unwrap(); + assert_eq!(expr.to_string(), r#"my_func("foo"; "bar")"#); + + let expr = parse(r#".path | my_func("a"; "b")"#).unwrap(); + assert_eq!(expr.to_string(), r#".path | my_func("a"; "b")"#); + } + + fn map_expr(body: Expr) -> Expr { + Expr::Map(Box::new(body)) + } + + #[test] + fn map_simple() { + assert_eq!(parse("map(.foo)"), Ok(map_expr(field("foo")))); + } + + #[test] + fn map_with_pipe() { + assert_eq!( + parse("map(.foo | .bar)"), + Ok(map_expr(pipe(field("foo"), field("bar")))) + ); + } + + #[test] + fn map_in_pipe() { + assert_eq!( + parse(".items | map(.name)"), + Ok(pipe(field("items"), map_expr(field("name")))) + ); + } + + #[test] + fn map_display_roundtrip() { + let expr = parse("map(.foo)").unwrap(); + assert_eq!(expr.to_string(), "map(.foo)"); + + let expr = parse("map(.foo | .bar)").unwrap(); + assert_eq!(expr.to_string(), "map(.foo | .bar)"); + + let expr = parse(".items | map(.name)").unwrap(); + assert_eq!(expr.to_string(), ".items | map(.name)"); + } +} diff --git a/crates/store/re_lenses_core/src/selector/runtime.rs b/crates/store/re_lenses_core/src/selector/runtime.rs new file mode 100644 index 000000000000..6bdf860431f1 --- /dev/null +++ b/crates/store/re_lenses_core/src/selector/runtime.rs @@ -0,0 +1,78 @@ +//! Shared runtime context for selector evaluation. + +use std::sync::{Arc, OnceLock}; + +use arrow::array::{ArrayRef, ListArray}; +use re_chunk::ArrowArray as _; + +use super::Selector; +use super::eval; +use super::function_registry::FunctionRegistry; + +pub(super) fn default_runtime() -> Arc { + static DEFAULT_RUNTIME: OnceLock> = OnceLock::new(); + + DEFAULT_RUNTIME + .get_or_init(|| { + Arc::new(Runtime { + function_registry: Arc::new(FunctionRegistry::default()), + }) + }) + .clone() +} + +/// Context passed to selector execution. +/// +/// Carries the [`FunctionRegistry`] and any future shared state +/// needed during evaluation. +#[derive(Clone)] +pub struct Runtime { + pub function_registry: Arc, +} + +impl Runtime { + /// Execute a selector against a raw array using this runtime. + /// + /// This is the `ArrayRef`-based entry point. For per-row execution + /// on a [`ListArray`], use [`execute_per_row`](Self::execute_per_row). + pub fn execute( + &self, + selector: &Selector, + source: ArrayRef, + ) -> Result, super::Error> { + eval::execute(&selector.expr, source, self).map_err(Into::into) + } + + /// Execute a selector against each row of a [`ListArray`] using this runtime. + /// + /// Performs implicit iteration over the inner list array, and reconstructs the array at the end. + /// + /// `map(.poses[].x)` is the actual query, we only require writing the `.poses[].x` portion. + /// + /// Returns `None` if the expression's error was suppressed (e.g. `.field?`). + pub fn execute_per_row( + &self, + selector: &Selector, + source: &ListArray, + ) -> Result, super::Error> { + let res = eval::eval_map(source, &selector.expr, self).map_err(Into::into); + + if let Ok(Some(ref output)) = res { + re_log::debug_assert_eq!( + output.len(), + source.len(), + "selectors should never change row count" + ); + } + + res + } +} + +impl re_byte_size::SizeBytes for Runtime { + fn heap_size_bytes(&self) -> u64 { + let Self { function_registry } = self; + + function_registry.heap_size_bytes() + } +} diff --git a/crates/store/re_lenses_core/tests/test_apply_selector.rs b/crates/store/re_lenses_core/tests/test_apply_selector.rs new file mode 100644 index 000000000000..1592ac5afa8f --- /dev/null +++ b/crates/store/re_lenses_core/tests/test_apply_selector.rs @@ -0,0 +1,103 @@ +#![expect(clippy::unwrap_used)] + +use std::sync::Arc; + +use arrow::array::{ArrayRef, Float64Array}; +use re_chunk::{Chunk, RowId}; +use re_lenses_core::combinators::Error; +use re_lenses_core::{ChunkExt as _, DynExpr, Selector}; +use re_log_types::Timeline; +use re_sdk_types::ComponentDescriptor; + +/// Doubles every float64 value. +fn double_values(source: &ArrayRef) -> Result, Error> { + let values = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "Float64".into(), + actual: source.data_type().clone(), + context: "double_values".into(), + })?; + + let doubled: Float64Array = values.iter().map(|v| v.map(|x| x * 2.0)).collect(); + Ok(Some(Arc::new(doubled))) +} + +fn test_chunk() -> Chunk { + let timeline = Timeline::new_sequence("tick"); + + Chunk::builder("test/sensor") + .with_row( + RowId::new(), + [(timeline, 0)], + [( + ComponentDescriptor::partial("value"), + Arc::new(Float64Array::from(vec![1.0])) as ArrayRef, + )], + ) + .with_row( + RowId::new(), + [(timeline, 1)], + [( + ComponentDescriptor::partial("value"), + Arc::new(Float64Array::from(vec![2.0])) as ArrayRef, + )], + ) + .build() + .unwrap() +} + +#[test] +fn apply_selector_doubles_values() { + let chunk = test_chunk(); + insta::assert_snapshot!(format!("{:-240}", chunk), @r#" + ┌────────────────────────────────────────────────────────────────────────────────────────────┐ + │ METADATA: │ + │ * entity_path: /test/sensor │ + │ * id: [**REDACTED**] │ + │ * version: [**REDACTED**] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ ┌───────────────────────────────────────────────┬──────────────────┬─────────────────────┐ │ + │ │ RowId ┆ tick ┆ value │ │ + │ │ --- ┆ --- ┆ --- │ │ + │ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(Float64) │ │ + │ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: tick ┆ component: value │ │ + │ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ kind: data │ │ + │ │ is_sorted: true ┆ kind: index ┆ │ │ + │ │ kind: control ┆ ┆ │ │ + │ ╞═══════════════════════════════════════════════╪══════════════════╪═════════════════════╡ │ + │ │ row_[**REDACTED**] ┆ 0 ┆ [1.0] │ │ + │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ + │ │ row_[**REDACTED**] ┆ 1 ┆ [2.0] │ │ + │ └───────────────────────────────────────────────┴──────────────────┴─────────────────────┘ │ + └────────────────────────────────────────────────────────────────────────────────────────────┘ + "#); + + let selector: Selector = Selector::parse(".").unwrap().pipe(double_values); + + let result = chunk.apply_selector("value".into(), &selector).unwrap(); + + insta::assert_snapshot!(format!("{:-240}", result), @r#" + ┌────────────────────────────────────────────────────────────────────────────────────────────┐ + │ METADATA: │ + │ * entity_path: /test/sensor │ + │ * id: [**REDACTED**] │ + │ * version: [**REDACTED**] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ ┌───────────────────────────────────────────────┬──────────────────┬─────────────────────┐ │ + │ │ RowId ┆ tick ┆ value │ │ + │ │ --- ┆ --- ┆ --- │ │ + │ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(Float64) │ │ + │ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: tick ┆ component: value │ │ + │ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ kind: data │ │ + │ │ is_sorted: true ┆ kind: index ┆ │ │ + │ │ kind: control ┆ ┆ │ │ + │ ╞═══════════════════════════════════════════════╪══════════════════╪═════════════════════╡ │ + │ │ row_[**REDACTED**] ┆ 0 ┆ [2.0] │ │ + │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ + │ │ row_[**REDACTED**] ┆ 1 ┆ [4.0] │ │ + │ └───────────────────────────────────────────────┴──────────────────┴─────────────────────┘ │ + └────────────────────────────────────────────────────────────────────────────────────────────┘ + "#); +} diff --git a/crates/store/re_lenses_core/tests/test_chunk_splitting.rs b/crates/store/re_lenses_core/tests/test_chunk_splitting.rs new file mode 100644 index 000000000000..222fb9f37869 --- /dev/null +++ b/crates/store/re_lenses_core/tests/test_chunk_splitting.rs @@ -0,0 +1,272 @@ +#![expect(clippy::unwrap_used)] + +use arrow::array::{Int32Builder, ListBuilder}; +use re_chunk::{Chunk, ChunkId, TimeColumn, TimelineName}; +use re_lenses_core::{Lens, Lenses, OutputMode, Selector}; +use re_sdk_types::ComponentDescriptor; + +/// Creates a chunk with three Int32 component columns (`alpha`, `beta`, `gamma`) +/// and a `tick` timeline with 2 rows. +fn three_component_chunk() -> Chunk { + let make_column = |values: &[i32]| { + let mut builder = ListBuilder::new(Int32Builder::new()); + for &v in values { + builder.values().append_value(v); + builder.append(true); + } + builder.finish() + }; + + let alpha = make_column(&[1, 2]); + let beta = make_column(&[10, 20]); + let gamma = make_column(&[100, 200]); + + let components = [ + (ComponentDescriptor::partial("alpha"), alpha), + (ComponentDescriptor::partial("beta"), beta), + (ComponentDescriptor::partial("gamma"), gamma), + ] + .into_iter(); + + let time_column = TimeColumn::new_sequence("tick", 0..2); + + Chunk::from_auto_row_ids( + ChunkId::new(), + "test/entity".into(), + std::iter::once((TimelineName::new("tick"), time_column)).collect(), + components.collect(), + ) + .unwrap() +} + +#[test] +fn three_component_chunk_identity() { + let chunk = three_component_chunk(); + insta::assert_snapshot!(format!("{:-240}", chunk), @r#" + ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ + │ METADATA: │ + │ * entity_path: /test/entity │ + │ * id: [**REDACTED**] │ + │ * version: [**REDACTED**] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ ┌──────────────────────────────────────────────┬──────────────────┬───────────────────┬───────────────────┬───────────────────┐ │ + │ │ RowId ┆ tick ┆ alpha ┆ beta ┆ gamma │ │ + │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ + │ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(Int32) ┆ type: List(Int32) ┆ type: List(Int32) │ │ + │ │ ARROW:extension:metadata: ┆ index_name: tick ┆ component: alpha ┆ component: beta ┆ component: gamma │ │ + │ │ {"namespace":"row"} ┆ is_sorted: true ┆ kind: data ┆ kind: data ┆ kind: data │ │ + │ │ ARROW:extension:name: TUID ┆ kind: index ┆ ┆ ┆ │ │ + │ │ is_sorted: true ┆ ┆ ┆ ┆ │ │ + │ │ kind: control ┆ ┆ ┆ ┆ │ │ + │ ╞══════════════════════════════════════════════╪══════════════════╪═══════════════════╪═══════════════════╪═══════════════════╡ │ + │ │ row_[**REDACTED**] ┆ 0 ┆ [1] ┆ [10] ┆ [100] │ │ + │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ + │ │ row_[**REDACTED**] ┆ 1 ┆ [2] ┆ [20] ┆ [200] │ │ + │ └──────────────────────────────────────────────┴──────────────────┴───────────────────┴───────────────────┴───────────────────┘ │ + └─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ + "#); +} + +/// Two lenses consume `alpha` and `beta`; `gamma` is untouched. +/// +/// With [`OutputMode::ForwardUnmatched`], the result should be three separate chunks +/// with no duplicate component columns: +/// 1. A prefix chunk containing only `gamma` +/// 2. Lens output containing `alpha_out` +/// 3. Lens output containing `beta_out` +#[test] +fn forward_unmatched_splits_components_across_chunks() { + let chunk = three_component_chunk(); + + let lens_alpha = Lens::for_input_column("alpha") + .output_columns(|out| { + out.component( + ComponentDescriptor::partial("alpha_out"), + Selector::parse(".")?, + ) + }) + .unwrap() + .build(); + + let lens_beta = Lens::for_input_column("beta") + .output_columns(|out| { + out.component( + ComponentDescriptor::partial("beta_out"), + Selector::parse(".")?, + ) + }) + .unwrap() + .build(); + + let lenses = Lenses::new(OutputMode::ForwardUnmatched) + .add_lens(lens_alpha) + .add_lens(lens_beta); + + let results: Vec<_> = lenses.apply(&chunk).collect::>().unwrap(); + assert_eq!(results.len(), 3); + + // Chunk 0: prefix with only the untouched component. + insta::assert_snapshot!(format!("{:-240}", results[0]), @r#" + ┌──────────────────────────────────────────────────────────────────────────────────────────┐ + │ METADATA: │ + │ * entity_path: /test/entity │ + │ * id: [**REDACTED**] │ + │ * version: [**REDACTED**] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ ┌───────────────────────────────────────────────┬──────────────────┬───────────────────┐ │ + │ │ RowId ┆ tick ┆ gamma │ │ + │ │ --- ┆ --- ┆ --- │ │ + │ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(Int32) │ │ + │ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: tick ┆ component: gamma │ │ + │ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ kind: data │ │ + │ │ is_sorted: true ┆ kind: index ┆ │ │ + │ │ kind: control ┆ ┆ │ │ + │ ╞═══════════════════════════════════════════════╪══════════════════╪═══════════════════╡ │ + │ │ row_[**REDACTED**] ┆ 0 ┆ [100] │ │ + │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ + │ │ row_[**REDACTED**] ┆ 1 ┆ [200] │ │ + │ └───────────────────────────────────────────────┴──────────────────┴───────────────────┘ │ + └──────────────────────────────────────────────────────────────────────────────────────────┘ + "#); + + // Chunk 1: lens output for alpha. + insta::assert_snapshot!(format!("{:-240}", results[1]), @r#" + ┌─────────────────────────────────────────────────────────────────────────────────────────────┐ + │ METADATA: │ + │ * entity_path: /test/entity │ + │ * id: [**REDACTED**] │ + │ * version: [**REDACTED**] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ ┌───────────────────────────────────────────────┬──────────────────┬──────────────────────┐ │ + │ │ RowId ┆ tick ┆ alpha_out │ │ + │ │ --- ┆ --- ┆ --- │ │ + │ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(Int32) │ │ + │ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: tick ┆ component: alpha_out │ │ + │ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ kind: data │ │ + │ │ is_sorted: true ┆ kind: index ┆ │ │ + │ │ kind: control ┆ ┆ │ │ + │ ╞═══════════════════════════════════════════════╪══════════════════╪══════════════════════╡ │ + │ │ row_[**REDACTED**] ┆ 0 ┆ [1] │ │ + │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ + │ │ row_[**REDACTED**] ┆ 1 ┆ [2] │ │ + │ └───────────────────────────────────────────────┴──────────────────┴──────────────────────┘ │ + └─────────────────────────────────────────────────────────────────────────────────────────────┘ + "#); + + // Chunk 2: lens output for beta. + insta::assert_snapshot!(format!("{:-240}", results[2]), @r#" + ┌────────────────────────────────────────────────────────────────────────────────────────────┐ + │ METADATA: │ + │ * entity_path: /test/entity │ + │ * id: [**REDACTED**] │ + │ * version: [**REDACTED**] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ ┌───────────────────────────────────────────────┬──────────────────┬─────────────────────┐ │ + │ │ RowId ┆ tick ┆ beta_out │ │ + │ │ --- ┆ --- ┆ --- │ │ + │ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(Int32) │ │ + │ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: tick ┆ component: beta_out │ │ + │ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ kind: data │ │ + │ │ is_sorted: true ┆ kind: index ┆ │ │ + │ │ kind: control ┆ ┆ │ │ + │ ╞═══════════════════════════════════════════════╪══════════════════╪═════════════════════╡ │ + │ │ row_[**REDACTED**] ┆ 0 ┆ [10] │ │ + │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ + │ │ row_[**REDACTED**] ┆ 1 ┆ [20] │ │ + │ └───────────────────────────────────────────────┴──────────────────┴─────────────────────┘ │ + └────────────────────────────────────────────────────────────────────────────────────────────┘ + "#); +} + +/// Three lenses consume all three components; nothing is untouched. +/// +/// With [`OutputMode::ForwardUnmatched`], no prefix chunk should be emitted since +/// every component is consumed by a lens. +#[test] +fn forward_unmatched_no_prefix_when_all_consumed() { + let chunk = three_component_chunk(); + + let make_lens = |input: &str, output: &str| { + Lens::for_input_column(input) + .output_columns_at(input, |out| { + out.component(ComponentDescriptor::partial(output), Selector::parse(".")?) + }) + .unwrap() + .build() + }; + + let lenses = Lenses::new(OutputMode::ForwardUnmatched) + .add_lens(make_lens("alpha", "alpha_out")) + .add_lens(make_lens("beta", "beta_out")) + .add_lens(make_lens("gamma", "gamma_out")); + + let results: Vec<_> = lenses.apply(&chunk).collect::>().unwrap(); + assert_eq!(results.len(), 3); + + insta::assert_snapshot!(format!("{:-240}", results[0]), @r#" + ┌─────────────────────────────────────────────────────────────────────────────────────────────┐ + │ METADATA: │ + │ * entity_path: /alpha │ + │ * id: [**REDACTED**] │ + │ * version: [**REDACTED**] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ ┌───────────────────────────────────────────────┬──────────────────┬──────────────────────┐ │ + │ │ RowId ┆ tick ┆ alpha_out │ │ + │ │ --- ┆ --- ┆ --- │ │ + │ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(Int32) │ │ + │ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: tick ┆ component: alpha_out │ │ + │ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ kind: data │ │ + │ │ is_sorted: true ┆ kind: index ┆ │ │ + │ │ kind: control ┆ ┆ │ │ + │ ╞═══════════════════════════════════════════════╪══════════════════╪══════════════════════╡ │ + │ │ row_[**REDACTED**] ┆ 0 ┆ [1] │ │ + │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ + │ │ row_[**REDACTED**] ┆ 1 ┆ [2] │ │ + │ └───────────────────────────────────────────────┴──────────────────┴──────────────────────┘ │ + └─────────────────────────────────────────────────────────────────────────────────────────────┘ + "#); + insta::assert_snapshot!(format!("{:-240}", results[1]), @r#" + ┌────────────────────────────────────────────────────────────────────────────────────────────┐ + │ METADATA: │ + │ * entity_path: /beta │ + │ * id: [**REDACTED**] │ + │ * version: [**REDACTED**] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ ┌───────────────────────────────────────────────┬──────────────────┬─────────────────────┐ │ + │ │ RowId ┆ tick ┆ beta_out │ │ + │ │ --- ┆ --- ┆ --- │ │ + │ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(Int32) │ │ + │ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: tick ┆ component: beta_out │ │ + │ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ kind: data │ │ + │ │ is_sorted: true ┆ kind: index ┆ │ │ + │ │ kind: control ┆ ┆ │ │ + │ ╞═══════════════════════════════════════════════╪══════════════════╪═════════════════════╡ │ + │ │ row_[**REDACTED**] ┆ 0 ┆ [10] │ │ + │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ + │ │ row_[**REDACTED**] ┆ 1 ┆ [20] │ │ + │ └───────────────────────────────────────────────┴──────────────────┴─────────────────────┘ │ + └────────────────────────────────────────────────────────────────────────────────────────────┘ + "#); + insta::assert_snapshot!(format!("{:-240}", results[2]), @r#" + ┌─────────────────────────────────────────────────────────────────────────────────────────────┐ + │ METADATA: │ + │ * entity_path: /gamma │ + │ * id: [**REDACTED**] │ + │ * version: [**REDACTED**] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ ┌───────────────────────────────────────────────┬──────────────────┬──────────────────────┐ │ + │ │ RowId ┆ tick ┆ gamma_out │ │ + │ │ --- ┆ --- ┆ --- │ │ + │ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(Int32) │ │ + │ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: tick ┆ component: gamma_out │ │ + │ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ kind: data │ │ + │ │ is_sorted: true ┆ kind: index ┆ │ │ + │ │ kind: control ┆ ┆ │ │ + │ ╞═══════════════════════════════════════════════╪══════════════════╪══════════════════════╡ │ + │ │ row_[**REDACTED**] ┆ 0 ┆ [100] │ │ + │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ + │ │ row_[**REDACTED**] ┆ 1 ┆ [200] │ │ + │ └───────────────────────────────────────────────┴──────────────────┴──────────────────────┘ │ + └─────────────────────────────────────────────────────────────────────────────────────────────┘ + "#); +} diff --git a/crates/store/re_lenses_core/tests/test_combinators_explode.rs b/crates/store/re_lenses_core/tests/test_combinators_explode.rs new file mode 100644 index 000000000000..bf61f3f5a0ed --- /dev/null +++ b/crates/store/re_lenses_core/tests/test_combinators_explode.rs @@ -0,0 +1,225 @@ +mod util; + +use std::sync::Arc; + +use arrow::array::{Array as _, Int32Array, ListArray}; +use arrow::buffer::OffsetBuffer; +use arrow::datatypes::{DataType, Field, Int32Type}; +use re_lenses_core::combinators::{Explode, Transform as _}; +use util::DisplayRB; + +#[test] +fn test_explode_primitives() -> Result<(), Box> { + let input = ListArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + Some(vec![Some(4), Some(5)]), + Some(vec![Some(6)]), + ]); + + insta::assert_snapshot!(format!("{}", DisplayRB(input.clone())), @r" + ┌───────────────────┐ + │ col │ + │ --- │ + │ type: List(Int32) │ + ╞═══════════════════╡ + │ [1, 2, 3] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [4, 5] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [6] │ + └───────────────────┘ + "); + + let explode = Explode; + let result = explode.transform(&input)?.unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r" + ┌───────────────────┐ + │ col │ + │ --- │ + │ type: List(Int32) │ + ╞═══════════════════╡ + │ [1] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [2] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [3] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [4] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [5] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [6] │ + └───────────────────┘ + "); + + Ok(()) +} + +#[test] +fn test_explode_with_nulls_and_empty() -> Result<(), Box> { + let input = ListArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2)]), + None, + Some(vec![]), + Some(vec![Some(3)]), + ]); + + insta::assert_snapshot!(format!("{}", DisplayRB(input.clone())), @r" + ┌───────────────────┐ + │ col │ + │ --- │ + │ type: List(Int32) │ + ╞═══════════════════╡ + │ [1, 2] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [3] │ + └───────────────────┘ + "); + + let explode = Explode; + let result = explode.transform(&input)?.unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r" + ┌───────────────────┐ + │ col │ + │ --- │ + │ type: List(Int32) │ + ╞═══════════════════╡ + │ [1] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [2] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [3] │ + └───────────────────┘ + "); + + Ok(()) +} + +#[test] +fn test_explode_nested_lists() -> Result<(), Box> { + let inner_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6]); + let inner_offsets = OffsetBuffer::new(vec![0, 2, 3, 6].into()); + let inner_field = Arc::new(Field::new_list_field(DataType::Int32, true)); + let inner_list = ListArray::new(inner_field, inner_offsets, Arc::new(inner_values), None); + + let outer_offsets = OffsetBuffer::new(vec![0, 2, 3].into()); + let outer_field = Arc::new(Field::new_list_field(inner_list.data_type().clone(), true)); + let input = ListArray::new( + outer_field, + outer_offsets, + Arc::new(inner_list.clone()), + None, + ); + + insta::assert_snapshot!(format!("{}", DisplayRB(input.clone())), @r" + ┌─────────────────────────┐ + │ col │ + │ --- │ + │ type: List(List(Int32)) │ + ╞═════════════════════════╡ + │ [[1, 2], [3]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[4, 5, 6]] │ + └─────────────────────────┘ + "); + + let explode = Explode; + let result = explode.transform(&input)?.unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r" + ┌─────────────────────────┐ + │ col │ + │ --- │ + │ type: List(List(Int32)) │ + ╞═════════════════════════╡ + │ [[1, 2]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[3]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[4, 5, 6]] │ + └─────────────────────────┘ + "); + + Ok(()) +} + +#[test] +fn test_explode_empty_input() -> Result<(), Box> { + // Test exploding an empty list + let input = ListArray::from_iter_primitive::(Vec::< + Option>>, + >::new()); + + let explode = Explode; + let result = explode.transform(&input)?.unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r" + ┌───────────────────┐ + │ col │ + │ --- │ + │ type: List(Int32) │ + ╞═══════════════════╡ + └───────────────────┘ + "); + + Ok(()) +} + +#[test] +fn test_explode_with_skips_in_offset_buffer() -> Result<(), Box> { + let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + let offsets = OffsetBuffer::new(vec![0, 2, 7, 10].into()); + let validity = arrow::buffer::NullBuffer::from(vec![true, false, true]); + let field = Arc::new(Field::new_list_field(DataType::Int32, true)); + + let input = ListArray::new(field, offsets, Arc::new(values), Some(validity)); + + insta::assert_snapshot!(format!("{}", DisplayRB(input.clone())), @r" + ┌───────────────────┐ + │ col │ + │ --- │ + │ type: List(Int32) │ + ╞═══════════════════╡ + │ [0, 1] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [7, 8, 9] │ + └───────────────────┘ + "); + + let explode = Explode; + let result = explode.transform(&input)?.unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r" + ┌───────────────────┐ + │ col │ + │ --- │ + │ type: List(Int32) │ + ╞═══════════════════╡ + │ [0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [1] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [7] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [8] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [9] │ + └───────────────────┘ + "); + + Ok(()) +} diff --git a/crates/store/re_lenses_core/tests/test_combinators_string.rs b/crates/store/re_lenses_core/tests/test_combinators_string.rs new file mode 100644 index 000000000000..f8b6770141f7 --- /dev/null +++ b/crates/store/re_lenses_core/tests/test_combinators_string.rs @@ -0,0 +1,181 @@ +mod util; + +use re_lenses_core::combinators::{GetField, MapList, StringPrefix, StringSuffix, Transform as _}; + +use crate::util::{DisplayRB, fixtures::nested_string_struct_column}; + +/// Tests that `StringPrefix` and `StringSuffix` work correctly when the `StringArray` +/// is extracted from a nested struct where string arrays share a common values buffer. +#[test] +fn test_string_transforms_from_nested_struct() -> Result<(), Box> { + let list_array = nested_string_struct_column(); + + let names_list = MapList::new(GetField::new("data")) + .then(MapList::new(GetField::new("names"))) + .transform(&list_array)? + .unwrap(); + insta::assert_snapshot!(DisplayRB(names_list.clone()), @r" + ┌──────────────────┐ + │ col │ + │ --- │ + │ type: List(Utf8) │ + ╞══════════════════╡ + │ [alice] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, dave] │ + └──────────────────┘ + "); + + let colors_list = MapList::new(GetField::new("data")) + .then(MapList::new(GetField::new("colors"))) + .transform(&list_array)? + .unwrap(); + insta::assert_snapshot!(DisplayRB(colors_list.clone()), @r" + ┌──────────────────┐ + │ col │ + │ --- │ + │ type: List(Utf8) │ + ╞══════════════════╡ + │ [red] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, yellow] │ + └──────────────────┘ + "); + + // Test prefix on names array using MapList. + let prefix_names = MapList::new(StringPrefix::new("user:")) + .transform(&names_list)? + .unwrap(); + insta::assert_snapshot!(DisplayRB(prefix_names.clone()), @r" + ┌───────────────────┐ + │ col │ + │ --- │ + │ type: List(Utf8) │ + ╞═══════════════════╡ + │ [user:alice] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, user:dave] │ + └───────────────────┘ + "); + + // Test suffix on colors array using MapList. + let suffix_colors = MapList::new(StringSuffix::new("_color")) + .transform(&colors_list)? + .unwrap(); + insta::assert_snapshot!(DisplayRB(suffix_colors.clone()), @r" + ┌──────────────────────┐ + │ col │ + │ --- │ + │ type: List(Utf8) │ + ╞══════════════════════╡ + │ [red_color] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, yellow_color] │ + └──────────────────────┘ + "); + + // Test chaining on names array using MapList and Then (via .then()). + let chained_names = MapList::new(StringPrefix::new("<").then(StringSuffix::new(">"))) + .transform(&names_list)? + .unwrap(); + insta::assert_snapshot!(DisplayRB(chained_names.clone()), @r" + ┌──────────────────┐ + │ col │ + │ --- │ + │ type: List(Utf8) │ + ╞══════════════════╡ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, ] │ + └──────────────────┘ + "); + + // Verify original nested list structure is unaffected by the transformations. + insta::assert_snapshot!(DisplayRB(list_array.clone()), @r#" + ┌───────────────────────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Struct("data": Struct("names": Utf8, "colors": Utf8))) │ + ╞═══════════════════════════════════════════════════════════════════╡ + │ [{data: {names: alice, colors: red}}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{data: null}, {data: {names: dave, colors: yellow}}] │ + └───────────────────────────────────────────────────────────────────┘ + "#); + + Ok(()) +} + +/// Tests that `StringPrefix` and `StringSuffix` preserve empty strings as-is when configured to do so. +#[test] +fn test_string_transforms_preserve_empty_strings() -> Result<(), Box> { + use arrow::array::StringArray; + + let input = StringArray::from(vec![Some("hello"), Some(""), None, Some("world")]); + + let prefixed = StringPrefix::new("prefix_") + .with_prefix_empty_string(false) + .transform(&input)? + .unwrap(); + insta::assert_snapshot!(DisplayRB(prefixed), @r" + ┌──────────────┐ + │ col │ + │ --- │ + │ type: Utf8 │ + ╞══════════════╡ + │ prefix_hello │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ prefix_world │ + └──────────────┘ + "); + + let suffixed = StringSuffix::new("_suffix") + .with_suffix_empty_string(false) + .transform(&input)? + .unwrap(); + insta::assert_snapshot!(DisplayRB(suffixed), @r" + ┌──────────────┐ + │ col │ + │ --- │ + │ type: Utf8 │ + ╞══════════════╡ + │ hello_suffix │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ world_suffix │ + └──────────────┘ + "); + + Ok(()) +} diff --git a/crates/store/re_lenses_core/tests/test_combinators_transform.rs b/crates/store/re_lenses_core/tests/test_combinators_transform.rs new file mode 100644 index 000000000000..fcfbda3df282 --- /dev/null +++ b/crates/store/re_lenses_core/tests/test_combinators_transform.rs @@ -0,0 +1,648 @@ +mod util; + +use std::str::FromStr as _; +use std::sync::Arc; + +use arrow::array::{ + Array as _, ArrayRef, Float32Array, Float64Array, Int32Builder, ListArray, ListBuilder, + StructArray, UInt8Array, +}; +use arrow::buffer::OffsetBuffer; +use arrow::datatypes::{DataType, Field, Fields}; +use re_lenses_core::Selector; +use re_lenses_core::combinators::{ + Flatten, ListToFixedSizeList, MapFixedSizeList, MapList, MapPrimitive, PrimitiveCast, + ReplaceNull, RowMajorToColumnMajor, StructToFixedList, Transform as _, +}; +use util::DisplayRB; + +use crate::util::fixtures; + +#[test] +fn simple() -> Result<(), Box> { + let array = fixtures::nested_list_struct_column(); + println!("{}", DisplayRB(array.clone())); + + let pipeline = Selector::from_str(".poses[]")?.pipe( + |source: &ArrayRef| -> Result, re_lenses_core::combinators::Error> { + let struct_array = source + .as_any() + .downcast_ref::() + .ok_or_else(|| re_lenses_core::combinators::Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: source.data_type().clone(), + context: "struct_to_fixed_list pipe".to_owned(), + })?; + Ok(StructToFixedList::new(["x", "y"]) + .transform(struct_array)? + .map(|arr| Arc::new(arr) as ArrayRef)) + }, + ); + + let result: ListArray = pipeline.execute_per_row(&array)?.unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result.clone())), @r" + ┌────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(FixedSizeList(2 x Float64)) │ + ╞════════════════════════════════════════╡ + │ [[1.0, 2.0], [3.0, 4.0]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[5.0, 6.0]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[7.0, null], [9.0, 10.0]] │ + └────────────────────────────────────────┘ + "); + + Ok(()) +} + +#[test] +fn add_one_to_leaves() -> Result<(), Box> { + let array = fixtures::nested_list_struct_column(); + println!("{}", DisplayRB(array.clone())); + + let pipeline = Selector::from_str(".poses[]")?.pipe( + |source: &ArrayRef| -> Result, re_lenses_core::combinators::Error> { + let struct_array = source + .as_any() + .downcast_ref::() + .ok_or_else(|| re_lenses_core::combinators::Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: source.data_type().clone(), + context: "struct_to_fixed_list pipe".to_owned(), + })?; + let fixed = StructToFixedList::new(["x", "y"]).transform(struct_array)?; + let Some(fixed) = fixed else { + return Ok(None); + }; + let mapped = + MapFixedSizeList::new(MapPrimitive::::new(|x| { + x + 1.0 + })) + .transform(&fixed)?; + Ok(mapped.map(|arr| Arc::new(arr) as ArrayRef)) + }, + ); + + let result = pipeline.execute_per_row(&array)?.unwrap(); + + insta::assert_snapshot!( + format!("{}", DisplayRB(result.clone())) + , @r" + ┌────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(FixedSizeList(2 x Float64)) │ + ╞════════════════════════════════════════╡ + │ [[2.0, 3.0], [4.0, 5.0]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[6.0, 7.0]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[8.0, null], [10.0, 11.0]] │ + └────────────────────────────────────────┘ + " + ); + + Ok(()) +} + +#[test] +fn convert_to_f32() -> Result<(), Box> { + let array = fixtures::nested_list_struct_column(); + println!("{}", DisplayRB(array.clone())); + + let pipeline = Selector::from_str(".poses[]")?.pipe( + |source: &ArrayRef| -> Result, re_lenses_core::combinators::Error> { + let struct_array = source + .as_any() + .downcast_ref::() + .ok_or_else(|| re_lenses_core::combinators::Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: source.data_type().clone(), + context: "struct_to_fixed_list pipe".to_owned(), + })?; + let fixed = StructToFixedList::new(["x", "y"]).transform(struct_array)?; + let Some(fixed) = fixed else { + return Ok(None); + }; + let casted = MapFixedSizeList::new(PrimitiveCast::::new()) + .transform(&fixed)?; + Ok(casted.map(|arr| Arc::new(arr) as ArrayRef)) + }, + ); + + let result = pipeline.execute_per_row(&array)?.unwrap(); + + insta::assert_snapshot!(DisplayRB(result.clone()), @r" + ┌────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(FixedSizeList(2 x Float32)) │ + ╞════════════════════════════════════════╡ + │ [[1.0, 2.0], [3.0, 4.0]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[5.0, 6.0]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[7.0, null], [9.0, 10.0]] │ + └────────────────────────────────────────┘ + "); + + Ok(()) +} + +#[test] +fn replace_nulls() -> Result<(), Box> { + let array = fixtures::nested_list_struct_column(); + println!("{}", DisplayRB(array.clone())); + + let pipeline = Selector::from_str(".poses[]")?.pipe( + |source: &ArrayRef| -> Result, re_lenses_core::combinators::Error> { + let struct_array = source + .as_any() + .downcast_ref::() + .ok_or_else(|| re_lenses_core::combinators::Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: source.data_type().clone(), + context: "struct_to_fixed_list pipe".to_owned(), + })?; + let fixed = StructToFixedList::new(["x", "y"]).transform(struct_array)?; + let Some(fixed) = fixed else { + return Ok(None); + }; + let replaced = + MapFixedSizeList::new(ReplaceNull::::new(1337.0)) + .transform(&fixed)?; + Ok(replaced.map(|arr| Arc::new(arr) as ArrayRef)) + }, + ); + + let result = pipeline.execute_per_row(&array)?.unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result.clone())), @r" + ┌─────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(FixedSizeList(2 x non-null Float64)) │ + ╞═════════════════════════════════════════════════╡ + │ [[1.0, 2.0], [3.0, 4.0]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[5.0, 6.0]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[7.0, 1337.0], [9.0, 10.0]] │ + └─────────────────────────────────────────────────┘ + "); + + Ok(()) +} + +#[test] +fn test_flatten_single_element() -> Result<(), Box> { + let array = fixtures::nested_list_struct_column(); + println!("{}", DisplayRB(array.clone())); + + let pipeline = Selector::from_str(".poses[]")?; + + let result = pipeline.execute_per_row(&array)?.unwrap(); + + insta::assert_snapshot!( + format!("{}", DisplayRB(result.clone())), @r#" + ┌────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Struct("x": Float64, "y": Float64)) │ + ╞════════════════════════════════════════════════╡ + │ [{x: 1.0, y: 2.0}, {x: 3.0, y: 4.0}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{x: 5.0, y: 6.0}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{x: 7.0, y: null}, {x: 9.0, y: 10.0}] │ + └────────────────────────────────────────────────┘ + "# + ); + + Ok(()) +} + +#[test] +fn test_flatten_multiple_elements() -> Result<(), Box> { + let inner_builder = ListBuilder::new(arrow::array::Int32Builder::new()); + let mut outer_builder = ListBuilder::new(inner_builder); + + // Row 0: [[1, 2], [3, 4]] -> should flatten to [1, 2, 3, 4] + outer_builder.values().values().append_value(1); + outer_builder.values().values().append_value(2); + outer_builder.values().append(true); + outer_builder.values().values().append_value(3); + outer_builder.values().values().append_value(4); + outer_builder.values().append(true); + outer_builder.append(true); + + // Row 1: [[5, null], [6, 7, 8]] -> should flatten to [5, null, 6, 7, 8] + outer_builder.values().values().append_value(5); + outer_builder.values().values().append_null(); + outer_builder.values().append(true); + outer_builder.values().values().append_value(6); + outer_builder.values().values().append_value(7); + outer_builder.values().values().append_value(8); + outer_builder.values().append(true); + outer_builder.append(true); + + // Row 2: [[]] -> should flatten to [] + outer_builder.values().append(true); + outer_builder.append(true); + + // Row 3: [[], [9]] -> should flatten to [9] + outer_builder.values().append(true); + outer_builder.values().values().append_value(9); + outer_builder.values().append(true); + outer_builder.append(true); + + // Row 4: null -> should remain null + outer_builder.append(false); + + // Row 5: [[10, 11]] -> should flatten to [10, 11] + outer_builder.values().values().append_value(10); + outer_builder.values().values().append_value(11); + outer_builder.values().append(true); + outer_builder.append(true); + + // Row 6: [[32], [33, 34], [], null] -> should flatten to [32, 33, 34] + outer_builder.values().values().append_value(32); + outer_builder.values().append(true); + outer_builder.values().values().append_value(33); + outer_builder.values().values().append_value(34); + outer_builder.values().append(true); + outer_builder.values().append(true); + outer_builder.values().append(false); + outer_builder.append(true); + + let list_of_lists = outer_builder.finish(); + + println!("{}", DisplayRB(list_of_lists.clone())); + + let result = Selector::from_str(".[]")? + .execute_per_row(&list_of_lists)? + .unwrap(); + + insta::assert_snapshot!( + format!("{}", DisplayRB(result.clone())), @r" + ┌────────────────────┐ + │ col │ + │ --- │ + │ type: List(Int32) │ + ╞════════════════════╡ + │ [1, 2, 3, 4] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [5, null, 6, 7, 8] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [9] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [10, 11] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [32, 33, 34] │ + └────────────────────┘ + " + ); + + Ok(()) +} + +#[test] +fn test_row_major_to_col_major() -> Result<(), Box> { + let inner_builder = Int32Builder::new(); + let mut outer_builder = ListBuilder::new(inner_builder); + + // First list represents a 4x3 matrix in row-major order with some null elements. + // Row 0 + outer_builder.values().append_value(1); + outer_builder.values().append_null(); + outer_builder.values().append_value(3); + // Row 1 + outer_builder.values().append_value(4); + outer_builder.values().append_value(5); + outer_builder.values().append_value(6); + // Row 2 + outer_builder.values().append_value(7); + outer_builder.values().append_value(8); + outer_builder.values().append_null(); + // Row 3 + outer_builder.values().append_value(10); + outer_builder.values().append_value(11); + outer_builder.values().append_value(12); + outer_builder.append(true); + + // Second list is invalid / null. + for _ in 0..12 { + // Add dummy values for Arrow's fixed-size requirements. + // See: https://docs.rs/arrow/latest/arrow/array/struct.FixedSizeListArray.html#representation + outer_builder.values().append_value(0); + } + outer_builder.append(false); + + // Third list represents a 4x3 matrix in row-major order without null elements. + // Row 0 + outer_builder.values().append_value(13); + outer_builder.values().append_value(14); + outer_builder.values().append_value(15); + // Row 1 + outer_builder.values().append_value(16); + outer_builder.values().append_value(17); + outer_builder.values().append_value(18); + // Row 2 + outer_builder.values().append_value(19); + outer_builder.values().append_value(20); + outer_builder.values().append_value(21); + // Row 3 + outer_builder.values().append_value(22); + outer_builder.values().append_value(23); + outer_builder.values().append_value(24); + outer_builder.append(true); + + let input_array = outer_builder.finish(); + + // Cast to `FixedSizeListArray` and convert to column-major order. + let fixed_size_list_array = ListToFixedSizeList::new(12) + .transform(&input_array)? + .unwrap(); + let result = RowMajorToColumnMajor::new(4, 3) + .transform(&fixed_size_list_array)? + .unwrap(); + + insta::assert_snapshot!( + format!("{}", DisplayRB(result.clone())), @r" + ┌──────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: FixedSizeList(12 x Int32) │ + ╞══════════════════════════════════════════════════╡ + │ [1, 4, 7, 10, null, 5, 8, 11, 3, 6, null, 12] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [13, 16, 19, 22, 14, 17, 20, 23, 15, 18, 21, 24] │ + └──────────────────────────────────────────────────┘ + " + ); + + Ok(()) +} + +#[test] +fn test_map_list_nullability() -> Result<(), Box> { + let array = fixtures::nested_list_struct_column(); + println!("{}", DisplayRB(array.clone())); + + let pipeline = Selector::from_str(".poses[]")?.pipe( + |source: &ArrayRef| -> Result, re_lenses_core::combinators::Error> { + let struct_array = source + .as_any() + .downcast_ref::() + .ok_or_else(|| re_lenses_core::combinators::Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: source.data_type().clone(), + context: "struct_to_fixed_list pipe".to_owned(), + })?; + Ok(StructToFixedList::new(["x", "y"]) + .transform(struct_array)? + .map(|arr| Arc::new(arr) as ArrayRef)) + }, + ); + + let result: ListArray = pipeline.execute_per_row(&array)?.unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result.clone())), @r" + ┌────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(FixedSizeList(2 x Float64)) │ + ╞════════════════════════════════════════╡ + │ [[1.0, 2.0], [3.0, 4.0]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[5.0, 6.0]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[7.0, null], [9.0, 10.0]] │ + └────────────────────────────────────────┘ + "); + + Ok(()) +} + +/// Tests that `StructToFixedList` can override the nullability of the output list. +#[test] +fn test_struct_to_fixed_list_nullability_override() -> Result<(), Box> { + let fields = Fields::from(vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + ]); + let values = StructArray::new( + fields.clone(), + vec![ + Arc::new(Float64Array::from(vec![1.0, 3.0, 5.0])), + Arc::new(Float64Array::from(vec![2.0, 4.0, 6.0])), + ], + None, + ); + let array = ListArray::new( + Arc::new(Field::new_list_field(DataType::Struct(fields), false)), + OffsetBuffer::from_lengths([2, 1, 0]), + Arc::new(values), + None, + ); + println!("{}", DisplayRB(array.clone())); + + let pipeline = MapList::new(StructToFixedList::new(["x", "y"]).with_nullable(false)); + + let result: ListArray = pipeline.transform(&array)?.unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result.clone())), @r" + ┌──────────────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(non-null FixedSizeList(2 x non-null Float64)) │ + ╞══════════════════════════════════════════════════════════╡ + │ [[1.0, 2.0], [3.0, 4.0]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[5.0, 6.0]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + └──────────────────────────────────────────────────────────┘ + "); + + Ok(()) +} + +#[test] +fn test_map_list_outer_nullability() -> Result<(), Box> { + let array = fixtures::list_not_nullable(); + println!("{}", DisplayRB(array.clone())); + + let pipeline = MapList::new(PrimitiveCast::::new()); + + let result: ListArray = pipeline.transform(&array)?.unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result.clone())), @r" + ┌──────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(non-null Float32) │ + ╞══════════════════════════════╡ + │ [1.0, 2.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [3.0, 4.0, 5.0] │ + └──────────────────────────────┘ + "); + + let array = fixtures::list_with_nulls(); + println!("{}", DisplayRB(array.clone())); + + let result: ListArray = pipeline.transform(&array)?.unwrap(); + insta::assert_snapshot!(format!("{}", DisplayRB(result.clone())), @r" + ┌─────────────────────┐ + │ col │ + │ --- │ + │ type: List(Float32) │ + ╞═════════════════════╡ + │ [1.0, 2.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + └─────────────────────┘ + "); + + Ok(()) +} + +#[test] +fn test_map_list_outer_nullability_identity() -> Result<(), Box> { + let array = fixtures::list_not_nullable(); + println!("{}", DisplayRB(array.clone())); + + let pipeline = MapList::new(MapPrimitive::::new(|x| x)); + + let result: ListArray = pipeline.transform(&array)?.unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result.clone())), @r" + ┌────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(non-null UInt8) │ + ╞════════════════════════════╡ + │ [1, 2] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [3, 4, 5] │ + └────────────────────────────┘ + "); + + Ok(()) +} + +#[test] +fn test_flatten_fixed_size_list() -> Result<(), Box> { + let array = fixtures::nested_list_struct_column(); + + // Produces List(FixedSizeList(2 x Float64)) instead of creating a new test case from scratch. + let source: ListArray = Selector::from_str(".poses[]")? + .pipe( + |source: &ArrayRef| -> Result, re_lenses_core::combinators::Error> { + let struct_array = + source + .as_any() + .downcast_ref::() + .ok_or_else(|| re_lenses_core::combinators::Error::TypeMismatch { + expected: "StructArray".to_owned(), + actual: source.data_type().clone(), + context: "struct_to_fixed_list pipe".to_owned(), + })?; + Ok(StructToFixedList::new(["x", "y"]) + .transform(struct_array)? + .map(|arr| Arc::new(arr) as ArrayRef)) + }, + ) + .execute_per_row(&array)? + .unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(source.clone())), @" + ┌────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(FixedSizeList(2 x Float64)) │ + ╞════════════════════════════════════════╡ + │ [[1.0, 2.0], [3.0, 4.0]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[5.0, 6.0]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[7.0, null], [9.0, 10.0]] │ + └────────────────────────────────────────┘ + "); + + let result = Flatten::new().transform(&source)?.unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @" + ┌────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Float64) │ + ╞════════════════════════╡ + │ [1.0, 2.0, 3.0, 4.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [5.0, 6.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [7.0, null, 9.0, 10.0] │ + └────────────────────────┘ + "); + + Ok(()) +} diff --git a/crates/store/re_lenses_core/tests/test_selector_array.rs b/crates/store/re_lenses_core/tests/test_selector_array.rs new file mode 100644 index 000000000000..f5a4c5072430 --- /dev/null +++ b/crates/store/re_lenses_core/tests/test_selector_array.rs @@ -0,0 +1,86 @@ +mod util; + +use std::sync::Arc; + +use re_chunk::ArrowArray as _; +use re_lenses_core::{Selector, SelectorError as Error}; + +use crate::util::fixtures; + +#[test] +fn execute_struct_field() -> Result<(), Error> { + let array = fixtures::struct_column(); + + let result = ".location" + .parse::()? + .execute(Arc::new(array))? + .unwrap(); + + insta::assert_snapshot!(util::DisplayRB(result), @r#" + ┌──────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: Struct("x": Float64, "y": Float64) │ + ╞══════════════════════════════════════════╡ + │ {x: 1.0, y: 2.0} │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ {x: 3.0, y: 4.0} │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ {x: 5.0, y: null} │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ {x: 7.0, y: 8.0} │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + └──────────────────────────────────────────┘ + "#); + + Ok(()) +} + +#[test] +fn execute_each_struct_field() -> Result<(), Error> { + let array = fixtures::nested_struct_column(); + + let result = ".[].location" + .parse::()? + .execute(Arc::new(array.clone()))? + .unwrap(); + + // NOTE: When calling `execute` without a surrounding `map()` + // statement it is possible to change the row count. This mimics + // the behavior of `jq`. + assert_eq!(array.len(), 7); + assert_eq!(result.len(), 8); + + insta::assert_snapshot!(util::DisplayRB(result), @r#" + ┌──────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: Struct("x": Float64, "y": Float64) │ + ╞══════════════════════════════════════════╡ + │ {x: 1.0, y: 2.0} │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ {x: 3.0, y: 4.0} │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ {x: 5.0, y: 6.0} │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ {x: 7.0, y: 8.0} │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + └──────────────────────────────────────────┘ + "#); + + Ok(()) +} diff --git a/crates/store/re_lenses_core/tests/test_selector_extraction.rs b/crates/store/re_lenses_core/tests/test_selector_extraction.rs new file mode 100644 index 000000000000..b7cc7915a884 --- /dev/null +++ b/crates/store/re_lenses_core/tests/test_selector_extraction.rs @@ -0,0 +1,118 @@ +mod util; + +use arrow::datatypes::{DataType, Field, Fields}; +use re_lenses_core::Selector; + +use crate::util::fixtures; + +fn formatted(pair: impl IntoIterator) -> String { + pair.into_iter() + .map(|(sel, dt)| format!("{sel} ({dt})")) + .collect::>() + .join("\n") +} + +#[test] +fn extract_scalar_fields_from_nested_struct() { + // Schema: + // ┌─ a (struct) + // │ ├─ b: Float64 + // │ └─ c: Int32 + // └─ d: Int32 + + let bc_fields = Fields::from(vec![ + Field::new("b", DataType::Float64, true), + Field::new("c", DataType::Int32, true), + ]); + + let e_field = Field::new_list_field(DataType::Float32, false); + + let root_fields = Fields::from(vec![ + Field::new("a", DataType::Struct(bc_fields), true), + Field::new("d", DataType::Int32, true), + Field::new("e", DataType::FixedSizeList(e_field.into(), 3), true), + ]); + + let datatype = DataType::Struct(root_fields); + + let result = re_lenses_core::extract_nested_fields(&datatype, |dt| { + matches!(dt, DataType::Float64 | DataType::Float32 | DataType::Int32) + }) + .expect("Should find nested fields"); + + insta::assert_snapshot!(formatted(result), @" + .d (Int32) + .e[] (Float32) + .a.b (Float64) + .a.c (Int32) + "); +} + +#[test] +fn extract_scalar_fields_from_nested_list_struct() { + // Schema: + // ┌─ a (struct) + // │ ├─ b: [Float64] + // │ └─ c: [Int32] + // └─ d: [Float64] + + let b_list = DataType::List(Field::new_list_field(DataType::Float64, true).into()); + let c_list = DataType::List(Field::new_list_field(DataType::Int32, true).into()); + let bc_fields = Fields::from(vec![ + Field::new("b", b_list, true), + Field::new("c", c_list, true), + ]); + + let d_list = DataType::List(Field::new_list_field(DataType::Float64, true).into()); + let e_list = DataType::List( + Field::new_list_field( + DataType::FixedSizeList(Field::new_list_field(DataType::Float32, false).into(), 3), + true, + ) + .into(), + ); + let root_fields = Fields::from(vec![ + Field::new("a", DataType::Struct(bc_fields), true), + Field::new("d", d_list, true), + Field::new("e", e_list, true), + ]); + + let datatype = DataType::Struct(root_fields); + + let result = re_lenses_core::extract_nested_fields(&datatype, |dt| { + matches!(dt, DataType::Float64 | DataType::Float32 | DataType::Int32) + }) + .expect("Should find nested fields"); + + insta::assert_snapshot!(formatted(result), @" + .d[] (Float64) + .e[][] (Float32) + .a.b[] (Float64) + .a.c[] (Int32) + "); +} + +#[test] +fn extract_nested_fields_fixtures() { + let array = fixtures::nested_struct_column(); + let result = re_lenses_core::extract_nested_fields(&array.value_type(), |dt| { + matches!(dt, DataType::Float64) + }) + .expect("Should find nested fields"); + + insta::assert_snapshot!(formatted(result), @" + .location.x (Float64) + .location.y (Float64) + "); + + let array = fixtures::nested_list_struct_column(); + let result = re_lenses_core::extract_nested_fields(&array.value_type(), |dt| { + matches!(dt, DataType::Float64) + }) + .expect("Should find nested fields"); + + insta::assert_snapshot!(formatted(result), @" + .poses[].x (Float64) + .poses[].y (Float64) + "); +} diff --git a/crates/store/re_lenses_core/tests/test_selector_functions.rs b/crates/store/re_lenses_core/tests/test_selector_functions.rs new file mode 100644 index 000000000000..60fb0084cb65 --- /dev/null +++ b/crates/store/re_lenses_core/tests/test_selector_functions.rs @@ -0,0 +1,656 @@ +#![expect(clippy::unwrap_used)] // Okay to use unwrap in tests + +mod util; + +use std::sync::Arc; + +use arrow::array::{Array as _, ArrayRef, Float64Array, ListArray, StringArray}; +use arrow::buffer::{NullBuffer, OffsetBuffer}; +use arrow::datatypes::{DataType, Field, Float64Type}; + +use re_lenses_core::combinators::Error; +use re_lenses_core::function_registry::{FunctionRegistry, FunctionRegistryError}; +use re_lenses_core::{Literal, Runtime, Selector, SelectorError}; +use util::DisplayRB; + +// -- Transform functions ----------------------------------------------------- + +/// Doubles every float64 value. +fn double_values(source: &ArrayRef) -> Result, Error> { + let values = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "Float64".into(), + actual: source.data_type().clone(), + context: "double_values".into(), + })?; + + let doubled: Float64Array = values.iter().map(|v| v.map(|x| x * 2.0)).collect(); + Ok(Some(Arc::new(doubled))) +} + +/// Repeats every float64 value 3 times, producing a list array. +fn repeat3(source: &ArrayRef) -> Result, Error> { + let values = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "Float64".into(), + actual: source.data_type().clone(), + context: "repeat3".into(), + })?; + + let repeated: ListArray = ListArray::from_iter_primitive::( + values + .iter() + .map(|v| Some(std::iter::repeat_n(v, 3).collect::>())), + ); + Ok(Some(Arc::new(repeated))) +} + +/// Prepends a prefix to every string value. +fn prepend(prefix: String) -> impl Fn(&ArrayRef) -> Result, Error> { + move |source: &ArrayRef| { + let values = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "Utf8".into(), + actual: source.data_type().clone(), + context: "prepend".into(), + })?; + + let prefixed: StringArray = values + .iter() + .map(|v| v.map(|s| format!("{prefix}{s}"))) + .collect(); + Ok(Some(Arc::new(prefixed) as ArrayRef)) + } +} + +/// Replaces Float64 values > 4.0 with `null`, passes others through. +fn nullify_gt4(source: &ArrayRef) -> Result, Error> { + let values = source + .as_any() + .downcast_ref::() + .ok_or_else(|| Error::TypeMismatch { + expected: "Float64".into(), + actual: source.data_type().clone(), + context: "nullify_gt4".into(), + })?; + + let result: Float64Array = values + .iter() + .map(|v| { + let x = v?; + if x <= 4.0 { Some(x) } else { None } + }) + .collect(); + Ok(Some(Arc::new(result))) +} + +// -- Helpers ----------------------------------------------------------------- + +fn make_float_list(rows: &[Option<&[f64]>]) -> ListArray { + ListArray::from_iter_primitive::( + rows.iter() + .map(|row| row.map(|vals| vals.iter().map(|&v| Some(v)))), + ) +} + +fn make_string_list(rows: &[Option<&[Option<&str>]>]) -> ListArray { + let mut values: Vec> = Vec::new(); + let mut offsets = vec![0i32]; + let mut nulls = Vec::new(); + + for row in rows { + if let Some(vals) = row { + values.extend_from_slice(vals); + offsets.push(values.len().try_into().unwrap()); + nulls.push(true); + } else { + offsets.push(*offsets.last().unwrap()); + nulls.push(false); + } + } + + let string_array = StringArray::from(values); + ListArray::new( + Arc::new(Field::new_list_field(DataType::Utf8, true)), + OffsetBuffer::new(offsets.into()), + Arc::new(string_array), + Some(NullBuffer::from(nulls)), + ) +} + +fn test_runtime() -> Runtime { + let mut registry = FunctionRegistry::new(); + registry.register("double", || double_values).unwrap(); + registry.register("repeat3", || repeat3).unwrap(); + registry.register("prepend", prepend).unwrap(); + registry.register("nullify_gt4", || nullify_gt4).unwrap(); + + Runtime { + function_registry: Arc::new(registry), + } +} + +// -- Registry unit tests ----------------------------------------------------- + +#[test] +fn register_and_get_no_args() { + let rt = test_runtime(); + + assert!(rt.function_registry.get("double", &[]).is_ok()); +} + +#[test] +fn register_and_get_with_args() { + let rt = test_runtime(); + + assert!( + rt.function_registry + .get("prepend", &[Literal::String("hello_".into())]) + .is_ok() + ); +} + +#[test] +fn get_unknown_function() { + let registry = FunctionRegistry::new(); + let result = registry.get("nonexistent", &[]); + assert!(matches!( + result, + Err(FunctionRegistryError::UnknownFunction { .. }) + )); +} + +#[test] +fn get_no_arg_function_with_extra_args() { + let rt = test_runtime(); + + // The zero-arg constructor ignores extra arguments, so this still succeeds. + // This test documents the current behavior. + let result = rt + .function_registry + .get("double", &[Literal::String("unexpected".into())]); + assert!(result.is_ok()); +} + +#[test] +fn get_one_arg_function_with_no_args() { + let rt = test_runtime(); + + let result = rt.function_registry.get("prepend", &[]); + assert!(matches!( + result, + Err(FunctionRegistryError::WrongArguments { .. }) + )); +} + +#[test] +fn register_multiple_functions() { + let rt = test_runtime(); + + assert!(rt.function_registry.get("double", &[]).is_ok()); + assert!( + rt.function_registry + .get("prepend", &[Literal::String("x_".into())]) + .is_ok() + ); +} + +// -- Selector + function integration tests ----------------------------------- + +#[test] +fn selector_calls_no_arg_function() -> Result<(), SelectorError> { + let rt = test_runtime(); + + let array = make_float_list(&[Some(&[1.0, 2.0]), Some(&[3.0]), None]); + + let selector = Selector::parse("double()")?; + let via_registry = rt.execute_per_row(&selector, &array)?.unwrap(); + + let via_pipe = Selector::parse(".")? + .pipe(double_values) + .execute_per_row(&array)? + .unwrap(); + + assert_eq!(via_registry, via_pipe); + + insta::assert_snapshot!(DisplayRB(via_registry), @r" + ┌─────────────────────┐ + │ col │ + │ --- │ + │ type: List(Float64) │ + ╞═════════════════════╡ + │ [2.0, 4.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [6.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + └─────────────────────┘ + "); + Ok(()) +} + +#[test] +fn selector_calls_function_with_string_arg() -> Result<(), SelectorError> { + let rt = test_runtime(); + + let array = make_string_list(&[ + Some(&[Some("alice"), Some("bob")]), + Some(&[Some("carol"), None]), + None, + ]); + + let selector = Selector::parse(r#"prepend("hello_")"#)?; + let result = rt.execute_per_row(&selector, &array)?.unwrap(); + + insta::assert_snapshot!(DisplayRB(result), @r" + ┌──────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Utf8) │ + ╞══════════════════════════╡ + │ [hello_alice, hello_bob] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [hello_carol, null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + └──────────────────────────┘ + "); + Ok(()) +} + +#[test] +fn selector_pipes_path_into_function() -> Result<(), SelectorError> { + let rt = test_runtime(); + + let array = util::fixtures::nested_struct_column(); + + let selector = ".location.x | double()".parse::()?; + let via_registry = rt.execute_per_row(&selector, &array)?.unwrap(); + + let via_pipe = Selector::parse(".location.x")? + .pipe(double_values) + .execute_per_row(&array)? + .unwrap(); + + assert_eq!(via_registry, via_pipe); + + insta::assert_snapshot!(DisplayRB(via_registry), @r" + ┌─────────────────────┐ + │ col │ + │ --- │ + │ type: List(Float64) │ + ╞═════════════════════╡ + │ [2.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [6.0, 10.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, 14.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, null] │ + └─────────────────────┘ + "); + + // NOTE: We also test functions that insert null values. + let selector = ".location.x | nullify_gt4() | double()".parse::()?; + let via_registry = rt.execute_per_row(&selector, &array)?.unwrap(); + + let via_pipe = Selector::parse(".location.x")? + .pipe(nullify_gt4) + .pipe(double_values) + .execute_per_row(&array)? + .unwrap(); + + assert_eq!(via_registry, via_pipe); + + insta::assert_snapshot!(DisplayRB(via_registry), @r" + ┌─────────────────────┐ + │ col │ + │ --- │ + │ type: List(Float64) │ + ╞═════════════════════╡ + │ [2.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [6.0, null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, null] │ + └─────────────────────┘ + "); + + Ok(()) +} + +#[test] +fn selector_pipes_nested_list_path_into_function() -> Result<(), SelectorError> { + let rt = test_runtime(); + + let array = util::fixtures::nested_list_struct_column(); + + let selector = ".poses[].y | double()".parse::()?; + let via_registry = rt.execute_per_row(&selector, &array)?.unwrap(); + + let via_pipe = Selector::parse(".poses[].y")? + .pipe(double_values) + .execute_per_row(&array)? + .unwrap(); + + assert_eq!(via_registry, via_pipe); + + insta::assert_snapshot!(DisplayRB(via_registry), @" + ┌─────────────────────┐ + │ col │ + │ --- │ + │ type: List(Float64) │ + ╞═════════════════════╡ + │ [4.0, 8.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [12.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, 20.0] │ + └─────────────────────┘ + "); + + let selector = ".poses[].y | double | repeat3".parse::()?; + let result = rt.execute_per_row(&selector, &array)?.unwrap(); + + insta::assert_snapshot!(DisplayRB(result), @" + ┌──────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(List(Float64)) │ + ╞══════════════════════════════════════════╡ + │ [[4.0, 4.0, 4.0], [8.0, 8.0, 8.0]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[12.0, 12.0, 12.0]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[null, null, null], [20.0, 20.0, 20.0]] │ + └──────────────────────────────────────────┘ + "); + + let selector = ".poses[].y | double | repeat3 | .[]".parse::()?; + let result = rt.execute_per_row(&selector, &array)?.unwrap(); + + insta::assert_snapshot!(DisplayRB(result), @" + ┌──────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Float64) │ + ╞══════════════════════════════════════╡ + │ [4.0, 4.0, 4.0, 8.0, 8.0, 8.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [12.0, 12.0, 12.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, null, null, 20.0, 20.0, 20.0] │ + └──────────────────────────────────────┘ + "); + + let selector = "map(.poses[].y | double | repeat3 | .[])".parse::()?; + let result = rt.execute(&selector, Arc::new(array.clone()))?.unwrap(); + + insta::assert_snapshot!(DisplayRB(result), @" + ┌──────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Float64) │ + ╞══════════════════════════════════════╡ + │ [4.0, 4.0, 4.0, 8.0, 8.0, 8.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [12.0, 12.0, 12.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, null, null, 20.0, 20.0, 20.0] │ + └──────────────────────────────────────┘ + "); + + Ok(()) +} + +#[test] +fn selector_pipes_path_into_string_function() -> Result<(), SelectorError> { + let rt = test_runtime(); + + let array = util::fixtures::nested_string_struct_column(); + + let selector = r#".data.names | prepend("user_")"#.parse::()?; + let result = rt.execute_per_row(&selector, &array)?.unwrap(); + + insta::assert_snapshot!(DisplayRB(result), @r" + ┌───────────────────┐ + │ col │ + │ --- │ + │ type: List(Utf8) │ + ╞═══════════════════╡ + │ [user_alice] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, user_dave] │ + └───────────────────┘ + "); + Ok(()) +} + +#[test] +fn selector_chains_two_functions() -> Result<(), SelectorError> { + let rt = test_runtime(); + + let array = make_string_list(&[Some(&[Some("world"), Some("there")]), Some(&[Some("x")])]); + + let selector = r#"prepend("hello_") | prepend("say_")"#.parse::()?; + let via_registry = rt.execute_per_row(&selector, &array)?.unwrap(); + + let via_pipe = Selector::parse(".")? + .pipe(prepend("hello_".into())) + .pipe(prepend("say_".into())) + .execute_per_row(&array)? + .unwrap(); + + assert_eq!(via_registry, via_pipe); + + insta::assert_snapshot!(DisplayRB(via_registry), @r" + ┌────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Utf8) │ + ╞════════════════════════════════════╡ + │ [say_hello_world, say_hello_there] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [say_hello_x] │ + └────────────────────────────────────┘ + "); + Ok(()) +} + +#[test] +fn selector_unknown_function_errors() { + let array = make_float_list(&[Some(&[1.0])]); + + let result = "missing_func()" + .parse::() + .unwrap() + .execute_per_row(&array); + + assert!(result.is_err()); +} + +#[test] +fn selector_pipes_struct_field_into_function() -> Result<(), SelectorError> { + let rt = test_runtime(); + + let array = util::fixtures::struct_column(); + + let selector = ".location.y | double()".parse::()?; + let via_registry = rt.execute(&selector, Arc::new(array.clone()))?.unwrap(); + + let via_pipe = Selector::parse(".location.y")? + .pipe(double_values) + .execute(Arc::new(array))? + .unwrap(); + + assert_eq!(via_registry.as_ref(), via_pipe.as_ref()); + + insta::assert_snapshot!(util::DisplayRB(via_registry), @" + ┌───────────────┐ + │ col │ + │ --- │ + │ type: Float64 │ + ╞═══════════════╡ + │ 4.0 │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ 8.0 │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ 16.0 │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + └───────────────┘ + "); + Ok(()) +} + +#[test] +fn selector_function_with_missing_args_errors() { + let rt = test_runtime(); + + let array = make_string_list(&[Some(&[Some("hello")])]); + + let selector = "prepend()".parse::().unwrap(); + let result = rt.execute_per_row(&selector, &array); + + assert!(result.is_err()); +} + +#[test] +fn selector_deep_nested_list_double() -> Result<(), SelectorError> { + let rt = test_runtime(); + let array = util::fixtures::deep_nested_list_column(); + + let selector = ".[] | .[] | double()".parse::()?; + let result = rt.execute_per_row(&selector, &array)?.unwrap(); + + insta::assert_snapshot!(DisplayRB(result), @r" + ┌────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Float64) │ + ╞════════════════════════╡ + │ [2.0, 6.0, 10.0, 14.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [18.0] │ + └────────────────────────┘ + "); + + let selector = "map(.[] | .[] | double())".parse::()?; + let via_registry = rt.execute(&selector, Arc::new(array))?.unwrap(); + + insta::assert_snapshot!(DisplayRB(via_registry), @r" + ┌────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Float64) │ + ╞════════════════════════╡ + │ [2.0, 6.0, 10.0, 14.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [18.0] │ + └────────────────────────┘ + "); + + Ok(()) +} + +#[test] +fn selector_deep_nested_flatten_all() -> Result<(), SelectorError> { + let array = util::fixtures::deep_nested_list_column(); + + let result = ".[] | .[] | .[]" + .parse::()? + .execute(Arc::new(array))? + .unwrap(); + + insta::assert_snapshot!(DisplayRB(result), @r" + ┌───────────────┐ + │ col │ + │ --- │ + │ type: Float64 │ + ╞═══════════════╡ + │ 1.0 │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ 3.0 │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ 5.0 │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ 7.0 │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ 9.0 │ + └───────────────┘ + "); + + Ok(()) +} diff --git a/crates/store/re_lenses_core/tests/test_selector_list_array.rs b/crates/store/re_lenses_core/tests/test_selector_list_array.rs new file mode 100644 index 000000000000..b2564b4dbcc7 --- /dev/null +++ b/crates/store/re_lenses_core/tests/test_selector_list_array.rs @@ -0,0 +1,541 @@ +mod util; + +use std::sync::Arc; + +use arrow::{ + array::{Array as _, FixedSizeListArray, Int32Array, ListArray}, + buffer::OffsetBuffer, + datatypes::{DataType, Field}, + error::ArrowError, +}; +use re_lenses_core::{Selector, SelectorError as Error}; +use util::DisplayRB; + +use crate::util::fixtures; + +#[test] +fn execute_nested_struct() -> Result<(), Error> { + let array = fixtures::nested_struct_column(); + + let result = ".location.x" + .parse::()? + .execute_per_row(&array)? + .unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r" + ┌─────────────────────┐ + │ col │ + │ --- │ + │ type: List(Float64) │ + ╞═════════════════════╡ + │ [1.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [3.0, 5.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, 7.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, null] │ + └─────────────────────┘ + "); + + Ok(()) +} + +#[test] +fn execute_identity() -> Result<(), Error> { + let array = fixtures::nested_list_struct_column(); + + let result = ".".parse::()?.execute_per_row(&array)?.unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r#" + ┌─────────────────────────────────────────────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Struct("poses": non-null List(non-null Struct("x": Float64, "y": Float64)))) │ + ╞═════════════════════════════════════════════════════════════════════════════════════════╡ + │ [{poses: [{x: 1.0, y: 2.0}, {x: 3.0, y: 4.0}]}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{poses: [{x: 5.0, y: 6.0}]}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{poses: []}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{poses: [{x: 7.0, y: null}, {x: 9.0, y: 10.0}]}] │ + └─────────────────────────────────────────────────────────────────────────────────────────┘ + "#); + Ok(()) +} + +#[test] +fn execute_simple_field() -> Result<(), Error> { + let array = fixtures::nested_list_struct_column(); + + let result = ".poses" + .parse::()? + .execute_per_row(&array)? + .unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r#" + ┌───────────────────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(List(non-null Struct("x": Float64, "y": Float64))) │ + ╞═══════════════════════════════════════════════════════════════╡ + │ [[{x: 1.0, y: 2.0}, {x: 3.0, y: 4.0}]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[{x: 5.0, y: 6.0}]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[{x: 7.0, y: null}, {x: 9.0, y: 10.0}]] │ + └───────────────────────────────────────────────────────────────┘ + "#); + + let result = "map(.poses)" + .parse::()? + .execute(Arc::new(array.clone()))? + .unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r#" + ┌───────────────────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(List(non-null Struct("x": Float64, "y": Float64))) │ + ╞═══════════════════════════════════════════════════════════════╡ + │ [[{x: 1.0, y: 2.0}, {x: 3.0, y: 4.0}]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[{x: 5.0, y: 6.0}]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[{x: 7.0, y: null}, {x: 9.0, y: 10.0}]] │ + └───────────────────────────────────────────────────────────────┘ + "#); + + Ok(()) +} + +#[test] +fn execute_index() -> Result<(), Error> { + let array = fixtures::nested_list_struct_column(); + + let result = ".poses[0]" + .parse::()? + .execute_per_row(&array)? + .unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r#" + ┌────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Struct("x": Float64, "y": Float64)) │ + ╞════════════════════════════════════════════════╡ + │ [{x: 1.0, y: 2.0}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{x: 5.0, y: 6.0}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{x: 7.0, y: null}] │ + └────────────────────────────────────────────────┘ + "#); + Ok(()) +} + +#[test] +fn execute_index_chained() -> Result<(), Error> { + let array = fixtures::nested_list_struct_column(); + + let result = ".poses[0].x" + .parse::()? + .execute_per_row(&array)? + .unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r" + ┌─────────────────────┐ + │ col │ + │ --- │ + │ type: List(Float64) │ + ╞═════════════════════╡ + │ [1.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [5.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [7.0] │ + └─────────────────────┘ + "); + Ok(()) +} + +#[test] +fn execute_index_to_extract_second_element() -> Result<(), Error> { + let array = fixtures::nested_list_struct_column(); + + let result = ".poses[1]" + .parse::()? + .execute_per_row(&array)? + .unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r#" + ┌────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Struct("x": Float64, "y": Float64)) │ + ╞════════════════════════════════════════════════╡ + │ [{x: 3.0, y: 4.0}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{x: 9.0, y: 10.0}] │ + └────────────────────────────────────────────────┘ + "#); + Ok(()) +} + +#[test] +fn execute_array_each() -> Result<(), Error> { + let array = fixtures::nested_list_struct_column(); + + let result = ".poses[]" + .parse::()? + .execute_per_row(&array)? + .unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r#" + ┌────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Struct("x": Float64, "y": Float64)) │ + ╞════════════════════════════════════════════════╡ + │ [{x: 1.0, y: 2.0}, {x: 3.0, y: 4.0}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{x: 5.0, y: 6.0}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{x: 7.0, y: null}, {x: 9.0, y: 10.0}] │ + └────────────────────────────────────────────────┘ + "#); + Ok(()) +} + +#[test] +fn execute_parse_error() { + let result = ".poses[".parse::(); + + assert!(matches!(result, Err(Error::Parse(_)))); +} + +#[test] +fn execute_missing_field() { + let array = fixtures::nested_list_struct_column(); + + let result = ".nonexistent" + .parse::() + .unwrap() + .execute_per_row(&array) + .expect("should not error"); + + assert!(result.is_none(), "missing field should return None"); +} + +#[test] +fn execute_index_out_of_bounds() -> Result<(), Error> { + let array = fixtures::nested_list_struct_column(); + + let result = ".poses[10]" + .parse::()? + .execute_per_row(&array)? + .unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r#" + ┌────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Struct("x": Float64, "y": Float64)) │ + ╞════════════════════════════════════════════════╡ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + └────────────────────────────────────────────────┘ + "#); + Ok(()) +} + +// TODO(RR-3435): Implement indexing into `FixedSizeListArray`. +#[test] +fn execute_index_on_fixed_size_list() -> Result<(), Error> { + let values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]); + let fixed_field = Arc::new(Field::new("item", DataType::Int32, true)); + let fixed_list = FixedSizeListArray::new(fixed_field, 3, Arc::new(values), None); + + let offsets = OffsetBuffer::new(vec![0, 2, 3].into()); + let list_field = Arc::new(Field::new_list_field(fixed_list.data_type().clone(), true)); + let array = ListArray::new(list_field, offsets, Arc::new(fixed_list), None); + + insta::assert_snapshot!(format!("{}", DisplayRB(array.clone())), @r" + ┌──────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(FixedSizeList(3 x Int32)) │ + ╞══════════════════════════════════════╡ + │ [[1, 2, 3], [4, 5, 6]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[7, 8, 9]] │ + └──────────────────────────────────────┘ + "); + + let result = ".[0][1]".parse::()?.execute_per_row(&array); + + assert!(matches!(result, Err(Error::Runtime(..)))); + + Ok(()) +} + +#[test] +fn execute_each_on_fixed_size_list() -> Result<(), Error> { + // Build List> + // Row 0: [[1,2,3], [4,5,6]] -> flatten to [1,2,3,4,5,6] + // Row 1: [[7,8,9]] -> flatten to [7,8,9] + + let values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]); + let fixed_field = Arc::new(Field::new("item", DataType::Int32, true)); + let fixed_list = FixedSizeListArray::new(fixed_field, 3, Arc::new(values), None); + + let offsets = OffsetBuffer::new(vec![0, 2, 3].into()); + let list_field = Arc::new(Field::new_list_field(fixed_list.data_type().clone(), true)); + let array = ListArray::new(list_field, offsets, Arc::new(fixed_list), None); + + let result = ".[]".parse::()?.execute_per_row(&array)?.unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @" + ┌────────────────────┐ + │ col │ + │ --- │ + │ type: List(Int32) │ + ╞════════════════════╡ + │ [1, 2, 3, 4, 5, 6] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [7, 8, 9] │ + └────────────────────┘ + "); + + Ok(()) +} + +#[test] +fn execute_optional_field() -> Result<(), Error> { + let array = fixtures::nested_struct_column(); + + // Accessing a field that doesn't exist returns `None`. + let result = ".location.z".parse::()?.execute_per_row(&array)?; + assert!(result.is_none(), "missing field should return None"); + + let result = ".foo.x".parse::()?.execute_per_row(&array)?; + assert!(result.is_none(), "missing field should return None"); + + // With `?`, the missing field is suppressed and we get `None` instead. + let result = ".location.z?" + .parse::()? + .execute_per_row(&array)?; + + assert!(result.is_none(), "optional segment should return None"); + let result = ".foo?.x".parse::()?.execute_per_row(&array)?; + + assert!(result.is_none(), "optional segment should return None"); + + Ok(()) +} + +#[test] +fn execute_optional_each_suppressed() -> Result<(), Error> { + let array = fixtures::nested_struct_column(); + + // Without `?`, `[]` on a struct (non-list) inner type errors. + let err = ".[]".parse::()?.execute_per_row(&array); + assert!(matches!( + err, + Err(Error::Runtime(re_lenses_core::combinators::Error::Arrow(ref e))) + if matches!(e.as_ref(), ArrowError::InvalidArgumentError(..)) + )); + + // With `?`, the error is suppressed and we get `None`. + let result = ".[]?".parse::()?.execute_per_row(&array)?; + assert!( + result.is_none(), + "optional each should return None on non-list inner type" + ); + + Ok(()) +} + +#[test] +fn execute_non_null_field() -> Result<(), Error> { + let array = fixtures::nested_struct_column(); + + // Without `!`, row 1 is `[null]` (inner null within a list) + let without = ".location" + .parse::()? + .execute_per_row(&array)? + .unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(without)), @r#" + ┌────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Struct("x": Float64, "y": Float64)) │ + ╞════════════════════════════════════════════════╡ + │ [{x: 1.0, y: 2.0}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{x: 3.0, y: 4.0}, {x: 5.0, y: 6.0}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, {x: 7.0, y: 8.0}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, null] │ + └────────────────────────────────────────────────┘ + "#); + + // With `!`, all-null rows ([null] and [null, null]) are promoted to outer nulls + let result = ".location!" + .parse::()? + .execute_per_row(&array)? + .unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r#" + ┌────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Struct("x": Float64, "y": Float64)) │ + ╞════════════════════════════════════════════════╡ + │ [{x: 1.0, y: 2.0}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{x: 3.0, y: 4.0}, {x: 5.0, y: 6.0}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, {x: 7.0, y: 8.0}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + └────────────────────────────────────────────────┘ + "#); + + Ok(()) +} + +#[test] +fn execute_non_null_nested() -> Result<(), Error> { + let array = fixtures::nested_struct_column(); + + // Without `!`, row 1 is `[null]` (inner null within a list) + let without = ".location" + .parse::()? + .execute_per_row(&array)? + .unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(without)), @r#" + ┌────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Struct("x": Float64, "y": Float64)) │ + ╞════════════════════════════════════════════════╡ + │ [{x: 1.0, y: 2.0}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{x: 3.0, y: 4.0}, {x: 5.0, y: 6.0}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, {x: 7.0, y: 8.0}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, null] │ + └────────────────────────────────────────────────┘ + "#); + + // With `!` on the intermediate field, null locations are promoted before accessing `.x` + let result = ".location!.x" + .parse::()? + .execute_per_row(&array)? + .unwrap(); + + insta::assert_snapshot!(format!("{}", DisplayRB(result)), @r" + ┌─────────────────────┐ + │ col │ + │ --- │ + │ type: List(Float64) │ + ╞═════════════════════╡ + │ [1.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [3.0, 5.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, 7.0] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + └─────────────────────┘ + "); + + Ok(()) +} diff --git a/crates/store/re_arrow_combinators/tests/util.rs b/crates/store/re_lenses_core/tests/util.rs similarity index 55% rename from crates/store/re_arrow_combinators/tests/util.rs rename to crates/store/re_lenses_core/tests/util.rs index eb3ad56b90dc..d1cd47ca64c1 100644 --- a/crates/store/re_arrow_combinators/tests/util.rs +++ b/crates/store/re_lenses_core/tests/util.rs @@ -45,25 +45,27 @@ pub mod fixtures { #[test] fn example_nested_struct_column() { let array = nested_struct_column(); - insta::assert_snapshot!(format!("{}", super::DisplayRB(array)), @" - ┌──────────────────────────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable Struct[1]] │ - ╞══════════════════════════════════════════════════════════════╡ - │ [{location: {x: 1.0, y: 2.0}}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{location: null}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{location: {x: 3.0, y: 4.0}}, {location: {x: 5.0, y: 6.0}}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null, {location: {x: 7.0, y: 8.0}}] │ - └──────────────────────────────────────────────────────────────┘ - "); + insta::assert_snapshot!(format!("{}", super::DisplayRB(array)), @r#" + ┌────────────────────────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Struct("location": Struct("x": Float64, "y": Float64))) │ + ╞════════════════════════════════════════════════════════════════════╡ + │ [{location: {x: 1.0, y: 2.0}}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{location: null}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{location: {x: 3.0, y: 4.0}}, {location: {x: 5.0, y: 6.0}}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null, {location: {x: 7.0, y: 8.0}}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{location: null}, {location: null}] │ + └────────────────────────────────────────────────────────────────────┘ + "#); } pub fn nested_struct_column() -> ListArray { @@ -75,27 +77,28 @@ pub mod fixtures { Field::new("y", DataType::Float64, true), ]); - let x_nulls = NullBuffer::from(vec![true, false, true, true, false, true]); - let y_nulls = NullBuffer::from(vec![true, false, true, true, false, true]); + let x_nulls = NullBuffer::from(vec![true, false, true, true, false, true, false, false]); + let y_nulls = NullBuffer::from(vec![true, false, true, true, false, true, false, false]); let x_data = ArrayData::builder(DataType::Float64) - .len(6) + .len(8) .null_bit_buffer(Some(x_nulls.buffer().clone())) - .add_buffer(values_buffer.slice_with_length(0, 48)) + .add_buffer(values_buffer.slice_with_length(0, 64)) .build() .unwrap(); let y_data = ArrayData::builder(DataType::Float64) - .len(6) + .len(8) .null_bit_buffer(Some(y_nulls.buffer().clone())) - .add_buffer(values_buffer.slice_with_length(48, 48)) + .add_buffer(values_buffer.slice_with_length(48, 64)) .build() .unwrap(); let x_array = Float64Array::from(x_data); let y_array = Float64Array::from(y_data); - let inner_struct_nulls = NullBuffer::from(vec![true, false, true, true, false, true]); + let inner_struct_nulls = + NullBuffer::from(vec![true, false, true, true, false, true, false, false]); let inner_struct = StructArray::new( inner_struct_fields.clone(), vec![Arc::new(x_array), Arc::new(y_array)], @@ -108,16 +111,17 @@ pub mod fixtures { true, )]); - let outer_struct_nulls = NullBuffer::from(vec![true, true, true, true, false, true]); + let outer_struct_nulls = + NullBuffer::from(vec![true, true, true, true, false, true, true, true]); let outer_struct = StructArray::new( outer_struct_fields, vec![Arc::new(inner_struct)], Some(outer_struct_nulls), ); - let list_offsets = OffsetBuffer::from_lengths([1, 1, 0, 0, 2, 2]); + let list_offsets = OffsetBuffer::from_lengths([1, 1, 0, 0, 2, 2, 2]); - let list_nulls = NullBuffer::from(vec![true, true, true, false, true, true]); + let list_nulls = NullBuffer::from(vec![true, true, true, false, true, true, true]); ListArray::new( Arc::new(Field::new_list_field( @@ -134,25 +138,25 @@ pub mod fixtures { #[test] fn example_nested_list_struct_column() { let array = nested_list_struct_column(); - insta::assert_snapshot!(format!("{}", super::DisplayRB(array)), @" - ┌───────────────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable Struct[1]] │ - ╞═══════════════════════════════════════════════════╡ - │ [{poses: [{x: 1.0, y: 2.0}, {x: 3.0, y: 4.0}]}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{poses: [{x: 5.0, y: 6.0}]}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{poses: []}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{poses: [{x: 7.0, y: null}, {x: 9.0, y: 10.0}]}] │ - └───────────────────────────────────────────────────┘ - "); + insta::assert_snapshot!(format!("{}", super::DisplayRB(array)), @r#" + ┌─────────────────────────────────────────────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Struct("poses": non-null List(non-null Struct("x": Float64, "y": Float64)))) │ + ╞═════════════════════════════════════════════════════════════════════════════════════════╡ + │ [{poses: [{x: 1.0, y: 2.0}, {x: 3.0, y: 4.0}]}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{poses: [{x: 5.0, y: 6.0}]}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{poses: []}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{poses: [{x: 7.0, y: null}, {x: 9.0, y: 10.0}]}] │ + └─────────────────────────────────────────────────────────────────────────────────────────┘ + "#); } pub fn nested_list_struct_column() -> ListArray { @@ -235,21 +239,21 @@ pub mod fixtures { #[test] fn example_nested_string_struct_column() { let list_array = nested_string_struct_column(); - insta::assert_snapshot!(super::DisplayRB(list_array.clone()), @r" - ┌───────────────────────────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable Struct[1]] │ - ╞═══════════════════════════════════════════════════════╡ - │ [{data: {names: alice, colors: red}}] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [null] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [{data: null}, {data: {names: dave, colors: yellow}}] │ - └───────────────────────────────────────────────────────┘ - "); + insta::assert_snapshot!(super::DisplayRB(list_array.clone()), @r#" + ┌───────────────────────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(Struct("data": Struct("names": Utf8, "colors": Utf8))) │ + ╞═══════════════════════════════════════════════════════════════════╡ + │ [{data: {names: alice, colors: red}}] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [null] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [{data: null}, {data: {names: dave, colors: yellow}}] │ + └───────────────────────────────────────────────────────────────────┘ + "#); } /// Creates a nested struct column with string values from an underlying shared buffer. @@ -336,19 +340,104 @@ pub mod fixtures { ) } + #[test] + fn example_struct_column() { + let array = struct_column(); + insta::assert_snapshot!(format!("{}", super::DisplayRB(array)), @r#" + ┌──────────────────────────────────────────────────────────────┐ + │ col │ + │ --- │ + │ type: Struct("location": Struct("x": Float64, "y": Float64)) │ + ╞══════════════════════════════════════════════════════════════╡ + │ {location: {x: 1.0, y: 2.0}} │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ {location: null} │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ {location: {x: 3.0, y: 4.0}} │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ {location: {x: 5.0, y: null}} │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ {location: {x: 7.0, y: 8.0}} │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ {location: null} │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ {location: null} │ + └──────────────────────────────────────────────────────────────┘ + "#); + } + + /// A plain `StructArray` (not wrapped in a `ListArray`) with a single `"location"` field + /// containing `Struct { x: Float64, y: Float64 }`. + /// + /// This is the same as the inner data of [`nested_struct_column`] before it gets wrapped + /// in a `ListArray`. + pub fn struct_column() -> StructArray { + let values = shared_values_array(); + let values_buffer = values.to_data().buffers()[0].clone(); + + let inner_struct_fields = Fields::from(vec![ + Field::new("x", DataType::Float64, true), + Field::new("y", DataType::Float64, true), + ]); + + let x_nulls = NullBuffer::from(vec![true, false, true, true, false, true, false, false]); + let y_nulls = NullBuffer::from(vec![true, false, true, false, false, true, false, false]); + + let x_data = ArrayData::builder(DataType::Float64) + .len(8) + .null_bit_buffer(Some(x_nulls.buffer().clone())) + .add_buffer(values_buffer.slice_with_length(0, 64)) + .build() + .unwrap(); + + let y_data = ArrayData::builder(DataType::Float64) + .len(8) + .null_bit_buffer(Some(y_nulls.buffer().clone())) + .add_buffer(values_buffer.slice_with_length(48, 64)) + .build() + .unwrap(); + + let x_array = Float64Array::from(x_data); + let y_array = Float64Array::from(y_data); + + let inner_struct_nulls = + NullBuffer::from(vec![true, false, true, true, false, true, false, false]); + let inner_struct = StructArray::new( + inner_struct_fields.clone(), + vec![Arc::new(x_array), Arc::new(y_array)], + Some(inner_struct_nulls), + ); + + let outer_struct_fields = Fields::from(vec![Field::new( + "location", + DataType::Struct(inner_struct_fields), + true, + )]); + + let outer_struct_nulls = + NullBuffer::from(vec![true, true, true, true, false, true, true, true]); + StructArray::new( + outer_struct_fields, + vec![Arc::new(inner_struct)], + Some(outer_struct_nulls), + ) + } + #[test] fn example_list_not_nullable() { let array = list_not_nullable(); insta::assert_snapshot!(format!("{}", super::DisplayRB(array)), @r" - ┌─────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[u8] │ - ╞═════════════════════════╡ - │ [1, 2] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ [3, 4, 5] │ - └─────────────────────────┘ + ┌────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(non-null UInt8) │ + ╞════════════════════════════╡ + │ [1, 2] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [3, 4, 5] │ + └────────────────────────────┘ "); } @@ -368,15 +457,15 @@ pub mod fixtures { fn example_list_with_nulls() { let array = list_with_nulls(); insta::assert_snapshot!(format!("{}", super::DisplayRB(array)), @r" - ┌──────────────────────────────────┐ - │ col │ - │ --- │ - │ type: nullable List[nullable u8] │ - ╞══════════════════════════════════╡ - │ [1, 2] │ - ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ - │ null │ - └──────────────────────────────────┘ + ┌───────────────────┐ + │ col │ + │ --- │ + │ type: List(UInt8) │ + ╞═══════════════════╡ + │ [1, 2] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + └───────────────────┘ "); } @@ -392,4 +481,69 @@ pub mod fixtures { Some(nulls), ) } + + #[test] + fn example_deep_nested_list_column() { + let array = deep_nested_list_column(); + insta::assert_snapshot!(format!("{}", super::DisplayRB(array)), @r#" + ┌─────────────────────────────────┐ + │ col │ + │ --- │ + │ type: List(List(List(Float64))) │ + ╞═════════════════════════════════╡ + │ [[[1.0, 3.0], [5.0]], [[7.0]]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[[]]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[]] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ null │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [] │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ [[[9.0]]] │ + └─────────────────────────────────┘ + "#); + } + + pub fn deep_nested_list_column() -> ListArray { + let values = shared_values_array(); + let values_buffer = values.to_data().buffers()[0].clone(); + + let float_data = ArrayData::builder(DataType::Float64) + .len(5) + .add_buffer(values_buffer.slice_with_length(12 * 8, 5 * 8)) + .build() + .unwrap(); + let float_values = Float64Array::from(float_data); + + let inner_field = Arc::new(Field::new_list_field(DataType::Float64, true)); + let inner_offsets = OffsetBuffer::from_lengths([2, 1, 1, 0, 1]); + let inner_list = ListArray::new(inner_field, inner_offsets, Arc::new(float_values), None); + + let middle_field = Arc::new(Field::new_list_field( + DataType::List(Arc::new(Field::new_list_field(DataType::Float64, true))), + true, + )); + let middle_offsets = OffsetBuffer::from_lengths([2, 1, 1, 0, 1]); + let middle_list = ListArray::new(middle_field, middle_offsets, Arc::new(inner_list), None); + + // Outer ListArray (6 rows): lengths [2, 1, 1, 0, 0, 1], row 3 is null + let outer_field = Arc::new(Field::new_list_field( + DataType::List(Arc::new(Field::new_list_field( + DataType::List(Arc::new(Field::new_list_field(DataType::Float64, true))), + true, + ))), + true, + )); + let outer_offsets = OffsetBuffer::from_lengths([2, 1, 1, 0, 0, 1]); + let outer_nulls = NullBuffer::from(vec![true, true, true, false, true, true]); + + ListArray::new( + outer_field, + outer_offsets, + Arc::new(middle_list), + Some(outer_nulls), + ) + } } diff --git a/crates/store/re_log_channel/src/data_source_message.rs b/crates/store/re_log_channel/src/data_source_message.rs index 960b3eac9273..af6a65f2ae8a 100644 --- a/crates/store/re_log_channel/src/data_source_message.rs +++ b/crates/store/re_log_channel/src/data_source_message.rs @@ -11,11 +11,15 @@ use re_log_types::{LogMsg, StoreId, TableMsg, impl_into_enum}; /// May contain limited UI commands for instrumenting the state of the receiving end. #[derive(Clone, Debug)] pub enum DataSourceMessage { - /// The index of all the chunks in a recording. + /// A piece of the index of all the chunks in a recording. /// /// Some sources may send this, others may not. + /// There may be one or more of these, followed by [`Self::RrdManifestComplete`]. RrdManifest(StoreId, Arc), + /// All parts of the RRD manifest have been sent. + RrdManifestComplete(StoreId), + /// See [`LogMsg`]. LogMsg(LogMsg), @@ -34,7 +38,7 @@ impl re_byte_size::SizeBytes for DataSourceMessage { Self::RrdManifest(_, manifest) => manifest.heap_size_bytes(), Self::LogMsg(log_msg) => log_msg.heap_size_bytes(), Self::TableMsg(table_msg) => table_msg.heap_size_bytes(), - Self::UiCommand(_) => 0, + Self::RrdManifestComplete(_) | Self::UiCommand(_) => 0, } } } @@ -47,7 +51,8 @@ impl DataSourceMessage { /// The name of the variant, useful for error message etc pub fn variant_name(&self) -> &'static str { match self { - Self::RrdManifest { .. } => "RrdManifest", + Self::RrdManifest(..) => "RrdManifest", + Self::RrdManifestComplete(_) => "RrdManifestComplete", Self::LogMsg(_) => "LogMsg", Self::TableMsg(_) => "TableMsg", Self::UiCommand(_) => "UiCommand", @@ -59,7 +64,7 @@ impl DataSourceMessage { match self { Self::LogMsg(log_msg) => log_msg.insert_arrow_record_batch_metadata(key, value), Self::TableMsg(table_msg) => table_msg.insert_arrow_record_batch_metadata(key, value), - Self::RrdManifest { .. } | Self::UiCommand(_) => { + Self::RrdManifest(..) | Self::RrdManifestComplete(_) | Self::UiCommand(_) => { // Not everything needs latency tracking } } diff --git a/crates/store/re_log_channel/src/lib.rs b/crates/store/re_log_channel/src/lib.rs index 352cbb114b73..46581c57b2ef 100644 --- a/crates/store/re_log_channel/src/lib.rs +++ b/crates/store/re_log_channel/src/lib.rs @@ -18,6 +18,23 @@ pub use self::sender::LogSender; // --- Source --- +/// Controls how a newly loaded recording is treated by the viewer. +#[derive( + Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, serde::Deserialize, serde::Serialize, +)] +pub enum RecordingOpenBehavior { + /// Load without affecting the recording panel. + /// + /// Used for preview views. + Background, + + /// Mark as opened in the recording panel, but don't navigate to it. + Open, + + /// Mark as opened and make it the active recording. + OpenAndSelect, +} + /// An error that can occur when flushing. #[derive(Debug, thiserror::Error)] pub enum FlushError { @@ -78,8 +95,7 @@ pub enum LogSource { RedapGrpcStream { uri: re_uri::DatasetSegmentUri, - /// Switch to this recording once it has been loaded? - select_when_loaded: bool, + open_behavior: RecordingOpenBehavior, }, /// The data is streaming in via a message proxy. @@ -116,7 +132,7 @@ impl LogSource { } } - pub fn select_when_loaded(&self) -> bool { + pub fn open_behavior(&self) -> RecordingOpenBehavior { match self { Self::File { .. } | Self::Sdk @@ -124,11 +140,9 @@ impl LogSource { | Self::Stdin | Self::HttpStream { .. } | Self::JsChannel { .. } - | Self::MessageProxy { .. } => true, + | Self::MessageProxy { .. } => RecordingOpenBehavior::OpenAndSelect, - Self::RedapGrpcStream { - select_when_loaded, .. - } => *select_when_loaded, + Self::RedapGrpcStream { open_behavior, .. } => *open_behavior, } } @@ -149,7 +163,10 @@ impl LogSource { /// Same as [`Self::redap_uri`], but strips any extra query or fragment from the uri. pub fn stripped_redap_uri(&self) -> Option { self.redap_uri().map(|uri| match uri { - RedapUri::Catalog(_) | RedapUri::Entry(_) | RedapUri::Proxy(_) => uri, + RedapUri::Catalog(_) + | RedapUri::Entry(_) + | RedapUri::Folder(_) + | RedapUri::Proxy(_) => uri, RedapUri::DatasetData(uri) => RedapUri::DatasetData(uri.without_query_and_fragment()), }) } diff --git a/crates/store/re_log_encoding/Cargo.toml b/crates/store/re_log_encoding/Cargo.toml index e8a538ffb8a9..ea2ec81ee1c8 100644 --- a/crates/store/re_log_encoding/Cargo.toml +++ b/crates/store/re_log_encoding/Cargo.toml @@ -83,6 +83,7 @@ web-sys = { workspace = true, optional = true, features = ["Window", "MessageEve re_arrow_util = { workspace = true, features = ["test"] } re_sdk_types.workspace = true +tempfile.workspace = true criterion.workspace = true insta.workspace = true mimalloc.workspace = true diff --git a/crates/store/re_log_encoding/src/lib.rs b/crates/store/re_log_encoding/src/lib.rs index f8f2dccd451f..3dfdb291cd1c 100644 --- a/crates/store/re_log_encoding/src/lib.rs +++ b/crates/store/re_log_encoding/src/lib.rs @@ -7,7 +7,7 @@ //! that case. You can learn more about these traits below. //! //! If you are working with actual RRD streams (i.e. everything that does not go through gRPC: -//! files, standard I/O, HTTP, data loaders, etc), then have a look into the [`rrd`] module. +//! files, standard I/O, HTTP, importers, etc), then have a look into the [`rrd`] module. //! The [`ToTransport`]/[`ToApplication`] traits will also be useful to you. You can learn more //! about these traits below. //! @@ -59,7 +59,7 @@ //! * SDK comms: our legacy gRPC-based protocol, currently used by everything relying on the old //! `StoreHub` model (logging, message proxy, etc). //! * RRD streams: the binary protocol that we use for all stream-based interfaces (files, stdio, -//! data-loaders, HTTP fetches, etc). +//! importers, HTTP fetches, etc). //! //! *All these protocols use the exact same encoding*. There is only one encoding: the Rerun encoding. //! It often happens that one protocol makes use of some types while others don't (e.g. the diff --git a/crates/store/re_log_encoding/src/rrd/chunk_reader.rs b/crates/store/re_log_encoding/src/rrd/chunk_reader.rs new file mode 100644 index 000000000000..979325e985d8 --- /dev/null +++ b/crates/store/re_log_encoding/src/rrd/chunk_reader.rs @@ -0,0 +1,291 @@ +use std::fs::File; +use std::io::{Read as _, Seek as _, SeekFrom}; +use std::sync::Arc; + +use re_chunk::{Chunk, ChunkId}; +use re_span::Span; + +use crate::RrdManifest; +use crate::ToApplication as _; +use crate::rrd::CodecError; + +/// Maximum gap between two chunk spans that will still be merged into a single I/O read. +/// Spans separated by more than this are read independently. +const MERGE_GAP_BYTES: u64 = 64 * 1024; // 64 KiB + +/// Read chunks from an open RRD file by their IDs, using byte offsets from the manifest. +/// +/// Internally sorts requested chunks by byte offset for sequential I/O, +/// and merges adjacent/nearby spans (within 64 kB) into single reads. +/// +/// Returns [`CodecError::ChunkNotInManifest`] if any chunk ID is not in the manifest. +/// Aborts on first error (no partial results). +pub fn read_chunks( + file: &mut File, + manifest: &RrdManifest, + chunk_ids: &[ChunkId], +) -> Result>, CodecError> { + if chunk_ids.is_empty() { + return Ok(Vec::new()); + } + + let all_ids = manifest.col_chunk_ids(); + let offsets = manifest.col_chunk_byte_offset(); + let sizes = manifest.col_chunk_byte_size(); + + // Build a temporary lookup for the manifest's chunk IDs. + let id_to_row: std::collections::HashMap = + all_ids.iter().enumerate().map(|(i, &id)| (id, i)).collect(); + + // Resolve chunk IDs to (chunk_id, byte_span). + let mut entries: Vec<(ChunkId, Span)> = chunk_ids + .iter() + .map(|&id| -> Result<_, CodecError> { + let &row = id_to_row + .get(&id) + .ok_or(CodecError::ChunkNotInManifest { chunk_id: id })?; + Ok(( + id, + Span { + start: offsets[row], + len: sizes[row], + }, + )) + }) + .collect::>()?; + + if entries.is_empty() { + return Ok(Vec::new()); + } + + // Sort by offset for sequential I/O. + entries.sort_by_key(|&(_, span)| span.start); + + // Merge nearby spans into coalesced reads. + let groups = coalesce_spans(&entries); + + let mut result = Vec::with_capacity(entries.len()); + + for group in &groups { + // Read the entire merged span in one I/O call. + file.seek(SeekFrom::Start(group.byte_span.start))?; + let mut buf = vec![0u8; usize::try_from(group.byte_span.len)?]; + file.read_exact(&mut buf)?; + + // Slice out individual chunks and decode them. + for &(_chunk_id, chunk_span) in &entries[group.entry_range.clone()] { + let local_span = Span { + start: usize::try_from(chunk_span.start - group.byte_span.start)?, + len: usize::try_from(chunk_span.len)?, + }; + let chunk = decode_chunk_from_bytes(&buf[local_span.range()])?; + result.push(Arc::new(chunk)); + } + } + + Ok(result) +} + +/// A contiguous byte range covering one or more chunk spans. +struct CoalescedSpan { + byte_span: Span, + + /// Which entries (index range into the sorted entries slice) this span covers. + entry_range: std::ops::Range, +} + +/// Merge chunk spans that are adjacent or within [`MERGE_GAP_BYTES`] of each other. +/// Input must be sorted by offset. +fn coalesce_spans(entries: &[(ChunkId, Span)]) -> Vec { + let mut groups: Vec = Vec::new(); + + for (i, &(_id, span)) in entries.iter().enumerate() { + if let Some(last) = groups.last_mut() { + let last_end = last.byte_span.end(); + if span.start <= last_end + MERGE_GAP_BYTES { + // Extend the current group. + last.byte_span.len = span.end().max(last_end) - last.byte_span.start; + last.entry_range.end = i + 1; + continue; + } + } + // Start a new group. + groups.push(CoalescedSpan { + byte_span: span, + entry_range: i..i + 1, + }); + } + + groups +} + +/// Decode a chunk from raw protobuf `ArrowMsg` bytes. +fn decode_chunk_from_bytes(buf: &[u8]) -> Result { + use crate::rrd::Decodable as _; + + let transport_arrow_msg = re_protos::log_msg::v1alpha1::ArrowMsg::from_rrd_bytes(buf)?; + let app_arrow_msg = transport_arrow_msg.to_application(())?; + let chunk = Chunk::from_arrow_msg(&app_arrow_msg)?; + Ok(chunk) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::rrd::test_util::{ + encode_test_rrd, encode_test_rrd_to_file_with_options, make_test_chunks, + }; + + #[test] + fn test_read_chunks_roundtrip() { + let chunks = make_test_chunks(5); + let (rrd, store_id) = encode_test_rrd(&chunks); + let mut file = File::open(rrd.path()).unwrap(); + + let footer = crate::read_rrd_footer(&mut file).unwrap().unwrap(); + let raw_manifest = &footer.manifests[&store_id]; + let manifest = RrdManifest::try_new(raw_manifest).unwrap(); + + let chunk_ids = manifest.col_chunk_ids(); + assert_eq!(chunk_ids.len(), chunks.len()); + + // Read all chunks. + let loaded = read_chunks(&mut file, &manifest, chunk_ids).unwrap(); + assert_eq!(loaded.len(), chunks.len()); + + for (i, loaded_chunk) in loaded.iter().enumerate() { + assert_eq!(loaded_chunk.entity_path(), chunks[i].entity_path()); + assert_eq!(loaded_chunk.num_rows(), chunks[i].num_rows()); + } + } + + #[test] + fn test_read_chunks_subset() { + let chunks = make_test_chunks(5); + let (rrd, store_id) = encode_test_rrd(&chunks); + let mut file = File::open(rrd.path()).unwrap(); + + let footer = crate::read_rrd_footer(&mut file).unwrap().unwrap(); + let raw_manifest = &footer.manifests[&store_id]; + let manifest = RrdManifest::try_new(raw_manifest).unwrap(); + + // Read only the first and last chunk. + let chunk_ids = manifest.col_chunk_ids(); + let subset = [chunk_ids[0], chunk_ids[chunk_ids.len() - 1]]; + let loaded = read_chunks(&mut file, &manifest, &subset).unwrap(); + assert_eq!(loaded.len(), 2); + } + + #[test] + fn test_read_chunks_unknown_id_errors() { + let chunks = make_test_chunks(3); + let (rrd, store_id) = encode_test_rrd(&chunks); + let mut file = File::open(rrd.path()).unwrap(); + + let footer = crate::read_rrd_footer(&mut file).unwrap().unwrap(); + let raw_manifest = &footer.manifests[&store_id]; + let manifest = RrdManifest::try_new(raw_manifest).unwrap(); + + let bogus_id = ChunkId::new(); + let result = read_chunks(&mut file, &manifest, &[bogus_id]); + assert!( + matches!(result, Err(crate::CodecError::ChunkNotInManifest { .. })), + "Expected ChunkNotInManifest error, got: {result:?}" + ); + } + + /// Shorthand for tests. + fn span(start: u64, len: u64) -> Span { + Span { start, len } + } + + #[test] + fn test_coalesce_spans_single_group() { + // All entries within MERGE_GAP_BYTES of each other → one group. + let entries = vec![ + (ChunkId::new(), span(100, 50)), + (ChunkId::new(), span(150, 50)), // adjacent + (ChunkId::new(), span(200, 50)), // adjacent + ]; + let groups = coalesce_spans(&entries); + assert_eq!(groups.len(), 1); + assert_eq!(groups[0].byte_span, span(100, 150)); // 100..250 + assert_eq!(groups[0].entry_range, 0..3); + } + + #[test] + fn test_coalesce_spans_multiple_groups() { + // Two clusters separated by more than MERGE_GAP_BYTES. + let gap = MERGE_GAP_BYTES + 1; + let entries = vec![ + (ChunkId::new(), span(0, 100)), + (ChunkId::new(), span(100, 100)), // adjacent to first → same group + (ChunkId::new(), span(200 + gap, 100)), // far from second → new group + (ChunkId::new(), span(200 + gap + 100, 50)), // adjacent to third → same group + ]; + let groups = coalesce_spans(&entries); + assert_eq!(groups.len(), 2); + + assert_eq!(groups[0].byte_span, span(0, 200)); // 0..200 + assert_eq!(groups[0].entry_range, 0..2); + + assert_eq!(groups[1].byte_span, span(200 + gap, 150)); // (200+gap)..(200+gap+150) + assert_eq!(groups[1].entry_range, 2..4); + } + + #[test] + fn test_coalesce_spans_merge_gap_boundary() { + // Exactly at MERGE_GAP_BYTES → should still merge. + let entries = vec![ + (ChunkId::new(), span(0, 100)), + (ChunkId::new(), span(100 + MERGE_GAP_BYTES, 50)), // gap == MERGE_GAP_BYTES → merged + ]; + let groups = coalesce_spans(&entries); + assert_eq!(groups.len(), 1); + assert_eq!(groups[0].byte_span, span(0, 100 + MERGE_GAP_BYTES + 50)); + assert_eq!(groups[0].entry_range, 0..2); + + // One byte beyond → separate groups. + let entries = vec![ + (ChunkId::new(), span(0, 100)), + (ChunkId::new(), span(100 + MERGE_GAP_BYTES + 1, 50)), + ]; + let groups = coalesce_spans(&entries); + assert_eq!(groups.len(), 2); + } + + #[test] + fn test_coalesce_spans_empty() { + let entries: Vec<(ChunkId, Span)> = vec![]; + let groups = coalesce_spans(&entries); + assert!(groups.is_empty()); + } + + #[test] + fn test_read_chunks_uncompressed() { + let chunks = make_test_chunks(3); + let rrd = tempfile::NamedTempFile::new().unwrap(); + + let store_id = re_log_types::StoreId::random(re_log_types::StoreKind::Recording, "test"); + encode_test_rrd_to_file_with_options( + rrd.path(), + &chunks, + &store_id, + true, + crate::EncodingOptions::PROTOBUF_UNCOMPRESSED, + ); + + let mut file = File::open(rrd.path()).unwrap(); + let footer = crate::read_rrd_footer(&mut file).unwrap().unwrap(); + let raw_manifest = &footer.manifests[&store_id]; + let manifest = RrdManifest::try_new(raw_manifest).unwrap(); + + let loaded = read_chunks(&mut file, &manifest, manifest.col_chunk_ids()).unwrap(); + assert_eq!(loaded.len(), chunks.len()); + + for (i, loaded_chunk) in loaded.iter().enumerate() { + assert_eq!(loaded_chunk.entity_path(), chunks[i].entity_path()); + assert_eq!(loaded_chunk.num_rows(), chunks[i].num_rows()); + } + } +} diff --git a/crates/store/re_log_encoding/src/rrd/decoder/iterator.rs b/crates/store/re_log_encoding/src/rrd/decoder/iterator.rs index 5b7c1ac8b05e..7d1931ed03fd 100644 --- a/crates/store/re_log_encoding/src/rrd/decoder/iterator.rs +++ b/crates/store/re_log_encoding/src/rrd/decoder/iterator.rs @@ -295,10 +295,7 @@ mod tests { ) -> Result { re_tracing::profile_function!(); let mut encoder = Encoder::new_eager(version, options, write)?; - let mut size_bytes = 0; - for message in messages { - size_bytes += encoder.append(message?.borrow())?; - } + let size_bytes = encoder.extend(messages)?; { encoder.flush_blocking()?; @@ -387,7 +384,7 @@ mod tests { for message in messages.clone() { unsafe { encoder - .append_transport(&message) + .append_transport_without_footer(&message) .expect("encoding should succeed"); } } @@ -428,7 +425,7 @@ mod tests { for message in out_of_order_messages.clone() { unsafe { encoder - .append_transport(&message) + .append_transport_without_footer(&message) .expect("encoding should succeed"); } } diff --git a/crates/store/re_log_encoding/src/rrd/decoder/state_machine.rs b/crates/store/re_log_encoding/src/rrd/decoder/state_machine.rs index d5ee15e40b47..804a827b3bd9 100644 --- a/crates/store/re_log_encoding/src/rrd/decoder/state_machine.rs +++ b/crates/store/re_log_encoding/src/rrd/decoder/state_machine.rs @@ -266,7 +266,7 @@ impl Decoder { return self.try_read(); } - err @ Err(_) => err?, + Err(err) => Err(err)?, }; self.byte_chunks diff --git a/crates/store/re_log_encoding/src/rrd/encoder.rs b/crates/store/re_log_encoding/src/rrd/encoder.rs index 13515c1fbd4b..a7854d249613 100644 --- a/crates/store/re_log_encoding/src/rrd/encoder.rs +++ b/crates/store/re_log_encoding/src/rrd/encoder.rs @@ -85,7 +85,7 @@ pub struct Encoder { /// /// If set to `None`, the footer will not be computed. /// - /// Calling [`Self::append_transport`] will automatically disable footers. + /// Calling [`Self::append_transport_without_footer`] will automatically disable footers. footer_state: Option, /// Tracks whether the end-of-stream marker, and optionally the associated footer, have been @@ -213,9 +213,7 @@ impl Encoder> { ) -> Result, EncodeError> { re_tracing::profile_function!(); let mut encoder = Self::local()?; - for message in messages { - encoder.append(message?.borrow())?; - } + encoder.extend(messages)?; encoder.finish()?; encoder.into_inner() } @@ -258,40 +256,11 @@ impl Encoder { /// Returns the size in bytes of the encoded data. pub fn append(&mut self, message: &re_log_types::LogMsg) -> Result { - if self.is_finished { - return Err(EncodeError::AlreadyFinished); - } - - let Some(w) = self.write.as_mut() else { - return Err(EncodeError::AlreadyUnwrapped); - }; - re_tracing::profile_function!(); let transport = message.to_transport(self.compression)?; - let byte_offset_excluding_header = - self.num_written + crate::MessageHeader::ENCODED_SIZE_BYTES as u64; - - self.scratch.clear(); - let n = match self.serializer { - Serializer::Protobuf => { - transport.to_rrd_bytes(&mut self.scratch)?; - let n = w - .write_all(&self.scratch) - .map(|_| self.scratch.len() as u64) - .map_err(EncodeError::Write)?; - self.num_written += n; - n - } - }; - - let byte_size_excluding_header = n - crate::MessageHeader::ENCODED_SIZE_BYTES as u64; - - let byte_span_excluding_header = re_span::Span { - start: byte_offset_excluding_header, - len: byte_size_excluding_header, - }; + let (n, byte_span_excluding_header) = self.write_encodable(&transport)?; if let Some(footer_state) = self.footer_state.as_mut() { footer_state.append( @@ -304,6 +273,21 @@ impl Encoder { Ok(n) } + /// Returns the size in bytes of the encoded data. + pub fn extend( + &mut self, + messages: impl IntoIterator>>, + ) -> Result { + re_tracing::profile_function!(); + + let mut size_bytes = 0; + // TODO(emilk): call `.to_transport` in parallel. + for message in messages { + size_bytes += self.append(message?.borrow())?; + } + Ok(size_bytes) + } + /// Instructs the encoder to _not_ emit a footer at the end of the stream. /// /// This cannot be reverted. @@ -320,21 +304,31 @@ impl Encoder { /// `message` must respect the global settings of the encoder (e.g. the compression used), /// otherwise the resulting RRD stream will be corrupt and unreadable. #[expect(unsafe_code)] - pub unsafe fn append_transport( + pub unsafe fn append_transport_without_footer( &mut self, message: &re_protos::log_msg::v1alpha1::log_msg::Msg, ) -> Result<(re_span::Span, u64), EncodeError> { - if self.is_finished { - return Err(EncodeError::AlreadyFinished); - } - - re_tracing::profile_function!(); - // We cannot update the RRD manifest without decoding the message, which would defeat the // entire purposes of using this method in the first place. // Therefore, we disable footers if and when this method is used. self.do_not_emit_footer(); + let (_, byte_span_excluding_header) = self.write_encodable(message)?; + + Ok((byte_span_excluding_header, message.byte_size_uncompressed())) + } + + /// Encode and write a message, returning `(total_bytes_written, byte_span_excluding_header)`. + fn write_encodable( + &mut self, + encodable: &dyn crate::Encodable, + ) -> Result<(u64, re_span::Span), EncodeError> { + re_tracing::profile_function!(); + + if self.is_finished { + return Err(EncodeError::AlreadyFinished); + } + let Some(w) = self.write.as_mut() else { return Err(EncodeError::AlreadyUnwrapped); }; @@ -345,10 +339,10 @@ impl Encoder { self.scratch.clear(); let n = match self.serializer { Serializer::Protobuf => { - message.to_rrd_bytes(&mut self.scratch)?; + encodable.to_rrd_bytes(&mut self.scratch)?; let n = w .write_all(&self.scratch) - .map(|_| self.scratch.len() as u64) + .map(|()| self.scratch.len() as u64) .map_err(EncodeError::Write)?; self.num_written += n; n @@ -362,7 +356,7 @@ impl Encoder { len: byte_size_excluding_header, }; - Ok((byte_span_excluding_header, message.byte_size_uncompressed())) + Ok((n, byte_span_excluding_header)) } /// Like [`Self::finish`], but appends the specified, custom RRD footer. @@ -503,11 +497,7 @@ impl Encoder { ) -> Result { re_tracing::profile_function!(); let mut encoder = Encoder::new_eager(version, options, write)?; - let mut size_bytes = 0; - for message in messages { - size_bytes += encoder.append(message?.borrow())?; - } - Ok(size_bytes) + encoder.extend(messages) } } diff --git a/crates/store/re_log_encoding/src/rrd/errors.rs b/crates/store/re_log_encoding/src/rrd/errors.rs index ad745e295c01..208526cea3c2 100644 --- a/crates/store/re_log_encoding/src/rrd/errors.rs +++ b/crates/store/re_log_encoding/src/rrd/errors.rs @@ -81,6 +81,12 @@ pub enum CodecError { #[error("Integer overflow: {0}")] Overflow(#[from] std::num::TryFromIntError), + + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + #[error("Chunk {chunk_id} not found in manifest")] + ChunkNotInManifest { chunk_id: re_chunk::ChunkId }, } const _: () = assert!( diff --git a/crates/store/re_log_encoding/src/rrd/footer/builders.rs b/crates/store/re_log_encoding/src/rrd/footer/builders.rs index 3fecaa70cd3b..b1bfadb2057a 100644 --- a/crates/store/re_log_encoding/src/rrd/footer/builders.rs +++ b/crates/store/re_log_encoding/src/rrd/footer/builders.rs @@ -345,8 +345,8 @@ impl RrdManifestBuilder { )) as ArrayRef; let columns_static = columns_static - .into_iter() - .flat_map(|(_desc, col)| [create_index_has_data_array(col.has_static_data)]); + .into_values() + .flat_map(|col| [create_index_has_data_array(col.has_static_data)]); let columns_temporal = columns_temporal.values().flat_map(|col| { [ @@ -355,7 +355,7 @@ impl RrdManifestBuilder { ] }); - let columns = columns.into_iter().flat_map(|(_key, col)| { + let columns = columns.into_values().flat_map(|col| { [ create_index_bound_array(col.timeline.typ(), &col.index.starts_inclusive), create_index_bound_array(col.timeline.typ(), &col.index.ends_inclusive), diff --git a/crates/store/re_log_encoding/src/rrd/footer/raw_rrd_manifest.rs b/crates/store/re_log_encoding/src/rrd/footer/raw_rrd_manifest.rs index e65c41c99b79..862a4fb9566c 100644 --- a/crates/store/re_log_encoding/src/rrd/footer/raw_rrd_manifest.rs +++ b/crates/store/re_log_encoding/src/rrd/footer/raw_rrd_manifest.rs @@ -1,8 +1,12 @@ use std::collections::{BTreeMap, HashMap}; -use arrow::array::{BinaryArray, BooleanArray, FixedSizeBinaryArray, StringArray, UInt64Array}; +use arrow::array::RecordBatch; use arrow::buffer::NullBuffer; use arrow::datatypes::Field; +use arrow::{ + array::{BinaryArray, BooleanArray, FixedSizeBinaryArray, StringArray, UInt64Array}, + error::ArrowError, +}; use itertools::Itertools as _; use re_chunk::external::nohash_hasher::IntMap; use re_chunk::external::re_byte_size; @@ -11,7 +15,7 @@ use re_log_types::external::re_tuid::Tuid; use re_log_types::{AbsoluteTimeRange, EntityPath, StoreId, TimeType}; use re_types_core::ComponentDescriptor; -use crate::{CodecResult, Decodable as _, StreamFooterEntry, ToApplication as _}; +use crate::{CodecError, CodecResult, Decodable as _, StreamFooterEntry, ToApplication as _}; /// The payload found in [`super::RrdFooter`]s. /// @@ -233,6 +237,123 @@ impl std::fmt::Display for RrdManifestSha256 { } impl RawRrdManifest { + /// Concatenate multiple manifests by appending all rows together. + /// + /// All manifests must be for the same recording (same `store_id` and sorbet schema). + /// The data `RecordBatch`es are concatenated. + /// + /// This is used when the server sends a manifest in multiple parts. + pub fn concat(manifests: &[&Self]) -> Result { + re_tracing::profile_function!(); + + let first = manifests.first().ok_or_else(|| { + ArrowError::InvalidArgumentError("No manifests to concatenate".to_owned()) + })?; + + for other in &manifests[1..] { + if first.store_id != other.store_id { + return Err(ArrowError::SchemaError( + "Mismatching store_id in RawRrdManifest::concat".to_owned(), + )); + } + + if first.sorbet_schema_sha256 != other.sorbet_schema_sha256 { + return Err(ArrowError::SchemaError( + "Mismatching sorbet recording schemas in RawRrdManifest::concat".to_owned(), + )); + } + + if first.data.schema() != other.data.schema() { + re_log::debug!( + "Different schemas in the RrdManifest ({} columns in existing, {} in the new part)", + first.data.num_columns(), + other.data.num_columns(), + ); + } + } + + let batches: Vec<&RecordBatch> = manifests.iter().map(|m| &m.data).collect(); + let data = arrow::compute::concat_batches(&first.data.schema(), batches)?; + + Ok(Self { + store_id: first.store_id.clone(), + sorbet_schema: first.sorbet_schema.clone(), + sorbet_schema_sha256: first.sorbet_schema_sha256, + data, + }) + } + + /// Merges multiple manifests into one, tolerating schema differences. + /// + /// Unlike [`Self::concat`] — which requires identical schemas and `store_id`s — this allows + /// each input to carry its own schema. Fields that exist in some manifests but not others are + /// padded with nulls during merging. Sorbet schemas are unified via + /// [`arrow::datatypes::Schema::try_merge`] and the hash is recomputed. + /// + /// The resulting manifest's `store_id` is set to the provided value — the individual inputs' + /// `store_id`s are ignored, since this method is designed for cases where the inputs come from + /// distinct recordings (e.g. per-layer manifests being merged into a segment-scoped one). + /// + /// The `:has_static_data` and `:num_rows` columns are non-nullable in the `RawRrdManifest` + /// contract: cross-layer null values are collapsed to the underlying buffer default + /// (`false` / `0`) so that clients get the same shape regardless of whether a column came + /// from one layer or many. This mirrors what `ExtendedRrdManifest::try_into_standard_manifest` + /// does on the commercial side. + pub fn merge(store_id: StoreId, manifests: Vec) -> CodecResult { + re_tracing::profile_function!(); + + if manifests.is_empty() { + return Err(CodecError::ArrowDeserialization( + ArrowError::InvalidArgumentError("cannot merge 0 manifests".to_owned()), + )); + } + + let parts: Vec<_> = manifests + .into_iter() + .map(|m| (m.sorbet_schema, m.data)) + .collect(); + + let (sorbet_schema, sorbet_schema_sha256, data) = Self::merge_polymorphic_parts(parts)?; + + let data = strip_null_mask_on_default_columns(data)?; + + Ok(Self { + store_id, + sorbet_schema, + sorbet_schema_sha256, + data, + }) + } + + /// Polymorphic-merge kernel: unify sorbet schemas and concatenate data batches. + /// + /// Takes one `(sorbet_schema, data_batch)` pair per logical input manifest, and returns + /// `(merged_sorbet_schema, merged_sha256, merged_data_batch)`. + pub fn merge_polymorphic_parts( + parts: Vec<(arrow::datatypes::Schema, RecordBatch)>, + ) -> CodecResult<(arrow::datatypes::Schema, [u8; 32], RecordBatch)> { + use re_arrow_util::{RecordBatchExt as _, concat_polymorphic_batches}; + + let (sorbet_schemas, data_batches): (Vec<_>, Vec<_>) = parts.into_iter().unzip(); + + let sorbet_schema = arrow::datatypes::Schema::try_merge(sorbet_schemas) + .map_err(CodecError::ArrowDeserialization)?; + let sorbet_schema_sha256 = Self::compute_sorbet_schema_sha256(&sorbet_schema) + .map_err(CodecError::ArrowSerialization)?; + + // Make all fields nullable so `concat_polymorphic_batches` can backfill missing columns + // with nulls. + let nullable_batches: Vec = data_batches + .into_iter() + .map(|b| b.make_nullable()) + .collect(); + + let data = concat_polymorphic_batches(&nullable_batches) + .map_err(CodecError::ArrowDeserialization)?; + + Ok((sorbet_schema, sorbet_schema_sha256, data)) + } + /// High-level helper to parse [`RawRrdManifest`]s from raw RRD bytes. /// /// This does not decode all the data, but rather goes straight to the RRD footer (if any). @@ -253,9 +374,9 @@ impl RawRrdManifest { Ok(footer) => footer, // That was in fact _not_ a footer. - Err(crate::CodecError::FrameDecoding(_)) => return Ok(vec![]), + Err(CodecError::FrameDecoding(_)) => return Ok(vec![]), - err @ Err(_) => err?, + Err(err) => Err(err)?, }; let mut manifests = Vec::new(); @@ -271,7 +392,7 @@ impl RawRrdManifest { let rrd_footer_byte_span = rrd_footer_byte_span .try_cast::() .ok_or_else(|| { - crate::CodecError::FrameDecoding( + CodecError::FrameDecoding( "RRD footer too large for native bit width".to_owned(), ) })? @@ -281,7 +402,7 @@ impl RawRrdManifest { let crc = crate::StreamFooter::compute_crc(rrd_footer_bytes); if crc != crc_excluding_header { - return Err(crate::CodecError::CrcMismatch { + return Err(CodecError::CrcMismatch { expected: crc_excluding_header, got: crc, }); @@ -345,7 +466,7 @@ impl RawRrdManifest { /// Computes the sha256 hash of the manifest's data, which can be used as a unique ID. // // TODO(cmc): very expensive, should be cached somewhere. - pub fn compute_sha256(&self) -> Result { + pub fn compute_sha256(&self) -> Result { re_tracing::profile_function!(); let data_ipc = { @@ -385,13 +506,11 @@ impl RawRrdManifest { .map(|(f, c)| { c.downcast_array_ref::() .ok_or_else(|| { - crate::CodecError::ArrowDeserialization( - arrow::error::ArrowError::SchemaError(format!( - "'{}' should be a BooleanArray, but it's a {} instead", - f.name(), - c.data_type(), - )), - ) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "'{}' should be a BooleanArray, but it's a {} instead", + f.name(), + c.data_type(), + ))) }) .map(|c| (f, c)) }) @@ -411,7 +530,7 @@ impl RawRrdManifest { } let Some(component) = f.metadata().get("rerun:component") else { - return Err(crate::CodecError::from(ChunkError::Malformed { + return Err(CodecError::from(ChunkError::Malformed { reason: format!( "column '{}' is missing rerun:component metadata", f.name() @@ -479,58 +598,48 @@ impl RawRrdManifest { continue; } - pub fn get_index_name(field: &arrow::datatypes::Field) -> Option<&str> { - field.metadata().get("rerun:index").map(|s| s.as_str()) - } - - pub fn is_specific_index(field: &arrow::datatypes::Field, index_name: &str) -> bool { - get_index_name(field) == Some(index_name) - } - let Some((_, col_start)) = itertools::izip!(fields, columns).find(|(f, _col)| { - is_specific_index(f, index) + Self::is_specific_index(f, index) && f.name().ends_with(":start") && f.metadata().get("rerun:component") == Some(component) }) else { - return Err(crate::CodecError::from(ChunkError::Malformed { + return Err(CodecError::from(ChunkError::Malformed { reason: format!("start index is missing for {component}"), })); }; let Some((_, col_end)) = itertools::izip!(fields, columns).find(|(f, _col)| { - is_specific_index(f, index) + Self::is_specific_index(f, index) && f.name().ends_with(":end") && f.metadata().get("rerun:component") == Some(component) }) else { - return Err(crate::CodecError::from(ChunkError::Malformed { + return Err(CodecError::from(ChunkError::Malformed { reason: format!("end index is missing for {component}"), })); }; let Some((field_num_rows, col_num_rows)) = itertools::izip!(fields, columns).find(|(f, _col)| { - is_specific_index(f, index) + Self::is_specific_index(f, index) && f.name().ends_with(":num_rows") && f.metadata().get("rerun:component") == Some(component) }) else { - return Err(crate::CodecError::from(ChunkError::Malformed { + return Err(CodecError::from(ChunkError::Malformed { reason: format!("num_rows index is missing for {component}"), })); }; - let (time_type, col_start_raw) = TimeType::from_arrow_array(col_start) - .map_err(crate::CodecError::ArrowDeserialization)?; - let (_, col_end_raw) = TimeType::from_arrow_array(col_end) - .map_err(crate::CodecError::ArrowDeserialization)?; + let (time_type, col_start_raw) = + TimeType::from_arrow_array(col_start).map_err(CodecError::ArrowDeserialization)?; + let (_, col_end_raw) = + TimeType::from_arrow_array(col_end).map_err(CodecError::ArrowDeserialization)?; let col_num_rows_raw: &[u64] = col_num_rows .downcast_array_ref::() .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!( - "'{}' should be a BooleanArray, but it's a {} instead", - field_num_rows.name(), - col_num_rows.data_type(), - ), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "'{}' should be a BooleanArray, but it's a {} instead", + field_num_rows.name(), + col_num_rows.data_type(), + ))) })? .values(); @@ -635,11 +744,78 @@ impl PartialEq for RawRrdManifest { } } +// Index-column helpers. +// +// Rerun index columns are tagged with `rerun:*` metadata keys that describe what kind of index +// they represent (static vs temporal, sequence vs timestamp, start/end/len marker, etc). These +// helpers centralize the key-name conventions so downstream code doesn't reach into the metadata +// map directly. +impl RawRrdManifest { + /// `true` if the field is a Rerun index column (temporal or static). + pub fn is_index(field: &Field) -> bool { + field.metadata().contains_key("rerun:index") + } + + /// The index name (e.g. `"frame_nr"`, `"log_time"`, `"static"`) for a field, if any. + pub fn get_index_name(field: &Field) -> Option<&str> { + field.metadata().get("rerun:index").map(|s| s.as_str()) + } + + /// The index kind (`"sequence"`, `"timestamp"`, `"duration"`) for a field, if any. + pub fn get_index_kind(field: &Field) -> Option<&str> { + field.metadata().get("rerun:index_kind").map(|s| s.as_str()) + } + + /// `true` if the field is a Rerun index column with the given name. + pub fn is_specific_index(field: &Field, index_name: &str) -> bool { + Self::get_index_name(field) == Some(index_name) + } + + /// `true` if the field belongs to the static-data pseudo-index. + pub fn is_index_static(field: &Field) -> bool { + Self::is_specific_index(field, "static") + } + + /// `true` if the field is the `:start` marker of an index. + pub fn is_index_start(field: &Field) -> bool { + field + .metadata() + .get("rerun:index_marker") + .map(|s| s.as_str()) + == Some("start") + } + + /// `true` if the field is the `:end` marker of an index. + pub fn is_index_end(field: &Field) -> bool { + field + .metadata() + .get("rerun:index_marker") + .map(|s| s.as_str()) + == Some("end") + } + + /// `true` if the field is the `:len` marker of an index. + pub fn is_index_length(field: &Field) -> bool { + field + .metadata() + .get("rerun:index_marker") + .map(|s| s.as_str()) + == Some("len") + } + + /// `true` if the field is a temporal index column (not static, not per-component). + pub fn is_index_global_temporal(field: &Field) -> bool { + Self::is_index(field) + && !Self::is_index_static(field) + && !field.metadata().contains_key("rerun:component") + } +} + // Helpers impl RawRrdManifest { pub fn compute_sorbet_schema_sha256( schema: &arrow::datatypes::Schema, - ) -> Result<[u8; 32], arrow::error::ArrowError> { + ) -> Result<[u8; 32], ArrowError> { let schema = { // Sort and remove top-level metadata before hashing. let mut fields = schema.fields().to_vec(); @@ -708,6 +884,33 @@ impl RawRrdManifest { // Remove leading underscore if present sanitized.trim_start_matches('_').to_owned() } + + // Prune the RecordBatch to keep only columns needed for chunk fetching. + // + // The full manifest may have 1000+ sparse columns (one per timeline×component pair), + // which Arrow inflates because it allocates full-length buffers even for + // mostly-null columns. The maps above already captured all indexing data from these + // sparse columns; the RecordBatch is only used for `take_record_batch` → `FetchChunks`, + // which needs exactly: chunk_id, chunk_key, chunk_partition_id, rerun_partition_layer. + // + // By dropping the sparse columns here, we reduce memory ~10-20x + // and make `take_record_batch` 100x faster. + pub(super) fn chunk_fetcher_record_batch(&self) -> RecordBatch { + let columns_to_keep = super::RrdManifest::CHUNK_FETCHER_COLUMNS; + + let schema = self.data.schema_ref(); + let indices: Vec = schema + .fields() + .iter() + .enumerate() + .filter(|(_, field)| columns_to_keep.contains(&field.name().as_str())) + .map(|(i, _)| i) + .collect(); + + self.data + .project(&indices) + .unwrap_or_else(|_| self.data.clone()) + } } // Sanity checks @@ -775,7 +978,7 @@ impl RawRrdManifest { "has_static_data" => { if field.data_type() != Self::field_chunk_is_static().data_type() { - return Err(crate::CodecError::from(ChunkError::Malformed { + return Err(CodecError::from(ChunkError::Malformed { reason: format!( "field '{}' should be {} but is actually {}", field.name(), @@ -788,7 +991,7 @@ impl RawRrdManifest { "num_rows" => { if field.data_type() != Self::field_chunk_num_rows().data_type() { - return Err(crate::CodecError::from(ChunkError::Malformed { + return Err(CodecError::from(ChunkError::Malformed { reason: format!( "field '{}' should be {} but is actually {}", field.name(), @@ -800,7 +1003,7 @@ impl RawRrdManifest { } suffix => { - return Err(crate::CodecError::from(ChunkError::Malformed { + return Err(CodecError::from(ChunkError::Malformed { reason: format!( "field '{}' has invalid suffix '{suffix}'", field.name(), @@ -823,7 +1026,7 @@ impl RawRrdManifest { name if Self::COMMON_IMPL_SPECIFIC_FIELDS.contains(&name) => {} name => { - return Err(crate::CodecError::from(ChunkError::Malformed { + return Err(CodecError::from(ChunkError::Malformed { reason: format!( "unexpected field '{name}' should not be present in an RRD manifest", ), @@ -850,7 +1053,7 @@ impl RawRrdManifest { .schema_ref() .field_with_name(&format!("{prefix}:{counterpart}")) .map_err(|_err| { - crate::CodecError::from(ChunkError::Malformed { + CodecError::from(ChunkError::Malformed { reason: format!( "field '{}' does not have matching `:{counterpart}` field", field.name() @@ -864,7 +1067,7 @@ impl RawRrdManifest { | arrow::datatypes::DataType::Duration(_) => {} datatype => { - return Err(crate::CodecError::from(ChunkError::Malformed { + return Err(CodecError::from(ChunkError::Malformed { reason: format!( "field '{}' is {datatype} which is not a supported index datatype", field.name(), @@ -874,7 +1077,7 @@ impl RawRrdManifest { } if field.data_type() != field_counterpart.data_type() { - return Err(crate::CodecError::from(ChunkError::Malformed { + return Err(CodecError::from(ChunkError::Malformed { reason: format!( "field '{}' is {} but field '{}' is {}", field.name(), @@ -897,7 +1100,7 @@ impl RawRrdManifest { .schema_ref() .field_with_name(&format!("{prefix}:num_rows")) .map_err(|_err| { - crate::CodecError::from(ChunkError::Malformed { + CodecError::from(ChunkError::Malformed { reason: format!( "field '{}' does not have matching `:num_rows` field", field.name() @@ -908,7 +1111,7 @@ impl RawRrdManifest { match field_num_rows.data_type() { arrow::datatypes::DataType::UInt64 => {} datatype => { - return Err(crate::CodecError::from(ChunkError::Malformed { + return Err(CodecError::from(ChunkError::Malformed { reason: format!( "field '{}' is {datatype} while it should be UInt64Array", field_num_rows.name(), @@ -953,7 +1156,7 @@ impl RawRrdManifest { for column in &sorbet_columns { let md = column.metadata(); let Some(component) = md.get("rerun:component") else { - return Err(crate::CodecError::from(ChunkError::Malformed { + return Err(CodecError::from(ChunkError::Malformed { reason: format!( "column '{}' is missing rerun:component metadata", column.name() @@ -979,7 +1182,7 @@ impl RawRrdManifest { .schema_ref() .field_with_name(&column_name) .map_err(|_err| { - crate::CodecError::from(ChunkError::Malformed { + CodecError::from(ChunkError::Malformed { reason: format!("static index '{column_name}' is missing"), }) })?; @@ -1004,7 +1207,7 @@ impl RawRrdManifest { for suffix in ["start", "end"] { let field = rrd_manifest_fields.remove(&format!("{sorbet_index_name_normalized}:{suffix}")) .ok_or_else(|| { - crate::CodecError::from(ChunkError::Malformed { + CodecError::from(ChunkError::Malformed { reason: format!( "global index '{sorbet_index}' does not have matching `:{suffix}` field" ), @@ -1012,7 +1215,7 @@ impl RawRrdManifest { })?; if sorbet_index.data_type() != field.data_type() { - return Err(crate::CodecError::from(ChunkError::Malformed { + return Err(CodecError::from(ChunkError::Malformed { reason: format!( "global index '{}' is {} but '{}' is {}", sorbet_index.name(), @@ -1029,7 +1232,7 @@ impl RawRrdManifest { let md = sorbet_column.metadata(); let Some(component) = md.get("rerun:component") else { - return Err(crate::CodecError::from(ChunkError::Malformed { + return Err(CodecError::from(ChunkError::Malformed { reason: format!( "column '{}' is missing rerun:component metadata", sorbet_column.name() @@ -1069,7 +1272,7 @@ impl RawRrdManifest { }; if sorbet_index.data_type() != field.data_type() { - return Err(crate::CodecError::from(ChunkError::Malformed { + return Err(CodecError::from(ChunkError::Malformed { reason: format!( "local index '{}' is {} but '{}' is {}", sorbet_index.name(), @@ -1084,7 +1287,7 @@ impl RawRrdManifest { } if !rrd_manifest_fields.is_empty() { - return Err(crate::CodecError::from(ChunkError::Malformed { + return Err(CodecError::from(ChunkError::Malformed { reason: format!( "detected dangling indexes (present in manifest but not in Sorbet schema): {:?}", rrd_manifest_fields.keys() @@ -1098,11 +1301,11 @@ impl RawRrdManifest { /// Costly. fn check_sorbet_schema_sha256_is_correct(&self) -> CodecResult<()> { let expected_sorbet_schema_sha256 = Self::compute_sorbet_schema_sha256(&self.sorbet_schema) - .map_err(crate::CodecError::ArrowDeserialization)?; + .map_err(CodecError::ArrowDeserialization)?; if self.sorbet_schema_sha256 != expected_sorbet_schema_sha256 { - return Err(crate::CodecError::ArrowDeserialization( - arrow::error::ArrowError::SchemaError(format!( + return Err(CodecError::ArrowDeserialization(ArrowError::SchemaError( + format!( "invalid schema hash: expected {} but got {}", expected_sorbet_schema_sha256 .iter() @@ -1112,8 +1315,8 @@ impl RawRrdManifest { .iter() .map(|b| format!("{b:02x}")) .collect::(), - )), - )); + ), + ))); } Ok(()) } @@ -1306,15 +1509,15 @@ impl RawRrdManifest { self.data .column_by_name(name) .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!("cannot read column: '{name}' is missing from batch",), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "cannot read column: '{name}' is missing from batch", + ))) })? .downcast_array_ref::() .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!("cannot downcast column: '{name}' is not a StringArray",), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "cannot downcast column: '{name}' is not a StringArray", + ))) }) } @@ -1334,15 +1537,15 @@ impl RawRrdManifest { self.data .column_by_name(name) .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!("cannot read column: '{name}' is missing from batch",), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "cannot read column: '{name}' is missing from batch", + ))) })? .downcast_array_ref::() .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!("cannot downcast column: '{name}' is not a FixedSizeBinaryArray",), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "cannot downcast column: '{name}' is not a FixedSizeBinaryArray", + ))) }) } @@ -1376,15 +1579,15 @@ impl RawRrdManifest { self.data .column_by_name(name) .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!("cannot read column: '{name}' is missing from batch",), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "cannot read column: '{name}' is missing from batch", + ))) })? .downcast_array_ref::() .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!("cannot downcast column: '{name}' is not a BooleanArray",), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "cannot downcast column: '{name}' is not a BooleanArray", + ))) }) } @@ -1402,15 +1605,15 @@ impl RawRrdManifest { self.data .column_by_name(name) .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!("cannot read column: '{name}' is missing from batch",), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "cannot read column: '{name}' is missing from batch", + ))) })? .downcast_array_ref::() .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!("cannot downcast column: '{name}' is not a UInt64Array",), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "cannot downcast column: '{name}' is not a UInt64Array", + ))) }) } @@ -1428,15 +1631,15 @@ impl RawRrdManifest { self.data .column_by_name(name) .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!("cannot read column: '{name}' is missing from batch",), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "cannot read column: '{name}' is missing from batch", + ))) })? .downcast_array_ref::() .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!("cannot downcast column: '{name}' is not a UInt64Array",), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "cannot downcast column: '{name}' is not a UInt64Array", + ))) }) } @@ -1456,15 +1659,15 @@ impl RawRrdManifest { self.data .column_by_name(name) .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!("cannot read column: '{name}' is missing from batch",), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "cannot read column: '{name}' is missing from batch", + ))) })? .downcast_array_ref::() .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!("cannot downcast column: '{name}' is not a UInt64Array",), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "cannot downcast column: '{name}' is not a UInt64Array", + ))) }) } @@ -1486,15 +1689,15 @@ impl RawRrdManifest { self.data .column_by_name(name) .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!("cannot read column: '{name}' is missing from batch",), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "cannot read column: '{name}' is missing from batch", + ))) })? .downcast_array_ref::() .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!("cannot downcast column: '{name}' is not a UInt64Array",), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "cannot downcast column: '{name}' is not a UInt64Array", + ))) }) } @@ -1517,15 +1720,72 @@ impl RawRrdManifest { self.data .column_by_name(name) .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!("cannot read column: '{name}' is missing from batch",), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "cannot read column: '{name}' is missing from batch", + ))) })? .downcast_array_ref::() .ok_or_else(|| { - crate::CodecError::ArrowDeserialization(arrow::error::ArrowError::SchemaError( - format!("cannot downcast column: '{name}' is not a BinaryArray"), - )) + CodecError::ArrowDeserialization(ArrowError::SchemaError(format!( + "cannot downcast column: '{name}' is not a BinaryArray" + ))) }) } } + +/// Strip the null mask from columns that the [`RawRrdManifest`] contract says are non-nullable +/// with a default value (`:has_static_data` → `false`, `:num_rows` → `0`). +/// +/// After [`crate::RrdManifestBuilder`] emits a manifest for a single layer, these columns are +/// fully populated. But once we merge multiple per-layer manifests via polymorphic concat, a +/// component only known to one layer will have nulls in the rows from other layers. The contract +/// — shared with commercial (see `ExtendedRrdManifest::try_into_standard_manifest`) — is that +/// the output manifest has these columns non-nullable with the underlying buffer's zero bits +/// (which Arrow zero-initializes when `new_null_array` is called). +fn strip_null_mask_on_default_columns(data: RecordBatch) -> CodecResult { + use re_arrow_util::ArrowArrayDowncastRef as _; + + let (schema, mut columns, num_rows) = data.into_parts(); + let mut new_fields = schema.fields.to_vec(); + + for (field, column) in itertools::izip!(&mut new_fields, &mut columns) { + if column.null_count() == 0 { + continue; + } + + let name = field.name().as_str(); + if name.ends_with(":has_static_data") { + let Some(c) = column.downcast_array_ref::() else { + return Err(CodecError::ArrowDeserialization(ArrowError::SchemaError( + format!( + "'{name}' should be a BooleanArray, got {}", + column.data_type() + ), + ))); + }; + let (bools, _nulls) = c.clone().into_parts(); + *column = std::sync::Arc::new(BooleanArray::new(bools, None)); + *field = std::sync::Arc::new((**field).clone().with_nullable(false)); + } else if name.ends_with(":num_rows") { + let Some(c) = column.downcast_array_ref::() else { + return Err(CodecError::ArrowDeserialization(ArrowError::SchemaError( + format!( + "'{name}' should be a UInt64Array, got {}", + column.data_type() + ), + ))); + }; + let (_dt, ints, _nulls) = c.clone().into_parts(); + *column = std::sync::Arc::new(UInt64Array::new(ints, None)); + *field = std::sync::Arc::new((**field).clone().with_nullable(false)); + } + } + + let schema = arrow::datatypes::Schema::new_with_metadata(new_fields, schema.metadata.clone()); + RecordBatch::try_new_with_options( + std::sync::Arc::new(schema), + columns, + &arrow::array::RecordBatchOptions::new().with_row_count(Some(num_rows)), + ) + .map_err(CodecError::ArrowSerialization) +} diff --git a/crates/store/re_log_encoding/src/rrd/footer/rrd_manifest.rs b/crates/store/re_log_encoding/src/rrd/footer/rrd_manifest.rs index f079cfdc088e..7d03d26d56ee 100644 --- a/crates/store/re_log_encoding/src/rrd/footer/rrd_manifest.rs +++ b/crates/store/re_log_encoding/src/rrd/footer/rrd_manifest.rs @@ -1,13 +1,15 @@ use std::sync::Arc; -use arrow::array::{Array as _, BinaryArray, FixedSizeBinaryArray, StringArray}; +use arrow::array::{Array as _, BinaryArray, FixedSizeBinaryArray, RecordBatch, StringArray}; use arrow::buffer::{BooleanBuffer, ScalarBuffer}; +use arrow::datatypes::Field; use re_chunk::external::re_byte_size; use re_chunk::{ChunkId, EntityPath}; use re_log_types::StoreId; +use re_sorbet::SorbetSchema; -use super::{RawRrdManifest, RrdManifestSha256, RrdManifestStaticMap, RrdManifestTemporalMap}; -use crate::CodecResult; +use super::{RawRrdManifest, RrdManifestStaticMap, RrdManifestTemporalMap}; +use crate::{CodecError, CodecResult}; /// A pre-validated and parsed [`RawRrdManifest`]. /// @@ -20,9 +22,16 @@ use crate::CodecResult; /// and does not duplicate the actual data. /// /// Use [`RrdManifest::try_new`] to create an instance from a [`RawRrdManifest`]. -#[derive(Clone, PartialEq)] +#[derive(Clone)] pub struct RrdManifest { - raw: RawRrdManifest, + // NOTE: the `chunk_fetcher_rb` only contains the columns listed in + // [`Self::CHUNK_FETCHER_COLUMNS`]. All other manifest columns are pre-extracted + // into the typed fields below (or into the static/temporal maps). + chunk_fetcher_rb: RecordBatch, + + store_id: StoreId, + recording_schema: SorbetSchema, + sorbet_schema: arrow::datatypes::Schema, chunk_ids: FixedSizeBinaryArray, chunk_entity_paths: StringArray, @@ -37,6 +46,46 @@ pub struct RrdManifest { temporal_data_map: RrdManifestTemporalMap, } +impl PartialEq for RrdManifest { + fn eq(&self, other: &Self) -> bool { + // Destructure to get a compile error when new fields are added, + // ensuring we consciously decide whether to include them. + let Self { + chunk_fetcher_rb, + store_id, + recording_schema, + // We skip `sorbet_schema` (the raw `arrow::datatypes::Schema`) because it is + // redundant with `recording_schema` for semantic equality, and its field order + // is not preserved through protobuf round-trips. + sorbet_schema: _, + chunk_ids, + chunk_entity_paths, + chunk_is_static, + chunk_num_rows, + chunk_byte_offsets, + chunk_byte_sizes, + chunk_byte_sizes_uncompressed, + chunk_keys, + static_data_map, + temporal_data_map, + } = self; + + *chunk_fetcher_rb == other.chunk_fetcher_rb + && *store_id == other.store_id + && *recording_schema == other.recording_schema + && *chunk_ids == other.chunk_ids + && *chunk_entity_paths == other.chunk_entity_paths + && *chunk_is_static == other.chunk_is_static + && *chunk_num_rows == other.chunk_num_rows + && *chunk_byte_offsets == other.chunk_byte_offsets + && *chunk_byte_sizes == other.chunk_byte_sizes + && *chunk_byte_sizes_uncompressed == other.chunk_byte_sizes_uncompressed + && *chunk_keys == other.chunk_keys + && *static_data_map == other.static_data_map + && *temporal_data_map == other.temporal_data_map + } +} + impl std::fmt::Debug for RrdManifest { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("RrdManifest").finish_non_exhaustive() @@ -47,13 +96,54 @@ impl re_byte_size::SizeBytes for RrdManifest { fn heap_size_bytes(&self) -> u64 { re_tracing::profile_function!(); - // The Arrow arrays are clones (Arc-based), so they share memory with the manifest. - self.raw.heap_size_bytes() + // After `try_new`, some extracted arrays (chunk_ids, chunk_is_static, …) share their + // underlying `Arc` with the pruned `RecordBatch` columns, so they are already + // covered by `chunk_fetcher_rb.heap_size_bytes()`. However, after `concat` all arrays + // are independently allocated, so the pruned-batch size alone would undercount. + // We intentionally accept that minor double-count (via Arc sharing) from `try_new` + // in exchange for always being correct after `concat`. + // + // Fields that are never in the pruned batch must always be counted separately: + self.chunk_fetcher_rb.heap_size_bytes() + + re_byte_size::SizeBytes::heap_size_bytes( + &self.chunk_entity_paths as &dyn arrow::array::Array, + ) + + self.chunk_num_rows.heap_size_bytes() + + self.chunk_byte_sizes.heap_size_bytes() + + self.chunk_byte_sizes_uncompressed.heap_size_bytes() + + self.sorbet_schema.heap_size_bytes() + self.static_data_map.heap_size_bytes() + self.temporal_data_map.heap_size_bytes() } } +// Columns retained in the pruned `chunk_fetcher_rb`. +// +// The full manifest can have 1000+ sparse columns (one per timeline x component pair). +// After extracting all indexing data into typed fields and maps, we prune the +// `RecordBatch` down to only the columns needed for chunk fetching. This list is the +// single source of truth for which columns survive that pruning — it is used by +// [`RawRrdManifest::chunk_fetcher_record_batch`] to do the pruning, and should be +// referenced by any code that accesses the pruned batch (e.g. sorting, sending over gRPC). +impl RrdManifest { + pub const FIELD_CHUNK_ID: &str = RawRrdManifest::FIELD_CHUNK_ID; + pub const FIELD_CHUNK_KEY: &str = RawRrdManifest::FIELD_CHUNK_KEY; + pub const FIELD_CHUNK_IS_STATIC: &str = RawRrdManifest::FIELD_CHUNK_IS_STATIC; + pub const FIELD_CHUNK_BYTE_OFFSET: &str = RawRrdManifest::FIELD_CHUNK_BYTE_OFFSET; + pub const FIELD_CHUNK_PARTITION_ID: &str = "chunk_partition_id"; + pub const FIELD_RERUN_PARTITION_LAYER: &str = "rerun_partition_layer"; + + /// All columns present in the pruned batch returned by [`Self::chunk_fetcher_rb()`]. + pub const CHUNK_FETCHER_COLUMNS: &[&str] = &[ + Self::FIELD_CHUNK_ID, + Self::FIELD_CHUNK_KEY, + Self::FIELD_CHUNK_IS_STATIC, + Self::FIELD_CHUNK_BYTE_OFFSET, + Self::FIELD_CHUNK_PARTITION_ID, + Self::FIELD_RERUN_PARTITION_LAYER, + ]; +} + impl RrdManifest { /// Creates a new [`RrdManifest`]. /// @@ -61,7 +151,7 @@ impl RrdManifest { /// or any required column is missing/malformed, an error is returned. /// /// All arrays must be non-null (no missing values). - pub fn try_new(manifest: RawRrdManifest) -> CodecResult { + pub fn try_new(manifest: &RawRrdManifest) -> CodecResult { re_tracing::profile_function!(); if cfg!(debug_assertions) { @@ -151,8 +241,202 @@ impl RrdManifest { let static_data_map = manifest.calc_static_map()?; let temporal_data_map = manifest.calc_temporal_map()?; + let mut recording_schema = + SorbetSchema::try_from_raw_arrow_schema(Arc::new(manifest.sorbet_schema.clone()))?; + // Sort columns so that PartialEq is stable across protobuf round-trips, + // which do not preserve column ordering. + recording_schema.columns.columns.sort(); + + let pruned_batch = manifest.chunk_fetcher_record_batch(); + Ok(Self { - raw: manifest, + chunk_fetcher_rb: pruned_batch, + store_id: manifest.store_id.clone(), + recording_schema, + sorbet_schema: manifest.sorbet_schema.clone(), + chunk_ids, + chunk_entity_paths, + chunk_is_static, + chunk_num_rows, + chunk_byte_offsets, + chunk_byte_sizes, + chunk_byte_sizes_uncompressed, + chunk_keys, + static_data_map, + temporal_data_map, + }) + } + + /// The schema for the entire recording. + pub fn recording_schema(&self) -> &SorbetSchema { + &self.recording_schema + } + + pub fn concat(manifests: &[&Self]) -> CodecResult { + re_tracing::profile_function!(); + + let first = manifests + .first() + .ok_or_else(|| CodecError::FrameDecoding("No manifests to concatenate".to_owned()))?; + + let any_has_chunk_keys = manifests.iter().any(|m| m.chunk_keys.is_some()); + + // Concatenate the (already pruned) raw manifests — used for `take_record_batch`. + // + // When some manifests have `chunk_key` and others don't, we must normalize + // the schemas before calling `concat_batches` (which requires matching schemas). + let normalized_batches: Vec; + let batches_to_concat: Vec<&RecordBatch> = + if any_has_chunk_keys && manifests.iter().any(|m| m.chunk_keys.is_none()) { + // Some have chunk_key, some don't — normalize by adding a null column. + normalized_batches = manifests + .iter() + .map(|m| { + if m.chunk_keys.is_some() { + m.chunk_fetcher_rb.clone() + } else { + Self::add_null_chunk_key_column(&m.chunk_fetcher_rb) + } + }) + .collect(); + normalized_batches.iter().collect() + } else { + manifests.iter().map(|m| &m.chunk_fetcher_rb).collect() + }; + + let combined_schema = batches_to_concat + .first() + .map(|b| b.schema()) + .unwrap_or_else(|| first.chunk_fetcher_rb.schema()); + let combined_batches = arrow::compute::concat_batches(&combined_schema, batches_to_concat) + .map_err(|err| { + CodecError::FrameDecoding(format!( + "Failed to concatenate RRD manifest parts: {err}" + )) + })?; + + // Concatenate pre-extracted Arrow arrays directly, avoiding a round-trip + // through `try_new` which would fail on pruned data (missing sparse columns). + let chunk_ids = { + let arrays: Vec<&dyn arrow::array::Array> = + manifests.iter().map(|m| &m.chunk_ids as _).collect(); + re_arrow_util::concat_arrays(&arrays) + .map_err(|err| CodecError::FrameDecoding(format!("concat chunk_ids: {err}")))? + .as_any() + .downcast_ref::() + .expect("concat of FixedSizeBinaryArray should yield FixedSizeBinaryArray") + .clone() + }; + let chunk_entity_paths = { + let arrays: Vec<&dyn arrow::array::Array> = manifests + .iter() + .map(|m| &m.chunk_entity_paths as _) + .collect(); + re_arrow_util::concat_arrays(&arrays) + .map_err(|err| { + CodecError::FrameDecoding(format!("concat chunk_entity_paths: {err}")) + })? + .as_any() + .downcast_ref::() + .expect("concat of StringArray should yield StringArray") + .clone() + }; + let chunk_is_static = manifests + .iter() + .flat_map(|m| m.chunk_is_static.iter()) + .collect::(); + let chunk_num_rows = ScalarBuffer::from( + manifests + .iter() + .flat_map(|m| m.chunk_num_rows.iter().copied()) + .collect::>(), + ); + let chunk_byte_offsets = ScalarBuffer::from( + manifests + .iter() + .flat_map(|m| m.chunk_byte_offsets.iter().copied()) + .collect::>(), + ); + let chunk_byte_sizes = ScalarBuffer::from( + manifests + .iter() + .flat_map(|m| m.chunk_byte_sizes.iter().copied()) + .collect::>(), + ); + let chunk_byte_sizes_uncompressed = ScalarBuffer::from( + manifests + .iter() + .flat_map(|m| m.chunk_byte_sizes_uncompressed.iter().copied()) + .collect::>(), + ); + // When some manifests have chunk_keys and others don't, create all-null + // BinaryArrays for the keyless manifests to maintain row alignment. + let chunk_keys = if any_has_chunk_keys { + let null_arrays: Vec = manifests + .iter() + .filter(|m| m.chunk_keys.is_none()) + .map(|m| BinaryArray::new_null(m.num_chunks())) + .collect(); + let mut null_idx = 0; + let arrays: Vec<&dyn arrow::array::Array> = manifests + .iter() + .map(|m| { + if let Some(keys) = &m.chunk_keys { + keys as &dyn arrow::array::Array + } else { + let arr = &null_arrays[null_idx] as &dyn arrow::array::Array; + null_idx += 1; + arr + } + }) + .collect(); + Some( + re_arrow_util::concat_arrays(&arrays) + .map_err(|err| CodecError::FrameDecoding(format!("concat chunk_keys: {err}")))? + .as_any() + .downcast_ref::() + .expect("concat of BinaryArray should yield BinaryArray") + .clone(), + ) + } else { + None + }; + + // Merge pre-computed maps. + let mut static_data_map = first.static_data_map.clone(); + for m in &manifests[1..] { + for (entity, components) in &m.static_data_map { + let entry = static_data_map.entry(entity.clone()).or_default(); + for (component, chunk_id) in components { + entry + .entry(*component) + .and_modify(|id| *id = *chunk_id) + .or_insert(*chunk_id); + } + } + } + + let mut temporal_data_map = first.temporal_data_map.clone(); + for m in &manifests[1..] { + for (entity, timelines) in &m.temporal_data_map { + let entity_entry = temporal_data_map.entry(entity.clone()).or_default(); + for (timeline, components) in timelines { + let timeline_entry = entity_entry.entry(*timeline).or_default(); + for (component, chunks) in components { + let component_entry = timeline_entry.entry(*component).or_default(); + for (chunk_id, map_entry) in chunks { + component_entry.insert(*chunk_id, *map_entry); + } + } + } + } + } + + Ok(Self { + chunk_fetcher_rb: combined_batches, + store_id: first.store_id.clone(), + recording_schema: first.recording_schema.clone(), + sorbet_schema: first.sorbet_schema.clone(), chunk_ids, chunk_entity_paths, chunk_is_static, @@ -177,13 +461,13 @@ impl RrdManifest { chunks: impl Iterator, ) -> CodecResult> { let raw = RawRrdManifest::build_in_memory_from_chunks(store_id, chunks)?; - Ok(Arc::new(Self::try_new(raw)?)) + Ok(Arc::new(Self::try_new(&raw)?)) } - /// Returns a reference to the underlying [`RawRrdManifest`]. + /// Returns the store ID this manifest belongs to. #[inline] - pub fn raw(&self) -> &RawRrdManifest { - &self.raw + pub fn store_id(&self) -> &StoreId { + &self.store_id } /// Returns the number of chunks (rows) in this manifest. @@ -192,28 +476,18 @@ impl RrdManifest { self.chunk_ids.len() } - /// Returns the recording ID that was used to identify the original recording. - #[inline] - pub fn store_id(&self) -> &StoreId { - &self.raw.store_id - } - /// Returns the Sorbet schema of the recording. #[inline] pub fn sorbet_schema(&self) -> &arrow::datatypes::Schema { - &self.raw.sorbet_schema - } - - /// Returns the SHA256 hash of the Sorbet schema. - #[inline] - pub fn sorbet_schema_sha256(&self) -> &[u8; 32] { - &self.raw.sorbet_schema_sha256 + &self.sorbet_schema } - /// Returns the actual manifest data as a `RecordBatch`. + /// Returns the `RecordBatch` with only the columns needed to do a `FetchChunk` request. + /// + /// See [`Self::CHUNK_FETCHER_COLUMNS`]. #[inline] - pub fn data(&self) -> &arrow::array::RecordBatch { - &self.raw.data + pub fn chunk_fetcher_rb(&self) -> &arrow::array::RecordBatch { + &self.chunk_fetcher_rb } /// Returns all the chunk ids @@ -287,13 +561,6 @@ impl RrdManifest { self.chunk_keys.as_ref() } - /// Computes the sha256 hash of the manifest's data, which can be used as a unique ID. - /// - /// Note: This is expensive to compute and delegates to [`RawRrdManifest::compute_sha256`]. - pub fn compute_sha256(&self) -> Result { - self.raw.compute_sha256() - } - /// Returns the map-based representation of the static data in this RRD manifest. #[inline] pub fn static_map(&self) -> &RrdManifestStaticMap { @@ -305,4 +572,34 @@ impl RrdManifest { pub fn temporal_map(&self) -> &RrdManifestTemporalMap { &self.temporal_data_map } + + /// Add an all-null `chunk_key` column to a `RecordBatch` that doesn't have one. + /// + /// Used by [`Self::concat`] to normalize schemas when some manifests have chunk keys + /// and others don't. + fn add_null_chunk_key_column(batch: &RecordBatch) -> RecordBatch { + let num_rows = batch.num_rows(); + let null_keys = BinaryArray::new_null(num_rows); + + let schema = batch.schema(); + let mut fields: Vec<_> = schema.fields().iter().cloned().collect(); + let mut columns: Vec<_> = batch.columns().to_vec(); + + fields.push(Arc::new(Field::new( + Self::FIELD_CHUNK_KEY, + arrow::datatypes::DataType::Binary, + true, + ))); + columns.push(Arc::new(null_keys)); + + RecordBatch::try_new_with_options( + Arc::new(arrow::datatypes::Schema::new_with_metadata( + fields, + schema.metadata().clone(), + )), + columns, + &arrow::array::RecordBatchOptions::new().with_row_count(Some(num_rows)), + ) + .expect("adding a null column to a valid batch should not fail") + } } diff --git a/crates/store/re_log_encoding/src/rrd/footer_reader.rs b/crates/store/re_log_encoding/src/rrd/footer_reader.rs new file mode 100644 index 000000000000..49b517aa10b3 --- /dev/null +++ b/crates/store/re_log_encoding/src/rrd/footer_reader.rs @@ -0,0 +1,157 @@ +use std::fs::File; +use std::io::{Read as _, Seek as _, SeekFrom}; + +use crate::rrd::{CodecError, Decodable as _, StreamFooter, StreamHeader}; +use crate::{RrdFooter, ToApplication as _}; + +/// Read the full RRD footer from an open file using seek-based I/O. +/// +/// The file position is moved during reading (seeks to header, footer, payload). +/// +/// Returns `Ok(None)` if the file is a valid RRD but has no footer (legacy RRD). +/// Returns `Err` if the file is not a valid RRD or is corrupted. +/// +/// The returned [`RrdFooter`] contains manifests for ALL stores in the file. +/// Caller is responsible for selecting the desired store. +pub fn read_rrd_footer(file: &mut File) -> Result, CodecError> { + let file_len = file.metadata()?.len(); + + // 1. Validate the StreamHeader to confirm this is actually an RRD file. + if file_len < StreamHeader::ENCODED_SIZE_BYTES as u64 { + return Err(CodecError::FrameDecoding( + "file too small to be an RRD".to_owned(), + )); + } + file.seek(SeekFrom::Start(0))?; + let mut header_buf = [0u8; StreamHeader::ENCODED_SIZE_BYTES]; + file.read_exact(&mut header_buf)?; + StreamHeader::from_rrd_bytes(&header_buf)?; // validates FourCC + version + + // 2. Read the StreamFooter from the end of the file. + if file_len < StreamFooter::ENCODED_SIZE_BYTES as u64 { + return Ok(None); // File too small to have a footer. + } + // SAFETY: ENCODED_SIZE_BYTES is a small constant (32), fits in i64. + #[expect(clippy::cast_possible_wrap)] + file.seek(SeekFrom::End(-(StreamFooter::ENCODED_SIZE_BYTES as i64)))?; + let mut footer_buf = [0u8; StreamFooter::ENCODED_SIZE_BYTES]; + file.read_exact(&mut footer_buf)?; + + let Ok(stream_footer) = StreamFooter::from_rrd_bytes(&footer_buf) else { + return Ok(None); // Valid RRD, but no footer (legacy). + }; + + // 2. For each entry, read and validate the RrdFooter payload. + // In practice there is always exactly one entry. + let Some(entry) = stream_footer.entries.first() else { + return Ok(None); + }; + + let span = &entry.rrd_footer_byte_span_from_start_excluding_header; + let payload_len = usize::try_from(span.len)?; + + // Sanity check: payload must fit within the file. + if span.start + span.len > file_len { + return Err(CodecError::FrameDecoding(format!( + "RrdFooter payload span ({start}..{end}) exceeds file size ({file_len})", + start = span.start, + end = span.start + span.len, + ))); + } + + // 3. Seek to the RrdFooter payload and read it. + file.seek(SeekFrom::Start(span.start))?; + let mut payload_buf = vec![0u8; payload_len]; + file.read_exact(&mut payload_buf)?; + + // 4. Validate CRC. + let actual_crc = StreamFooter::compute_crc(&payload_buf); + if actual_crc != entry.crc_excluding_header { + return Err(CodecError::CrcMismatch { + expected: entry.crc_excluding_header, + got: actual_crc, + }); + } + + // 5. Decode protobuf RrdFooter → application-level RrdFooter. + let transport_footer = re_protos::log_msg::v1alpha1::RrdFooter::from_rrd_bytes(&payload_buf)?; + let rrd_footer = transport_footer.to_application(())?; + + Ok(Some(rrd_footer)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::rrd::test_util::{encode_test_rrd, encode_test_rrd_to_file, make_test_chunks}; + + #[test] + fn test_read_footer_roundtrip() { + let chunks = make_test_chunks(5); + let (file, _store_id) = encode_test_rrd(&chunks); + + let footer = read_rrd_footer(&mut File::open(file.path()).unwrap()).unwrap(); + assert!(footer.is_some(), "Footer should be present"); + let footer = footer.unwrap(); + assert!( + !footer.manifests.is_empty(), + "Should have at least one manifest" + ); + } + + #[test] + fn test_read_footer_no_footer() { + // Needs with_footer: false, so uses the lower-level helper. + let file = tempfile::NamedTempFile::new().unwrap(); + let chunks = make_test_chunks(3); + encode_test_rrd_to_file(file.path(), &chunks, false); + + let footer = read_rrd_footer(&mut File::open(file.path()).unwrap()).unwrap(); + assert!(footer.is_none(), "Legacy RRD should have no footer"); + } + + #[test] + fn test_read_footer_not_an_rrd() { + let file = tempfile::NamedTempFile::new().unwrap(); + std::fs::write(file.path(), b"this is not an rrd file at all").unwrap(); + + let result = read_rrd_footer(&mut File::open(file.path()).unwrap()); + assert!(result.is_err(), "Non-RRD file should return an error"); + } + + #[test] + fn test_read_footer_too_small() { + let file = tempfile::NamedTempFile::new().unwrap(); + std::fs::write(file.path(), b"tiny").unwrap(); + + let result = read_rrd_footer(&mut File::open(file.path()).unwrap()); + assert!( + result.is_err(), + "File too small for StreamHeader should error" + ); + } + + #[test] + fn test_read_footer_corrupted_crc() { + let chunks = make_test_chunks(3); + let (file, _store_id) = encode_test_rrd(&chunks); + + let mut data = std::fs::read(file.path()).unwrap(); + let file_len = data.len(); + + let footer_bytes = &data[file_len - StreamFooter::ENCODED_SIZE_BYTES..]; + let stream_footer = StreamFooter::from_rrd_bytes(footer_bytes).unwrap(); + let entry = &stream_footer.entries[0]; + let payload_start = entry.rrd_footer_byte_span_from_start_excluding_header.start as usize; + + // Flip a byte in the payload. + data[payload_start] ^= 0xFF; + std::fs::write(file.path(), &data).unwrap(); + + let result = read_rrd_footer(&mut File::open(file.path()).unwrap()); + assert!( + matches!(result, Err(CodecError::CrcMismatch { .. })), + "Expected CRC mismatch, got: {result:?}" + ); + } +} diff --git a/crates/store/re_log_encoding/src/rrd/mod.rs b/crates/store/re_log_encoding/src/rrd/mod.rs index 5ba9c2828b41..ca851d350b9e 100644 --- a/crates/store/re_log_encoding/src/rrd/mod.rs +++ b/crates/store/re_log_encoding/src/rrd/mod.rs @@ -2,7 +2,7 @@ //! //! ⚠️Make sure to familiarize yourself with the [crate-level docs](crate) first. ⚠️ //! -//! RRD streams are used everywhere gRPC isn't: files, standard I/O, HTTP fetches, data-loaders, etc. +//! RRD streams are used everywhere gRPC isn't: files, standard I/O, HTTP fetches, importers, etc. //! This module is completely unrelated to the Rerun Data Protocol (Redap) gRPC API. //! This module is also completely unrelated to the legacy SDK comms gRPC API. //! @@ -26,6 +26,18 @@ mod decoder; #[cfg(feature = "encoder")] mod encoder; +#[cfg(test)] +#[cfg(not(target_arch = "wasm32"))] +pub(crate) mod test_util; + +#[cfg(feature = "decoder")] +#[cfg(not(target_arch = "wasm32"))] +mod chunk_reader; + +#[cfg(feature = "decoder")] +#[cfg(not(target_arch = "wasm32"))] +mod footer_reader; + #[cfg(feature = "encoder")] #[cfg(not(target_arch = "wasm32"))] mod file_sink; @@ -33,6 +45,9 @@ mod file_sink; #[cfg(feature = "stream_from_http")] pub mod stream_from_http; +#[cfg(feature = "decoder")] +#[cfg(not(target_arch = "wasm32"))] +pub use self::chunk_reader::read_chunks; #[cfg(feature = "decoder")] pub use self::decoder::{ DecodeError, Decoder, DecoderApp, DecoderEntrypoint, DecoderIterator, DecoderStream, @@ -48,6 +63,9 @@ pub use self::footer::{ RawRrdManifest, RrdFooter, RrdManifest, RrdManifestBuilder, RrdManifestSha256, RrdManifestStaticMap, RrdManifestTemporalMap, RrdManifestTemporalMapEntry, }; +#[cfg(feature = "decoder")] +#[cfg(not(target_arch = "wasm32"))] +pub use self::footer_reader::read_rrd_footer; pub use self::frames::{ Compression, CrateVersion, EncodingOptions, MessageHeader, MessageKind, Serializer, StreamFooter, StreamFooterEntry, StreamHeader, diff --git a/crates/store/re_log_encoding/src/rrd/test_util.rs b/crates/store/re_log_encoding/src/rrd/test_util.rs new file mode 100644 index 000000000000..237a123e4fb1 --- /dev/null +++ b/crates/store/re_log_encoding/src/rrd/test_util.rs @@ -0,0 +1,84 @@ +use std::path::Path; +use std::sync::Arc; + +use re_chunk::{Chunk, RowId, TimePoint, Timeline}; +use re_log_types::{ + EntityPath, LogMsg, SetStoreInfo, StoreId, StoreInfo, StoreKind, StoreSource, + example_components::{MyPoint, MyPoints}, +}; +use tempfile::NamedTempFile; + +/// Create simple test chunks with temporal data. +pub fn make_test_chunks(num_chunks: usize) -> Vec> { + let entity_path = EntityPath::from("/test/entity"); + let timeline = Timeline::new_sequence("frame"); + + (0..num_chunks) + .map(|i| { + let row_id = RowId::new(); + let points = MyPoint::from_iter(i as u32..i as u32 + 1); + let chunk = Chunk::builder(entity_path.clone()) + .with_sparse_component_batches( + row_id, + #[expect(clippy::cast_possible_wrap)] + TimePoint::default().with(timeline, i as i64), + [(MyPoints::descriptor_points(), Some(&points as _))], + ) + .build() + .unwrap(); + Arc::new(chunk) + }) + .collect() +} + +/// Encode test chunks into a temporary RRD file (with footer). +/// +/// Returns the temp file (keeps it alive) and the `StoreId`. +/// Use `.path()` on the returned file to get the path. +pub fn encode_test_rrd(chunks: &[Arc]) -> (NamedTempFile, StoreId) { + let file = NamedTempFile::new().unwrap(); + let store_id = encode_test_rrd_to_file(file.path(), chunks, true); + (file, store_id) +} + +/// Encode chunks into an RRD file at the given path. Returns the `StoreId` used. +pub fn encode_test_rrd_to_file(path: &Path, chunks: &[Arc], with_footer: bool) -> StoreId { + let store_id = StoreId::random(StoreKind::Recording, "test"); + encode_test_rrd_to_file_with_options( + path, + chunks, + &store_id, + with_footer, + crate::EncodingOptions::PROTOBUF_COMPRESSED, + ); + store_id +} + +/// Encode chunks with specific options. +pub fn encode_test_rrd_to_file_with_options( + path: &Path, + chunks: &[Arc], + store_id: &StoreId, + with_footer: bool, + options: crate::EncodingOptions, +) { + let store_info = StoreInfo::new(store_id.clone(), StoreSource::Unknown); + let set_store_info = LogMsg::SetStoreInfo(SetStoreInfo { + row_id: *RowId::ZERO, + info: store_info, + }); + + let mut file = std::fs::File::create(path).unwrap(); + let mut encoder = + crate::Encoder::new_eager(re_build_info::CrateVersion::LOCAL, options, &mut file).unwrap(); + if !with_footer { + encoder.do_not_emit_footer(); + } + encoder.append(&set_store_info).unwrap(); + for chunk in chunks { + let arrow_msg = chunk.to_arrow_msg().unwrap(); + let msg = LogMsg::ArrowMsg(store_id.clone(), arrow_msg); + encoder.append(&msg).unwrap(); + } + encoder.finish().unwrap(); +} diff --git a/crates/store/re_log_encoding/src/transport_to_app.rs b/crates/store/re_log_encoding/src/transport_to_app.rs index f28bc3f64e12..247c2ad99cd8 100644 --- a/crates/store/re_log_encoding/src/transport_to_app.rs +++ b/crates/store/re_log_encoding/src/transport_to_app.rs @@ -55,7 +55,7 @@ impl ToTransport for crate::RrdFooter { type Output = re_protos::log_msg::v1alpha1::RrdFooter; type Context<'a> = (); - fn to_transport(&self, _: Self::Context<'_>) -> Result { + fn to_transport(&self, (): Self::Context<'_>) -> Result { let manifests: Result, _> = self .manifests .values() diff --git a/crates/store/re_log_encoding/tests/arrow_encode_roundtrip.rs b/crates/store/re_log_encoding/tests/arrow_encode_roundtrip.rs index befbaa0c2f29..14b305ac26ed 100644 --- a/crates/store/re_log_encoding/tests/arrow_encode_roundtrip.rs +++ b/crates/store/re_log_encoding/tests/arrow_encode_roundtrip.rs @@ -1,3 +1,5 @@ +#![cfg(all(feature = "encoder", feature = "decoder"))] + use re_chunk::{Chunk, RowId, TimePoint, Timeline}; use re_log_encoding::{DecoderApp, Encoder}; use re_log_types::{LogMsg, StoreId}; diff --git a/crates/store/re_log_encoding/tests/footers_and_manifests.rs b/crates/store/re_log_encoding/tests/footers_and_manifests.rs index dfac76dc13ce..647a6a2c2055 100644 --- a/crates/store/re_log_encoding/tests/footers_and_manifests.rs +++ b/crates/store/re_log_encoding/tests/footers_and_manifests.rs @@ -1,13 +1,16 @@ #![expect(clippy::unwrap_used)] use std::collections::BTreeMap; +use std::sync::Arc; +use arrow::array::{Array as _, BinaryArray, RecordBatch}; +use arrow::datatypes::Field; use itertools::Itertools as _; use re_arrow_util::RecordBatchTestExt as _; use re_chunk::{Chunk, ChunkId, RowId, TimePoint}; use re_log_encoding::{ - Decodable as _, DecoderApp, Encoder, RrdManifest, RrdManifestBuilder, StreamFooter, - StreamFooterEntry, ToApplication as _, ToTransport as _, + Decodable as _, DecoderApp, Encoder, RawRrdManifest, RrdManifest, RrdManifestBuilder, + StreamFooter, StreamFooterEntry, ToApplication as _, ToTransport as _, }; use re_log_types::external::re_tuid::Tuid; use re_log_types::{ArrowMsg, LogMsg, StoreId, StoreKind, build_log_time}; @@ -160,10 +163,10 @@ fn footer_roundtrip() { re_protos::log_msg::v1alpha1::RrdFooter::from_rrd_bytes(rrd_footer_bytes).unwrap(); let mut rrd_footer = rrd_footer.to_application(()).unwrap(); - let rrd_manifest_recording = - RrdManifest::try_new(rrd_footer.manifests.remove(&store_id_recording).unwrap()).unwrap(); - let rrd_manifest_blueprint = - RrdManifest::try_new(rrd_footer.manifests.remove(&store_id_blueprint).unwrap()).unwrap(); + let raw_manifest_recording = rrd_footer.manifests.remove(&store_id_recording).unwrap(); + let raw_manifest_blueprint = rrd_footer.manifests.remove(&store_id_blueprint).unwrap(); + let rrd_manifest_recording = RrdManifest::try_new(&raw_manifest_recording).unwrap(); + let rrd_manifest_blueprint = RrdManifest::try_new(&raw_manifest_blueprint).unwrap(); fn decode_messages(msgs_encoded: &[u8], rrd_manifest: &RrdManifest) -> Vec { itertools::izip!( @@ -220,28 +223,39 @@ fn footer_roundtrip() { .format_schema_snapshot(), ); + // Note: we compare semantic fields rather than raw data because `RrdManifest::try_new` + // prunes sparse columns from the RecordBatch for memory efficiency. The sequential decoder + // returns the full unpruned `RawRrdManifest`, so raw data comparison would fail. similar_asserts::assert_eq!( - rrd_manifest_recording_sequential.data.format_snapshot(true), - rrd_manifest_recording.data().format_snapshot(true), - "RRD manifest decoded sequentially should be identical to the one decoded by jumping via the footer", + rrd_manifest_recording_sequential.store_id, + raw_manifest_recording.store_id, + "RRD manifest decoded sequentially should have the same store_id as the one decoded via the footer", + ); + similar_asserts::assert_eq!( + rrd_manifest_recording_sequential.sorbet_schema, + *rrd_manifest_recording.sorbet_schema(), + "RRD manifest decoded sequentially should have the same sorbet_schema as the one decoded via the footer", ); - // Same test but check everything, not just the manifest data (we do both cause we want a nice diff for the manifest data) similar_asserts::assert_eq!( - &rrd_manifest_recording_sequential, - rrd_manifest_recording.raw(), - "RRD manifest decoded sequentially should be identical to the one decoded by jumping via the footer", + rrd_manifest_recording_sequential.sorbet_schema_sha256, + raw_manifest_recording.sorbet_schema_sha256, + "RRD manifest decoded sequentially should have the same sorbet_schema_sha256 as the one decoded via the footer", ); similar_asserts::assert_eq!( - rrd_manifest_blueprint_sequential.data.format_snapshot(true), - rrd_manifest_blueprint.data().format_snapshot(true), - "RRD manifest decoded sequentially should be identical to the one decoded by jumping via the footer", + rrd_manifest_blueprint_sequential.store_id, + raw_manifest_blueprint.store_id, + "RRD manifest decoded sequentially should have the same store_id as the one decoded via the footer", + ); + similar_asserts::assert_eq!( + rrd_manifest_blueprint_sequential.sorbet_schema, + *rrd_manifest_blueprint.sorbet_schema(), + "RRD manifest decoded sequentially should have the same sorbet_schema as the one decoded via the footer", ); - // Same test but check everything, not just the manifest data (we do both cause we want a nice diff for the manifest data) similar_asserts::assert_eq!( - &rrd_manifest_blueprint_sequential, - rrd_manifest_blueprint.raw(), - "RRD manifest decoded sequentially should be identical to the one decoded by jumping via the footer", + rrd_manifest_blueprint_sequential.sorbet_schema_sha256, + raw_manifest_blueprint.sorbet_schema_sha256, + "RRD manifest decoded sequentially should have the same sorbet_schema_sha256 as the one decoded via the footer", ); // Check that the data decoded "traditionally" matches the data decoded via random access / footer. @@ -299,25 +313,25 @@ fn footer_roundtrip() { .unwrap(); let mut reencoded_rrd_footer = reencoded_rrd_footer.to_application(()).unwrap(); - let reencoded_rrd_manifest_recording = RrdManifest::try_new( - reencoded_rrd_footer - .manifests - .remove(&store_id_recording) - .unwrap(), - ) - .unwrap(); - let reencoded_rrd_manifest_blueprint = RrdManifest::try_new( - reencoded_rrd_footer - .manifests - .remove(&store_id_blueprint) - .unwrap(), - ) - .unwrap(); + let reencoded_raw_recording = reencoded_rrd_footer + .manifests + .remove(&store_id_recording) + .unwrap(); + let reencoded_raw_blueprint = reencoded_rrd_footer + .manifests + .remove(&store_id_blueprint) + .unwrap(); + let reencoded_rrd_manifest_recording = + RrdManifest::try_new(&reencoded_raw_recording).unwrap(); + let reencoded_rrd_manifest_blueprint = + RrdManifest::try_new(&reencoded_raw_blueprint).unwrap(); similar_asserts::assert_eq!( - rrd_manifest_recording.data().format_snapshot(true), + rrd_manifest_recording + .chunk_fetcher_rb() + .format_snapshot(true), reencoded_rrd_manifest_recording - .data() + .chunk_fetcher_rb() .format_snapshot(true), "Reencoded RRD manifest should be identical to the original one", ); @@ -329,9 +343,11 @@ fn footer_roundtrip() { ); similar_asserts::assert_eq!( - rrd_manifest_blueprint.data().format_snapshot(true), + rrd_manifest_blueprint + .chunk_fetcher_rb() + .format_snapshot(true), reencoded_rrd_manifest_blueprint - .data() + .chunk_fetcher_rb() .format_snapshot(true), "Reencoded RRD manifest should be identical to the original one", ); @@ -603,3 +619,259 @@ fn next_row_id_generator(prefix: u64) -> impl FnMut() -> RowId { row_id } } + +/// Helper: add a `chunk_key` column to a `RawRrdManifest`, returning a new manifest. +fn add_chunk_keys_to_raw(raw: &RawRrdManifest) -> RawRrdManifest { + let num_rows = raw.data.num_rows(); + let keys: Vec> = (0..num_rows) + .map(|i| format!("key_{i}").into_bytes()) + .collect(); + let key_refs: Vec<&[u8]> = keys.iter().map(|k| k.as_slice()).collect(); + let chunk_key_array = BinaryArray::from_vec(key_refs); + + let schema = raw.data.schema(); + let mut fields: Vec<_> = schema.fields().iter().cloned().collect(); + let mut columns: Vec<_> = raw.data.columns().to_vec(); + + fields.push(Arc::new(Field::new( + RawRrdManifest::FIELD_CHUNK_KEY, + arrow::datatypes::DataType::Binary, + true, + ))); + columns.push(Arc::new(chunk_key_array)); + + let new_schema = Arc::new(arrow::datatypes::Schema::new_with_metadata( + fields, + schema.metadata().clone(), + )); + let num_rows = raw.data.num_rows(); + let new_batch = RecordBatch::try_new_with_options( + new_schema, + columns, + &arrow::array::RecordBatchOptions::new().with_row_count(Some(num_rows)), + ) + .unwrap(); + + RawRrdManifest { + store_id: raw.store_id.clone(), + sorbet_schema: raw.sorbet_schema.clone(), + sorbet_schema_sha256: raw.sorbet_schema_sha256, + data: new_batch, + } +} + +/// Verifies that concatenating manifests where some have `chunk_keys` and others don't +/// produces a correctly aligned result (null keys for manifests without them). +#[test] +fn concat_with_mixed_chunk_keys() { + use re_log_types::example_components::{MyPoint, MyPoints}; + use re_log_types::{TimeInt, build_frame_nr}; + + let store_id = generate_recording_store_id(); + + let mut next_chunk_id = next_chunk_id_generator(200); + let mut next_row_id = next_row_id_generator(200); + + let mut make_chunk = |entity: &str, frame: i64| -> Chunk { + let points = MyPoint::from_iter(0..1); + let timepoint = TimePoint::from([build_frame_nr(TimeInt::new_temporal(frame))]); + Chunk::builder_with_id(next_chunk_id(), entity) + .with_sparse_component_batches( + next_row_id(), + timepoint, + [(MyPoints::descriptor_points(), Some(&points as _))], + ) + .build() + .unwrap() + }; + + let chunks1 = [make_chunk("entity_a", 10), make_chunk("entity_a", 20)]; + let chunks2 = [make_chunk("entity_a", 30), make_chunk("entity_a", 40)]; + + let raw1 = + RawRrdManifest::build_in_memory_from_chunks(store_id.clone(), chunks1.iter()).unwrap(); + let raw2 = + RawRrdManifest::build_in_memory_from_chunks(store_id.clone(), chunks2.iter()).unwrap(); + + // raw1 gets chunk_keys, raw2 does not + let raw1_with_keys = add_chunk_keys_to_raw(&raw1); + + let m1 = RrdManifest::try_new(&raw1_with_keys).unwrap(); + let m2 = RrdManifest::try_new(&raw2).unwrap(); + + assert!(m1.col_chunk_key_raw().is_some()); + assert!(m2.col_chunk_key_raw().is_none()); + + // Concat should handle mixed chunk_keys gracefully + let combined = RrdManifest::concat(&[&m1, &m2]).unwrap(); + + // Total chunks must equal sum of parts + assert_eq!(combined.num_chunks(), 4); + + // chunk_keys should be present and aligned with the total number of chunks + let combined_keys = combined + .col_chunk_key_raw() + .expect("combined manifest should have chunk_keys when any part has them"); + assert_eq!( + combined_keys.len(), + 4, + "chunk_keys array must have one entry per chunk" + ); + + // First two entries (from m1) should be non-null + assert!(!combined_keys.is_null(0)); + assert!(!combined_keys.is_null(1)); + // Last two entries (from m2, which had no keys) should be null + assert!(combined_keys.is_null(2)); + assert!(combined_keys.is_null(3)); +} + +/// Verifies that `heap_size_bytes` accounts for pre-extracted arrays that are NOT +/// in the pruned `chunk_fetcher_rb`. +#[test] +fn size_bytes_accounts_for_extracted_arrays() { + use re_chunk::external::re_byte_size::SizeBytes as _; + use re_log_types::example_components::{MyPoint, MyPoints}; + use re_log_types::{TimeInt, build_frame_nr}; + + let store_id = generate_recording_store_id(); + + let mut next_chunk_id = next_chunk_id_generator(300); + let mut next_row_id = next_row_id_generator(300); + + let mut make_chunk = |entity: &str, frame: i64| -> Chunk { + let points = MyPoint::from_iter(0..1); + let timepoint = TimePoint::from([build_frame_nr(TimeInt::new_temporal(frame))]); + Chunk::builder_with_id(next_chunk_id(), entity) + .with_sparse_component_batches( + next_row_id(), + timepoint, + [(MyPoints::descriptor_points(), Some(&points as _))], + ) + .build() + .unwrap() + }; + + let chunks: Vec<_> = (0..10).map(|i| make_chunk("entity_a", i * 10)).collect(); + let manifest = RrdManifest::build_in_memory_from_chunks(store_id, chunks.iter()).unwrap(); + + // Call heap_size_bytes on RrdManifest directly, not through Arc (which adds struct size overhead). + let total_size = + re_chunk::external::re_byte_size::SizeBytes::heap_size_bytes(manifest.as_ref()); + + // chunk_entity_paths, chunk_num_rows, chunk_byte_sizes, chunk_byte_sizes_uncompressed + // are NOT in the pruned chunk_fetcher_rb. They hold their own Arrow buffer allocations + // (not shared with the pruned batch) and must be counted in heap_size_bytes. + // + // Compute what the pruned batch + maps alone would give us, then verify the total + // is strictly larger — meaning the extracted arrays are actually being counted. + let pruned_batch_and_maps_only = manifest.chunk_fetcher_rb().heap_size_bytes() + + manifest.static_map().heap_size_bytes() + + manifest.temporal_map().heap_size_bytes(); + + assert!( + total_size > pruned_batch_and_maps_only, + "heap_size_bytes ({total_size}) must be strictly greater than the pruned batch + maps \ + ({pruned_batch_and_maps_only}). The extracted chunk_entity_paths, chunk_num_rows, \ + chunk_byte_sizes, and chunk_byte_sizes_uncompressed hold their own allocations and \ + must be counted." + ); +} + +/// Verifies that `RawRrdManifest::concat` → `RrdManifest::try_new` produces the same result +/// as `RrdManifest::try_new` on each part → `RrdManifest::concat`. +#[test] +fn concat_raw_then_validate_vs_validate_then_concat() { + use re_log_types::example_components::{MyColor, MyPoint, MyPoints}; + use re_log_types::{TimeInt, build_frame_nr}; + + let store_id = generate_recording_store_id(); + + let mut next_chunk_id = next_chunk_id_generator(100); + let mut next_row_id = next_row_id_generator(100); + + // Helper: build a chunk with points and colors, either temporal or static. + let mut make_chunk = |entity: &str, frame: Option| -> Chunk { + let points = MyPoint::from_iter(0..1); + let colors = MyColor::from_iter(0..1); + let timepoint = match frame { + Some(f) => TimePoint::from([build_frame_nr(TimeInt::new_temporal(f))]), + None => TimePoint::default(), + }; + Chunk::builder_with_id(next_chunk_id(), entity) + .with_sparse_component_batches( + next_row_id(), + timepoint, + [ + (MyPoints::descriptor_points(), Some(&points as _)), + (MyPoints::descriptor_colors(), Some(&colors as _)), + ], + ) + .build() + .unwrap() + }; + + // Three groups of chunks. Each group has the same component/timeline structure + // so that the sorbet schemas match across manifests. + let chunks1 = [ + make_chunk("entity_a", Some(10)), + make_chunk("entity_a", Some(20)), + make_chunk("entity_a", None), + ]; + let chunks2 = [ + make_chunk("entity_a", Some(30)), + make_chunk("entity_a", Some(40)), + make_chunk("entity_a", None), + ]; + let chunks3 = [ + make_chunk("entity_a", Some(50)), + make_chunk("entity_a", Some(60)), + make_chunk("entity_a", None), + ]; + + let raw1 = + RawRrdManifest::build_in_memory_from_chunks(store_id.clone(), chunks1.iter()).unwrap(); + let raw2 = + RawRrdManifest::build_in_memory_from_chunks(store_id.clone(), chunks2.iter()).unwrap(); + let raw3 = + RawRrdManifest::build_in_memory_from_chunks(store_id.clone(), chunks3.iter()).unwrap(); + + // Path A: concat raw manifests first, then validate into RrdManifest. + let raw_concatenated = RawRrdManifest::concat(&[&raw1, &raw2, &raw3]).unwrap(); + let path_a = RrdManifest::try_new(&raw_concatenated).unwrap(); + + // Path B: validate each raw manifest into RrdManifest first, then concat. + let m1 = RrdManifest::try_new(&raw1).unwrap(); + let m2 = RrdManifest::try_new(&raw2).unwrap(); + let m3 = RrdManifest::try_new(&raw3).unwrap(); + let path_b = RrdManifest::concat(&[&m1, &m2, &m3]).unwrap(); + + // Both paths must produce identical results. + assert_eq!(path_a.num_chunks(), path_b.num_chunks(), "num_chunks"); + + similar_asserts::assert_eq!(path_a.col_chunk_ids(), path_b.col_chunk_ids()); + similar_asserts::assert_eq!( + path_a.col_chunk_entity_path().collect::>(), + path_b.col_chunk_entity_path().collect::>(), + ); + similar_asserts::assert_eq!( + path_a.col_chunk_is_static().collect::>(), + path_b.col_chunk_is_static().collect::>(), + ); + similar_asserts::assert_eq!(path_a.col_chunk_num_rows(), path_b.col_chunk_num_rows()); + similar_asserts::assert_eq!( + path_a.col_chunk_byte_offset(), + path_b.col_chunk_byte_offset(), + ); + similar_asserts::assert_eq!(path_a.col_chunk_byte_size(), path_b.col_chunk_byte_size()); + similar_asserts::assert_eq!( + path_a.col_chunk_byte_size_uncompressed(), + path_b.col_chunk_byte_size_uncompressed(), + ); + + assert_eq!(path_a.static_map(), path_b.static_map(), "static_map"); + assert_eq!(path_a.temporal_map(), path_b.temporal_map(), "temporal_map"); + + similar_asserts::assert_eq!(path_a.recording_schema(), path_b.recording_schema()); + similar_asserts::assert_eq!(path_a.sorbet_schema(), path_b.sorbet_schema()); +} diff --git a/crates/store/re_log_encoding/tests/snapshots/footers_and_manifests__rrd_manifest_blueprint_schema.snap b/crates/store/re_log_encoding/tests/snapshots/footers_and_manifests__rrd_manifest_blueprint_schema.snap index 9f4efc6799e1..15d182c51389 100644 --- a/crates/store/re_log_encoding/tests/snapshots/footers_and_manifests__rrd_manifest_blueprint_schema.snap +++ b/crates/store/re_log_encoding/tests/snapshots/footers_and_manifests__rrd_manifest_blueprint_schema.snap @@ -2,40 +2,40 @@ source: crates/store/re_log_encoding/tests/footers_and_manifests.rs expression: rrd_manifest_blueprint_sequential.data.format_schema_snapshot() --- -TimePanelBlueprint:fps:has_static_data: bool [ +TimePanelBlueprint:fps:has_static_data: non-null Boolean [ rerun:archetype: "rerun.blueprint.archetypes.TimePanelBlueprint" rerun:component: "TimePanelBlueprint:fps" rerun:component_type: "rerun.blueprint.components.Fps" rerun:index: "rerun:static" ] -chunk_byte_offset: u64 -chunk_byte_size: u64 -chunk_byte_size_uncompressed: u64 -chunk_entity_path: Utf8 -chunk_id: FixedSizeBinary[16] -chunk_is_static: bool -chunk_num_rows: u64 -frame_nr:TimePanelBlueprint:fps:end: nullable i64 [ +chunk_byte_offset: non-null UInt64 +chunk_byte_size: non-null UInt64 +chunk_byte_size_uncompressed: non-null UInt64 +chunk_entity_path: non-null Utf8 +chunk_id: non-null FixedSizeBinary(16) +chunk_is_static: non-null Boolean +chunk_num_rows: non-null UInt64 +frame_nr:TimePanelBlueprint:fps:end: Int64 [ rerun:archetype: "rerun.blueprint.archetypes.TimePanelBlueprint" rerun:component: "TimePanelBlueprint:fps" rerun:component_type: "rerun.blueprint.components.Fps" rerun:index: "frame_nr" ] -frame_nr:TimePanelBlueprint:fps:num_rows: nullable u64 [ +frame_nr:TimePanelBlueprint:fps:num_rows: UInt64 [ rerun:archetype: "rerun.blueprint.archetypes.TimePanelBlueprint" rerun:component: "TimePanelBlueprint:fps" rerun:component_type: "rerun.blueprint.components.Fps" rerun:index: "frame_nr" ] -frame_nr:TimePanelBlueprint:fps:start: nullable i64 [ +frame_nr:TimePanelBlueprint:fps:start: Int64 [ rerun:archetype: "rerun.blueprint.archetypes.TimePanelBlueprint" rerun:component: "TimePanelBlueprint:fps" rerun:component_type: "rerun.blueprint.components.Fps" rerun:index: "frame_nr" ] -frame_nr:end: nullable i64 [ +frame_nr:end: Int64 [ rerun:index: "frame_nr" ] -frame_nr:start: nullable i64 [ +frame_nr:start: Int64 [ rerun:index: "frame_nr" ] diff --git a/crates/store/re_log_encoding/tests/snapshots/footers_and_manifests__rrd_manifest_recording_schema.snap b/crates/store/re_log_encoding/tests/snapshots/footers_and_manifests__rrd_manifest_recording_schema.snap index d2fbf335fd0f..d1c6326d5c06 100644 --- a/crates/store/re_log_encoding/tests/snapshots/footers_and_manifests__rrd_manifest_recording_schema.snap +++ b/crates/store/re_log_encoding/tests/snapshots/footers_and_manifests__rrd_manifest_recording_schema.snap @@ -2,154 +2,154 @@ source: crates/store/re_log_encoding/tests/footers_and_manifests.rs expression: rrd_manifest_recording_sequential.data.format_schema_snapshot() --- -chunk_byte_offset: u64 -chunk_byte_size: u64 -chunk_byte_size_uncompressed: u64 -chunk_entity_path: Utf8 -chunk_id: FixedSizeBinary[16] -chunk_is_static: bool -chunk_num_rows: u64 -elapsed_time:end: nullable Duration(ns) [ +chunk_byte_offset: non-null UInt64 +chunk_byte_size: non-null UInt64 +chunk_byte_size_uncompressed: non-null UInt64 +chunk_entity_path: non-null Utf8 +chunk_id: non-null FixedSizeBinary(16) +chunk_is_static: non-null Boolean +chunk_num_rows: non-null UInt64 +elapsed_time:end: Duration(ns) [ rerun:index: "elapsed time" ] -elapsed_time:example_MyPoints:colors:end: nullable Duration(ns) [ +elapsed_time:example_MyPoints:colors:end: Duration(ns) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "elapsed time" ] -elapsed_time:example_MyPoints:colors:num_rows: nullable u64 [ +elapsed_time:example_MyPoints:colors:num_rows: UInt64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "elapsed time" ] -elapsed_time:example_MyPoints:colors:start: nullable Duration(ns) [ +elapsed_time:example_MyPoints:colors:start: Duration(ns) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "elapsed time" ] -elapsed_time:example_MyPoints:points:end: nullable Duration(ns) [ +elapsed_time:example_MyPoints:points:end: Duration(ns) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "elapsed time" ] -elapsed_time:example_MyPoints:points:num_rows: nullable u64 [ +elapsed_time:example_MyPoints:points:num_rows: UInt64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "elapsed time" ] -elapsed_time:example_MyPoints:points:start: nullable Duration(ns) [ +elapsed_time:example_MyPoints:points:start: Duration(ns) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "elapsed time" ] -elapsed_time:start: nullable Duration(ns) [ +elapsed_time:start: Duration(ns) [ rerun:index: "elapsed time" ] -example_MyPoints:colors:has_static_data: bool [ +example_MyPoints:colors:has_static_data: non-null Boolean [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "rerun:static" ] -example_MyPoints:labels:has_static_data: bool [ +example_MyPoints:labels:has_static_data: non-null Boolean [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:labels" rerun:component_type: "example.MyLabel" rerun:index: "rerun:static" ] -example_MyPoints:points:has_static_data: bool [ +example_MyPoints:points:has_static_data: non-null Boolean [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "rerun:static" ] -frame_nr:end: nullable i64 [ +frame_nr:end: Int64 [ rerun:index: "frame_nr" ] -frame_nr:example_MyPoints:colors:end: nullable i64 [ +frame_nr:example_MyPoints:colors:end: Int64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "frame_nr" ] -frame_nr:example_MyPoints:colors:num_rows: nullable u64 [ +frame_nr:example_MyPoints:colors:num_rows: UInt64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "frame_nr" ] -frame_nr:example_MyPoints:colors:start: nullable i64 [ +frame_nr:example_MyPoints:colors:start: Int64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "frame_nr" ] -frame_nr:example_MyPoints:points:end: nullable i64 [ +frame_nr:example_MyPoints:points:end: Int64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "frame_nr" ] -frame_nr:example_MyPoints:points:num_rows: nullable u64 [ +frame_nr:example_MyPoints:points:num_rows: UInt64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "frame_nr" ] -frame_nr:example_MyPoints:points:start: nullable i64 [ +frame_nr:example_MyPoints:points:start: Int64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "frame_nr" ] -frame_nr:start: nullable i64 [ +frame_nr:start: Int64 [ rerun:index: "frame_nr" ] -log_time:end: nullable Timestamp(ns) [ +log_time:end: Timestamp(ns) [ rerun:index: "log_time" ] -log_time:example_MyPoints:colors:end: nullable Timestamp(ns) [ +log_time:example_MyPoints:colors:end: Timestamp(ns) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "log_time" ] -log_time:example_MyPoints:colors:num_rows: nullable u64 [ +log_time:example_MyPoints:colors:num_rows: UInt64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "log_time" ] -log_time:example_MyPoints:colors:start: nullable Timestamp(ns) [ +log_time:example_MyPoints:colors:start: Timestamp(ns) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "log_time" ] -log_time:example_MyPoints:points:end: nullable Timestamp(ns) [ +log_time:example_MyPoints:points:end: Timestamp(ns) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "log_time" ] -log_time:example_MyPoints:points:num_rows: nullable u64 [ +log_time:example_MyPoints:points:num_rows: UInt64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "log_time" ] -log_time:example_MyPoints:points:start: nullable Timestamp(ns) [ +log_time:example_MyPoints:points:start: Timestamp(ns) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "log_time" ] -log_time:start: nullable Timestamp(ns) [ +log_time:start: Timestamp(ns) [ rerun:index: "log_time" ] diff --git a/crates/store/re_log_encoding/tests/snapshots/footers_and_manifests__simple_manifest_batch_schema.snap b/crates/store/re_log_encoding/tests/snapshots/footers_and_manifests__simple_manifest_batch_schema.snap index 747b93d1227a..edb89f8e796f 100644 --- a/crates/store/re_log_encoding/tests/snapshots/footers_and_manifests__simple_manifest_batch_schema.snap +++ b/crates/store/re_log_encoding/tests/snapshots/footers_and_manifests__simple_manifest_batch_schema.snap @@ -2,154 +2,154 @@ source: crates/store/re_log_encoding/tests/footers_and_manifests.rs expression: rrd_manifest_batch.format_schema_snapshot() --- -chunk_byte_offset: u64 -chunk_byte_size: u64 -chunk_byte_size_uncompressed: u64 -chunk_entity_path: Utf8 -chunk_id: FixedSizeBinary[16] -chunk_is_static: bool -chunk_num_rows: u64 -elapsed_time:end: nullable Duration(ns) [ +chunk_byte_offset: non-null UInt64 +chunk_byte_size: non-null UInt64 +chunk_byte_size_uncompressed: non-null UInt64 +chunk_entity_path: non-null Utf8 +chunk_id: non-null FixedSizeBinary(16) +chunk_is_static: non-null Boolean +chunk_num_rows: non-null UInt64 +elapsed_time:end: Duration(ns) [ rerun:index: "elapsed time" ] -elapsed_time:example_MyPoints:colors:end: nullable Duration(ns) [ +elapsed_time:example_MyPoints:colors:end: Duration(ns) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "elapsed time" ] -elapsed_time:example_MyPoints:colors:num_rows: nullable u64 [ +elapsed_time:example_MyPoints:colors:num_rows: UInt64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "elapsed time" ] -elapsed_time:example_MyPoints:colors:start: nullable Duration(ns) [ +elapsed_time:example_MyPoints:colors:start: Duration(ns) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "elapsed time" ] -elapsed_time:example_MyPoints:points:end: nullable Duration(ns) [ +elapsed_time:example_MyPoints:points:end: Duration(ns) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "elapsed time" ] -elapsed_time:example_MyPoints:points:num_rows: nullable u64 [ +elapsed_time:example_MyPoints:points:num_rows: UInt64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "elapsed time" ] -elapsed_time:example_MyPoints:points:start: nullable Duration(ns) [ +elapsed_time:example_MyPoints:points:start: Duration(ns) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "elapsed time" ] -elapsed_time:start: nullable Duration(ns) [ +elapsed_time:start: Duration(ns) [ rerun:index: "elapsed time" ] -example_MyPoints:colors:has_static_data: bool [ +example_MyPoints:colors:has_static_data: non-null Boolean [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "rerun:static" ] -example_MyPoints:labels:has_static_data: bool [ +example_MyPoints:labels:has_static_data: non-null Boolean [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:labels" rerun:component_type: "example.MyLabel" rerun:index: "rerun:static" ] -example_MyPoints:points:has_static_data: bool [ +example_MyPoints:points:has_static_data: non-null Boolean [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "rerun:static" ] -frame_nr:end: nullable i64 [ +frame_nr:end: Int64 [ rerun:index: "frame_nr" ] -frame_nr:example_MyPoints:colors:end: nullable i64 [ +frame_nr:example_MyPoints:colors:end: Int64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "frame_nr" ] -frame_nr:example_MyPoints:colors:num_rows: nullable u64 [ +frame_nr:example_MyPoints:colors:num_rows: UInt64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "frame_nr" ] -frame_nr:example_MyPoints:colors:start: nullable i64 [ +frame_nr:example_MyPoints:colors:start: Int64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "frame_nr" ] -frame_nr:example_MyPoints:points:end: nullable i64 [ +frame_nr:example_MyPoints:points:end: Int64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "frame_nr" ] -frame_nr:example_MyPoints:points:num_rows: nullable u64 [ +frame_nr:example_MyPoints:points:num_rows: UInt64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "frame_nr" ] -frame_nr:example_MyPoints:points:start: nullable i64 [ +frame_nr:example_MyPoints:points:start: Int64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "frame_nr" ] -frame_nr:start: nullable i64 [ +frame_nr:start: Int64 [ rerun:index: "frame_nr" ] -log_time:end: nullable Timestamp(ns) [ +log_time:end: Timestamp(ns) [ rerun:index: "log_time" ] -log_time:example_MyPoints:colors:end: nullable Timestamp(ns) [ +log_time:example_MyPoints:colors:end: Timestamp(ns) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "log_time" ] -log_time:example_MyPoints:colors:num_rows: nullable u64 [ +log_time:example_MyPoints:colors:num_rows: UInt64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "log_time" ] -log_time:example_MyPoints:colors:start: nullable Timestamp(ns) [ +log_time:example_MyPoints:colors:start: Timestamp(ns) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:index: "log_time" ] -log_time:example_MyPoints:points:end: nullable Timestamp(ns) [ +log_time:example_MyPoints:points:end: Timestamp(ns) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "log_time" ] -log_time:example_MyPoints:points:num_rows: nullable u64 [ +log_time:example_MyPoints:points:num_rows: UInt64 [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "log_time" ] -log_time:example_MyPoints:points:start: nullable Timestamp(ns) [ +log_time:example_MyPoints:points:start: Timestamp(ns) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:index: "log_time" ] -log_time:start: nullable Timestamp(ns) [ +log_time:start: Timestamp(ns) [ rerun:index: "log_time" ] diff --git a/crates/store/re_log_types/Cargo.toml b/crates/store/re_log_types/Cargo.toml index ec431edc66e8..5ead290476ef 100644 --- a/crates/store/re_log_types/Cargo.toml +++ b/crates/store/re_log_types/Cargo.toml @@ -74,6 +74,7 @@ thiserror.workspace = true typenum.workspace = true uuid = { workspace = true, features = ["serde", "v4", "js"] } web-time.workspace = true +xxhash-rust.workspace = true # Optional dependencies: diff --git a/crates/store/re_log_types/src/entry_name.rs b/crates/store/re_log_types/src/entry_name.rs new file mode 100644 index 000000000000..daeedf8a5598 --- /dev/null +++ b/crates/store/re_log_types/src/entry_name.rs @@ -0,0 +1,93 @@ +use std::sync::Arc; + +/// Maximum length of an entry name. +const MAX_ENTRY_NAME_LENGTH: usize = 180; + +#[derive(Debug)] +pub struct InvalidEntryNameError(String); + +impl std::fmt::Display for InvalidEntryNameError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl std::error::Error for InvalidEntryNameError {} + +impl From for InvalidEntryNameError { + fn from(s: String) -> Self { + Self(s) + } +} + +/// A validated entry name. +/// +/// Entry names must: +/// - Be at most 180 characters long +/// - Only contain ASCII alphanumeric characters, underscores, hyphens, dots, spaces, +/// brackets, and colons +/// +/// Uses an `Arc` internally to allow for cheap cloning. +// TODO(RR-3718): Entry names should support a broader set of characters. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(serde::Serialize))] // Only used for tests +#[cfg_attr(feature = "serde", serde(transparent))] +pub struct EntryName(Arc); + +impl EntryName { + /// The name of the system entries table (`__entries`). + pub fn entries_table() -> Self { + #[expect(clippy::unwrap_used)] // Constant string, cannot fail validation. + Self::new("__entries").unwrap() + } + + /// Create a new entry name, validating that it conforms to the naming rules. + pub fn new(name: impl Into) -> Result { + let name = name.into(); + + if MAX_ENTRY_NAME_LENGTH < name.len() { + return Err(InvalidEntryNameError(format!( + "name '{name}' exceeds maximum length of {MAX_ENTRY_NAME_LENGTH} characters (got {})", + name.len() + ))); + } + + if let Some(ch) = name.chars().find(|c| { + !c.is_ascii_alphanumeric() + && *c != '_' + && *c != '-' + && *c != '.' + && *c != ' ' + && *c != '[' + && *c != ']' + && *c != ':' + }) { + return Err(InvalidEntryNameError(format!( + "name '{name}' contains invalid character '{ch}'" + ))); + } + + Ok(Self(Arc::from(name))) + } + + /// The name of the blueprint dataset associated with a given dataset entry. + pub fn blueprint_for(dataset_id: crate::EntryId) -> Self { + Self::new(format!("__bp_{dataset_id}")) + .expect("EntryId can always be converted to a valid entry name") + } + + /// Hidden entries have names starting with `__` (e.g. `__entries`, `__bp_…`). + pub fn is_hidden(&self) -> bool { + self.0.starts_with("__") + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl std::fmt::Display for EntryName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} diff --git a/crates/store/re_log_types/src/index/mod.rs b/crates/store/re_log_types/src/index/mod.rs index bcb64a89fd52..26408a8a1505 100644 --- a/crates/store/re_log_types/src/index/mod.rs +++ b/crates/store/re_log_types/src/index/mod.rs @@ -24,4 +24,4 @@ pub use self::time_type::TimeType; pub use self::timeline::Timeline; pub use self::timeline_point::TimelinePoint; pub use self::timestamp::Timestamp; -pub use self::timestamp_format::{TimestampFormat, TimestampFormatKind}; +pub use self::timestamp_format::{DateVisibility, TimestampFormat, TimestampFormatKind}; diff --git a/crates/store/re_log_types/src/index/timeline.rs b/crates/store/re_log_types/src/index/timeline.rs index a30a6194fd78..d5b6584ef22e 100644 --- a/crates/store/re_log_types/src/index/timeline.rs +++ b/crates/store/re_log_types/src/index/timeline.rs @@ -101,6 +101,51 @@ impl Timeline { pub fn datatype(&self) -> arrow::datatypes::DataType { self.typ.datatype() } + + /// Whether this is a built-in timeline (`log_time` or `log_tick`) as opposed to a + /// user-defined one. + #[inline] + pub fn is_builtin(&self) -> bool { + *self == Self::log_time() || *self == Self::log_tick() + } + + /// Pick the most likely "default" timeline from a set of candidates. + /// + /// Priority (highest first): + /// 1. `message_log_time` (present in MCAP imports, common in robotics) + /// 2. Other user-defined timelines + /// 3. `log_time` + /// 4. `log_tick` + /// + /// Among timelines of the same priority, the one with the higher `score` wins + /// (e.g. row count). + /// Falls back to `log_time` if the iterator is empty. + pub fn pick_best_timeline<'a>( + timelines: impl IntoIterator, + score: impl Fn(&Self) -> u64, + ) -> Self { + fn priority(timeline: &Timeline) -> u8 { + if timeline.name().as_str() == "message_log_time" { + 3 + } else if *timeline == Timeline::log_tick() { + 0 + } else if *timeline == Timeline::log_time() { + 1 + } else { + 2 // user-defined + } + } + + timelines + .into_iter() + .max_by(|a, b| { + priority(a) + .cmp(&priority(b)) + .then_with(|| score(a).cmp(&score(b))) + }) + .copied() + .unwrap_or_else(Self::log_time) + } } impl nohash_hasher::IsEnabled for Timeline {} @@ -124,3 +169,83 @@ impl std::hash::Hash for Timeline { state.write_u64(self.name.hash() ^ self.typ.hash()); } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_pick_best_timeline() { + let log_time = Timeline::log_time(); + let log_tick = Timeline::log_tick(); + let custom_timeline0 = Timeline::new("my_timeline0", TimeType::DurationNs); + let custom_timeline1 = Timeline::new("my_timeline1", TimeType::DurationNs); + + // With equal row counts, priority alone decides. + let equal = |_: &Timeline| 42_u64; + + assert_eq!(Timeline::pick_best_timeline([], equal), log_time); + assert_eq!(Timeline::pick_best_timeline([&log_tick], equal), log_tick); + assert_eq!(Timeline::pick_best_timeline([&log_time], equal), log_time); + assert_eq!( + Timeline::pick_best_timeline([&log_time, &log_tick], equal), + log_time + ); + assert_eq!( + Timeline::pick_best_timeline([&log_time, &log_tick, &custom_timeline0], equal,), + custom_timeline0 + ); + assert_eq!( + Timeline::pick_best_timeline([&custom_timeline0, &log_time, &log_tick], equal,), + custom_timeline0 + ); + assert_eq!( + Timeline::pick_best_timeline([&log_time, &custom_timeline0, &log_tick], equal,), + custom_timeline0 + ); + assert_eq!( + Timeline::pick_best_timeline([&custom_timeline0, &log_time], equal), + custom_timeline0 + ); + assert_eq!( + Timeline::pick_best_timeline([&custom_timeline0, &log_tick], equal), + custom_timeline0 + ); + assert_eq!( + Timeline::pick_best_timeline([&log_time, &custom_timeline0], equal), + custom_timeline0 + ); + assert_eq!( + Timeline::pick_best_timeline([&log_tick, &custom_timeline0], equal), + custom_timeline0 + ); + assert_eq!( + Timeline::pick_best_timeline([&custom_timeline0], equal), + custom_timeline0 + ); + + // Row count breaks ties between timelines with the same priority. + let more_rows_on_1 = |t: &Timeline| { + if *t == custom_timeline1 { 100 } else { 10 } + }; + assert_eq!( + Timeline::pick_best_timeline([&custom_timeline0, &custom_timeline1], more_rows_on_1), + custom_timeline1 + ); + assert_eq!( + Timeline::pick_best_timeline([&custom_timeline1, &custom_timeline0], more_rows_on_1), + custom_timeline1 + ); + + // `message_log_time` beats all other timelines (even user-defined ones). + let message_log_time = Timeline::new("message_log_time", TimeType::TimestampNs); + assert_eq!( + Timeline::pick_best_timeline([&custom_timeline0, &message_log_time, &log_time], equal), + message_log_time + ); + assert_eq!( + Timeline::pick_best_timeline([&log_time, &log_tick, &message_log_time], equal), + message_log_time + ); + } +} diff --git a/crates/store/re_log_types/src/index/timestamp.rs b/crates/store/re_log_types/src/index/timestamp.rs index 8e37f0f0d849..b724d052da0a 100644 --- a/crates/store/re_log_types/src/index/timestamp.rs +++ b/crates/store/re_log_types/src/index/timestamp.rs @@ -2,8 +2,8 @@ use std::ops::RangeInclusive; use std::str::FromStr as _; use super::{Duration, TimestampFormat}; -use crate::TimestampFormatKind; use crate::external::re_types_core; +use crate::{DateVisibility, TimestampFormatKind}; /// Encodes a timestamp in nanoseconds since unix epoch. /// @@ -197,11 +197,15 @@ impl Timestamp { let tz = timestamp_format.to_jiff_time_zone(); let zoned = timestamp.to_zoned(tz.clone()); - let is_today = zoned.date() == jiff::Timestamp::now().to_zoned(tz.clone()).date(); - - let formatted = if timestamp_format.short() - || (timestamp_format.hide_today_date() && is_today) - { + let hide_date = timestamp_format.short() + || match timestamp_format.date_visibility() { + DateVisibility::ShowDate => false, + DateVisibility::HideDate => true, + DateVisibility::HideDateToday => { + zoned.date() == jiff::Timestamp::now().to_zoned(tz.clone()).date() + } + }; + let formatted = if hide_date { zoned.strftime("%H:%M:%S").to_string() } else { zoned.strftime("%Y-%m-%d %H:%M:%S").to_string() @@ -289,7 +293,10 @@ impl Timestamp { // Parse as seconds and convert to nanoseconds let seconds = s.parse::().ok()?; Some(Self::from_secs_since_epoch(seconds)) - } else if timestamp_format.hide_today_date() { + } else if matches!( + timestamp_format.date_visibility(), + DateVisibility::HideDateToday | DateVisibility::HideDate + ) { // Maybe this is a naked timestamp without any date? let tz = timestamp_format.to_jiff_time_zone(); @@ -439,7 +446,7 @@ mod tests { .unwrap(); let datetime = Timestamp::from(today.clone()); assert_eq!( - datetime.format(TimestampFormat::utc().with_hide_today_date(false)), + datetime.format(TimestampFormat::utc().with_date_visibility(DateVisibility::ShowDate)), format!("{} 22:35:42Z", today.strftime("%Y-%m-%d")) ); } diff --git a/crates/store/re_log_types/src/index/timestamp_format.rs b/crates/store/re_log_types/src/index/timestamp_format.rs index b1279b6d6c3b..fe97dbff4ce4 100644 --- a/crates/store/re_log_types/src/index/timestamp_format.rs +++ b/crates/store/re_log_types/src/index/timestamp_format.rs @@ -18,33 +18,35 @@ pub enum TimestampFormatKind { SecondsSinceUnixEpoch, } +/// Controls whether the date part of a timestamp is shown. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub enum DateVisibility { + /// Always show the date. + ShowDate, + + /// Hide the date when it's today. + #[default] + HideDateToday, + + /// Always hide the date. + HideDate, +} + /// How to display a [`crate::Timestamp`]. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct TimestampFormat { /// What kind of format to use. format_kind: TimestampFormatKind, - /// For date-time format kinds, should we omit the date part when it's today? - /// - /// By default, we do, but having this toggle is convenient for the uses-cases where omitting - /// the date part is not desirable. - hide_today_date: bool, + /// For date-time format kinds, controls whether the date part is shown. + date_visibility: DateVisibility, /// For date-time format kinds, should we omit date, nanos and suffix? short: bool, } -impl Default for TimestampFormat { - fn default() -> Self { - Self { - format_kind: Default::default(), - hide_today_date: true, - short: false, - } - } -} - impl From for TimestampFormat { fn from(value: TimestampFormatKind) -> Self { Self { @@ -75,8 +77,8 @@ impl TimestampFormat { self.format_kind } - pub fn with_hide_today_date(mut self, hide_date_when_today: bool) -> Self { - self.hide_today_date = hide_date_when_today; + pub fn with_date_visibility(mut self, date_visibility: DateVisibility) -> Self { + self.date_visibility = date_visibility; self } @@ -85,8 +87,8 @@ impl TimestampFormat { self } - pub fn hide_today_date(&self) -> bool { - self.hide_today_date + pub fn date_visibility(&self) -> DateVisibility { + self.date_visibility } pub fn short(&self) -> bool { diff --git a/crates/store/re_log_types/src/lib.rs b/crates/store/re_log_types/src/lib.rs index fee37040e135..14e1068971f1 100644 --- a/crates/store/re_log_types/src/lib.rs +++ b/crates/store/re_log_types/src/lib.rs @@ -19,6 +19,7 @@ pub mod arrow_msg; mod entry_id; +mod entry_name; pub mod example_components; pub mod hash; mod index; @@ -40,10 +41,11 @@ pub use re_types_core::TimelineName; pub use self::arrow_msg::{ArrowMsg, ArrowRecordBatchReleaseCallback}; pub use self::entry_id::{EntryId, EntryIdOrName}; +pub use self::entry_name::{EntryName, InvalidEntryNameError}; pub use self::index::{ - AbsoluteTimeRange, AbsoluteTimeRangeF, Duration, NonMinI64, TimeCell, TimeInt, TimePoint, - TimeReal, TimeType, Timeline, TimelinePoint, Timestamp, TimestampFormat, TimestampFormatKind, - TryFromIntError, + AbsoluteTimeRange, AbsoluteTimeRangeF, DateVisibility, Duration, NonMinI64, TimeCell, TimeInt, + TimePoint, TimeReal, TimeType, Timeline, TimelinePoint, Timestamp, TimestampFormat, + TimestampFormatKind, TryFromIntError, }; pub use self::instance::Instance; pub use self::path::*; diff --git a/crates/store/re_log_types/src/path/entity_path.rs b/crates/store/re_log_types/src/path/entity_path.rs index 6d71cc91ac70..9ef0d1e612ca 100644 --- a/crates/store/re_log_types/src/path/entity_path.rs +++ b/crates/store/re_log_types/src/path/entity_path.rs @@ -1,3 +1,4 @@ +use std::hash::{Hash as _, Hasher as _}; use std::sync::Arc; use ahash::{HashMap, HashSet}; @@ -78,7 +79,7 @@ impl std::fmt::Debug for EntityPathHash { /// When written as a string, some characters in the parts need to be escaped with a `\` /// (only character, numbers, `.`, `-`, `_` does not need escaping). /// -/// See for more on entity paths. +/// See for more on entity paths. /// /// This is basically implemented as a list of strings, but is reference-counted internally, so it is cheap to clone. /// It also has a precomputed hash and implemented [`nohash_hasher::IsEnabled`], @@ -269,6 +270,16 @@ impl EntityPath { self.hash.hash64() } + /// Calculates a deterministic hash of the entity path. + /// + /// This is useful for generating deterministic IDs for visualizer instructions. By default, + /// self.hash is generated using ahash, which works differently on web and native. + pub fn calculate_deterministic_hash(&self) -> u64 { + let mut hasher = xxhash_rust::xxh64::Xxh64::new(0); + self.parts.hash(&mut hasher); + hasher.finish() + } + /// Return [`None`] if root. #[must_use] pub fn parent(&self) -> Option { diff --git a/crates/store/re_log_types/src/path/entity_path_filter.rs b/crates/store/re_log_types/src/path/entity_path_filter.rs index 2eb784b37d4e..1fb870587306 100644 --- a/crates/store/re_log_types/src/path/entity_path_filter.rs +++ b/crates/store/re_log_types/src/path/entity_path_filter.rs @@ -431,10 +431,9 @@ impl EntityPathFilter { /// Resolve variables & parse paths, ignoring any errors. /// - /// If there is no mention of [`EntityPath::properties`] in the filter, it will be added. + /// Automatically excludes `__properties/**` unless the user explicitly + /// provides a rule for that exact subtree. pub fn resolve_forgiving(&self, subst_env: &EntityPathSubs) -> ResolvedEntityPathFilter { - let mut seen_properties = false; - let mut rules: BTreeMap = self .rules .iter() @@ -444,19 +443,15 @@ impl EntityPathFilter { *effect, ) }) - .inspect(|(ResolvedEntityPathRule { resolved_path, .. }, _)| { - if resolved_path.starts_with(&EntityPath::properties()) { - seen_properties = true; - } - }) .collect(); - if !seen_properties { - rules.insert( - ResolvedEntityPathRule::including_subtree(&EntityPath::properties()), - RuleEffect::Exclude, - ); - } + // Default-exclude `__properties/**`, but don't overwrite if the user + // already has an explicit rule for that exact subtree. + rules + .entry(ResolvedEntityPathRule::including_subtree( + &EntityPath::properties(), + )) + .or_insert(RuleEffect::Exclude); ResolvedEntityPathFilter { rules } } @@ -466,29 +461,21 @@ impl EntityPathFilter { self, subst_env: &EntityPathSubs, ) -> Result { - let mut seen_properties = false; - let mut rules = self .rules .into_iter() .map(|(rule, effect)| { ResolvedEntityPathRule::parse_strict(&rule, subst_env).map(|r| (r, effect)) }) - .inspect(|maybe_rule| { - if let Ok((ResolvedEntityPathRule { resolved_path, .. }, _)) = maybe_rule - && resolved_path.starts_with(&EntityPath::properties()) - { - seen_properties = true; - } - }) .collect::, _>>()?; - if !seen_properties { - rules.insert( - ResolvedEntityPathRule::including_subtree(&EntityPath::properties()), - RuleEffect::Exclude, - ); - } + // Default-exclude `__properties/**`, but don't overwrite if the user + // already has an explicit rule for that exact subtree. + rules + .entry(ResolvedEntityPathRule::including_subtree( + &EntityPath::properties(), + )) + .or_insert(RuleEffect::Exclude); Ok(ResolvedEntityPathFilter { rules }) } @@ -1566,6 +1553,46 @@ mod tests { ) ); + // Including a specific __properties subpath should not expose sibling + // __properties paths — the auto-exclusion of __properties/** must still + // apply, with the specific path winning via specificity. + let filter = EntityPathFilter::parse_forgiving( + r#" + + /** + + /__properties/this + "#, + ); + let resolved = filter.resolve_forgiving(&EntityPathSubs::empty()); + assert!( + resolved.matches(&EntityPath::from("/__properties/this")), + "explicitly included __properties subpath should match" + ); + assert!( + !resolved.matches(&EntityPath::from("/__properties/that")), + "sibling __properties path should still be excluded by auto-exclusion" + ); + assert!( + !resolved.matches(&EntityPath::from("/__properties")), + "__properties root should still be excluded by auto-exclusion" + ); + + // But explicitly including __properties/** should override the auto-exclusion. + let filter = EntityPathFilter::parse_forgiving( + r#" + + /** + + /__properties/** + "#, + ); + let resolved = filter.resolve_forgiving(&EntityPathSubs::empty()); + assert!( + resolved.matches(&EntityPath::from("/__properties/this")), + "__properties subpath should match when subtree is explicitly included" + ); + assert!( + resolved.matches(&EntityPath::from("/__properties/that")), + "__properties subpath should match when subtree is explicitly included" + ); + // If the subpaths of properties are _excluded_ they should be present. // We should omit the properties from the unresolved filter. let filter = EntityPathFilter::parse_forgiving( diff --git a/crates/store/re_log_types/src/path/entity_path_part.rs b/crates/store/re_log_types/src/path/entity_path_part.rs index a3cbe1b123af..e0c4da3e74bc 100644 --- a/crates/store/re_log_types/src/path/entity_path_part.rs +++ b/crates/store/re_log_types/src/path/entity_path_part.rs @@ -1,3 +1,5 @@ +use std::fmt::Write as _; + use re_string_interner::InternedString; use crate::PathParseError; @@ -203,7 +205,7 @@ impl EntityPathPart { } c => { // Rust-style unicode escape, e.g. `\u{262E}`. - s.push_str(&format!("\\u{{{:04X}}}", c as u32)); + write!(s, "\\u{{{:04X}}}", c as u32).ok(); } } } diff --git a/crates/store/re_log_types/src/path/mod.rs b/crates/store/re_log_types/src/path/mod.rs index ad51db9077a6..dda78e624f52 100644 --- a/crates/store/re_log_types/src/path/mod.rs +++ b/crates/store/re_log_types/src/path/mod.rs @@ -19,7 +19,7 @@ pub use entity_path_filter::{ ResolvedEntityPathFilter, ResolvedEntityPathRule, RuleEffect, }; pub use entity_path_part::EntityPathPart; -pub use parse_path::{PathParseError, tokenize_by}; +pub use parse_path::{PathParseError, forgiving_parse_cache_bytes_used, tokenize_by}; // ---------------------------------------------------------------------------- diff --git a/crates/store/re_log_types/src/path/parse_path.rs b/crates/store/re_log_types/src/path/parse_path.rs index 25c530e911b1..8576f629e1e7 100644 --- a/crates/store/re_log_types/src/path/parse_path.rs +++ b/crates/store/re_log_types/src/path/parse_path.rs @@ -11,6 +11,12 @@ use crate::{ComponentPath, DataPath, EntityPath, EntityPathPart, Instance, hash: static FORGIVING_PARSE_CACHE: LazyLock>> = LazyLock::new(|| RwLock::new(IntMap::default())); +/// Approximate heap usage of the global forgiving path parse cache. +pub fn forgiving_parse_cache_bytes_used() -> u64 { + use re_byte_size::SizeBytes as _; + FORGIVING_PARSE_CACHE.read().heap_size_bytes() +} + #[derive(thiserror::Error, Debug, PartialEq, Eq)] pub enum PathParseError { #[error("Expected path, found empty string")] diff --git a/crates/store/re_mcap/Cargo.toml b/crates/store/re_mcap/Cargo.toml index 51a1860667f8..f19d1e8f6a94 100644 --- a/crates/store/re_mcap/Cargo.toml +++ b/crates/store/re_mcap/Cargo.toml @@ -29,6 +29,7 @@ byteorder.workspace = true cdr-encoding.workspace = true mcap.workspace = true prost-reflect.workspace = true +regex-lite.workspace = true saturating_cast.workspace = true serde.workspace = true serde_bytes.workspace = true @@ -36,7 +37,7 @@ strum = { workspace = true, features = ["derive"] } thiserror.workspace = true [dev-dependencies] -crossbeam.workspace = true insta = { workspace = true, features = ["filters", "redactions"] } prost-reflect = { workspace = true, features = ["text-format"] } +re_arrow_util.workspace = true re_log = { workspace = true, features = ["setup"] } diff --git a/crates/store/re_mcap/src/decoders/metadata.rs b/crates/store/re_mcap/src/decoders/metadata.rs new file mode 100644 index 000000000000..8a861fb570f2 --- /dev/null +++ b/crates/store/re_mcap/src/decoders/metadata.rs @@ -0,0 +1,214 @@ +use std::collections::BTreeMap; + +use re_chunk::{Chunk, EntityPath, RowId, TimePoint}; +use re_sdk_types::{ + Component as _, ComponentBatch as _, ComponentDescriptor, SerializedComponentBatch, components, + datatypes, +}; + +use super::{Decoder, DecoderIdentifier}; +use crate::Error; + +/// Extracts [`mcap::records::Metadata`] records from an MCAP file as a single static chunk. +/// +/// Outputs a single `McapMetadata` archetype at [`EntityPath::properties()`], +/// with one [`components::KeyValuePairs`] component per metadata record. +#[derive(Debug, Default)] +pub struct McapMetadataDecoder; + +const ARCHETYPE_NAME: &str = "McapMetadata"; +const ROSBAG2_METADATA_NAME: &str = "rosbag2"; + +impl Decoder for McapMetadataDecoder { + fn identifier() -> DecoderIdentifier { + "metadata".into() + } + + fn process( + &mut self, + mcap_bytes: &[u8], + summary: &mcap::Summary, + _topic_filter: &super::TopicFilter, + emit: &mut dyn FnMut(Chunk), + ) -> Result<(), Error> { + if summary.metadata_indexes.is_empty() { + return Ok(()); + } + + // We can encounter multiple metadata records with the same name. + // Collect all metadata records by name, merging key-value pairs from records with the same name. + let mut metadata_by_name: BTreeMap> = BTreeMap::new(); + + for index in &summary.metadata_indexes { + let metadata = match mcap::read::metadata(mcap_bytes, index) { + Ok(metadata) => metadata, + Err(err) => { + re_log::warn_once!( + "Failed to read MCAP metadata record '{}': {err}", + index.name + ); + continue; + } + }; + + if metadata.name == ROSBAG2_METADATA_NAME { + // "rosbag2" is a dump of the metadata YAML file that is specific to ROS2's rosbag2 tool. + // It's mainly a backwards-compatibility feature for conversion to the legacy SQL rosbag format, + // so we can safely ignore it (it is potentially large). + // See also: https://docs.ros.org/en/kilted/Releases/Release-Jazzy-Jalisco.html#store-serialized-metadata-in-bag-files-directly + re_log::debug_once!( + "Skipping ROS MCAP metadata record '{}' as it is not relevant for Rerun.", + ROSBAG2_METADATA_NAME + ); + continue; + } + + re_log::debug!( + "Processing MCAP metadata record '{}' with {} entries", + metadata.name, + metadata.metadata.len(), + ); + + let entries = metadata_by_name.entry(metadata.name.clone()).or_default(); + for (key, value) in &metadata.metadata { + if entries.insert(key.clone(), value.clone()).is_some() { + re_log::warn_once!( + "Key '{key}' appears in multiple MCAP metadata records named '{}'", + metadata.name + ); + } + } + } + + let mut batches: Vec = Vec::new(); + for (name, entries) in metadata_by_name { + let pairs: Vec<_> = entries + .into_iter() + .map(|(key, value)| datatypes::Utf8Pair { + first: key.into(), + second: value.into(), + }) + .collect(); + let kv = components::KeyValuePairs(pairs); + batches.push(kv.try_serialized(ComponentDescriptor { + archetype: Some(ARCHETYPE_NAME.into()), + component: name.into(), + component_type: Some(components::KeyValuePairs::name()), + })?); + } + + if !batches.is_empty() { + let chunk = Chunk::builder(EntityPath::properties()) + .with_serialized_batches(RowId::new(), TimePoint::STATIC, batches) + .build()?; + emit(chunk); + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::collections::BTreeMap; + use std::io; + + use re_chunk::Chunk; + + use re_log_types::TimeType; + + use crate::DecoderRegistry; + + use super::*; + + /// Helper function to run the metadata decoder and collect emitted chunks. + fn run_metadata_decoder(buffer: &[u8]) -> Vec { + let reader = io::Cursor::new(buffer); + let summary = crate::read_summary(reader) + .expect("failed to read summary") + .expect("no summary found"); + + let mut chunks = Vec::new(); + let registry = DecoderRegistry::empty().register_file_decoder::(); + registry + .plan(buffer, &summary, &crate::TopicFilter::default()) + .expect("failed to plan") + .run(buffer, &summary, TimeType::TimestampNs, &mut |chunk| { + chunks.push(chunk); + }) + .expect("failed to run decoder"); + chunks + } + + /// Tests that multiple metadata records are merged into a single chunk with one component per metadata. + #[test] + fn test_multiple_metadata_records() { + let buffer = { + let cursor = io::Cursor::new(Vec::new()); + let mut writer = mcap::Writer::new(cursor).expect("failed to create writer"); + + for i in 0..3 { + let mut key_values = BTreeMap::new(); + key_values.insert("index".to_owned(), i.to_string()); + writer + .write_metadata(&mcap::records::Metadata { + name: format!("meta_{i}"), + metadata: key_values, + }) + .expect("failed to write metadata"); + } + + writer.finish().expect("failed to finish writer"); + writer.into_inner().into_inner() + }; + + let chunks = run_metadata_decoder(&buffer); + assert_eq!(chunks.len(), 1, "all metadata in a single chunk"); + + let chunk = &chunks[0]; + assert_eq!(chunk.entity_path(), &EntityPath::properties()); + assert!(chunk.is_static()); + assert_eq!(chunk.num_components(), 3); + } + + /// Tests that two metadata records with the same name are merged into one component. + #[test] + fn test_duplicate_metadata_names() { + let buffer = { + const METADATA_NAME: &str = "duplicated_metadata_name"; + let cursor = io::Cursor::new(Vec::new()); + let mut writer = mcap::Writer::new(cursor).expect("failed to create writer"); + + let mut first = BTreeMap::new(); + first.insert("key_a".to_owned(), "value_a".to_owned()); + writer + .write_metadata(&mcap::records::Metadata { + name: METADATA_NAME.to_owned(), + metadata: first, + }) + .expect("failed to write metadata"); + + let mut second = BTreeMap::new(); + second.insert("key_b".to_owned(), "value_b".to_owned()); + writer + .write_metadata(&mcap::records::Metadata { + name: METADATA_NAME.to_owned(), + metadata: second, + }) + .expect("failed to write metadata"); + + writer.finish().expect("failed to finish writer"); + writer.into_inner().into_inner() + }; + + let chunks = run_metadata_decoder(&buffer); + assert_eq!(chunks.len(), 1); + + let chunk = &chunks[0]; + assert_eq!( + chunk.num_components(), + 1, + "duplicates merged into one component" + ); + } +} diff --git a/crates/store/re_mcap/src/decoders/mod.rs b/crates/store/re_mcap/src/decoders/mod.rs new file mode 100644 index 000000000000..814951458d8e --- /dev/null +++ b/crates/store/re_mcap/src/decoders/mod.rs @@ -0,0 +1,898 @@ +mod metadata; +mod protobuf; +mod raw; +mod recording_info; +mod ros2; +mod ros2_reflection; +mod schema; +mod stats; + +use std::collections::{BTreeMap, BTreeSet}; + +use re_chunk::external::nohash_hasher::IntMap; +use re_chunk::{Chunk, EntityPath}; +use re_log_types::TimeType; + +pub use self::metadata::McapMetadataDecoder; +pub use self::protobuf::McapProtobufDecoder; +pub use self::raw::McapRawDecoder; +pub use self::recording_info::McapRecordingInfoDecoder; +pub use self::ros2::McapRos2Decoder; +pub use self::ros2_reflection::McapRos2ReflectionDecoder; +pub use self::schema::McapSchemaDecoder; +pub use self::stats::McapStatisticDecoder; +use crate::Error; +use crate::parsers::{ChannelId, MessageParser, ParserContext}; +use crate::util::collect_empty_channels; + +/// Globally unique identifier for a decoder. +#[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)] +#[repr(transparent)] +pub struct DecoderIdentifier(String); + +impl From<&'static str> for DecoderIdentifier { + fn from(value: &'static str) -> Self { + Self(value.to_owned()) + } +} + +impl From for DecoderIdentifier { + fn from(value: String) -> Self { + Self(value) + } +} + +impl std::fmt::Display for DecoderIdentifier { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +/// A decoder describes information that can be extracted from an MCAP file. +/// +/// It is the most general level at which we can interpret an MCAP file and can +/// be used to either output general information about the MCAP file or to call +/// into decoders that work on a per-message basis via the [`MessageDecoder`] trait. +pub trait Decoder { + /// Globally unique identifier for this decoder. + /// + /// [`DecoderIdentifier`]s are also be used to select only a subset of active decoders. + fn identifier() -> DecoderIdentifier + where + Self: Sized; + + /// The processing that needs to happen for this decoder. + /// + /// This function has access to the entire MCAP file via `mcap_bytes`. + /// + /// `topic_filter` is provided so per-channel decoders can skip channels + /// the user has filtered out. File-level decoders (those that don't iterate + /// channels) may ignore it. + // TODO(michael): consider introducing a `DecoderContext` for holding the processing context. + // TODO(#10862): Consider abstracting over `Summary` to allow more convenient / performant indexing. + // For example, we probably don't want to store the entire file in memory. + fn process( + &mut self, + mcap_bytes: &[u8], + summary: &::mcap::Summary, + topic_filter: &TopicFilter, + emit: &mut dyn FnMut(Chunk), + ) -> Result<(), Error>; +} + +/// Can be used to extract per-message information from an MCAP file. +/// +/// This is a specialization of [`Decoder`] that allows defining [`MessageParser`]s. +/// to interpret the contents of MCAP chunks. +pub trait MessageDecoder { + fn identifier() -> DecoderIdentifier + where + Self: Sized; + + fn init(&mut self, _summary: &::mcap::Summary) -> Result<(), Error> { + Ok(()) + } + + /// Returns `true` if this decoder can handle the given channel. + /// + /// This method is used to determine which channels should be processed by which decoders, + /// particularly for implementing fallback behavior where one decoder handles channels + /// that other decoders cannot process. + fn supports_channel(&self, channel: &mcap::Channel<'_>) -> bool; + + /// Instantites a new [`MessageParser`] that expects `num_rows` if it is interested in the current channel. + /// + /// Otherwise returns `None`. + /// + /// The `num_rows` argument allows parsers to pre-allocate storage with the + /// correct capacity, avoiding reallocations during message processing. + fn message_parser( + &self, + channel: &mcap::Channel<'_>, + num_rows: usize, + ) -> Option>; +} + +type Parser = (ParserContext, Box); + +/// Decodes batches of messages from an MCAP into Rerun chunks using previously registered parsers. +struct McapChunkDecoder { + parsers: IntMap, + time_type: TimeType, +} + +impl McapChunkDecoder { + pub fn new(parsers: IntMap, time_type: TimeType) -> Self { + Self { parsers, time_type } + } + + /// Decode the next message in the chunk + pub fn decode_next(&mut self, msg: &::mcap::Message<'_>) -> Result<(), Error> { + re_tracing::profile_function!(); + + let channel = msg.channel.as_ref(); + let channel_id = ChannelId(channel.id); + + if let Some((ctx, parser)) = self.parsers.get_mut(&channel_id) { + // If the parser fails, we should _not_ append the timepoint + parser.append(ctx, msg)?; + for timepoint in parser.get_log_and_publish_timepoints(msg, self.time_type)? { + ctx.add_timepoint(timepoint); + } + } else { + // TODO(#10862): If we encounter a message that we can't parse at all we should emit a warning. + // Note that this quite easy to achieve when using decoders and only selecting a subset. + // However, to not overwhelm the user this should be reported in a _single_ static chunk, + // so this is not the right place for this. Maybe we need to introduce something like a "report". + } + Ok(()) + } + + /// Finish the decoding process and return the chunks. + pub fn finish(self) -> impl Iterator> { + self.parsers + .into_values() + .flat_map(|(ctx, parser)| match parser.finalize(ctx) { + Ok(chunks) => chunks.into_iter().map(Ok).collect::>(), + Err(err) => vec![Err(Error::Other(err))], + }) + } +} + +/// Used to select certain decoders. +#[derive(Clone, Debug)] +pub enum SelectedDecoders { + All, + Subset(BTreeSet), +} + +impl SelectedDecoders { + /// Checks if a decoder is part of the current selection. + pub fn contains(&self, value: &DecoderIdentifier) -> bool { + match self { + Self::All => true, + Self::Subset(subset) => subset.contains(value), + } + } +} + +/// Regex-based filter selecting which MCAP topics to decode. +/// +/// Patterns use [RE2 syntax](https://github.com/google/re2/wiki/Syntax). +/// +/// A topic is kept if: +/// - `include` is empty, **or** any pattern in `include` matches; **and** +/// - no pattern in `exclude` matches. +/// +/// Patterns are not implicitly anchored; use `^` / `$` if you need anchoring. +#[derive(Default, Clone, Debug)] +pub struct TopicFilter { + include: Vec, + exclude: Vec, +} + +impl TopicFilter { + pub fn with_include_patterns(mut self, include: &[String]) -> Result { + self.include = include + .iter() + .map(|pattern| regex_lite::Regex::new(pattern)) + .collect::, _>>()?; + Ok(self) + } + + pub fn with_exclude_patterns(mut self, exclude: &[String]) -> Result { + self.exclude = exclude + .iter() + .map(|pattern| regex_lite::Regex::new(pattern)) + .collect::, _>>()?; + Ok(self) + } + + /// Returns `true` if the given topic passes the filter. + pub fn matches(&self, topic: &str) -> bool { + let included = self.include.is_empty() || self.include.iter().any(|r| r.is_match(topic)); + let excluded = self.exclude.iter().any(|r| r.is_match(topic)); + included && !excluded + } + + /// Returns `true` if no patterns are configured (i.e. all topics pass). + pub fn is_empty(&self) -> bool { + self.include.is_empty() && self.exclude.is_empty() + } +} + +/// Registry fallback strategy. +#[derive(Clone, Debug, Default)] +pub enum Fallback { + /// No fallback – channels without a handler are simply unassigned. + #[default] + None, + + /// Single global fallback message decoder (e.g. `raw`). + Global(DecoderIdentifier), +} + +/// A runner that constrains a [`MessageDecoder`] to a specific set of channels. +pub struct MessageDecoderRunner { + inner: Box, + allowed: BTreeSet, +} + +impl MessageDecoderRunner { + fn new(inner: Box, allowed: BTreeSet) -> Self { + Self { inner, allowed } + } + + fn process( + &mut self, + mcap_bytes: &[u8], + summary: &mcap::Summary, + time_type: TimeType, + emit: &mut dyn FnMut(Chunk), + ) -> Result<(), Error> { + self.inner.init(summary)?; + + for chunk in &summary.chunk_indexes { + let parsers = summary + .read_message_indexes(mcap_bytes, chunk)? + .iter() + .filter_map(|(channel, msg_offsets)| { + let channel_id = ChannelId::from(channel.id); + if !self.allowed.contains(&channel_id) { + return None; + } + + let parser = self.inner.message_parser(channel, msg_offsets.len())?; + let entity_path = EntityPath::from(channel.topic.as_str()); + let ctx = ParserContext::new(entity_path, channel.topic.clone(), time_type); + Some((channel_id, (ctx, parser))) + }) + .collect::>(); + + let mut decoder = McapChunkDecoder::new(parsers, time_type); + + for msg in summary.stream_chunk(mcap_bytes, chunk)? { + match msg { + Ok(message) => { + if let Err(err) = decoder.decode_next(&message) { + re_log::error_once!( + "Failed to decode message on channel {}: {err}", + message.channel.topic + ); + } + } + Err(err) => re_log::error!("Failed to read message from MCAP file: {err}"), + } + } + + for mut chunk in decoder.finish() { + if let Ok(chunk) = &mut chunk { + chunk.sort_if_unsorted(); + for (name, column) in chunk.timelines() { + if !column.is_sorted() { + let entity_path = chunk.entity_path(); + re_log::warn_once!( + "Found unsorted timeline '{name}' for entity '{entity_path}'. This may lead to suboptimal performance.", + ); + } + } + } + + match chunk { + Ok(c) => emit(c), + Err(err) => re_log::error!("Failed to decode chunk: {err}"), + } + } + } + + Ok(()) + } +} + +/// A printable assignment used for dry-runs / UI. +#[derive(Clone, Debug)] +pub struct DecoderAssignment { + pub channel_id: ChannelId, + pub topic: String, + pub encoding: String, + pub schema_name: Option, + pub decoder: DecoderIdentifier, +} + +/// A concrete execution plan for a given MCAP source. +pub struct ExecutionPlan { + pub file_decoders: Vec>, + pub runners: Vec, + pub assignments: Vec, + pub topic_filter: TopicFilter, +} + +impl ExecutionPlan { + pub fn run( + mut self, + mcap_bytes: &[u8], + summary: &mcap::Summary, + time_type: TimeType, + emit: &mut dyn FnMut(Chunk), + ) -> anyhow::Result<()> { + for mut decoder in self.file_decoders { + decoder.process(mcap_bytes, summary, &self.topic_filter, emit)?; + } + + for runner in &mut self.runners { + runner.process(mcap_bytes, summary, time_type, emit)?; + } + Ok(()) + } +} + +/// Holds a set of all known decoders, split into file-scoped and message-scoped. +pub struct DecoderRegistry { + file_factories: BTreeMap Box>, + msg_factories: BTreeMap Box>, + msg_order: Vec, + fallback: Fallback, +} + +impl DecoderRegistry { + /// Creates an empty registry. + pub fn empty() -> Self { + Self { + file_factories: Default::default(), + msg_factories: Default::default(), + msg_order: Vec::new(), + fallback: Fallback::None, + } + } + + /// Creates a registry with all builtin decoders and raw fallback enabled. + pub fn all_with_raw_fallback() -> Self { + Self::all_builtin(true) + } + + /// Creates a registry with all builtin decoders and raw fallback disabled. + pub fn all_without_raw_fallback() -> Self { + Self::all_builtin(false) + } + + /// Creates a registry with all builtin decoders with configurable raw fallback. + pub fn all_builtin(raw_fallback_enabled: bool) -> Self { + let mut registry = Self::empty() + // file decoders: + .register_file_decoder::() + .register_file_decoder::() + .register_file_decoder::() + .register_file_decoder::() + // message decoders (priority order): + .register_message_decoder::() + .register_message_decoder::() + .register_message_decoder::(); + + if raw_fallback_enabled { + registry = registry + .register_message_decoder::() + .with_global_fallback::(); + } else { + // still register raw so users can explicitly select it, just no fallback + registry = registry.register_message_decoder::(); + } + + registry + } + + /// Register a file-scoped decoder (runs once over the file/summary). + pub fn register_file_decoder(mut self) -> Self { + let id = L::identifier(); + if self + .file_factories + .insert(id.clone(), || Box::new(L::default())) + .is_some() + { + re_log::warn_once!("Inserted file decoder {} twice.", id); + } + self + } + + /// Register a message-scoped decoder (eligible to handle channels). + pub fn register_message_decoder(mut self) -> Self { + let id = ::identifier(); + if self + .msg_factories + .insert(id.clone(), || Box::new(M::default())) + .is_some() + { + re_log::warn_once!("Inserted message decoder {} twice.", id); + } + self.msg_order.push(id); + self + } + + /// Configure a global fallback message decoder (e.g. `raw`). + pub fn with_global_fallback(mut self) -> Self { + self.fallback = Fallback::Global(::identifier()); + self + } + + /// Returns all registered decoder identifiers (file + message) as strings. + pub fn all_identifiers(&self) -> Vec { + self.file_factories + .keys() + .chain(self.msg_factories.keys()) + .map(|id| id.to_string()) + .collect() + } + + /// Produce a filtered registry that only contains `selected` decoders. + pub fn select(&self, selected: &SelectedDecoders) -> Self { + let file_factories = self + .file_factories + .iter() + .filter(|(id, _)| selected.contains(id)) + .map(|(k, v)| (k.clone(), *v)) + .collect(); + + let msg_factories = self + .msg_factories + .iter() + .filter(|(id, _)| selected.contains(id)) + .map(|(k, v)| (k.clone(), *v)) + .collect(); + + let msg_order = self + .msg_order + .iter() + .filter(|&id| selected.contains(id)) + .cloned() + .collect(); + + let fallback = self.select_fallback(selected); + + Self { + file_factories, + msg_factories, + msg_order, + fallback, + } + } + + fn select_fallback(&self, selected: &SelectedDecoders) -> Fallback { + match &self.fallback { + Fallback::Global(id) if selected.contains(id) => Fallback::Global(id.clone()), + Fallback::Global(_) | Fallback::None => Fallback::None, + } + } + + /// Build a concrete execution plan for a given file. + pub fn plan( + &self, + mcap_bytes: &[u8], + summary: &mcap::Summary, + topic_filter: &TopicFilter, + ) -> anyhow::Result { + let file_decoders = self + .file_factories + .values() + .map(|f| f()) + .collect::>(); + + let empty_channels = collect_empty_channels(mcap_bytes, summary)?; + + // instantiate message decoders and init them (supports_channel may depend on init) + let mut msg_decoders: Vec<(DecoderIdentifier, Box)> = self + .msg_order + .iter() + .filter_map(|id| self.msg_factories.get(id).map(|f| (id.clone(), f()))) + .collect(); + + for (_, l) in &mut msg_decoders { + l.init(summary)?; + } + + let mut by_decoder: BTreeMap> = BTreeMap::new(); + let mut assignments: Vec = Vec::new(); + + for channel_id in summary.channels.values() { + let channel_id = ChannelId::from(channel_id.id); + let channel = summary.channels[&channel_id.0].as_ref(); + + if empty_channels.contains(&channel_id) { + re_log::debug!( + "Skipping MCAP channel '{}' (id={}) because it contains no messages.", + channel.topic, + channel_id.0, + ); + continue; + } + + if channel.message_encoding.trim().is_empty() { + re_log::warn_once!( + "MCAP channel '{}' does not specify a message encoding.", + channel.topic, + ); + } + + if !topic_filter.matches(&channel.topic) { + re_log::debug!( + "Skipping MCAP channel '{}' because it does not match the topic filter.", + channel.topic, + ); + continue; + } + + // explicit priority order + let mut chosen: Option = None; + for (id, decoder) in &msg_decoders { + if decoder.supports_channel(channel) { + chosen = Some(id.clone()); + break; + } + } + + if chosen.is_none() { + // fallbacks (if any) + if let Fallback::Global(id) = &self.fallback + && self.msg_factories.contains_key(id) + { + chosen = Some(id.clone()); + } + } + + let schema_name = channel.schema.as_ref().map(|s| s.name.clone()); + + let schema_encoding = channel + .schema + .as_ref() + .map(|s| s.encoding.as_str()) + .unwrap_or("Unknown"); + + if let Some(id) = chosen { + by_decoder.entry(id.clone()).or_default().insert(channel_id); + + assignments.push(DecoderAssignment { + channel_id, + topic: channel.topic.clone(), + encoding: schema_encoding.to_owned(), + schema_name: channel.schema.as_ref().map(|s| s.name.clone()), + decoder: id, + }); + } else { + re_log::debug!( + "No message decoder selected for topic '{}' (encoding='{}', schema='{:?}')", + channel.topic, + schema_encoding, + schema_name, + ); + } + } + + let mut runners = Vec::new(); + for (decoder_id, allowed) in by_decoder { + if let Some(factory) = self.msg_factories.get(&decoder_id) { + let inner = factory(); + runners.push(MessageDecoderRunner::new(inner, allowed)); + } + } + + Ok(ExecutionPlan { + file_decoders, + runners, + assignments, + topic_filter: topic_filter.clone(), + }) + } +} + +#[cfg(test)] +mod tests { + use std::io; + + use re_chunk::Chunk; + use re_log_types::TimeType; + use re_sdk_types::archetypes::McapMessage; + + use super::*; + + #[test] + fn skips_channels_without_messages() { + let (summary, buffer, empty_channel_id, active_channel_id) = { + let cursor = io::Cursor::new(Vec::new()); + let mut writer = mcap::Writer::new(cursor).expect("failed to create writer"); + + let empty_channel_id = writer + .add_channel(0, "empty_topic", "raw", &Default::default()) + .expect("failed to add empty channel"); + let active_channel_id = writer + .add_channel(0, "active_topic", "raw", &Default::default()) + .expect("failed to add active channel"); + + writer + .write_to_known_channel( + &mcap::records::MessageHeader { + channel_id: active_channel_id, + sequence: 0, + log_time: 1, + publish_time: 1, + }, + &[1, 2, 3], + ) + .expect("failed to write message"); + + let summary = writer.finish().expect("failed to finish writer"); + let buffer = writer.into_inner().into_inner(); + + (summary, buffer, empty_channel_id, active_channel_id) + }; + + let plan = DecoderRegistry::empty() + .register_file_decoder::() + .register_message_decoder::() + .plan(&buffer, &summary, &TopicFilter::default()) + .expect("failed to plan"); + + assert_eq!(plan.assignments.len(), 1); + assert_eq!(plan.assignments[0].channel_id, ChannelId(active_channel_id)); + assert_ne!(plan.assignments[0].channel_id, ChannelId(empty_channel_id)); + + let mut chunks = Vec::::new(); + plan.run(&buffer, &summary, TimeType::TimestampNs, &mut |chunk| { + chunks.push(chunk); + }) + .expect("failed to run plan"); + + assert_eq!(chunks.len(), 2); + assert!( + chunks + .iter() + .all(|chunk| !chunk.entity_path().to_string().ends_with("empty_topic")) + ); + assert!( + chunks + .iter() + .any(|chunk| chunk.entity_path().to_string().ends_with("active_topic")) + ); + } + + /// Test helper for creating an MCAP summary & blob with a ros2msg-schema channel. + fn ros2_summary_with_message_encoding( + schema_name: &str, + topic: &str, + message_encoding: &str, + payload: &[u8], + ) -> (mcap::Summary, Vec) { + let cursor = io::Cursor::new(Vec::new()); + let mut writer = mcap::Writer::new(cursor).expect("failed to create writer"); + let schema_id = writer + .add_schema(schema_name, "ros2msg", b"string data") + .expect("failed to add schema"); + let channel_id = writer + .add_channel(schema_id, topic, message_encoding, &Default::default()) + .expect("failed to add channel"); + + writer + .write_to_known_channel( + &mcap::records::MessageHeader { + channel_id, + sequence: 0, + log_time: 1, + publish_time: 1, + }, + payload, + ) + .expect("failed to write message"); + + let summary = writer.finish().expect("failed to finish writer"); + let buffer = writer.into_inner().into_inner(); + (summary, buffer) + } + + /// We expect CDR as encoding for ros2msg-schema messages. + /// Test that a non-CDR channel that claims to have ros2msg + /// falls back to raw forwarding instead of message reflection. + #[test] + fn non_cdr_ros2msg_channel_is_forwarded_as_raw_blob() { + let (summary, buffer) = ros2_summary_with_message_encoding( + "custom_msgs/msg/Foo", + "non_cdr_topic", + "json", + br#"{"data":"hello"}"#, + ); + + let plan = DecoderRegistry::all_with_raw_fallback() + .plan(&buffer, &summary, &TopicFilter::default()) + .expect("failed to plan"); + + let assignment = plan + .assignments + .iter() + .find(|assignment| assignment.topic == "non_cdr_topic") + .expect("missing assignment"); + assert_eq!(assignment.decoder.to_string(), "raw"); + + let mut chunks = Vec::::new(); + plan.run(&buffer, &summary, TimeType::TimestampNs, &mut |chunk| { + chunks.push(chunk); + }) + .expect("failed to run plan"); + + assert!(chunks.iter().any(|chunk| { + chunk.entity_path().to_string().ends_with("non_cdr_topic") + && chunk + .component_descriptors() + .any(|descr| descr.component == McapMessage::descriptor_data().component) + })); + } + + /// Tests that semantic ROS 2 parsers also reject non-CDR channels. + #[test] + fn semantic_ros2_decoder_does_not_claim_non_cdr_channels() { + let (summary, buffer) = ros2_summary_with_message_encoding( + "std_msgs/msg/String", + "non_cdr_string_topic", + "json", + br#"{"data":"hello"}"#, + ); + + let plan = DecoderRegistry::all_with_raw_fallback() + .plan(&buffer, &summary, &TopicFilter::default()) + .expect("failed to plan"); + + let assignment = plan + .assignments + .iter() + .find(|assignment| assignment.topic == "non_cdr_string_topic") + .expect("missing assignment"); + assert_eq!(assignment.decoder.to_string(), "raw"); + } + + #[test] + fn topic_filter_matches() { + // Empty filter accepts everything. + let filter = TopicFilter::default(); + assert!(filter.is_empty()); + assert!(filter.matches("/anything")); + assert!(filter.matches("/foo/bar")); + + // Pure include: only matching topics pass. + let filter = TopicFilter { + include: vec![regex_lite::Regex::new(r"^/camera/").unwrap()], + exclude: vec![], + }; + assert!(!filter.is_empty()); + assert!(filter.matches("/camera/rgb")); + assert!(filter.matches("/camera/depth")); + assert!(!filter.matches("/imu")); + + // Pure exclude: empty include means everything passes except excluded. + let filter = TopicFilter { + include: vec![], + exclude: vec![regex_lite::Regex::new(r"^/diagnostics").unwrap()], + }; + assert!(filter.matches("/camera/rgb")); + assert!(!filter.matches("/diagnostics/agg")); + + // Combined: exclude takes precedence over include. + let filter = TopicFilter { + include: vec![regex_lite::Regex::new(r"^/camera/").unwrap()], + exclude: vec![regex_lite::Regex::new(r"depth$").unwrap()], + }; + assert!(filter.matches("/camera/rgb")); + assert!(!filter.matches("/camera/depth")); + assert!(!filter.matches("/imu")); + + // Multiple includes: match if ANY matches. + let filter = TopicFilter { + include: vec![ + regex_lite::Regex::new(r"^/camera/").unwrap(), + regex_lite::Regex::new(r"^/imu$").unwrap(), + ], + exclude: vec![], + }; + assert!(filter.matches("/camera/rgb")); + assert!(filter.matches("/imu")); + assert!(!filter.matches("/lidar")); + } + + #[test] + fn filter_skips_unselected_topics() { + let (summary, buffer) = { + let cursor = io::Cursor::new(Vec::new()); + let mut writer = mcap::Writer::new(cursor).expect("failed to create writer"); + + let camera_rgb = writer + .add_channel(0, "/camera/rgb", "raw", &Default::default()) + .expect("failed to add channel"); + let camera_depth = writer + .add_channel(0, "/camera/depth", "raw", &Default::default()) + .expect("failed to add channel"); + let imu = writer + .add_channel(0, "/imu", "raw", &Default::default()) + .expect("failed to add channel"); + + for channel_id in [camera_rgb, camera_depth, imu] { + writer + .write_to_known_channel( + &mcap::records::MessageHeader { + channel_id, + sequence: 0, + log_time: 1, + publish_time: 1, + }, + &[1, 2, 3], + ) + .expect("failed to write message"); + } + + let summary = writer.finish().expect("failed to finish writer"); + let buffer = writer.into_inner().into_inner(); + (summary, buffer) + }; + + // Include only /camera/* topics. + let filter = TopicFilter { + include: vec![regex_lite::Regex::new(r"^/camera/").unwrap()], + exclude: vec![], + }; + + let plan = DecoderRegistry::empty() + .register_message_decoder::() + .plan(&buffer, &summary, &filter) + .expect("failed to plan"); + + assert_eq!(plan.assignments.len(), 2); + let topics: BTreeSet<_> = plan.assignments.iter().map(|a| a.topic.as_str()).collect(); + assert!(topics.contains("/camera/rgb")); + assert!(topics.contains("/camera/depth")); + assert!(!topics.contains("/imu")); + + // Exclude /camera/depth. + let filter = TopicFilter { + include: vec![], + exclude: vec![regex_lite::Regex::new(r"depth$").unwrap()], + }; + + let plan = DecoderRegistry::empty() + .register_message_decoder::() + .plan(&buffer, &summary, &filter) + .expect("failed to plan"); + + let topics: BTreeSet<_> = plan.assignments.iter().map(|a| a.topic.as_str()).collect(); + assert_eq!(topics.len(), 2); + assert!(topics.contains("/camera/rgb")); + assert!(topics.contains("/imu")); + assert!(!topics.contains("/camera/depth")); + + // Include + exclude combined. + let filter = TopicFilter { + include: vec![regex_lite::Regex::new(r"^/camera/").unwrap()], + exclude: vec![regex_lite::Regex::new(r"depth$").unwrap()], + }; + + let plan = DecoderRegistry::empty() + .register_message_decoder::() + .plan(&buffer, &summary, &filter) + .expect("failed to plan"); + + assert_eq!(plan.assignments.len(), 1); + assert_eq!(plan.assignments[0].topic, "/camera/rgb"); + } +} diff --git a/crates/store/re_mcap/src/layers/protobuf.rs b/crates/store/re_mcap/src/decoders/protobuf.rs similarity index 63% rename from crates/store/re_mcap/src/layers/protobuf.rs rename to crates/store/re_mcap/src/decoders/protobuf.rs index f939ecb2a205..cc50aab33961 100644 --- a/crates/store/re_mcap/src/layers/protobuf.rs +++ b/crates/store/re_mcap/src/decoders/protobuf.rs @@ -1,19 +1,21 @@ +use std::sync::Arc; + use arrow::array::{ ArrayBuilder, BinaryBuilder, BooleanBuilder, FixedSizeListBuilder, Float32Builder, - Float64Builder, Int32Builder, Int64Builder, ListBuilder, StringBuilder, StructBuilder, - UInt32Builder, UInt64Builder, + Float64Builder, Int32Builder, Int64Builder, ListBuilder, MapBuilder, MapFieldNames, + StringBuilder, StructBuilder, UInt32Builder, UInt64Builder, }; use arrow::datatypes::{DataType, Field, Fields}; use prost_reflect::{ - DescriptorPool, DynamicMessage, FieldDescriptor, Kind, MessageDescriptor, ReflectMessage as _, - Value, + DescriptorPool, DynamicMessage, FieldDescriptor, Kind, MapKey, MessageDescriptor, + OneofDescriptor, ReflectMessage as _, Value, }; use re_chunk::{Chunk, ChunkId}; use re_sdk_types::ComponentDescriptor; use re_sdk_types::reflection::ComponentDescriptorExt as _; use crate::parsers::{MessageParser, ParserContext}; -use crate::{Error, LayerIdentifier, MessageLayer}; +use crate::{DecoderIdentifier, Error, MessageDecoder}; struct ProtobufMessageParser { message_descriptor: MessageDescriptor, @@ -41,6 +43,9 @@ enum ProtobufError { actual: prost_reflect::Kind, }, + #[error("appending to array failed: {0}")] + AppendFailed(#[from] arrow::error::ArrowError), + #[error("unknown enum number {0}")] UnknownEnumNumber(i32), @@ -80,31 +85,57 @@ fn append_message_fields( .map(|(field_desc, value)| (field_desc.number(), value)) .collect(); - // TODO(#11221): Support `oneof` values in protobuf MCAP files. - let non_oneof_fields = descriptor - .fields() - .filter(|f| f.containing_oneof().is_none()); + let grouped = grouped_fields(&descriptor); - for (field_builder, field_desc) in struct_builder + for (field_builder, grouped_field) in struct_builder .field_builders_mut() .iter_mut() - .zip(non_oneof_fields) + .zip(grouped.iter()) { - // Use the actual field number from the schema, not index-based numbering. - // Protobuf schemas can have gaps (e.g., fields 1, 2, 5, 8 after deprecating 3, 4). - let protobuf_number = field_desc.number(); + match grouped_field { + GroupedField::Regular(field_desc) => { + // Use the actual field number from the schema, not index-based numbering. + // Protobuf schemas can have gaps (e.g., fields 1, 2, 5, 8 after deprecating 3, 4). + let protobuf_number = field_desc.number(); + + if let Some(val) = set_fields.get(&protobuf_number) { + append_value(field_builder, field_desc, val)?; + } else { + // For proto3 optional fields, maps, and lists: append null for unset fields. + // For regular proto3 fields: append default values. + if field_desc.supports_presence() || field_desc.is_map() || field_desc.is_list() + { + append_null_to_builder(field_builder)?; + } else { + // Use the default value for this field type. + let default_value = field_desc.default_value(); + append_value(field_builder, field_desc, &default_value)?; + } + } + } + GroupedField::OneOf(oneof) => { + let oneof_builder = field_builder + .as_any_mut() + .downcast_mut::() + .expect("oneof wrapper builder should be a StructBuilder"); + + // Find which variant (if any) is set in this message. + let mut any_set = false; + for (variant_builder, variant_field) in oneof_builder + .field_builders_mut() + .iter_mut() + .zip(oneof.fields()) + { + if let Some(val) = set_fields.get(&variant_field.number()) { + append_value(variant_builder, &variant_field, val)?; + any_set = true; + } else { + append_null_to_builder(variant_builder)?; + } + } - if let Some(val) = set_fields.get(&protobuf_number) { - append_value(field_builder, &field_desc, val)?; - } else { - // For proto3 optional fields, append null for unset fields. - // For regular proto3 fields, append default values. - if field_desc.supports_presence() { - append_null_to_builder(field_builder)?; - } else { - // Use the default value for this field type. - let default_value = field_desc.default_value(); - append_value(field_builder, &field_desc, &default_value)?; + // Append the wrapper struct: true if any variant is set, false (null) otherwise. + oneof_builder.append(any_set); } } } @@ -205,6 +236,11 @@ fn append_null_to_builder(builder: &mut dyn ArrayBuilder) -> Result<(), Protobuf .downcast_mut::>>() { b.append_null(); + } else if let Some(b) = builder + .as_any_mut() + .downcast_mut::, Box>>() + { + b.append(false)?; } else { return Err(ProtobufError::UnsupportedType( "Unknown builder type for append_null", @@ -246,9 +282,30 @@ fn append_value( list_builder.append(true); re_log::trace!("Finished append on list with elements {val}"); } - Value::Map(_hash_map) => { - // We should not encounter hash maps in protobufs. - return Err(ProtobufError::UnsupportedType("HashMap")); + Value::Map(hash_map) => { + let Kind::Message(entry_msg) = field.kind() else { + return Err(ProtobufError::UnexpectedType { + expected: "message (map entry)", + actual: field.kind(), + }); + }; + let key_field = entry_msg.map_entry_key_field(); + let value_field = entry_msg.map_entry_value_field(); + + let map_builder = downcast_err::< + MapBuilder, Box>, + >(builder, val)?; + + let mut sorted_entries: Vec<_> = hash_map.iter().collect(); + sorted_entries.sort_by_key(|(k, _)| (*k).clone()); + for (map_key, map_value) in sorted_entries { + let key_value = map_key_to_value(map_key); + append_value(map_builder.keys(), &key_field, &key_value)?; + append_value(map_builder.values(), &value_field, map_value)?; + } + map_builder + .append(true) + .map_err(|_err| ProtobufError::UnsupportedType("MapBuilder append failed"))?; } Value::EnumNumber(x) => { let kind = field.kind(); @@ -279,26 +336,85 @@ fn append_value( Ok(()) } -fn struct_builder_from_message(message_descriptor: &MessageDescriptor) -> StructBuilder { - // TODO(#11221): Support `oneof` values in protobuf MCAP files. - // Warn about oneof fields in this message (both top-level and nested) - if message_descriptor.oneofs().len() > 0 { - re_log::warn_once!( - "Ignoring {} message: Protobuf schemas containing `oneof` are not supported yet.", - message_descriptor.full_name() - ); +/// Converts a [`MapKey`] to a [`Value`] so it can be appended via `append_value`. +fn map_key_to_value(key: &MapKey) -> Value { + match key { + MapKey::Bool(b) => Value::Bool(*b), + MapKey::I32(i) => Value::I32(*i), + MapKey::I64(i) => Value::I64(*i), + MapKey::U32(u) => Value::U32(*u), + MapKey::U64(u) => Value::U64(*u), + MapKey::String(s) => Value::String(s.clone()), } +} - let fields = message_descriptor - .fields() - .filter(|f| f.containing_oneof().is_none()) - .map(|f| arrow_field_from(&f)) - .collect::(); - let field_builders = message_descriptor - .fields() - .filter(|f| f.containing_oneof().is_none()) - .map(|f| arrow_builder_from_field(&f)) - .collect::>(); +enum GroupedField { + Regular(FieldDescriptor), + OneOf(OneofDescriptor), +} + +/// Iterates the fields of a message descriptor, collapsing real oneof variants +/// (those with more than one field) into a single [`GroupedField::OneOf`] entry +/// at the position of the first variant. +fn grouped_fields(descriptor: &MessageDescriptor) -> Vec { + let mut result = Vec::new(); + let mut seen = ahash::HashSet::default(); + for field in descriptor.fields() { + // Synthetic oneofs (single-field, proto3 optional) remain as [`GroupedField::Regular`]. + if let Some(oneof) = field.containing_oneof() + && oneof.fields().len() > 1 + { + if seen.insert(oneof.full_name().to_owned()) { + result.push(GroupedField::OneOf(oneof)); + } + continue; + } + result.push(GroupedField::Regular(field)); + } + result +} + +/// Creates Arrow [`Fields`] from a message descriptor, wrapping real oneof +/// variants in a single struct field named after the oneof with +/// `ProtobufOneOf` metadata on the wrapper. +fn fields_from_message(descriptor: &MessageDescriptor) -> Fields { + grouped_fields(descriptor) + .into_iter() + .map(|gf| match gf { + GroupedField::Regular(f) => arrow_field_from(&f), + GroupedField::OneOf(oneof) => { + let inner: Fields = oneof.fields().map(|f| arrow_field_from(&f)).collect(); + Field::new(oneof.name(), DataType::Struct(inner), true).with_metadata( + std::iter::once(( + "ARROW:extension:name".to_owned(), + "rerun.datatypes.ProtobufOneOf".to_owned(), + )) + .collect(), + ) + } + }) + .collect() +} + +fn struct_builder_from_message(message_descriptor: &MessageDescriptor) -> StructBuilder { + let fields = fields_from_message(message_descriptor); + let field_builders: Vec> = grouped_fields(message_descriptor) + .into_iter() + .map(|gf| -> Box { + match gf { + GroupedField::Regular(f) => arrow_builder_from_field(&f), + GroupedField::OneOf(oneof) => { + let inner_fields: Fields = + oneof.fields().map(|f| arrow_field_from(&f)).collect(); + let inner_builders: Vec> = oneof + .fields() + .map(|f| arrow_builder_from_field(&f)) + .collect(); + Box::new(StructBuilder::new(inner_fields, inner_builders)) + } + } + }) + .collect(); re_log::debug_assert_eq!(fields.len(), field_builders.len()); @@ -321,6 +437,18 @@ fn arrow_builder_from_field(descr: &FieldDescriptor) -> Box { Kind::Bool => Box::new(BooleanBuilder::new()), Kind::String => Box::new(StringBuilder::new()), Kind::Bytes => Box::new(BinaryBuilder::new()), + Kind::Message(message_descriptor) if descr.is_map() => { + let key_field = message_descriptor.map_entry_key_field(); + let val_field = message_descriptor.map_entry_value_field(); + let field_names = MapFieldNames { + entry: "entries".to_owned(), + key: key_field.name().to_owned(), + value: val_field.name().to_owned(), + }; + let key_builder = arrow_builder_from_field(&key_field); + let val_builder = arrow_builder_from_field(&val_field); + return Box::new(MapBuilder::new(Some(field_names), key_builder, val_builder)); + } Kind::Message(message_descriptor) => { Box::new(struct_builder_from_message(&message_descriptor)) as Box } @@ -376,14 +504,29 @@ fn datatype_from(descr: &FieldDescriptor) -> DataType { Kind::Bool => DataType::Boolean, Kind::String => DataType::Utf8, Kind::Bytes => DataType::Binary, + Kind::Message(message_descriptor) if descr.is_map() => { + let proto_key_field = message_descriptor.map_entry_key_field(); + let proto_val_field = message_descriptor.map_entry_value_field(); + let key_field = Field::new( + proto_key_field.name(), + datatype_from(&proto_key_field), + false, + ); + let val_field = Field::new( + proto_val_field.name(), + datatype_from(&proto_val_field), + true, + ); + let entry_field = Field::new( + "entries", + DataType::Struct(Fields::from(vec![key_field, val_field])), + false, + ); + // TODO(grtlr): We actually store the data sorted, but `MapBuilder` does not allow that. + DataType::Map(Arc::new(entry_field), false) + } Kind::Message(message_descriptor) => { - // TODO(#11221): Support `oneof` values in protobuf MCAP files. - let fields = message_descriptor - .fields() - .filter(|f| f.containing_oneof().is_none()) - .map(|f| arrow_field_from(&f)) - .collect::(); - DataType::Struct(fields) + DataType::Struct(fields_from_message(&message_descriptor)) } Kind::Enum(_) => { // Struct with "name" (String) and "value" (Int32) fields. @@ -406,16 +549,16 @@ fn datatype_from(descr: &FieldDescriptor) -> DataType { /// Provides reflection-based conversion of protobuf-encoded MCAP messages. /// -/// Applying this layer will result in a direct Arrow representation of the fields. +/// Applying this decoder will result in a direct Arrow representation of the fields. /// This is useful for querying certain fields from an MCAP file, but wont result /// in semantic types that can be picked up by the Rerun viewer. #[derive(Debug, Default)] -pub struct McapProtobufLayer { +pub struct McapProtobufDecoder { descrs_per_topic: ahash::HashMap, } -impl MessageLayer for McapProtobufLayer { - fn identifier() -> LayerIdentifier { +impl MessageDecoder for McapProtobufDecoder { + fn identifier() -> DecoderIdentifier { "protobuf".into() } @@ -483,7 +626,7 @@ mod unit_tests { /// Verifies that `append_null_to_builder` properly handles `StructBuilder` /// by recursively appending nulls to child builders to maintain length consistency. #[test] - fn struct_builder_null_append() { + fn test_struct_builder_null_append() { // Create a StructBuilder with 2 child fields. let fields = Fields::from(vec![ Field::new("a", DataType::Utf8, true), @@ -512,25 +655,61 @@ mod unit_tests { mod integration_tests { use std::io; - use crossbeam::channel::Receiver; - use prost_reflect::prost::Message as _; use prost_reflect::prost_types::{ DescriptorProto, EnumDescriptorProto, EnumValueDescriptorProto, FieldDescriptorProto, - FileDescriptorProto, FileDescriptorSet, OneofDescriptorProto, field_descriptor_proto, + FileDescriptorProto, FileDescriptorSet, MessageOptions, OneofDescriptorProto, + field_descriptor_proto, }; use prost_reflect::{DescriptorPool, DynamicMessage, MessageDescriptor}; use re_chunk::Chunk; - use re_log::LogMsg; - use crate::LayerRegistry; - use crate::layers::McapProtobufLayer; + use re_log_types::TimeType; - /// Helper to mark a field descriptor as proto3 optional. - fn mark_optional(mut field: FieldDescriptorProto) -> FieldDescriptorProto { - field.label = Some(field_descriptor_proto::Label::Optional as i32); - field.proto3_optional = Some(true); - field + use crate::DecoderRegistry; + use crate::decoders::McapProtobufDecoder; + + fn format_chunk(chunk: &Chunk) -> String { + let batch = chunk.to_record_batch().expect("failed to convert chunk"); + re_arrow_util::RecordBatchFormatOpts { + width: Some(240), + max_cell_content_width: usize::MAX, + redact_non_deterministic: true, + ..Default::default() + } + .format(&batch) + .to_string() + } + + /// Helper to mark fields as proto3 optional with proper synthetic oneof declarations. + /// + /// Returns the modified fields and the synthetic `OneofDescriptorProto` entries that + /// must be added to the parent message's `oneof_decl`. The `oneof_index_offset` is + /// used when the message already has real oneofs declared before these synthetic ones. + #[expect( + clippy::cast_possible_wrap, + reason = "test helper with small field counts" + )] + fn make_fields_optional( + fields: Vec, + oneof_index_offset: i32, + ) -> (Vec, Vec) { + let mut oneof_decls = Vec::new(); + let fields = fields + .into_iter() + .enumerate() + .map(|(i, mut field)| { + field.label = Some(field_descriptor_proto::Label::Optional as i32); + field.proto3_optional = Some(true); + field.oneof_index = Some(oneof_index_offset + i as i32); + oneof_decls.push(OneofDescriptorProto { + name: Some(format!("_{}", field.name.as_ref().unwrap())), + ..Default::default() + }); + field + }) + .collect(); + (fields, oneof_decls) } /// Helper to create a [`MessageDescriptor`] from a list of [`DescriptorProto`]. @@ -556,6 +735,32 @@ mod integration_tests { .expect("failed to get message descriptor") } + /// Creates a `Person` message descriptor equivalent to: + /// + /// ```protobuf + /// message Person { + /// enum Status { + /// UNKNOWN = 0; + /// ACTIVE = 1; + /// INACTIVE = 2; + /// } + /// + /// message Address { + /// optional string street = 1; + /// optional string city = 2; + /// } + /// + /// optional string name = 1; + /// reserved 2 to 4; + /// optional int32 id = 5; + /// optional Status status = 8; + /// optional Address address = 9; + /// map tags = 10; + /// } + /// ``` + /// + /// If `use_proto3_optional` is `false`, the `optional` keywords are removed and + /// fields will not have presence tracking (unset fields show default values). fn create_person_descriptor(use_proto3_optional: bool) -> (&'static str, DescriptorProto) { let status = EnumDescriptorProto { name: Some("Status".into()), @@ -582,7 +787,7 @@ mod integration_tests { }; // Create a nested Address message. - let mut address_fields = vec![ + let address_fields = vec![ FieldDescriptorProto { name: Some("street".into()), number: Some(1), @@ -597,20 +802,23 @@ mod integration_tests { }, ]; - if use_proto3_optional { - address_fields = address_fields.into_iter().map(mark_optional).collect(); - } + let (address_fields, address_oneof_decls) = if use_proto3_optional { + make_fields_optional(address_fields, 0) + } else { + (address_fields, vec![]) + }; let address_message = DescriptorProto { name: Some("Address".into()), field: address_fields, + oneof_decl: address_oneof_decls, ..Default::default() }; // Create field descriptors with gaps in field numbering to test handling of schemas // with non-contiguous field numbers and reserved ranges between actual fields. // Field 1: name, Reserved: 2-4, Field 5: id, Field 8: status, Field 9: address. - let mut fields = vec![ + let fields = vec![ FieldDescriptorProto { name: Some("name".into()), number: Some(1), @@ -639,16 +847,54 @@ mod integration_tests { }, ]; - if use_proto3_optional { - fields = fields.into_iter().map(mark_optional).collect(); - } + let (mut fields, person_oneof_decls) = if use_proto3_optional { + make_fields_optional(fields, 0) + } else { + (fields, vec![]) + }; + + // Map field added *after* `make_fields_optional` so it is not wrapped in a synthetic oneof. + let tags_entry = DescriptorProto { + name: Some("TagsEntry".into()), + field: vec![ + FieldDescriptorProto { + name: Some("key".into()), + number: Some(1), + label: Some(field_descriptor_proto::Label::Optional as i32), + r#type: Some(field_descriptor_proto::Type::String as i32), + ..Default::default() + }, + FieldDescriptorProto { + name: Some("value".into()), + number: Some(2), + label: Some(field_descriptor_proto::Label::Optional as i32), + r#type: Some(field_descriptor_proto::Type::String as i32), + ..Default::default() + }, + ], + options: Some(MessageOptions { + map_entry: Some(true), + ..Default::default() + }), + ..Default::default() + }; + + fields.push(FieldDescriptorProto { + name: Some("tags".into()), + number: Some(10), + label: Some(field_descriptor_proto::Label::Repeated as i32), + r#type: Some(field_descriptor_proto::Type::Message as i32), + type_name: Some("TagsEntry".into()), + ..Default::default() + }); // Create a message descriptor with reserved field numbers (2, 3, 4) between actual fields. let person_proto = DescriptorProto { name: Some("Person".into()), field: fields, - nested_type: vec![address_message], + nested_type: vec![address_message, tags_entry], enum_type: vec![status], + oneof_decl: person_oneof_decls, reserved_range: vec![ prost_reflect::prost_types::descriptor_proto::ReservedRange { start: Some(2), @@ -697,32 +943,41 @@ mod integration_tests { Ok(()) } - fn run_layer(summary: &mcap::Summary, buffer: &[u8]) -> Vec { + fn run_decoder(summary: &mcap::Summary, buffer: &[u8]) -> Vec { let mut chunks = Vec::new(); let mut send_chunk = |chunk| { chunks.push(chunk); }; - let registry = LayerRegistry::empty().register_message_layer::(); + let registry = DecoderRegistry::empty().register_message_decoder::(); registry - .plan(summary) + .plan(buffer, summary, &crate::TopicFilter::default()) .expect("failed to plan") - .run(buffer, summary, &mut send_chunk) - .expect("failed to run layer"); + .run(buffer, summary, TimeType::TimestampNs, &mut send_chunk) + .expect("failed to run decoder"); chunks } /// Helper to create test messages with various field combinations. fn create_test_messages(person_message: &MessageDescriptor) -> Vec { + use prost_reflect::{MapKey, Value}; + vec![ - // Message 1: has all fields including nested address. - DynamicMessage::parse_text_format( - person_message.clone(), - "name: \"Alice\" id: 123 status: 1 address: { street: \"Main St\" city: \"NYC\" }", - ) - .expect("failed to parse text format"), + // Message 1: has all fields including nested address and tags. + { + let mut msg = DynamicMessage::parse_text_format( + person_message.clone(), + "name: \"Alice\" id: 123 status: 1 address: { street: \"Main St\" city: \"NYC\" }", + ) + .expect("failed to parse text format"); + let mut tags = std::collections::HashMap::new(); + tags.insert(MapKey::String("role".into()), Value::String("admin".into())); + tags.insert(MapKey::String("org".into()), Value::String("rerun".into())); + msg.set_field_by_name("tags", Value::Map(tags)); + msg + }, // Message 2: has name and status, with partial address (only street). DynamicMessage::parse_text_format( person_message.clone(), @@ -732,22 +987,28 @@ mod integration_tests { // Message 3: has name and id, no address. DynamicMessage::parse_text_format(person_message.clone(), "name: \"Charlie\" id: 456") .expect("failed to parse text format"), - // Message 4: has only name and nested address. - DynamicMessage::parse_text_format( - person_message.clone(), - "name: \"Dave\" address: { city: \"LA\" }", - ) - .expect("failed to parse text format"), + // Message 4: has only name, nested address, and tags. + { + let mut msg = DynamicMessage::parse_text_format( + person_message.clone(), + "name: \"Dave\" address: { city: \"LA\" }", + ) + .expect("failed to parse text format"); + let mut tags = std::collections::HashMap::new(); + tags.insert(MapKey::String("role".into()), Value::String("admin".into())); + msg.set_field_by_name("tags", Value::Map(tags)); + msg + }, // Message 5: has only id (name, status, and address missing). { let mut msg = DynamicMessage::new(person_message.clone()); - msg.set_field_by_name("id", prost_reflect::Value::I32(789)); + msg.set_field_by_name("id", Value::I32(789)); msg }, // Message 6: has only status (name, id, and address missing). { let mut msg = DynamicMessage::new(person_message.clone()); - msg.set_field_by_name("status", prost_reflect::Value::EnumNumber(1)); + msg.set_field_by_name("status", Value::EnumNumber(1)); msg }, // Message 7: empty message (all fields missing). @@ -782,30 +1043,30 @@ mod integration_tests { "there should be only one chunk" ); - let chunks = run_layer(&summary, buffer.as_slice()); + let chunks = run_decoder(&summary, buffer.as_slice()); assert_eq!(chunks.len(), 1); - insta::assert_snapshot!(snapshot_name, format!("{:-240}", &chunks[0])); + insta::assert_snapshot!(snapshot_name, format_chunk(&chunks[0])); } /// Test various field combinations with proto3 optional (presence tracking). /// This includes messages with all fields, partial fields, and missing fields. #[test] - fn field_combinations_with_presence_tracking() { + fn test_field_combinations_with_presence_tracking() { test_field_combinations_helper(true, "field_combinations_with_presence_tracking"); } /// Test various field combinations without proto3 optional (no presence tracking). /// Unset fields will show default values instead of null. #[test] - fn field_combinations_without_presence_tracking() { + fn test_field_combinations_without_presence_tracking() { test_field_combinations_helper(false, "field_combinations_without_presence_tracking"); } /// This test verifies that we are resilient to decode failures. When messages fail to decode, /// they should be logged and skipped without causing length mismatches. #[test] - fn decode_failure_resilience() { + fn test_decode_failure_resilience() { use prost_reflect::prost::Message as _; let (summary, buffer) = { @@ -852,15 +1113,32 @@ mod integration_tests { (summary, writer.into_inner().into_inner()) }; - let chunks = run_layer(&summary, buffer.as_slice()); + let chunks = run_decoder(&summary, buffer.as_slice()); assert_eq!(chunks.len(), 1); // We wrote 10 messages (5 valid, 5 invalid), so we should get 5 rows. assert_eq!(chunks[0].num_rows(), 5); - insta::assert_snapshot!("decode_failure_resilience", format!("{:-240}", &chunks[0])); + insta::assert_snapshot!("decode_failure_resilience", format_chunk(&chunks[0])); } - fn create_color_descriptor() -> (&'static str, DescriptorProto) { + /// Creates a `Color` message descriptor equivalent to: + /// + /// ```protobuf + /// message Color { + /// string object = 1; + /// oneof color { + /// string rgb = 2; + /// string hsv = 3; + /// string bgr = 4; + /// } + /// optional float gamma = 5; + /// } + /// ``` + /// + /// If `set_proto3_optional_flag` is `false`, the `gamma` field will still be in a + /// single-field oneof but without the `proto3_optional` flag. This simulates protobuf + /// compilers/tools that don't emit the flag. + fn create_color_descriptor(set_proto3_optional_flag: bool) -> (&'static str, DescriptorProto) { let color_proto = DescriptorProto { name: Some("Color".into()), field: vec![ @@ -877,7 +1155,7 @@ mod integration_tests { number: Some(2), label: Some(field_descriptor_proto::Label::Optional as i32), r#type: Some(field_descriptor_proto::Type::String as i32), - oneof_index: Some(0), // Part of oneof. + oneof_index: Some(0), // Part of real oneof "color". ..Default::default() }, FieldDescriptorProto { @@ -885,7 +1163,7 @@ mod integration_tests { number: Some(3), label: Some(field_descriptor_proto::Label::Optional as i32), r#type: Some(field_descriptor_proto::Type::String as i32), - oneof_index: Some(0), // Part of oneof. + oneof_index: Some(0), // Part of real oneof "color". ..Default::default() }, FieldDescriptorProto { @@ -893,20 +1171,45 @@ mod integration_tests { number: Some(4), label: Some(field_descriptor_proto::Label::Optional as i32), r#type: Some(field_descriptor_proto::Type::String as i32), - oneof_index: Some(0), // Part of oneof. + oneof_index: Some(0), // Part of real oneof "color". + ..Default::default() + }, + // Proto3 optional field (synthetic oneof "_gamma"). + FieldDescriptorProto { + name: Some("gamma".into()), + number: Some(5), + label: Some(field_descriptor_proto::Label::Optional as i32), + r#type: Some(field_descriptor_proto::Type::Float as i32), + proto3_optional: set_proto3_optional_flag.then_some(true), + oneof_index: Some(1), // Synthetic oneof "_gamma". + ..Default::default() + }, + ], + oneof_decl: vec![ + // Real oneof (index 0). + OneofDescriptorProto { + name: Some("color".into()), + ..Default::default() + }, + // Synthetic oneof for proto3 optional "gamma" (index 1). + OneofDescriptorProto { + name: Some("_gamma".into()), ..Default::default() }, ], - oneof_decl: vec![OneofDescriptorProto { - name: Some("color".into()), - ..Default::default() - }], ..Default::default() }; ("com.example.Color", color_proto) } + /// Creates a `Scene` message descriptor equivalent to: + /// + /// ```protobuf + /// message Scene { + /// Color object = 1; + /// } + /// ``` fn create_scene_descriptor() -> (&'static str, DescriptorProto) { let scene_proto = DescriptorProto { name: Some("Scene".into()), @@ -927,75 +1230,48 @@ mod integration_tests { ("com.example.Scene", scene_proto) } - fn create_color_test_messages(color_message: &MessageDescriptor) -> Vec { + fn create_scene_test_messages(scene_message: &MessageDescriptor) -> Vec { vec![ - // Message 1: object and rgb color. + // Message 1: scene with nested Color using rgb. DynamicMessage::parse_text_format( - color_message.clone(), - "object: \"box\" rgb: \"255,0,0\"", + scene_message.clone(), + "object: { object: \"cube\" rgb: \"128,64,32\" }", ) .expect("failed to parse text format"), - // Message 2: object and hsv color. + // Message 2: scene with nested Color using bgr. DynamicMessage::parse_text_format( - color_message.clone(), - "object: \"sphere\" hsv: \"120,1.0,1.0\"", + scene_message.clone(), + "object: { object: \"pyramid\" bgr: \"0,255,0\" }", ) .expect("failed to parse text format"), - // Message 3: only object (no color oneof field set). - DynamicMessage::parse_text_format(color_message.clone(), "object: \"cone\"") - .expect("failed to parse text format"), ] } - fn create_scene_test_messages(scene_message: &MessageDescriptor) -> Vec { + fn create_color_test_messages(color_message: &MessageDescriptor) -> Vec { vec![ - // Message 1: scene with nested Color using rgb. + // Message 1: object, gamma, and rgb color. DynamicMessage::parse_text_format( - scene_message.clone(), - "object: { object: \"cube\" rgb: \"128,64,32\" }", + color_message.clone(), + "object: \"box\" gamma: 2.2 rgb: \"255,0,0\"", ) .expect("failed to parse text format"), - // Message 2: scene with nested Color using bgr. + // Message 2: object and hsv color (no gamma). DynamicMessage::parse_text_format( - scene_message.clone(), - "object: { object: \"pyramid\" bgr: \"0,255,0\" }", + color_message.clone(), + "object: \"sphere\" hsv: \"120,1.0,1.0\"", ) .expect("failed to parse text format"), + // Message 3: only object (no gamma, no color oneof field set). + DynamicMessage::parse_text_format(color_message.clone(), "object: \"cone\"") + .expect("failed to parse text format"), ] } - fn check_single_warning(log_rx: &Receiver) { - // Verify warning was emitted - let warning = log_rx - .try_recv() - .expect("Expected warning for oneof field in message"); - assert_eq!(warning.level, re_log::Level::Warn); - assert!( - warning.msg.contains("oneof"), - "Expected warning to mention 'oneof', but got: {}", - warning.msg - ); - - // Verify no additional warnings - assert!( - log_rx.try_recv().is_err(), - "Expected exactly one warning, but found additional warnings" - ); - } - - // TODO(#11221): Support `oneof` values in protobuf MCAP files. - /// This test verifies that top-level `oneof` fields triggers a warning and are filtered out. - #[test] - fn oneof_top_level() { - // Setup logging to capture warnings - re_log::setup_logging(); - let (logger, log_rx) = re_log::ChannelLogger::new(re_log::LevelFilter::Warn); - re_log::add_boxed_logger(Box::new(logger)).expect("failed to add logger"); - - let (color_name, color_proto) = create_color_descriptor(); + /// Helper to test oneof fields with or without the `proto3_optional` flag on `gamma`. + fn test_oneof_fields_helper(set_proto3_optional_flag: bool) { + let (color_name, color_proto) = create_color_descriptor(set_proto3_optional_flag); let color_message = create_message_descriptor(vec![color_proto], color_name); - // Create MCAP with test messages. let buffer = Vec::new(); let cursor = io::Cursor::new(buffer); let mut writer = mcap::Writer::new(cursor).expect("failed to create writer"); @@ -1012,30 +1288,35 @@ mod integration_tests { let summary = writer.finish().expect("finishing writer failed"); let buffer = writer.into_inner().into_inner(); - let chunks = run_layer(&summary, buffer.as_slice()); - - check_single_warning(&log_rx); + let chunks = run_decoder(&summary, buffer.as_slice()); assert_eq!(chunks.len(), 1); assert_eq!(chunks[0].num_rows(), 3); - insta::assert_snapshot!("oneof_top_level", format!("{:-240}", &chunks[0])); + insta::assert_snapshot!("oneof_fields", format_chunk(&chunks[0])); + } + + /// This test verifies that all oneof fields (both real and synthetic) are included + /// in the Arrow output. + #[test] + fn test_oneof_fields() { + test_oneof_fields_helper(true); } - // TODO(#11221): Support `oneof` values in protobuf MCAP files. - /// This test verifies that nested `oneof` fields triggers a warning and are filtered out. + /// Same as `test_oneof_fields` but without the `proto3_optional` flag on `gamma`. + /// Verifies that the output is identical regardless of whether the flag is set. #[test] - fn oneof_nested() { - // Setup logging to capture warnings - re_log::setup_logging(); - let (logger, log_rx) = re_log::ChannelLogger::new(re_log::LevelFilter::Warn); - re_log::add_boxed_logger(Box::new(logger)).expect("failed to add logger"); + fn test_oneof_fields_without_proto3_optional_flag() { + test_oneof_fields_helper(false); + } - let (_, color_proto) = create_color_descriptor(); + /// This test verifies that nested oneof fields are included in the Arrow output. + #[test] + fn test_oneof_nested() { + let (_, color_proto) = create_color_descriptor(true); let (scene_name, scene_proto) = create_scene_descriptor(); let scene_message = create_message_descriptor(vec![color_proto, scene_proto], scene_name); - // Create MCAP with test messages. let buffer = Vec::new(); let cursor = io::Cursor::new(buffer); let mut writer = mcap::Writer::new(cursor).expect("failed to create writer"); @@ -1052,13 +1333,11 @@ mod integration_tests { let summary = writer.finish().expect("finishing writer failed"); let buffer = writer.into_inner().into_inner(); - let chunks = run_layer(&summary, buffer.as_slice()); + let chunks = run_decoder(&summary, buffer.as_slice()); - check_single_warning(&log_rx); - - assert_eq!(chunks.len(), 1); // One chunk for scene_topic + assert_eq!(chunks.len(), 1); assert_eq!(chunks[0].num_rows(), 2); - insta::assert_snapshot!("oneof_nested", format!("{:-240}", &chunks[0])); + insta::assert_snapshot!("oneof_nested", format_chunk(&chunks[0])); } } diff --git a/crates/store/re_mcap/src/layers/raw.rs b/crates/store/re_mcap/src/decoders/raw.rs similarity index 93% rename from crates/store/re_mcap/src/layers/raw.rs rename to crates/store/re_mcap/src/decoders/raw.rs index 06a320553c32..839bc613a73f 100644 --- a/crates/store/re_mcap/src/layers/raw.rs +++ b/crates/store/re_mcap/src/decoders/raw.rs @@ -5,7 +5,7 @@ use re_sdk_types::archetypes::McapMessage; use crate::parsers::util::blob_list_builder; use crate::parsers::{MessageParser, ParserContext}; -use crate::{Error, LayerIdentifier, MessageLayer}; +use crate::{DecoderIdentifier, Error, MessageDecoder}; struct RawMcapMessageParser { data: FixedSizeListBuilder>, @@ -57,10 +57,10 @@ impl MessageParser for RawMcapMessageParser { /// The result will be verbatim copies of the original messages without decoding /// or imposing any semantic meaning on the data. #[derive(Default, Debug)] -pub struct McapRawLayer; +pub struct McapRawDecoder; -impl MessageLayer for McapRawLayer { - fn identifier() -> LayerIdentifier { +impl MessageDecoder for McapRawDecoder { + fn identifier() -> DecoderIdentifier { "raw".into() } diff --git a/crates/store/re_mcap/src/layers/recording_info.rs b/crates/store/re_mcap/src/decoders/recording_info.rs similarity index 82% rename from crates/store/re_mcap/src/layers/recording_info.rs rename to crates/store/re_mcap/src/decoders/recording_info.rs index 1dbbb52d06e0..fc2e6a49b711 100644 --- a/crates/store/re_mcap/src/layers/recording_info.rs +++ b/crates/store/re_mcap/src/decoders/recording_info.rs @@ -2,15 +2,15 @@ use re_chunk::{Chunk, EntityPath, RowId, TimePoint}; use re_sdk_types::archetypes::RecordingInfo; use saturating_cast::SaturatingCast as _; -use super::Layer; +use super::Decoder; use crate::Error; /// Build the [`RecordingInfo`] chunk using the message statistics from a [`mcap::Summary`]. #[derive(Debug, Default)] -pub struct McapRecordingInfoLayer; +pub struct McapRecordingInfoDecoder; -impl Layer for McapRecordingInfoLayer { - fn identifier() -> super::LayerIdentifier { +impl Decoder for McapRecordingInfoDecoder { + fn identifier() -> super::DecoderIdentifier { "recording_info".into() } @@ -18,6 +18,7 @@ impl Layer for McapRecordingInfoLayer { &mut self, _mcap_bytes: &[u8], summary: &mcap::Summary, + _topic_filter: &super::TopicFilter, emit: &mut dyn FnMut(Chunk), ) -> std::result::Result<(), Error> { let properties = summary diff --git a/crates/store/re_mcap/src/layers/ros2.rs b/crates/store/re_mcap/src/decoders/ros2.rs similarity index 69% rename from crates/store/re_mcap/src/layers/ros2.rs rename to crates/store/re_mcap/src/decoders/ros2.rs index 3afeaf92e610..f363f8cba398 100644 --- a/crates/store/re_mcap/src/layers/ros2.rs +++ b/crates/store/re_mcap/src/decoders/ros2.rs @@ -1,6 +1,6 @@ use std::collections::BTreeMap; -use super::MessageLayer; +use super::MessageDecoder; use crate::parsers::MessageParser; use crate::parsers::ros2msg::Ros2MessageParser; use crate::parsers::ros2msg::geometry_msgs::PoseStampedMessageParser; @@ -20,11 +20,11 @@ use crate::parsers::ros2msg::tf2_msgs::tf_message::TfMessageParser; type ParserFactory = fn(usize) -> Box; #[derive(Debug)] -pub struct McapRos2Layer { +pub struct McapRos2Decoder { registry: BTreeMap, } -impl McapRos2Layer { +impl McapRos2Decoder { const ENCODING: &str = "ros2msg"; fn empty() -> Self { @@ -33,7 +33,7 @@ impl McapRos2Layer { } } - /// Creates a new [`McapRos2Layer`] with all supported message types pre-registered + /// Creates a new [`McapRos2Decoder`] with all supported message types pre-registered pub fn new() -> Self { Self::empty() // geometry_msgs @@ -81,27 +81,33 @@ impl McapRos2Layer { self } - /// Returns true if the given schema is supported by this layer + /// Returns true if the given schema is supported by this decoder pub fn supports_schema(&self, schema_name: &str) -> bool { self.registry.contains_key(schema_name) } } -impl Default for McapRos2Layer { +impl Default for McapRos2Decoder { fn default() -> Self { Self::new() } } -impl MessageLayer for McapRos2Layer { - fn identifier() -> super::LayerIdentifier { +impl MessageDecoder for McapRos2Decoder { + fn identifier() -> super::DecoderIdentifier { "ros2msg".into() } fn supports_channel(&self, channel: &mcap::Channel<'_>) -> bool { - channel.schema.as_ref().is_some_and(|s| { - s.encoding.as_str() == Self::ENCODING && self.registry.contains_key(&s.name) - }) + let Some(schema) = channel.schema.as_ref() else { + return false; + }; + + if !self.registry.contains_key(&schema.name) { + return false; + } + + supports_ros2_cdr_channel(channel) } fn message_parser( @@ -126,3 +132,49 @@ impl MessageLayer for McapRos2Layer { } } } + +fn is_cdr_message_encoding(message_encoding: &str) -> bool { + message_encoding.eq_ignore_ascii_case("cdr") +} + +/// Returns true for channels that explicitly advertise CDR payloads. +pub(super) fn is_cdr_encoded_channel(channel: &mcap::Channel<'_>) -> bool { + is_cdr_message_encoding(&channel.message_encoding) +} + +/// Warns once if a ROS2 schema is not encoded as CDR. +pub(super) fn warn_if_ros2msg_non_cdr_channel(channel: &mcap::Channel<'_>) { + // Note: empty encodings have a separate, ROS-independent warning. + if channel.message_encoding.trim().is_empty() + || is_cdr_message_encoding(&channel.message_encoding) + { + return; + } + + re_log::warn_once!( + concat!( + "MCAP channel '{}' has a ROS2 message schema, but unknown encoding '{}'. ", + "ROS 2 deserialization is only supported for CDR-encoded messages." + ), + channel.topic, + channel.message_encoding, + ); +} + +/// Returns true if the channel carries a ROS2 message schema with CDR payloads. +pub(super) fn supports_ros2_cdr_channel(channel: &mcap::Channel<'_>) -> bool { + let Some(schema) = channel.schema.as_ref() else { + return false; + }; + + if schema.encoding.as_str() != McapRos2Decoder::ENCODING { + return false; + } + + if !is_cdr_encoded_channel(channel) { + warn_if_ros2msg_non_cdr_channel(channel); + return false; + } + + true +} diff --git a/crates/store/re_mcap/src/layers/ros2_reflection.rs b/crates/store/re_mcap/src/decoders/ros2_reflection.rs similarity index 75% rename from crates/store/re_mcap/src/layers/ros2_reflection.rs rename to crates/store/re_mcap/src/decoders/ros2_reflection.rs index 3856be9bc619..fd3088246cea 100644 --- a/crates/store/re_mcap/src/layers/ros2_reflection.rs +++ b/crates/store/re_mcap/src/decoders/ros2_reflection.rs @@ -21,8 +21,9 @@ use re_sdk_types::ComponentDescriptor; use re_sdk_types::reflection::ComponentDescriptorExt as _; use serde::de::DeserializeSeed as _; +use super::ros2::supports_ros2_cdr_channel; use crate::parsers::{MessageParser, ParserContext, dds}; -use crate::{Error, LayerIdentifier, MessageLayer}; +use crate::{DecoderIdentifier, Error, MessageDecoder}; pub fn decode_bytes(top: &MessageSchema, buf: &[u8]) -> anyhow::Result { // 4-byte encapsulation header @@ -32,6 +33,10 @@ pub fn decode_bytes(top: &MessageSchema, buf: &[u8]) -> anyhow::Result { let representation_identifier = dds::RepresentationIdentifier::from_bytes([buf[0], buf[1]]) .with_context(|| "failed to parse CDR representation identifier")?; + anyhow::ensure!( + representation_identifier.is_cdr() || representation_identifier.is_cdr2(), + "message is not encoded using a CDR representation: {representation_identifier:?}" + ); let resolver = MapResolver::new(top.dependencies.iter().map(|dep| (dep.name.clone(), dep))); @@ -55,7 +60,7 @@ struct Ros2ReflectionMessageParser { #[derive(Debug, thiserror::Error)] pub enum Ros2ReflectionError { - #[error("Invalid message on channel {channel} for schema {schema}: {source}")] + #[error("Invalid message on channel {channel} for schema {schema}: {source:#}")] InvalidMessage { schema: String, channel: String, @@ -330,26 +335,19 @@ fn arrow_builder_from_type( dependencies: &[MessageSpecification], ) -> anyhow::Result> { Ok(match ty { - Type::BuiltIn(p) => match p { - BuiltInType::Bool => Box::new(BooleanBuilder::new()), - BuiltInType::Byte | BuiltInType::UInt8 => Box::new(UInt8Builder::new()), - BuiltInType::Char | BuiltInType::Int8 => Box::new(Int8Builder::new()), - BuiltInType::Int16 => Box::new(Int16Builder::new()), - BuiltInType::UInt16 => Box::new(UInt16Builder::new()), - BuiltInType::Int32 => Box::new(Int32Builder::new()), - BuiltInType::UInt32 => Box::new(UInt32Builder::new()), - BuiltInType::Int64 => Box::new(Int64Builder::new()), - BuiltInType::UInt64 => Box::new(UInt64Builder::new()), - BuiltInType::Float32 => Box::new(Float32Builder::new()), - BuiltInType::Float64 => Box::new(Float64Builder::new()), - BuiltInType::String(_) | BuiltInType::WString(_) => Box::new(StringBuilder::new()), - }, + Type::BuiltIn(p) => arrow_builder_from_builtin_type(p), Type::Complex(complex_type) => { - // Look up the message spec in dependencies let spec = resolve_complex_type(complex_type, dependencies).ok_or_else(|| { anyhow::anyhow!("Could not resolve complex type: {complex_type:?}") })?; - Box::new(struct_builder_from_message_spec(spec, dependencies)?) + // Some user-defined ROS 2 enum message definitions may only contain + // primitive-type constants (of same type) and no data field. + // We should use that common primitive type in this special case. + if let Some(primitive_type) = spec.underlying_type_if_enum_like()? { + arrow_builder_from_builtin_type(primitive_type) + } else { + Box::new(struct_builder_from_message_spec(spec, dependencies)?) + } } Type::Array { ty, .. } => { Box::new(ListBuilder::new(arrow_builder_from_type(ty, dependencies)?)) @@ -357,6 +355,23 @@ fn arrow_builder_from_type( }) } +fn arrow_builder_from_builtin_type(ty: &BuiltInType) -> Box { + match ty { + BuiltInType::Bool => Box::new(BooleanBuilder::new()), + BuiltInType::Byte | BuiltInType::UInt8 => Box::new(UInt8Builder::new()), + BuiltInType::Char | BuiltInType::Int8 => Box::new(Int8Builder::new()), + BuiltInType::Int16 => Box::new(Int16Builder::new()), + BuiltInType::UInt16 => Box::new(UInt16Builder::new()), + BuiltInType::Int32 => Box::new(Int32Builder::new()), + BuiltInType::UInt32 => Box::new(UInt32Builder::new()), + BuiltInType::Int64 => Box::new(Int64Builder::new()), + BuiltInType::UInt64 => Box::new(UInt64Builder::new()), + BuiltInType::Float32 => Box::new(Float32Builder::new()), + BuiltInType::Float64 => Box::new(Float64Builder::new()), + BuiltInType::String(_) | BuiltInType::WString(_) => Box::new(StringBuilder::new()), + } +} + fn arrow_field_from_type( ty: &Type, name: &str, @@ -370,30 +385,24 @@ fn datatype_from_type( dependencies: &[MessageSpecification], ) -> anyhow::Result { Ok(match ty { - Type::BuiltIn(p) => match p { - BuiltInType::Bool => DataType::Boolean, - BuiltInType::Byte | BuiltInType::UInt8 => DataType::UInt8, - BuiltInType::Char | BuiltInType::Int8 => DataType::Int8, - BuiltInType::Int16 => DataType::Int16, - BuiltInType::UInt16 => DataType::UInt16, - BuiltInType::Int32 => DataType::Int32, - BuiltInType::UInt32 => DataType::UInt32, - BuiltInType::Int64 => DataType::Int64, - BuiltInType::UInt64 => DataType::UInt64, - BuiltInType::Float32 => DataType::Float32, - BuiltInType::Float64 => DataType::Float64, - BuiltInType::String(_) | BuiltInType::WString(_) => DataType::Utf8, // No wstring in Arrow - }, + Type::BuiltIn(p) => datatype_from_builtin_type(p), Type::Complex(complex_type) => { let spec = resolve_complex_type(complex_type, dependencies).ok_or_else(|| { anyhow::anyhow!("Could not resolve complex type: {complex_type:?}") })?; - let fields = spec - .fields - .iter() - .map(|f| arrow_field_from_type(&f.ty, &f.name, dependencies)) - .collect::>()?; - DataType::Struct(fields) + // Some user-defined ROS 2 enum message definitions may only contain + // primitive-type constants (of same type) and no data field. + // We should use that common primitive type in this special case. + if let Some(primitive_type) = spec.underlying_type_if_enum_like()? { + datatype_from_builtin_type(primitive_type) + } else { + let fields = spec + .fields + .iter() + .map(|f| arrow_field_from_type(&f.ty, &f.name, dependencies)) + .collect::>()?; + DataType::Struct(fields) + } } Type::Array { ty, size } => match size { ArraySize::Fixed(_) | ArraySize::Bounded(_) | ArraySize::Unbounded => { @@ -403,6 +412,23 @@ fn datatype_from_type( }) } +fn datatype_from_builtin_type(ty: &BuiltInType) -> DataType { + match ty { + BuiltInType::Bool => DataType::Boolean, + BuiltInType::Byte | BuiltInType::UInt8 => DataType::UInt8, + BuiltInType::Char | BuiltInType::Int8 => DataType::Int8, + BuiltInType::Int16 => DataType::Int16, + BuiltInType::UInt16 => DataType::UInt16, + BuiltInType::Int32 => DataType::Int32, + BuiltInType::UInt32 => DataType::UInt32, + BuiltInType::Int64 => DataType::Int64, + BuiltInType::UInt64 => DataType::UInt64, + BuiltInType::Float32 => DataType::Float32, + BuiltInType::Float64 => DataType::Float64, + BuiltInType::String(_) | BuiltInType::WString(_) => DataType::Utf8, // No wstring in Arrow + } +} + fn resolve_complex_type<'a>( complex_type: &ComplexType, dependencies: &'a [MessageSpecification], @@ -419,15 +445,15 @@ fn resolve_complex_type<'a>( /// Provides reflection-based conversion of ROS2-encoded MCAP messages. /// -/// This layer dynamically parses ROS2 messages at runtime, allowing for -/// a direct arrow representation of the messages fields, similar to the protobuf layer. +/// This decoder dynamically parses ROS2 messages at runtime, allowing for +/// a direct arrow representation of the messages fields, similar to the protobuf decoder. #[derive(Debug, Default)] -pub struct McapRos2ReflectionLayer { +pub struct McapRos2ReflectionDecoder { schemas_per_topic: ahash::HashMap, } -impl MessageLayer for McapRos2ReflectionLayer { - fn identifier() -> LayerIdentifier { +impl MessageDecoder for McapRos2ReflectionDecoder { + fn identifier() -> DecoderIdentifier { "ros2_reflection".into() } @@ -474,15 +500,19 @@ impl MessageLayer for McapRos2ReflectionLayer { return false; } - // Only support channels if the semantic layer doesn't support them + // Only support channels if the semantic decoder doesn't support them // First check if we have parsed the schema successfully if !self.schemas_per_topic.contains_key(&channel.topic) { return false; } - // Check if the semantic layer would handle this message type - let semantic_layer = super::McapRos2Layer::new(); - !semantic_layer.supports_schema(&schema.name) + if !supports_ros2_cdr_channel(channel) { + return false; + } + + // Check if the semantic decoder would handle this message type + let semantic_decoder = super::McapRos2Decoder::new(); + !semantic_decoder.supports_channel(channel) } fn message_parser( @@ -496,3 +526,59 @@ impl MessageLayer for McapRos2ReflectionLayer { )) } } + +#[cfg(test)] +mod tests { + use arrow::datatypes::DataType; + use re_ros_msg::MessageSchema; + use re_ros_msg::deserialize::Value; + + use super::*; + + fn enum_schema() -> MessageSchema { + MessageSchema::parse( + "test/Msg", + r#" +test/DummyEnum enum_value +bool enabled + +================================================================================ +MSG: test/DummyEnum +int8 FOO=0 +int8 BAR=1 +"#, + ) + .unwrap() + } + + /// Tests that constants-only enum-like fields deserialize as primitive values. + #[test] + fn decodes_constants_only_enum_as_primitive_value() { + let schema = enum_schema(); + let value = decode_bytes(&schema, &[0x00, 0x01, 0x00, 0x00, 2, 1]).unwrap(); + + let Value::Message(fields) = value else { + panic!("expected message"); + }; + + assert_eq!(fields.get("enum_value"), Some(&Value::I8(2))); + assert_eq!(fields.get("enabled"), Some(&Value::Bool(true))); + } + + /// Tests that constants-only enum-like fields use their primitive Arrow type. + #[test] + fn constants_only_enum_uses_primitive_arrow_type() { + let schema = enum_schema(); + let mode = schema + .spec + .fields + .iter() + .find(|field| field.name == "enum_value") + .unwrap(); + + assert_eq!( + datatype_from_type(&mode.ty, &schema.dependencies).unwrap(), + DataType::Int8 + ); + } +} diff --git a/crates/store/re_mcap/src/layers/schema.rs b/crates/store/re_mcap/src/decoders/schema.rs similarity index 78% rename from crates/store/re_mcap/src/layers/schema.rs rename to crates/store/re_mcap/src/decoders/schema.rs index 1558e6eefdd2..65eea775e734 100644 --- a/crates/store/re_mcap/src/layers/schema.rs +++ b/crates/store/re_mcap/src/decoders/schema.rs @@ -4,27 +4,38 @@ use re_chunk::{Chunk, RowId, TimePoint}; use re_sdk_types::archetypes::{McapChannel, McapSchema}; use re_sdk_types::{AsComponents as _, components}; -use super::{Layer, LayerIdentifier}; +use super::{Decoder, DecoderIdentifier}; use crate::Error; /// Extracts a static summary of channel and schema information. /// /// Can be used to get an overview over the contents of an MCAP file. #[derive(Debug, Default)] -pub struct McapSchemaLayer; +pub struct McapSchemaDecoder; -impl Layer for McapSchemaLayer { - fn identifier() -> LayerIdentifier { +impl Decoder for McapSchemaDecoder { + fn identifier() -> DecoderIdentifier { "schema".into() } fn process( &mut self, - _mcap_bytes: &[u8], + mcap_bytes: &[u8], summary: &mcap::Summary, + topic_filter: &super::TopicFilter, emit: &mut dyn FnMut(Chunk), ) -> Result<(), Error> { + let empty_channels = crate::util::collect_empty_channels(mcap_bytes, summary)?; + for channel in summary.channels.values() { + if empty_channels.contains(&crate::parsers::ChannelId(channel.id)) { + continue; + } + + if !topic_filter.matches(&channel.topic) { + continue; + } + let mut components = from_channel(channel).as_serialized_batches(); if let Some(schema) = channel.schema.as_ref() { components.extend( diff --git a/crates/store/re_mcap/src/layers/snapshots/re_mcap__layers__protobuf__integration_tests__field_combinations_with_presence_tracking.snap b/crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__decode_failure_resilience.snap similarity index 50% rename from crates/store/re_mcap/src/layers/snapshots/re_mcap__layers__protobuf__integration_tests__field_combinations_with_presence_tracking.snap rename to crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__decode_failure_resilience.snap index a324769e3cec..3b3d303672bd 100644 --- a/crates/store/re_mcap/src/layers/snapshots/re_mcap__layers__protobuf__integration_tests__field_combinations_with_presence_tracking.snap +++ b/crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__decode_failure_resilience.snap @@ -1,38 +1,40 @@ --- -source: crates/store/re_mcap/src/layers/protobuf.rs -expression: "format!(\"{:-240}\", &chunks[0])" +source: crates/store/re_mcap/src/decoders/protobuf.rs +expression: "format_chunk(&chunks[0])" --- -┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /test_topic │ -│ * id: [**REDACTED**] │ -│ * version: [**REDACTED**] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬──────────────────────────────┬──────────────────────────────────┬──────────────────────────────────────────────────────────┐ │ -│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ com.example.Person:message │ │ -│ │ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable Duration(ns) ┆ type: nullable Duration(ns) ┆ type: nullable List[nullable Struct[4]] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: message_log_time ┆ index_name: message_publish_time ┆ archetype: com.example.Person │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ is_sorted: true ┆ component: com.example.Person:message │ │ -│ │ is_sorted: true ┆ kind: index ┆ kind: index ┆ kind: data │ │ -│ │ kind: control ┆ ┆ ┆ │ │ -│ ╞═══════════════════════════════════════════════╪══════════════════════════════╪══════════════════════════════════╪══════════════════════════════════════════════════════════╡ │ -│ │ row_[**REDACTED**] ┆ PT0.000000001S ┆ PT0.000000001S ┆ [{name: Alice, id: 123, status: {name: ACTIVE, value: │ │ -│ │ ┆ ┆ ┆ 1}, address: {street: Main St, city: NYC}}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000002S ┆ PT0.000000002S ┆ [{name: Bob, id: null, status: {name: INACTIVE, value: │ │ -│ │ ┆ ┆ ┆ 2}, address: {street: Oak Ave, city: null}}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000003S ┆ PT0.000000003S ┆ [{name: Charlie, id: 456, status: null, address: null}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000004S ┆ PT0.000000004S ┆ [{name: Dave, id: null, status: null, address: {street: │ │ -│ │ ┆ ┆ ┆ null, city: LA}}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000005S ┆ PT0.000000005S ┆ [{name: null, id: 789, status: null, address: null}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000006S ┆ PT0.000000006S ┆ [{name: null, id: null, status: {name: ACTIVE, value: │ │ -│ │ ┆ ┆ ┆ 1}, address: null}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000007S ┆ PT0.000000007S ┆ [{name: null, id: null, status: null, address: null}] │ │ -│ └───────────────────────────────────────────────┴──────────────────────────────┴──────────────────────────────────┴──────────────────────────────────────────────────────────┘ │ -└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /test_topic │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬───────────────────────────────┬──────────────────────────────────┬──────────────────────────────────────────────────────────┐ │ +│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ com.example.Person:message │ │ +│ │ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Struct("name": Utf8, "id": Int32, "status": │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: message_log_time ┆ index_name: message_publish_time ┆ Struct("name": Utf8, "value": Int32), metadata: │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ is_sorted: true ┆ {"ARROW:extension:name": │ │ +│ │ is_sorted: true ┆ kind: index ┆ kind: index ┆ "rerun.datatypes.ProtobufEnum"}, "address": │ │ +│ │ kind: control ┆ ┆ ┆ Struct("street": Utf8, "city": Utf8), "tags": │ │ +│ │ ┆ ┆ ┆ Map("entries": non-null Struct("key": non-null Utf8, │ │ +│ │ ┆ ┆ ┆ "value": Utf8), unsorted))) │ │ +│ │ ┆ ┆ ┆ archetype: com.example.Person │ │ +│ │ ┆ ┆ ┆ component: com.example.Person:message │ │ +│ │ ┆ ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪═══════════════════════════════╪══════════════════════════════════╪══════════════════════════════════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000101 ┆ 1970-01-01T00:00:00.000000101 ┆ [{name: Person1, id: 1, status: null, address: null, │ │ +│ │ ┆ ┆ ┆ tags: null}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000103 ┆ 1970-01-01T00:00:00.000000103 ┆ [{name: Person3, id: 3, status: null, address: null, │ │ +│ │ ┆ ┆ ┆ tags: null}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000105 ┆ 1970-01-01T00:00:00.000000105 ┆ [{name: Person5, id: 5, status: null, address: null, │ │ +│ │ ┆ ┆ ┆ tags: null}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000107 ┆ 1970-01-01T00:00:00.000000107 ┆ [{name: Person7, id: 7, status: null, address: null, │ │ +│ │ ┆ ┆ ┆ tags: null}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000109 ┆ 1970-01-01T00:00:00.000000109 ┆ [{name: Person9, id: 9, status: null, address: null, │ │ +│ │ ┆ ┆ ┆ tags: null}] │ │ +│ └───────────────────────────────────────────────┴───────────────────────────────┴──────────────────────────────────┴──────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_mcap/src/layers/snapshots/re_mcap__layers__protobuf__integration_tests__field_combinations_without_presence_tracking.snap b/crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__field_combinations_with_presence_tracking.snap similarity index 50% rename from crates/store/re_mcap/src/layers/snapshots/re_mcap__layers__protobuf__integration_tests__field_combinations_without_presence_tracking.snap rename to crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__field_combinations_with_presence_tracking.snap index 6db8f919ed5e..b43033d7e5c9 100644 --- a/crates/store/re_mcap/src/layers/snapshots/re_mcap__layers__protobuf__integration_tests__field_combinations_without_presence_tracking.snap +++ b/crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__field_combinations_with_presence_tracking.snap @@ -1,41 +1,47 @@ --- -source: crates/store/re_mcap/src/layers/protobuf.rs -expression: "format!(\"{:-240}\", &chunks[0])" +source: crates/store/re_mcap/src/decoders/protobuf.rs +expression: "format_chunk(&chunks[0])" --- -┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /test_topic │ -│ * id: [**REDACTED**] │ -│ * version: [**REDACTED**] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬──────────────────────────────┬──────────────────────────────────┬──────────────────────────────────────────────────────────┐ │ -│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ com.example.Person:message │ │ -│ │ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable Duration(ns) ┆ type: nullable Duration(ns) ┆ type: nullable List[nullable Struct[4]] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: message_log_time ┆ index_name: message_publish_time ┆ archetype: com.example.Person │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ is_sorted: true ┆ component: com.example.Person:message │ │ -│ │ is_sorted: true ┆ kind: index ┆ kind: index ┆ kind: data │ │ -│ │ kind: control ┆ ┆ ┆ │ │ -│ ╞═══════════════════════════════════════════════╪══════════════════════════════╪══════════════════════════════════╪══════════════════════════════════════════════════════════╡ │ -│ │ row_[**REDACTED**] ┆ PT0.000000001S ┆ PT0.000000001S ┆ [{name: Alice, id: 123, status: {name: ACTIVE, value: │ │ -│ │ ┆ ┆ ┆ 1}, address: {street: Main St, city: NYC}}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000002S ┆ PT0.000000002S ┆ [{name: Bob, id: 0, status: {name: INACTIVE, value: 2}, │ │ -│ │ ┆ ┆ ┆ address: {street: Oak Ave, city: }}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000003S ┆ PT0.000000003S ┆ [{name: Charlie, id: 456, status: {name: UNKNOWN, value: │ │ -│ │ ┆ ┆ ┆ 0}, address: null}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000004S ┆ PT0.000000004S ┆ [{name: Dave, id: 0, status: {name: UNKNOWN, value: 0}, │ │ -│ │ ┆ ┆ ┆ address: {street: , city: LA}}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000005S ┆ PT0.000000005S ┆ [{name: , id: 789, status: {name: UNKNOWN, value: 0}, │ │ -│ │ ┆ ┆ ┆ address: null}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000006S ┆ PT0.000000006S ┆ [{name: , id: 0, status: {name: ACTIVE, value: 1}, │ │ -│ │ ┆ ┆ ┆ address: null}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000007S ┆ PT0.000000007S ┆ [{name: , id: 0, status: {name: UNKNOWN, value: 0}, │ │ -│ │ ┆ ┆ ┆ address: null}] │ │ -│ └───────────────────────────────────────────────┴──────────────────────────────┴──────────────────────────────────┴──────────────────────────────────────────────────────────┘ │ -└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /test_topic │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬───────────────────────────────┬──────────────────────────────────┬──────────────────────────────────────────────────────────┐ │ +│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ com.example.Person:message │ │ +│ │ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Struct("name": Utf8, "id": Int32, "status": │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: message_log_time ┆ index_name: message_publish_time ┆ Struct("name": Utf8, "value": Int32), metadata: │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ is_sorted: true ┆ {"ARROW:extension:name": │ │ +│ │ is_sorted: true ┆ kind: index ┆ kind: index ┆ "rerun.datatypes.ProtobufEnum"}, "address": │ │ +│ │ kind: control ┆ ┆ ┆ Struct("street": Utf8, "city": Utf8), "tags": │ │ +│ │ ┆ ┆ ┆ Map("entries": non-null Struct("key": non-null Utf8, │ │ +│ │ ┆ ┆ ┆ "value": Utf8), unsorted))) │ │ +│ │ ┆ ┆ ┆ archetype: com.example.Person │ │ +│ │ ┆ ┆ ┆ component: com.example.Person:message │ │ +│ │ ┆ ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪═══════════════════════════════╪══════════════════════════════════╪══════════════════════════════════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000001 ┆ 1970-01-01T00:00:00.000000001 ┆ [{name: Alice, id: 123, status: {name: ACTIVE, value: │ │ +│ │ ┆ ┆ ┆ 1}, address: {street: Main St, city: NYC}, tags: {org: │ │ +│ │ ┆ ┆ ┆ rerun, role: admin}}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000002 ┆ 1970-01-01T00:00:00.000000002 ┆ [{name: Bob, id: null, status: {name: INACTIVE, value: │ │ +│ │ ┆ ┆ ┆ 2}, address: {street: Oak Ave, city: null}, tags: null}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000003 ┆ 1970-01-01T00:00:00.000000003 ┆ [{name: Charlie, id: 456, status: null, address: null, │ │ +│ │ ┆ ┆ ┆ tags: null}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000004 ┆ 1970-01-01T00:00:00.000000004 ┆ [{name: Dave, id: null, status: null, address: {street: │ │ +│ │ ┆ ┆ ┆ null, city: LA}, tags: {role: admin}}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000005 ┆ 1970-01-01T00:00:00.000000005 ┆ [{name: null, id: 789, status: null, address: null, │ │ +│ │ ┆ ┆ ┆ tags: null}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000006 ┆ 1970-01-01T00:00:00.000000006 ┆ [{name: null, id: null, status: {name: ACTIVE, value: │ │ +│ │ ┆ ┆ ┆ 1}, address: null, tags: null}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000007 ┆ 1970-01-01T00:00:00.000000007 ┆ [{name: null, id: null, status: null, address: null, │ │ +│ │ ┆ ┆ ┆ tags: null}] │ │ +│ └───────────────────────────────────────────────┴───────────────────────────────┴──────────────────────────────────┴──────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__field_combinations_without_presence_tracking.snap b/crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__field_combinations_without_presence_tracking.snap new file mode 100644 index 000000000000..96668b4b34d2 --- /dev/null +++ b/crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__field_combinations_without_presence_tracking.snap @@ -0,0 +1,47 @@ +--- +source: crates/store/re_mcap/src/decoders/protobuf.rs +expression: "format_chunk(&chunks[0])" +--- +┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /test_topic │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬───────────────────────────────┬──────────────────────────────────┬──────────────────────────────────────────────────────────┐ │ +│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ com.example.Person:message │ │ +│ │ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Struct("name": Utf8, "id": Int32, "status": │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: message_log_time ┆ index_name: message_publish_time ┆ Struct("name": Utf8, "value": Int32), metadata: │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ is_sorted: true ┆ {"ARROW:extension:name": │ │ +│ │ is_sorted: true ┆ kind: index ┆ kind: index ┆ "rerun.datatypes.ProtobufEnum"}, "address": │ │ +│ │ kind: control ┆ ┆ ┆ Struct("street": Utf8, "city": Utf8), "tags": │ │ +│ │ ┆ ┆ ┆ Map("entries": non-null Struct("key": non-null Utf8, │ │ +│ │ ┆ ┆ ┆ "value": Utf8), unsorted))) │ │ +│ │ ┆ ┆ ┆ archetype: com.example.Person │ │ +│ │ ┆ ┆ ┆ component: com.example.Person:message │ │ +│ │ ┆ ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪═══════════════════════════════╪══════════════════════════════════╪══════════════════════════════════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000001 ┆ 1970-01-01T00:00:00.000000001 ┆ [{name: Alice, id: 123, status: {name: ACTIVE, value: │ │ +│ │ ┆ ┆ ┆ 1}, address: {street: Main St, city: NYC}, tags: {org: │ │ +│ │ ┆ ┆ ┆ rerun, role: admin}}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000002 ┆ 1970-01-01T00:00:00.000000002 ┆ [{name: Bob, id: 0, status: {name: INACTIVE, value: 2}, │ │ +│ │ ┆ ┆ ┆ address: {street: Oak Ave, city: }, tags: null}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000003 ┆ 1970-01-01T00:00:00.000000003 ┆ [{name: Charlie, id: 456, status: {name: UNKNOWN, value: │ │ +│ │ ┆ ┆ ┆ 0}, address: null, tags: null}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000004 ┆ 1970-01-01T00:00:00.000000004 ┆ [{name: Dave, id: 0, status: {name: UNKNOWN, value: 0}, │ │ +│ │ ┆ ┆ ┆ address: {street: , city: LA}, tags: {role: admin}}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000005 ┆ 1970-01-01T00:00:00.000000005 ┆ [{name: , id: 789, status: {name: UNKNOWN, value: 0}, │ │ +│ │ ┆ ┆ ┆ address: null, tags: null}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000006 ┆ 1970-01-01T00:00:00.000000006 ┆ [{name: , id: 0, status: {name: ACTIVE, value: 1}, │ │ +│ │ ┆ ┆ ┆ address: null, tags: null}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000007 ┆ 1970-01-01T00:00:00.000000007 ┆ [{name: , id: 0, status: {name: UNKNOWN, value: 0}, │ │ +│ │ ┆ ┆ ┆ address: null, tags: null}] │ │ +│ └───────────────────────────────────────────────┴───────────────────────────────┴──────────────────────────────────┴──────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__oneof_fields.snap b/crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__oneof_fields.snap new file mode 100644 index 000000000000..849bbbcf4777 --- /dev/null +++ b/crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__oneof_fields.snap @@ -0,0 +1,30 @@ +--- +source: crates/store/re_mcap/src/decoders/protobuf.rs +expression: "format_chunk(&chunks[0])" +--- +┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /color_topic │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬───────────────────────────────┬──────────────────────────────────┬──────────────────────────────────────────────────────────┐ │ +│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ com.example.Color:message │ │ +│ │ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Struct("object": Utf8, "color": Struct("rgb": │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: message_log_time ┆ index_name: message_publish_time ┆ Utf8, "hsv": Utf8, "bgr": Utf8), metadata: │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ is_sorted: true ┆ {"ARROW:extension:name": │ │ +│ │ is_sorted: true ┆ kind: index ┆ kind: index ┆ "rerun.datatypes.ProtobufOneOf"}, "gamma": Float32)) │ │ +│ │ kind: control ┆ ┆ ┆ archetype: com.example.Color │ │ +│ │ ┆ ┆ ┆ component: com.example.Color:message │ │ +│ │ ┆ ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪═══════════════════════════════╪══════════════════════════════════╪══════════════════════════════════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000001 ┆ 1970-01-01T00:00:00.000000001 ┆ [{object: box, color: {rgb: 255,0,0, hsv: null, bgr: │ │ +│ │ ┆ ┆ ┆ null}, gamma: 2.2}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000002 ┆ 1970-01-01T00:00:00.000000002 ┆ [{object: sphere, color: {rgb: null, hsv: 120,1.0,1.0, │ │ +│ │ ┆ ┆ ┆ bgr: null}, gamma: null}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000003 ┆ 1970-01-01T00:00:00.000000003 ┆ [{object: cone, color: null, gamma: null}] │ │ +│ └───────────────────────────────────────────────┴───────────────────────────────┴──────────────────────────────────┴──────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_mcap/src/layers/snapshots/re_mcap__layers__protobuf__integration_tests__decode_failure_resilience.snap b/crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__oneof_message_variants.snap similarity index 66% rename from crates/store/re_mcap/src/layers/snapshots/re_mcap__layers__protobuf__integration_tests__decode_failure_resilience.snap rename to crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__oneof_message_variants.snap index 8ce10acdcc35..e545788444a3 100644 --- a/crates/store/re_mcap/src/layers/snapshots/re_mcap__layers__protobuf__integration_tests__decode_failure_resilience.snap +++ b/crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__oneof_message_variants.snap @@ -1,30 +1,31 @@ --- -source: crates/store/re_mcap/src/layers/protobuf.rs -expression: "format!(\"{:-240}\", &chunks[0])" +source: crates/store/re_mcap/src/decoders/protobuf.rs +expression: "format_chunk(&chunks[0])" --- -┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /test_topic │ -│ * id: [**REDACTED**] │ -│ * version: [**REDACTED**] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬──────────────────────────────┬──────────────────────────────────┬───────────────────────────────────────────────────────┐ │ -│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ com.example.Person:message │ │ -│ │ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable Duration(ns) ┆ type: nullable Duration(ns) ┆ type: nullable List[nullable Struct[4]] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: message_log_time ┆ index_name: message_publish_time ┆ archetype: com.example.Person │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ is_sorted: true ┆ component: com.example.Person:message │ │ -│ │ is_sorted: true ┆ kind: index ┆ kind: index ┆ kind: data │ │ -│ │ kind: control ┆ ┆ ┆ │ │ -│ ╞═══════════════════════════════════════════════╪══════════════════════════════╪══════════════════════════════════╪═══════════════════════════════════════════════════════╡ │ -│ │ row_[**REDACTED**] ┆ PT0.000000101S ┆ PT0.000000101S ┆ [{name: Person1, id: 1, status: null, address: null}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000103S ┆ PT0.000000103S ┆ [{name: Person3, id: 3, status: null, address: null}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000105S ┆ PT0.000000105S ┆ [{name: Person5, id: 5, status: null, address: null}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000107S ┆ PT0.000000107S ┆ [{name: Person7, id: 7, status: null, address: null}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000109S ┆ PT0.000000109S ┆ [{name: Person9, id: 9, status: null, address: null}] │ │ -│ └───────────────────────────────────────────────┴──────────────────────────────┴──────────────────────────────────┴───────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /shape_topic │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬──────────────────────────────┬──────────────────────────────────┬──────────────────────────────────────────────────────────┐ │ +│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ com.example.Shape:message │ │ +│ │ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Duration(ns) ┆ type: Duration(ns) ┆ type: List(Struct("name": Utf8, "geometry": │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: message_log_time ┆ index_name: message_publish_time ┆ Struct("circle": Struct("radius": Float32), "rectangle": │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ is_sorted: true ┆ Struct("width": Float32, "height": Float32)), metadata: │ │ +│ │ is_sorted: true ┆ kind: index ┆ kind: index ┆ {"ARROW:extension:name": │ │ +│ │ kind: control ┆ ┆ ┆ "rerun.datatypes.ProtobufOneOf"})) │ │ +│ │ ┆ ┆ ┆ archetype: com.example.Shape │ │ +│ │ ┆ ┆ ┆ component: com.example.Shape:message │ │ +│ │ ┆ ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪══════════════════════════════╪══════════════════════════════════╪══════════════════════════════════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ PT0.000000001S ┆ PT0.000000001S ┆ [{name: wheel, geometry: {circle: {radius: 5.0}, │ │ +│ │ ┆ ┆ ┆ rectangle: null}}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ PT0.000000002S ┆ PT0.000000002S ┆ [{name: door, geometry: {circle: null, rectangle: │ │ +│ │ ┆ ┆ ┆ {width: 3.0, height: 7.0}}}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ PT0.000000003S ┆ PT0.000000003S ┆ [{name: unknown, geometry: null}] │ │ +│ └───────────────────────────────────────────────┴──────────────────────────────┴──────────────────────────────────┴──────────────────────────────────────────────────────────┘ │ +└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__oneof_nested.snap b/crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__oneof_nested.snap new file mode 100644 index 000000000000..b5f042b38548 --- /dev/null +++ b/crates/store/re_mcap/src/decoders/snapshots/re_mcap__decoders__protobuf__integration_tests__oneof_nested.snap @@ -0,0 +1,28 @@ +--- +source: crates/store/re_mcap/src/decoders/protobuf.rs +expression: "format_chunk(&chunks[0])" +--- +┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /scene_topic │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬───────────────────────────────┬──────────────────────────────────┬──────────────────────────────────────────────────────────┐ │ +│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ com.example.Scene:message │ │ +│ │ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Timestamp(ns) ┆ type: Timestamp(ns) ┆ type: List(Struct("object": Struct("object": Utf8, │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: message_log_time ┆ index_name: message_publish_time ┆ "color": Struct("rgb": Utf8, "hsv": Utf8, "bgr": Utf8), │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ is_sorted: true ┆ metadata: {"ARROW:extension:name": │ │ +│ │ is_sorted: true ┆ kind: index ┆ kind: index ┆ "rerun.datatypes.ProtobufOneOf"}, "gamma": Float32))) │ │ +│ │ kind: control ┆ ┆ ┆ archetype: com.example.Scene │ │ +│ │ ┆ ┆ ┆ component: com.example.Scene:message │ │ +│ │ ┆ ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪═══════════════════════════════╪══════════════════════════════════╪══════════════════════════════════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000001 ┆ 1970-01-01T00:00:00.000000001 ┆ [{object: {object: cube, color: {rgb: 128,64,32, hsv: │ │ +│ │ ┆ ┆ ┆ null, bgr: null}, gamma: null}}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1970-01-01T00:00:00.000000002 ┆ 1970-01-01T00:00:00.000000002 ┆ [{object: {object: pyramid, color: {rgb: null, hsv: │ │ +│ │ ┆ ┆ ┆ null, bgr: 0,255,0}, gamma: null}}] │ │ +│ └───────────────────────────────────────────────┴───────────────────────────────┴──────────────────────────────────┴──────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_mcap/src/layers/stats.rs b/crates/store/re_mcap/src/decoders/stats.rs similarity index 91% rename from crates/store/re_mcap/src/layers/stats.rs rename to crates/store/re_mcap/src/decoders/stats.rs index 9dbd3c04c412..09e2566c358e 100644 --- a/crates/store/re_mcap/src/layers/stats.rs +++ b/crates/store/re_mcap/src/decoders/stats.rs @@ -3,17 +3,17 @@ use re_sdk_types::archetypes::McapStatistics; use re_sdk_types::{components, datatypes}; use saturating_cast::SaturatingCast as _; -use super::{Layer, LayerIdentifier}; +use super::{Decoder, DecoderIdentifier}; use crate::Error; /// Extracts [`mcap::records::Statistics`], such as message count, from an MCAP file. /// /// The results will be stored as recording properties. #[derive(Debug, Default)] -pub struct McapStatisticLayer; +pub struct McapStatisticDecoder; -impl Layer for McapStatisticLayer { - fn identifier() -> LayerIdentifier { +impl Decoder for McapStatisticDecoder { + fn identifier() -> DecoderIdentifier { "stats".into() } @@ -21,6 +21,7 @@ impl Layer for McapStatisticLayer { &mut self, _mcap_bytes: &[u8], summary: &mcap::Summary, + _topic_filter: &super::TopicFilter, emit: &mut dyn FnMut(Chunk), ) -> Result<(), Error> { if let Some(statistics) = summary.stats.as_ref() { diff --git a/crates/store/re_mcap/src/layers/mod.rs b/crates/store/re_mcap/src/layers/mod.rs deleted file mode 100644 index 5e6d2d500235..000000000000 --- a/crates/store/re_mcap/src/layers/mod.rs +++ /dev/null @@ -1,500 +0,0 @@ -mod protobuf; -mod raw; -mod recording_info; -mod ros2; -mod ros2_reflection; -mod schema; -mod stats; - -use std::collections::{BTreeMap, BTreeSet}; - -use re_chunk::external::nohash_hasher::IntMap; -use re_chunk::{Chunk, EntityPath}; - -pub use self::protobuf::McapProtobufLayer; -pub use self::raw::McapRawLayer; -pub use self::recording_info::McapRecordingInfoLayer; -pub use self::ros2::McapRos2Layer; -pub use self::ros2_reflection::McapRos2ReflectionLayer; -pub use self::schema::McapSchemaLayer; -pub use self::stats::McapStatisticLayer; -use crate::Error; -use crate::parsers::{ChannelId, MessageParser, ParserContext}; - -/// Globally unique identifier for a layer. -#[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)] -#[repr(transparent)] -pub struct LayerIdentifier(String); - -impl From<&'static str> for LayerIdentifier { - fn from(value: &'static str) -> Self { - Self(value.to_owned()) - } -} - -impl From for LayerIdentifier { - fn from(value: String) -> Self { - Self(value) - } -} - -impl std::fmt::Display for LayerIdentifier { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.0.fmt(f) - } -} - -/// A layer describes information that can be extracted from an MCAP file. -/// -/// It is the most general level at which we can interpret an MCAP file and can -/// be used to either output general information about the MCAP file or to call -/// into layers that work on a per-message basis via the [`MessageLayer`] trait. -pub trait Layer { - /// Globally unique identifier for this layer. - /// - /// [`LayerIdentifier`]s are also be used to select only a subset of active layers. - fn identifier() -> LayerIdentifier - where - Self: Sized; - - /// The processing that needs to happen for this layer. - /// - /// This function has access to the entire MCAP file via `mcap_bytes`. - // TODO(#10862): Consider abstracting over `Summary` to allow more convenient / performant indexing. - // For example, we probably don't want to store the entire file in memory. - fn process( - &mut self, - mcap_bytes: &[u8], - summary: &::mcap::Summary, - emit: &mut dyn FnMut(Chunk), - ) -> Result<(), Error>; -} - -/// Can be used to extract per-message information from an MCAP file. -/// -/// This is a specialization of [`Layer`] that allows defining [`MessageParser`]s. -/// to interpret the contents of MCAP chunks. -pub trait MessageLayer { - fn identifier() -> LayerIdentifier - where - Self: Sized; - - fn init(&mut self, _summary: &::mcap::Summary) -> Result<(), Error> { - Ok(()) - } - - /// Returns `true` if this layer can handle the given channel. - /// - /// This method is used to determine which channels should be processed by which layers, - /// particularly for implementing fallback behavior where one layer handles channels - /// that other layers cannot process. - fn supports_channel(&self, channel: &mcap::Channel<'_>) -> bool; - - /// Instantites a new [`MessageParser`] that expects `num_rows` if it is interested in the current channel. - /// - /// Otherwise returns `None`. - /// - /// The `num_rows` argument allows parsers to pre-allocate storage with the - /// correct capacity, avoiding reallocations during message processing. - fn message_parser( - &self, - channel: &mcap::Channel<'_>, - num_rows: usize, - ) -> Option>; -} - -type Parser = (ParserContext, Box); - -/// Decodes batches of messages from an MCAP into Rerun chunks using previously registered parsers. -struct McapChunkDecoder { - parsers: IntMap, -} - -impl McapChunkDecoder { - pub fn new(parsers: IntMap) -> Self { - Self { parsers } - } - - /// Decode the next message in the chunk - pub fn decode_next(&mut self, msg: &::mcap::Message<'_>) -> Result<(), Error> { - re_tracing::profile_function!(); - - let channel = msg.channel.as_ref(); - let channel_id = ChannelId(channel.id); - - if let Some((ctx, parser)) = self.parsers.get_mut(&channel_id) { - // If the parser fails, we should _not_ append the timepoint - parser.append(ctx, msg)?; - for timepoint in parser.get_log_and_publish_timepoints(msg)? { - ctx.add_timepoint(timepoint); - } - } else { - // TODO(#10862): If we encounter a message that we can't parse at all we should emit a warning. - // Note that this quite easy to achieve when using layers and only selecting a subset. - // However, to not overwhelm the user this should be reported in a _single_ static chunk, - // so this is not the right place for this. Maybe we need to introduce something like a "report". - } - Ok(()) - } - - /// Finish the decoding process and return the chunks. - pub fn finish(self) -> impl Iterator> { - self.parsers - .into_values() - .flat_map(|(ctx, parser)| match parser.finalize(ctx) { - Ok(chunks) => chunks.into_iter().map(Ok).collect::>(), - Err(err) => vec![Err(Error::Other(err))], - }) - } -} - -/// Used to select certain layers. -#[derive(Clone, Debug)] -pub enum SelectedLayers { - All, - Subset(BTreeSet), -} - -impl SelectedLayers { - /// Checks if a layer is part of the current selection. - pub fn contains(&self, value: &LayerIdentifier) -> bool { - match self { - Self::All => true, - Self::Subset(subset) => subset.contains(value), - } - } -} - -/// Registry fallback strategy. -#[derive(Clone, Debug, Default)] -pub enum Fallback { - /// No fallback – channels without a handler are simply unassigned. - #[default] - None, - - /// Single global fallback message layer (e.g. `raw`). - Global(LayerIdentifier), -} - -/// A runner that constrains a [`MessageLayer`] to a specific set of channels. -pub struct MessageLayerRunner { - inner: Box, - allowed: BTreeSet, -} - -impl MessageLayerRunner { - fn new(inner: Box, allowed: BTreeSet) -> Self { - Self { inner, allowed } - } -} - -impl Layer for MessageLayerRunner { - fn identifier() -> LayerIdentifier - where - Self: Sized, - { - // static identifier isn't used for trait objects; unreachable in practice. - "message_layer_runner".into() - } - - fn process( - &mut self, - mcap_bytes: &[u8], - summary: &mcap::Summary, - emit: &mut dyn FnMut(Chunk), - ) -> Result<(), Error> { - self.inner.init(summary)?; - - for chunk in &summary.chunk_indexes { - let parsers = summary - .read_message_indexes(mcap_bytes, chunk)? - .iter() - .filter_map(|(channel, msg_offsets)| { - let channel_id = ChannelId::from(channel.id); - if !self.allowed.contains(&channel_id) { - return None; - } - - let parser = self.inner.message_parser(channel, msg_offsets.len())?; - let entity_path = EntityPath::from(channel.topic.as_str()); - let ctx = ParserContext::new(entity_path); - Some((channel_id, (ctx, parser))) - }) - .collect::>(); - - let mut decoder = McapChunkDecoder::new(parsers); - - for msg in summary.stream_chunk(mcap_bytes, chunk)? { - match msg { - Ok(message) => { - if let Err(err) = decoder.decode_next(&message) { - re_log::error_once!( - "Failed to decode message on channel {}: {err}", - message.channel.topic - ); - } - } - Err(err) => re_log::error!("Failed to read message from MCAP file: {err}"), - } - } - - for chunk in decoder.finish() { - match chunk { - Ok(c) => emit(c), - Err(err) => re_log::error!("Failed to decode chunk: {err}"), - } - } - } - - Ok(()) - } -} - -/// A printable assignment used for dry-runs / UI. -#[derive(Clone, Debug)] -pub struct LayerAssignment { - pub channel_id: ChannelId, - pub topic: String, - pub encoding: String, - pub schema_name: Option, - pub layer: LayerIdentifier, -} - -/// A concrete execution plan for a given MCAP source. -pub struct ExecutionPlan { - pub file_layers: Vec>, - pub runners: Vec, - pub assignments: Vec, -} - -impl ExecutionPlan { - pub fn run( - mut self, - mcap_bytes: &[u8], - summary: &mcap::Summary, - emit: &mut dyn FnMut(Chunk), - ) -> anyhow::Result<()> { - for mut layer in self.file_layers { - layer.process(mcap_bytes, summary, emit)?; - } - - for runner in &mut self.runners { - runner.process(mcap_bytes, summary, emit)?; - } - Ok(()) - } -} - -/// Holds a set of all known layers, split into file-scoped and message-scoped. -pub struct LayerRegistry { - file_factories: BTreeMap Box>, - msg_factories: BTreeMap Box>, - msg_order: Vec, - fallback: Fallback, -} - -impl LayerRegistry { - /// Creates an empty registry. - pub fn empty() -> Self { - Self { - file_factories: Default::default(), - msg_factories: Default::default(), - msg_order: Vec::new(), - fallback: Fallback::None, - } - } - - /// Creates a registry with all builtin layers and raw fallback enabled. - pub fn all_with_raw_fallback() -> Self { - Self::all_builtin(true) - } - - /// Creates a registry with all builtin layers and raw fallback disabled. - pub fn all_without_raw_fallback() -> Self { - Self::all_builtin(false) - } - - /// Creates a registry with all builtin layers with configurable raw fallback. - pub fn all_builtin(raw_fallback_enabled: bool) -> Self { - let mut registry = Self::empty() - // file layers: - .register_file_layer::() - .register_file_layer::() - .register_file_layer::() - // message layers (priority order): - .register_message_layer::() - .register_message_layer::() - .register_message_layer::(); - - if raw_fallback_enabled { - registry = registry - .register_message_layer::() - .with_global_fallback::(); - } else { - // still register raw so users can explicitly select it, just no fallback - registry = registry.register_message_layer::(); - } - - registry - } - - /// Register a file-scoped layer (runs once over the file/summary). - pub fn register_file_layer(mut self) -> Self { - let id = L::identifier(); - if self - .file_factories - .insert(id.clone(), || Box::new(L::default())) - .is_some() - { - re_log::warn_once!("Inserted file layer {} twice.", id); - } - self - } - - /// Register a message-scoped layer (eligible to handle channels). - pub fn register_message_layer(mut self) -> Self { - let id = ::identifier(); - if self - .msg_factories - .insert(id.clone(), || Box::new(M::default())) - .is_some() - { - re_log::warn_once!("Inserted message layer {} twice.", id); - } - self.msg_order.push(id); - self - } - - /// Configure a global fallback message layer (e.g. `raw`). - pub fn with_global_fallback(mut self) -> Self { - self.fallback = Fallback::Global(::identifier()); - self - } - - /// Produce a filtered registry that only contains `selected` layers. - pub fn select(&self, selected: &SelectedLayers) -> Self { - let file_factories = self - .file_factories - .iter() - .filter(|(id, _)| selected.contains(id)) - .map(|(k, v)| (k.clone(), *v)) - .collect(); - - let msg_factories = self - .msg_factories - .iter() - .filter(|(id, _)| selected.contains(id)) - .map(|(k, v)| (k.clone(), *v)) - .collect(); - - let msg_order = self - .msg_order - .iter() - .filter(|&id| selected.contains(id)) - .cloned() - .collect(); - - let fallback = self.select_fallback(selected); - - Self { - file_factories, - msg_factories, - msg_order, - fallback, - } - } - - fn select_fallback(&self, selected: &SelectedLayers) -> Fallback { - match &self.fallback { - Fallback::Global(id) if selected.contains(id) => Fallback::Global(id.clone()), - Fallback::Global(_) | Fallback::None => Fallback::None, - } - } - - /// Build a concrete execution plan for a given file. - pub fn plan(&self, summary: &mcap::Summary) -> anyhow::Result { - let file_layers = self - .file_factories - .values() - .map(|f| f()) - .collect::>(); - - // instantiate message layers and init them (supports_channel may depend on init) - let mut msg_layers: Vec<(LayerIdentifier, Box)> = self - .msg_order - .iter() - .filter_map(|id| self.msg_factories.get(id).map(|f| (id.clone(), f()))) - .collect(); - - for (_, l) in &mut msg_layers { - l.init(summary)?; - } - - let mut by_layer: BTreeMap> = BTreeMap::new(); - let mut assignments: Vec = Vec::new(); - - for channel_id in summary.channels.values() { - // explicit priority order - let mut chosen: Option = None; - for (id, layer) in &msg_layers { - if layer.supports_channel(channel_id.as_ref()) { - chosen = Some(id.clone()); - break; - } - } - - if chosen.is_none() { - // fallbacks (if any) - if let Fallback::Global(id) = &self.fallback - && self.msg_factories.contains_key(id) - { - chosen = Some(id.clone()); - } - } - - let schema_name = channel_id.schema.as_ref().map(|s| s.name.clone()); - - let schema_encoding = channel_id - .schema - .as_ref() - .map(|s| s.encoding.as_str()) - .unwrap_or("Unknown"); - - if let Some(id) = chosen { - by_layer - .entry(id.clone()) - .or_default() - .insert(ChannelId::from(channel_id.id)); - - assignments.push(LayerAssignment { - channel_id: ChannelId::from(channel_id.id), - topic: channel_id.topic.clone(), - encoding: schema_encoding.to_owned(), - schema_name: channel_id.schema.as_ref().map(|s| s.name.clone()), - layer: id, - }); - } else { - re_log::debug!( - "No message layer selected for topic '{}' (encoding='{}', schema='{:?}')", - channel_id.topic, - schema_encoding, - schema_name, - ); - } - } - - let mut runners = Vec::new(); - for (layer_id, allowed) in by_layer { - if let Some(factory) = self.msg_factories.get(&layer_id) { - let inner = factory(); - runners.push(MessageLayerRunner::new(inner, allowed)); - } - } - - Ok(ExecutionPlan { - file_layers, - runners, - assignments, - }) - } -} diff --git a/crates/store/re_mcap/src/layers/snapshots/re_mcap__layers__protobuf__integration_tests__oneof_nested.snap b/crates/store/re_mcap/src/layers/snapshots/re_mcap__layers__protobuf__integration_tests__oneof_nested.snap deleted file mode 100644 index 3d0c99a7c7fc..000000000000 --- a/crates/store/re_mcap/src/layers/snapshots/re_mcap__layers__protobuf__integration_tests__oneof_nested.snap +++ /dev/null @@ -1,24 +0,0 @@ ---- -source: crates/store/re_mcap/src/layers/protobuf.rs -expression: "format!(\"{:-240}\", &chunks[0])" ---- -┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /scene_topic │ -│ * id: [**REDACTED**] │ -│ * version: [**REDACTED**] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬──────────────────────────────┬──────────────────────────────────┬─────────────────────────────────────────┐ │ -│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ com.example.Scene:message │ │ -│ │ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable Duration(ns) ┆ type: nullable Duration(ns) ┆ type: nullable List[nullable Struct[1]] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: message_log_time ┆ index_name: message_publish_time ┆ archetype: com.example.Scene │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ is_sorted: true ┆ component: com.example.Scene:message │ │ -│ │ is_sorted: true ┆ kind: index ┆ kind: index ┆ kind: data │ │ -│ │ kind: control ┆ ┆ ┆ │ │ -│ ╞═══════════════════════════════════════════════╪══════════════════════════════╪══════════════════════════════════╪═════════════════════════════════════════╡ │ -│ │ row_[**REDACTED**] ┆ PT0.000000001S ┆ PT0.000000001S ┆ [{object: {object: cube}}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000002S ┆ PT0.000000002S ┆ [{object: {object: pyramid}}] │ │ -│ └───────────────────────────────────────────────┴──────────────────────────────┴──────────────────────────────────┴─────────────────────────────────────────┘ │ -└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_mcap/src/layers/snapshots/re_mcap__layers__protobuf__integration_tests__oneof_top_level.snap b/crates/store/re_mcap/src/layers/snapshots/re_mcap__layers__protobuf__integration_tests__oneof_top_level.snap deleted file mode 100644 index fd1cac65cc28..000000000000 --- a/crates/store/re_mcap/src/layers/snapshots/re_mcap__layers__protobuf__integration_tests__oneof_top_level.snap +++ /dev/null @@ -1,26 +0,0 @@ ---- -source: crates/store/re_mcap/src/layers/protobuf.rs -expression: "format!(\"{:-240}\", &chunks[0])" ---- -┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /color_topic │ -│ * id: [**REDACTED**] │ -│ * version: [**REDACTED**] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬──────────────────────────────┬──────────────────────────────────┬─────────────────────────────────────────┐ │ -│ │ RowId ┆ message_log_time ┆ message_publish_time ┆ com.example.Color:message │ │ -│ │ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable Duration(ns) ┆ type: nullable Duration(ns) ┆ type: nullable List[nullable Struct[1]] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: message_log_time ┆ index_name: message_publish_time ┆ archetype: com.example.Color │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ is_sorted: true ┆ component: com.example.Color:message │ │ -│ │ is_sorted: true ┆ kind: index ┆ kind: index ┆ kind: data │ │ -│ │ kind: control ┆ ┆ ┆ │ │ -│ ╞═══════════════════════════════════════════════╪══════════════════════════════╪══════════════════════════════════╪═════════════════════════════════════════╡ │ -│ │ row_[**REDACTED**] ┆ PT0.000000001S ┆ PT0.000000001S ┆ [{object: box}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000002S ┆ PT0.000000002S ┆ [{object: sphere}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ PT0.000000003S ┆ PT0.000000003S ┆ [{object: cone}] │ │ -│ └───────────────────────────────────────────────┴──────────────────────────────┴──────────────────────────────────┴─────────────────────────────────────────┘ │ -└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_mcap/src/lib.rs b/crates/store/re_mcap/src/lib.rs index f4a9d8e1d399..cd6dea65b36d 100644 --- a/crates/store/re_mcap/src/lib.rs +++ b/crates/store/re_mcap/src/lib.rs @@ -1,13 +1,16 @@ //! Library providing utilities to load MCAP files with Rerun. +pub mod decoders; mod error; -pub mod layers; pub(crate) mod parsers; pub(crate) mod util; +pub use decoders::{ + Decoder, DecoderIdentifier, DecoderRegistry, MessageDecoder, SelectedDecoders, TopicFilter, +}; + pub use error::Error; -pub use layers::{Layer, LayerIdentifier, LayerRegistry, MessageLayer, SelectedLayers}; pub use parsers::ros2msg::sensor_msgs::{ ImageEncoding, decode_image_encoding, decode_image_format, }; diff --git a/crates/store/re_mcap/src/parsers/decode.rs b/crates/store/re_mcap/src/parsers/decode.rs index 023414e7b311..b65c65f461e2 100644 --- a/crates/store/re_mcap/src/parsers/decode.rs +++ b/crates/store/re_mcap/src/parsers/decode.rs @@ -4,7 +4,7 @@ use re_chunk::external::nohash_hasher::{IntMap, IsEnabled}; use re_chunk::{ Chunk, EntityPath, TimeColumn, TimeColumnBuilder, TimePoint, Timeline, TimelineName, }; -use re_log_types::TimeCell; +use re_log_types::{TimeCell, TimeType}; use crate::util::{TimestampCell, log_and_publish_timepoint_from_msg}; @@ -45,8 +45,9 @@ pub trait MessageParser { fn get_log_and_publish_timepoints( &self, msg: &mcap::Message<'_>, + time_type: TimeType, ) -> anyhow::Result> { - Ok(vec![log_and_publish_timepoint_from_msg(msg)]) + Ok(vec![log_and_publish_timepoint_from_msg(msg, time_type)]) } /// Consume the parser and convert all accumulated data into Rerun chunks. @@ -70,18 +71,31 @@ impl IsEnabled for ChannelId {} /// Common context used by parsers to build timelines and store entity paths. pub struct ParserContext { entity_path: EntityPath, + channel_topic: String, + time_type: TimeType, pub timelines: IntMap, } impl ParserContext { - /// Construct a new parser context with the given [`EntityPath`]. - pub fn new(entity_path: EntityPath) -> Self { + /// Construct a new parser context with the given [`EntityPath`] and [`TimeType`]. + pub fn new( + entity_path: EntityPath, + channel_topic: impl Into, + time_type: TimeType, + ) -> Self { Self { entity_path, + channel_topic: channel_topic.into(), + time_type, timelines: IntMap::default(), } } + /// The [`TimeType`] to use for timestamp timelines. + pub fn time_type(&self) -> TimeType { + self.time_type + } + /// Add an additional [`TimePoint`] to the timelines in this context. /// /// # Note @@ -122,7 +136,6 @@ impl ParserContext { /// Add a timestamp to the timeline using the provided timestamp cell. /// /// The timeline name and [`TimeCell`] are automatically determined from the timestamp cell. - /// For Unix epochs, creates a timestamp cell. For custom epochs, creates a duration cell. pub fn add_timestamp_cell(&mut self, timestamp_cell: TimestampCell) -> &mut Self { let timeline_name = TimelineName::from(timestamp_cell.timeline_name()); let cell = timestamp_cell.into_time_cell(); @@ -147,4 +160,9 @@ impl ParserContext { pub fn entity_path(&self) -> &EntityPath { &self.entity_path } + + /// Get the MCAP channel topic associated with this context. + pub fn channel_topic(&self) -> &str { + &self.channel_topic + } } diff --git a/crates/store/re_mcap/src/parsers/ros2msg/geometry_msgs/pose_stamped.rs b/crates/store/re_mcap/src/parsers/ros2msg/geometry_msgs/pose_stamped.rs index d1e4a1a7c957..effda1d54a08 100644 --- a/crates/store/re_mcap/src/parsers/ros2msg/geometry_msgs/pose_stamped.rs +++ b/crates/store/re_mcap/src/parsers/ros2msg/geometry_msgs/pose_stamped.rs @@ -38,8 +38,9 @@ impl MessageParser for PoseStampedMessageParser { } = pose; // Add the header timestamp to the context, `log_time` and `publish_time` are added automatically - ctx.add_timestamp_cell(TimestampCell::guess_from_nanos_ros2( + ctx.add_timestamp_cell(TimestampCell::from_nanos_ros2( header.stamp.as_nanos() as u64, + ctx.time_type(), )); self.frame_ids.push(header.frame_id); @@ -88,7 +89,7 @@ impl MessageParser for PoseStampedMessageParser { timelines.clone(), pose_components .into_iter() - .chain(frame_components.into_iter()) + .chain(frame_components) .collect(), )?; diff --git a/crates/store/re_mcap/src/parsers/ros2msg/rcl_interfaces/log.rs b/crates/store/re_mcap/src/parsers/ros2msg/rcl_interfaces/log.rs index 519f37dd4cb6..28a0770c1214 100644 --- a/crates/store/re_mcap/src/parsers/ros2msg/rcl_interfaces/log.rs +++ b/crates/store/re_mcap/src/parsers/ros2msg/rcl_interfaces/log.rs @@ -73,8 +73,9 @@ impl MessageParser for LogMessageParser { .context("Failed to decode `rcl_interfaces::Log` message from CDR data")?; // add the sensor timestamp to the context, `log_time` and `publish_time` are added automatically - ctx.add_timestamp_cell(crate::util::TimestampCell::guess_from_nanos_ros2( + ctx.add_timestamp_cell(crate::util::TimestampCell::from_nanos_ros2( stamp.as_nanos() as u64, + ctx.time_type(), )); self.text_entries.push(format!("[{name}] {log_msg}")); diff --git a/crates/store/re_mcap/src/parsers/ros2msg/scalar_parser.rs b/crates/store/re_mcap/src/parsers/ros2msg/scalar_parser.rs index 76b93e2548fe..8aaee9ab62b3 100644 --- a/crates/store/re_mcap/src/parsers/ros2msg/scalar_parser.rs +++ b/crates/store/re_mcap/src/parsers/ros2msg/scalar_parser.rs @@ -88,8 +88,9 @@ impl MessageParser for ScalarMessageParser { })?; // Add the sensor timestamp to the context, `log_time` and `publish_time` are added automatically - ctx.add_timestamp_cell(crate::util::TimestampCell::guess_from_nanos_ros2( + ctx.add_timestamp_cell(crate::util::TimestampCell::from_nanos_ros2( message.header().stamp.as_nanos() as u64, + ctx.time_type(), )); let scalar_values = message.extract_scalars(); diff --git a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/camera_info.rs b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/camera_info.rs index 081fb9cbbcf2..4fe46357a576 100644 --- a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/camera_info.rs +++ b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/camera_info.rs @@ -35,8 +35,9 @@ impl MessageParser for CameraInfoMessageParser { } = cdr::try_decode_message::(&msg.data)?; // add the sensor timestamp to the context, `log_time` and `publish_time` are added automatically - ctx.add_timestamp_cell(crate::util::TimestampCell::guess_from_nanos_ros2( + ctx.add_timestamp_cell(crate::util::TimestampCell::from_nanos_ros2( header.stamp.as_nanos() as u64, + ctx.time_type(), )); self.frame_ids.push(header.frame_id); diff --git a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/compressed_image.rs b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/compressed_image.rs index 88a056fc1560..b95fade8e72f 100644 --- a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/compressed_image.rs +++ b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/compressed_image.rs @@ -40,8 +40,9 @@ impl MessageParser for CompressedImageMessageParser { } = cdr::try_decode_message::>(&msg.data)?; // add the sensor timestamp to the context, `log_time` and `publish_time` are added automatically - ctx.add_timestamp_cell(TimestampCell::guess_from_nanos_ros2( + ctx.add_timestamp_cell(TimestampCell::from_nanos_ros2( header.stamp.as_nanos() as u64, + ctx.time_type(), )); self.frame_ids.push(header.frame_id); diff --git a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/image.rs b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/image.rs index 7e8e304be03d..1caef01230d6 100644 --- a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/image.rs +++ b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/image.rs @@ -44,8 +44,9 @@ impl MessageParser for ImageMessageParser { .context("Failed to decode sensor_msgs::Image message from CDR data")?; // add the sensor timestamp to the context, `log_time` and `publish_time` are added automatically - ctx.add_timestamp_cell(crate::util::TimestampCell::guess_from_nanos_ros2( + ctx.add_timestamp_cell(crate::util::TimestampCell::from_nanos_ros2( header.stamp.as_nanos() as u64, + ctx.time_type(), )); self.frame_ids.push(header.frame_id); diff --git a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/imu.rs b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/imu.rs index 2b3b01e4c164..adfd5930fe20 100644 --- a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/imu.rs +++ b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/imu.rs @@ -62,8 +62,9 @@ impl MessageParser for ImuMessageParser { .map_err(|err| Error::Other(anyhow::anyhow!(err)))?; // add the sensor timestamp to the context, `log_time` and `publish_time` are added automatically - ctx.add_timestamp_cell(crate::util::TimestampCell::guess_from_nanos_ros2( + ctx.add_timestamp_cell(crate::util::TimestampCell::from_nanos_ros2( imu.header.stamp.as_nanos() as u64, + ctx.time_type(), )); self.frame_ids.values().append_value(imu.header.frame_id); diff --git a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/joint_state.rs b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/joint_state.rs index 4cf52ab73991..3d8a601692b3 100644 --- a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/joint_state.rs +++ b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/joint_state.rs @@ -39,8 +39,9 @@ impl MessageParser for JointStateMessageParser { .map_err(|err| Error::Other(anyhow::anyhow!(err)))?; // add the sensor timestamp to the context, `log_time` and `publish_time` are added automatically - ctx.add_timestamp_cell(crate::util::TimestampCell::guess_from_nanos_ros2( + ctx.add_timestamp_cell(crate::util::TimestampCell::from_nanos_ros2( header.stamp.as_nanos() as u64, + ctx.time_type(), )); self.frame_ids.values().append_value(header.frame_id); diff --git a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/joy.rs b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/joy.rs index 7b45d13f94bf..47044f4a1b6e 100644 --- a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/joy.rs +++ b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/joy.rs @@ -33,8 +33,9 @@ impl MessageParser for JoyMessageParser { .map_err(|err| Error::Other(anyhow::anyhow!(err)))?; // add the sensor timestamp to the context, `log_time` and `publish_time` are added automatically - ctx.add_timestamp_cell(crate::util::TimestampCell::guess_from_nanos_ros2( + ctx.add_timestamp_cell(crate::util::TimestampCell::from_nanos_ros2( header.stamp.as_nanos() as u64, + ctx.time_type(), )); self.frame_ids.values().append_value(header.frame_id); diff --git a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/magnetic_field.rs b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/magnetic_field.rs index 94bea9f84538..b607e161ead0 100644 --- a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/magnetic_field.rs +++ b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/magnetic_field.rs @@ -28,8 +28,9 @@ impl MessageParser for MagneticFieldMessageParser { .map_err(|err| Error::Other(anyhow::anyhow!(err)))?; // add the sensor timestamp to the context, `log_time` and `publish_time` are added automatically - ctx.add_timestamp_cell(crate::util::TimestampCell::guess_from_nanos_ros2( + ctx.add_timestamp_cell(crate::util::TimestampCell::from_nanos_ros2( magnetic_field.header.stamp.as_nanos() as u64, + ctx.time_type(), )); self.frame_ids.push(magnetic_field.header.frame_id); diff --git a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/nav_sat_fix.rs b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/nav_sat_fix.rs index 8633d7141427..d30933aa115c 100644 --- a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/nav_sat_fix.rs +++ b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/nav_sat_fix.rs @@ -52,8 +52,9 @@ impl MessageParser for NavSatFixMessageParser { .context("Failed to decode sensor_msgs::NavSatFix message from CDR data")?; // add the sensor timestamp to the context, `log_time` and `publish_time` are added automatically - ctx.add_timestamp_cell(crate::util::TimestampCell::guess_from_nanos_ros2( + ctx.add_timestamp_cell(crate::util::TimestampCell::from_nanos_ros2( header.stamp.as_nanos() as u64, + ctx.time_type(), )); self.frame_ids.push(header.frame_id); diff --git a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs index 45a3dd8c03b4..b6c6f5ee0267 100644 --- a/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs +++ b/crates/store/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs @@ -154,6 +154,10 @@ impl<'a> Position3DIter<'a> { is_big_endian: bool, fields: &[PointField], ) -> Option { + if step == 0 { + return None; + } + let mut x_accessor: Option<(usize, PointFieldDatatype)> = None; let mut y_accessor: Option<(usize, PointFieldDatatype)> = None; let mut z_accessor: Option<(usize, PointFieldDatatype)> = None; @@ -331,8 +335,9 @@ impl MessageParser for PointCloud2MessageParser { let point_cloud = cdr::try_decode_message::(msg.data.as_ref()) .map_err(|err| Error::Other(anyhow::anyhow!(err)))?; - ctx.add_timestamp_cell(crate::util::TimestampCell::guess_from_nanos_ros2( + ctx.add_timestamp_cell(crate::util::TimestampCell::from_nanos_ros2( point_cloud.header.stamp.as_nanos() as u64, + ctx.time_type(), )); let Self { @@ -369,7 +374,7 @@ impl MessageParser for PointCloud2MessageParser { // We lazily initialize the builders that store the extracted fields from // the blob when we receive the first message. - if extracted_fields.len() != point_cloud.fields.len() { + if extracted_fields.is_empty() && !point_cloud.fields.is_empty() { *extracted_fields = point_cloud .fields .iter() @@ -382,12 +387,17 @@ impl MessageParser for PointCloud2MessageParser { .collect(); } - for point in point_cloud.data.chunks(point_cloud.point_step as usize) { - for (field, (_name, builder)) in - point_cloud.fields.iter().zip(extracted_fields.iter_mut()) - { - let field_builder = builder.values(); - add_field_value(field_builder, field, point_cloud.is_bigendian, point)?; + // `PointCloud2` occasionally shows up as an empty placeholder message + // with `point_step == 0` and no payload. Skip per-point extraction in + // that case instead of panicking when chunking the blob. + if point_cloud.point_step != 0 { + for point in point_cloud.data.chunks(point_cloud.point_step as usize) { + for (field, (_name, builder)) in + point_cloud.fields.iter().zip(extracted_fields.iter_mut()) + { + let field_builder = builder.values(); + add_field_value(field_builder, field, point_cloud.is_bigendian, point)?; + } } } @@ -590,3 +600,130 @@ impl MessageParser for PointCloud2MessageParser { Ok(chunks) } } + +#[cfg(test)] +mod tests { + use std::{borrow::Cow, collections::BTreeMap, sync::Arc}; + + use byteorder::LittleEndian; + use cdr_encoding::to_vec; + use mcap::{Channel, Message}; + use re_log_types::TimeType; + + use super::*; + use crate::parsers::decode::ParserContext; + use crate::parsers::ros2msg::definitions::{builtin_interfaces, std_msgs}; + use re_chunk::EntityPath; + + fn cdr_message(point_cloud: &sensor_msgs::PointCloud2) -> Message<'static> { + let mut data = vec![0x00, 0x01, 0x00, 0x00]; + data.extend(to_vec::(point_cloud).unwrap()); + + Message { + channel: Arc::new(Channel { + id: 1, + topic: "/nav2_percep_cloud".to_owned(), + schema: None, + message_encoding: "cdr".to_owned(), + metadata: BTreeMap::default(), + }), + sequence: 0, + log_time: 0, + publish_time: 0, + data: Cow::Owned(data), + } + } + + #[test] + fn ignores_zero_point_step_after_valid_message() { + let mut parser = PointCloud2MessageParser::new(2); + let mut ctx = ParserContext::new( + EntityPath::from("/nav2_percep_cloud"), + "/nav2_percep_cloud", + TimeType::TimestampNs, + ); + + let valid_message = sensor_msgs::PointCloud2 { + header: std_msgs::Header { + stamp: builtin_interfaces::Time { sec: 1, nanosec: 0 }, + frame_id: "map".to_owned(), + }, + height: 1, + width: 1, + fields: vec![ + PointField { + name: "x".to_owned(), + offset: 0, + datatype: PointFieldDatatype::Float32, + count: 1, + }, + PointField { + name: "y".to_owned(), + offset: 4, + datatype: PointFieldDatatype::Float32, + count: 1, + }, + PointField { + name: "z".to_owned(), + offset: 8, + datatype: PointFieldDatatype::Float32, + count: 1, + }, + PointField { + name: "intensity".to_owned(), + offset: 12, + datatype: PointFieldDatatype::Float32, + count: 1, + }, + ], + is_bigendian: false, + point_step: 16, + row_step: 16, + data: [1.0_f32, 2.0, 3.0, 4.0] + .into_iter() + .flat_map(f32::to_le_bytes) + .collect(), + is_dense: true, + }; + + let empty_message = sensor_msgs::PointCloud2 { + header: std_msgs::Header { + stamp: builtin_interfaces::Time { sec: 2, nanosec: 0 }, + frame_id: "map".to_owned(), + }, + height: 0, + width: 0, + fields: Vec::new(), + is_bigendian: false, + point_step: 0, + row_step: 0, + data: Vec::new(), + is_dense: false, + }; + + parser + .append(&mut ctx, &cdr_message(&valid_message)) + .unwrap(); + + parser + .append(&mut ctx, &cdr_message(&empty_message)) + .unwrap(); + + let chunks = Box::new(parser).finalize(ctx).unwrap(); + let row_counts = chunks.iter().map(Chunk::num_rows).collect::>(); + let data_chunk = chunks + .iter() + .find(|chunk| chunk.num_rows() == 2 && chunk.num_components() > 1) + .unwrap(); + let intensity_descriptor = ComponentDescriptor::partial("intensity") + .with_builtin_archetype(archetypes::Points3D::name()); + + assert_eq!(chunks.len(), 3); + assert_eq!(row_counts, vec![2, 1, 2]); + assert!( + data_chunk + .component_descriptors() + .any(|descriptor| descriptor == &intensity_descriptor) + ); + } +} diff --git a/crates/store/re_mcap/src/parsers/ros2msg/tf2_msgs/tf_message.rs b/crates/store/re_mcap/src/parsers/ros2msg/tf2_msgs/tf_message.rs index fd055c46e787..b6dc7e342c1f 100644 --- a/crates/store/re_mcap/src/parsers/ros2msg/tf2_msgs/tf_message.rs +++ b/crates/store/re_mcap/src/parsers/ros2msg/tf2_msgs/tf_message.rs @@ -14,6 +14,14 @@ use crate::parsers::{ }; use crate::util::{TimestampCell, log_and_publish_timepoint_from_msg}; +const STATIC_TF_TOPIC: &str = "/tf_static"; + +fn static_chunk_timelines() +-> re_chunk::external::nohash_hasher::IntMap { + // Chunks without any timelines are treated as static by Rerun. + re_chunk::external::nohash_hasher::IntMap::default() +} + pub struct TfMessageParser { translations: Vec, quaternions: Vec, @@ -38,12 +46,13 @@ impl MessageParser for TfMessageParser { fn get_log_and_publish_timepoints( &self, msg: &mcap::Message<'_>, + time_type: re_log_types::TimeType, ) -> anyhow::Result> { // We need a custom implementation of this method because we have a 1-to-N relationship between input messages and output rows. // Assign each output row the same log and publish time as the input message. let TFMessage { transforms } = cdr::try_decode_message::(&msg.data)?; Ok(vec![ - log_and_publish_timepoint_from_msg(msg); + log_and_publish_timepoint_from_msg(msg, time_type); transforms.len() ]) } @@ -62,7 +71,10 @@ impl MessageParser for TfMessageParser { // Add the header timestamp to the context. // `log_time` and `publish_time` are added via `log_and_publish_time_from_msg`. let Header { stamp, frame_id } = header; - ctx.add_timestamp_cell(TimestampCell::guess_from_nanos_ros2(stamp.as_nanos() as u64)); + ctx.add_timestamp_cell(TimestampCell::from_nanos_ros2( + stamp.as_nanos() as u64, + ctx.time_type(), + )); self.parent_frame_ids.push(frame_id); self.child_frame_ids.push(child_frame_id); @@ -100,7 +112,11 @@ impl MessageParser for TfMessageParser { } = *self; let entity_path = ctx.entity_path().clone(); - let timelines = ctx.build_timelines(); + let timelines = if ctx.channel_topic() == STATIC_TF_TOPIC { + static_chunk_timelines() + } else { + ctx.build_timelines() + }; let chunk = Chunk::from_auto_row_ids( ChunkId::new(), @@ -118,3 +134,41 @@ impl MessageParser for TfMessageParser { Ok(vec![chunk]) } } + +#[cfg(test)] +mod tests { + use re_chunk::TimePoint; + use re_log_types::{TimeCell, TimeType, TimelineName}; + + use super::*; + + fn test_parser() -> TfMessageParser { + TfMessageParser { + translations: vec![Translation3D::new(1.0, 2.0, 3.0)], + quaternions: vec![Quaternion::from_xyzw([0.0, 0.0, 0.0, 1.0]).into()], + parent_frame_ids: vec!["parent".to_owned()], + child_frame_ids: vec!["child".to_owned()], + } + } + + #[test] + fn tf_static_topic_produces_static_chunk() { + let ctx = ParserContext::new("/tf_static".into(), STATIC_TF_TOPIC, TimeType::TimestampNs); + let chunk = Box::new(test_parser()).finalize(ctx).unwrap().remove(0); + + assert!(chunk.is_static()); + } + + #[test] + fn non_tf_static_topic_stays_temporal() { + let mut ctx = ParserContext::new("/tf".into(), "tf", TimeType::TimestampNs); + ctx.add_timepoint(TimePoint::from([( + TimelineName::log_time(), + TimeCell::from_timestamp_nanos_since_epoch(123), + )])); + + let chunk = Box::new(test_parser()).finalize(ctx).unwrap().remove(0); + + assert!(!chunk.is_static()); + } +} diff --git a/crates/store/re_mcap/src/util.rs b/crates/store/re_mcap/src/util.rs index de566ed8c967..f270b033abb7 100644 --- a/crates/store/re_mcap/src/util.rs +++ b/crates/store/re_mcap/src/util.rs @@ -1,11 +1,15 @@ +use std::collections::BTreeSet; use std::io::{Read, Seek}; use mcap::Summary; use mcap::sans_io::{SummaryReadEvent, SummaryReader}; use re_chunk::TimePoint; -use re_log_types::TimeCell; +use re_log_types::{TimeCell, TimeType}; use saturating_cast::SaturatingCast as _; +use crate::Error; +use crate::parsers::ChannelId; + /// Read out the summary of an MCAP file. pub fn read_summary(mut reader: R) -> anyhow::Result> { let mut summary_reader = SummaryReader::new(); @@ -24,76 +28,118 @@ pub fn read_summary(mut reader: R) -> anyhow::Result Result, Error> { + let all_channels = summary + .channels + .keys() + .copied() + .map(ChannelId) + .collect::>(); + + if let Some(stats) = &summary.stats { + let nonempty_channels = stats + .channel_message_counts + .iter() + .filter_map(|(&channel_id, &count)| (count > 0).then_some(ChannelId(channel_id))) + .collect::>(); + + return Ok(all_channels + .difference(&nonempty_channels) + .copied() + .collect()); + } + + let mut empty_channels = all_channels; + + for chunk in &summary.chunk_indexes { + for (channel, msg_offsets) in summary.read_message_indexes(mcap_bytes, chunk)? { + if !msg_offsets.is_empty() { + // Channel has at least one message, so it's not empty. + empty_channels.remove(&ChannelId(channel.id)); + } + } + } + + Ok(empty_channels) +} + /// Extracts log and publish time from an MCAP message as a `TimePoint`. -pub fn log_and_publish_timepoint_from_msg(msg: &mcap::Message<'_>) -> TimePoint { - let log_time_cell = crate::util::TimestampCell::guess_from_nanos(msg.log_time); - let publish_time_cell = crate::util::TimestampCell::guess_from_nanos(msg.publish_time); +/// +/// The `time_type` parameter controls whether the timelines are created as +/// [`TimeType::TimestampNs`] or [`TimeType::DurationNs`]. +pub fn log_and_publish_timepoint_from_msg( + msg: &mcap::Message<'_>, + time_type: TimeType, +) -> TimePoint { + let log_time_cell = crate::util::TimestampCell::from_nanos_default(msg.log_time, time_type); + let publish_time_cell = + crate::util::TimestampCell::from_nanos_default(msg.publish_time, time_type); re_chunk::TimePoint::from([ ("message_log_time", log_time_cell.into_time_cell()), ("message_publish_time", publish_time_cell.into_time_cell()), ]) } -/// Timestamp + epoch interpretation. +/// A timestamp or duration on a specific timeline. #[derive(Debug, Clone, PartialEq, Eq)] -pub enum TimestampCell { - /// Unix epoch (nanoseconds since 1970-01-01). - Unix { timeline: String, time: TimeCell }, - - /// User-understood epoch with a named timeline (nanoseconds since custom zero). - Custom { timeline: String, time: TimeCell }, +pub struct TimestampCell { + pub timeline: String, + pub time: TimeCell, } impl TimestampCell { - // Unix range we consider "reasonable" for raw ns values. - const YEAR_1990_NS: i64 = 631_148_400_000_000_000; // 1990-01-01 - const YEAR_2100_NS: i64 = 4_102_444_800_000_000_000; // 2100-01-01 - - /// Make a best-effort guess on the epoch type based on the provided raw timestamp. - pub fn guess_from_nanos_with_names( - timestamp_ns: u64, - timestamp_timeline: impl Into, - duration_timeline: impl Into, - ) -> Self { + /// Create a Unix-epoch timestamp cell with a custom timeline name. + /// + /// Always interprets the value as a timestamp, regardless of magnitude. + /// Use [`Self::from_nanos_with_type`] for configurable [`TimeType`]. + pub fn from_nanos(timestamp_ns: u64, timeline: impl Into) -> Self { let ns = timestamp_ns.saturating_cast::(); + Self { + timeline: timeline.into(), + time: TimeCell::from_timestamp_nanos_since_epoch(ns), + } + } - if Self::YEAR_1990_NS <= ns && ns <= Self::YEAR_2100_NS { - Self::Unix { - timeline: timestamp_timeline.into(), - time: TimeCell::from_timestamp_nanos_since_epoch(ns), - } - } else { - Self::Custom { - timeline: duration_timeline.into(), - time: TimeCell::from_duration_nanos(ns), - } + /// Create a time cell with a configurable [`TimeType`] and custom timeline name. + pub fn from_nanos_with_type( + nanos: u64, + timeline: impl Into, + time_type: TimeType, + ) -> Self { + let ns = nanos.saturating_cast::(); + let time = match time_type { + TimeType::TimestampNs => TimeCell::from_timestamp_nanos_since_epoch(ns), + TimeType::DurationNs => TimeCell::from_duration_nanos(ns), + TimeType::Sequence => TimeCell::from_sequence(ns), + }; + Self { + timeline: timeline.into(), + time, } } - /// Make a best-effort guess on the epoch type based on the provided raw timestamp, using - /// the default timeline names `timestamp` and `duration`. - pub fn guess_from_nanos(timestamp_ns: u64) -> Self { - Self::guess_from_nanos_with_names(timestamp_ns, "timestamp", "duration") + /// Create a time cell on the `"timestamp"` timeline with the given [`TimeType`]. + pub fn from_nanos_default(timestamp_ns: u64, time_type: TimeType) -> Self { + Self::from_nanos_with_type(timestamp_ns, "timestamp", time_type) } - /// Make a best-effort guess on the epoch type based on the provided raw timestamp, using - /// the default timeline names `ros2_timestamp` and `ros2_duration`. - pub fn guess_from_nanos_ros2(timestamp_ns: u64) -> Self { - Self::guess_from_nanos_with_names(timestamp_ns, "ros2_timestamp", "ros2_duration") + /// Create a time cell on the `"ros2_timestamp"` timeline with the given [`TimeType`]. + pub fn from_nanos_ros2(timestamp_ns: u64, time_type: TimeType) -> Self { + Self::from_nanos_with_type(timestamp_ns, "ros2_timestamp", time_type) } - /// The timeline name for this timestamp. + /// The timeline name for this time cell. pub fn timeline_name(&self) -> &str { - match self { - Self::Custom { timeline, .. } | Self::Unix { timeline, .. } => timeline, - } + &self.timeline } /// Extract the contained [`TimeCell`]. pub fn into_time_cell(self) -> TimeCell { - match self { - Self::Unix { time, .. } | Self::Custom { time, .. } => time, - } + self.time } } @@ -106,115 +152,37 @@ mod tests { use super::*; #[test] - fn test_guess_from_nanos() { - // within reasonable unix range for `TimestampCell::Unix` - let unix_ts_2023: u64 = 1_672_531_200_000_000_000; // 2023-01-01 - let cell = TimestampCell::guess_from_nanos(unix_ts_2023); - let TimestampCell::Unix { timeline: _, time } = cell else { - panic!("expected `TimestampCell::Unix` variant") - }; - - assert!(matches!(time.typ, TimeType::TimestampNs)); - assert_eq!( - time, - TimeCell::from_timestamp_nanos_since_epoch(unix_ts_2023 as i64) - ); + fn test_from_nanos() { + let ts: u64 = 1_672_531_200_000_000_000; // 2023-01-01 + let cell = TimestampCell::from_nanos_default(ts, TimeType::TimestampNs); assert_eq!(cell.timeline_name(), "timestamp"); - - // early date for `TimestampCell::Custom` - let early: u64 = 100_000_000; - let cell = TimestampCell::guess_from_nanos(early); - let TimestampCell::Custom { timeline, time } = cell else { - panic!("expected `TimestampCell::Custom` variant") - }; - assert_eq!(timeline, "duration"); - assert!(matches!(time.typ, TimeType::DurationNs)); - assert_eq!(time, TimeCell::from_duration_nanos(early as i64)); - - // after 2100 for `TimestampCell::Custom` - let far_future: u64 = 5_000_000_000_000_000_000; - let cell = TimestampCell::guess_from_nanos(far_future); - let TimestampCell::Custom { timeline, time } = cell else { - panic!("expected `TimestampCell::Custom` variant") - }; - assert_eq!(timeline, "duration"); - assert!(matches!(time.typ, TimeType::DurationNs)); - assert_eq!(time, TimeCell::from_duration_nanos(far_future as i64)); - - // exactly 1990-01-01 for `TimestampCell::Unix` - let year_1990 = TimestampCell::YEAR_1990_NS as u64; - let cell = TimestampCell::guess_from_nanos(year_1990); - let TimestampCell::Unix { timeline: _, time } = cell else { - panic!("expected `TimestampCell::Unix` at lower boundary") - }; - assert!(matches!(time.typ, TimeType::TimestampNs)); + assert!(matches!(cell.time.typ, TimeType::TimestampNs)); assert_eq!( - time, - TimeCell::from_timestamp_nanos_since_epoch(year_1990 as i64) - ); - - // exactly 2100-01-01 for `TimestampCell::Unix` - let year_2100 = TimestampCell::YEAR_2100_NS as u64; - let cell = TimestampCell::guess_from_nanos(year_2100); - let TimestampCell::Unix { timeline: _, time } = cell else { - panic!("expected `TimestampCell::Unix` at upper boundary") - }; - assert!(matches!(time.typ, TimeType::TimestampNs)); - assert_eq!( - time, - TimeCell::from_timestamp_nanos_since_epoch(year_2100 as i64) + cell.time, + TimeCell::from_timestamp_nanos_since_epoch(ts as i64) ); - // just outside lower boundary for `TimestampCell::Custom` - let before_1990 = (TimestampCell::YEAR_1990_NS - 1) as u64; - let cell = TimestampCell::guess_from_nanos(before_1990); - let TimestampCell::Custom { timeline, time } = cell else { - panic!("expected `TimestampCell::Custom` just before lower boundary") - }; - assert_eq!(timeline, "duration"); - assert!(matches!(time.typ, TimeType::DurationNs)); - assert_eq!(time, TimeCell::from_duration_nanos(before_1990 as i64)); - - // just outside upper boundary for `TimestampCell::Custom` - let after_2100 = (TimestampCell::YEAR_2100_NS + 1) as u64; - let cell = TimestampCell::guess_from_nanos(after_2100); - let TimestampCell::Custom { timeline, time } = cell else { - panic!("expected `TimestampCell::Custom` just after upper boundary") - }; - assert_eq!(timeline, "duration"); - assert!(matches!(time.typ, TimeType::DurationNs)); - assert_eq!(time, TimeCell::from_duration_nanos(after_2100 as i64)); + let cell = TimestampCell::from_nanos_default(ts, TimeType::DurationNs); + assert_eq!(cell.timeline_name(), "timestamp"); + assert!(matches!(cell.time.typ, TimeType::DurationNs)); + assert_eq!(cell.time, TimeCell::from_duration_nanos(ts as i64)); } #[test] - fn test_timeline_name() { - let unix = TimestampCell::Unix { - timeline: "timestamp".to_owned(), - time: TimeCell::from_timestamp_nanos_since_epoch(1_234_567_890), - }; - assert_eq!(unix.timeline_name(), "timestamp"); - - let custom = TimestampCell::Custom { - timeline: "sensor/imu".to_owned(), - time: TimeCell::from_duration_nanos(1_234_567_890), - }; - assert_eq!(custom.timeline_name(), "sensor/imu"); + fn test_from_nanos_ros2() { + let ts: u64 = 1_672_531_200_000_000_000; + let cell = TimestampCell::from_nanos_ros2(ts, TimeType::TimestampNs); + assert_eq!(cell.timeline_name(), "ros2_timestamp"); + assert!(matches!(cell.time.typ, TimeType::TimestampNs)); } #[test] - fn test_into_time_cell() { - let timestamp1 = TimeCell::from_timestamp_nanos_since_epoch(42); - let unix = TimestampCell::Unix { - timeline: "timestamp".to_owned(), - time: timestamp1, - }; - assert_eq!(unix.into_time_cell(), timestamp1); - - let timestamp2 = TimeCell::from_duration_nanos(1337); - let custom = TimestampCell::Custom { - timeline: "foo".into(), - time: timestamp2, - }; - assert_eq!(custom.into_time_cell(), timestamp2); + fn test_from_nanos_custom_timeline() { + let cell = TimestampCell::from_nanos(42, "my_timeline"); + assert_eq!(cell.timeline_name(), "my_timeline"); + assert_eq!( + cell.into_time_cell(), + TimeCell::from_timestamp_nanos_since_epoch(42) + ); } } diff --git a/crates/store/re_parquet/Cargo.toml b/crates/store/re_parquet/Cargo.toml new file mode 100644 index 000000000000..f7567df26070 --- /dev/null +++ b/crates/store/re_parquet/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "re_parquet" +authors.workspace = true +description = "Core parquet-to-chunk loading logic for Rerun" +edition.workspace = true +homepage.workspace = true +license.workspace = true +publish = true +repository.workspace = true +rust-version.workspace = true +version.workspace = true + +[lints] +workspace = true + +[dependencies] +re_chunk.workspace = true +re_log.workspace = true +re_log_types.workspace = true +re_sdk_types.workspace = true +re_tracing.workspace = true + +arrow.workspace = true +anyhow.workspace = true +bytes.workspace = true +parquet = { workspace = true, features = ["arrow", "snap", "zstd"] } +thiserror.workspace = true + +[dev-dependencies] diff --git a/crates/store/re_parquet/src/config.rs b/crates/store/re_parquet/src/config.rs new file mode 100644 index 000000000000..f42ddcdd20e8 --- /dev/null +++ b/crates/store/re_parquet/src/config.rs @@ -0,0 +1,232 @@ +//! Configuration types for parquet loading. + +use re_chunk::EntityPath; +use re_sdk_types::ComponentDescriptor; + +/// Strategy for grouping parquet columns into Rerun chunks. +/// +/// Grouping reduces RRD size by sharing timeline data and row IDs across +/// columns in the same chunk instead of duplicating them per column. +#[derive(Debug, Clone)] +pub enum ColumnGrouping { + /// Each column becomes its own entity/chunk (no deduplication). + Individual, + + /// Group columns that share a common prefix before `delimiter`. + /// + /// For example, with `delimiter: '_'`, columns `camera_rgb` and + /// `camera_depth` are grouped under entity `/camera` with components + /// `rgb` and `depth`. Columns without the delimiter are placed in + /// their own single-column group. + Prefix { delimiter: char, use_structs: bool }, + + /// Group columns by explicit prefix strings. + /// + /// Each column is checked against the prefixes in longest-first order. + /// The first matching prefix is stripped, and the column is added to that + /// prefix's group. One leading underscore is also stripped from the + /// remainder (so prefix `"cat"` on column `"cat_foo"` gives component `"foo"`). + /// + /// Columns that don't match any prefix become individual groups. + /// + /// **Note:** Matching uses simple `str::starts_with`, not delimiter-aware + /// boundaries. Prefix `"cat"` will match column `"catdog"` (remainder + /// `"dog"`). To avoid unintended matches, choose prefixes that are + /// unambiguous in your column namespace, or include the delimiter in the + /// prefix (e.g., `"cat_"` — though the leading-underscore strip then + /// becomes a no-op since there is no underscore to strip). + ExplicitPrefixes { + prefixes: Vec, + use_structs: bool, + }, +} + +impl Default for ColumnGrouping { + fn default() -> Self { + Self::Prefix { + delimiter: '_', + use_structs: true, + } + } +} + +/// What to produce from a group of matched columns. +/// Highly experimental and will definitely change as +/// we add tools to support this more generically +#[derive(Debug, Clone)] +pub enum ColumnMapping { + /// N columns → a Rerun component. Interleaved into `FixedSizeList(N, Float32)`. + Component { + /// Archetype + component descriptor used for the output chunk. + descriptor: ComponentDescriptor, + }, + + /// N columns → multi-instance Scalars with named series. + /// Interleaved into `FixedSizeList(N, Float64)` + companion names field. + Scalars { + /// Display name for each series, in the same order as `suffixes`. + names: Vec, + }, + + /// Translation + rotation columns → a `Transform3D` archetype. + /// + /// The translation suffixes come from the parent [`ColumnRule::suffixes`] field. + /// When both suffix sets match with the same sub-prefix, the columns are + /// combined into a `Transform3D` with translation and quaternion components. + /// + /// In struct mode this produces a nested struct with `translation` and + /// `quaternion` fields. In flat mode, two components at the same entity path. + Transform { + /// Ordered suffixes that identify the rotation columns + /// (e.g., `["_quat_x", "_quat_y", "_quat_z", "_quat_w"]`). + rotation_suffixes: Vec, + }, +} + +impl ColumnMapping { + /// `Translation3D` component mapping. + pub fn translation3d() -> Self { + use re_sdk_types::archetypes::Transform3D; + Self::Component { + descriptor: Transform3D::descriptor_translation(), + } + } + + /// `RotationQuat` component mapping. + pub fn rotation_quat() -> Self { + use re_sdk_types::archetypes::Transform3D; + Self::Component { + descriptor: Transform3D::descriptor_quaternion(), + } + } + + /// `RotationAxisAngle` component mapping. + pub fn rotation_axis_angle() -> Self { + use re_sdk_types::archetypes::Transform3D; + Self::Component { + descriptor: Transform3D::descriptor_rotation_axis_angle(), + } + } + + /// `Scale3D` component mapping. + pub fn scale3d() -> Self { + use re_sdk_types::archetypes::Transform3D; + Self::Component { + descriptor: Transform3D::descriptor_scale(), + } + } + + /// `Transform3D` mapping (translation + rotation quaternion). + pub fn transform(rotation_suffixes: Vec) -> Self { + Self::Transform { rotation_suffixes } + } +} + +/// Rule for combining columns with matching suffixes into a typed component. +/// +/// When a set of columns whose names end with the specified `suffixes` (in order) +/// share a common prefix, they are combined according to `mapping`. +/// +/// Rules are processed in list order; the first rule whose suffixes match a set +/// of columns wins. Put specific rules before broad catch-all rules. +/// +/// Experimental: this API may change or be removed. +#[derive(Debug, Clone)] +pub struct ColumnRule { + /// Ordered suffixes that identify columns (e.g., `["_pos_x", "_pos_y", "_pos_z"]`). + pub suffixes: Vec, + + /// What to produce from the matched columns. + pub mapping: ColumnMapping, + + /// Optional override appended to the sub-prefix to form the struct field name. + /// + /// When present and `sub_prefix` is non-empty: `field_name = "{sub_prefix}{override}"`. + /// When present and `sub_prefix` is empty: `field_name = override` (leading `_` stripped). + /// The `suffix_fallback` is ignored when override is set. + pub field_name_override: Option, +} + +/// Configuration for parquet loading. +/// +/// Fields marked "Experimental" are expected to change or be removed +/// as the parquet loading API matures. `column_grouping`, `index_columns`, +/// and `static_columns` are considered stable. +#[derive(Debug, Clone, Default)] +pub struct ParquetConfig { + /// How to group columns into chunks. + pub column_grouping: ColumnGrouping, + + /// Columns to use as timeline indices. When empty, a synthetic + /// `row_index` sequence is generated automatically. + pub index_columns: Vec, + + /// Column names with constant values — emitted as static data. + pub static_columns: Vec, + + // TODO(parquet): Ad-hoc; will be replaced by lenses in py-chunk. + /// Experimental: suffix-based column combination rules. + pub column_rules: Vec, +} + +impl ParquetConfig { + /// Default entity path prefix used when none is specified by the caller. + pub fn default_entity_path_prefix() -> EntityPath { + EntityPath::from("/") + } +} + +/// Specifies how a parquet column maps to a Rerun timeline. +#[derive(Debug, Clone)] +pub struct IndexColumn { + /// Column name in the parquet file. + pub name: String, + + /// What kind of timeline this represents. + pub index_type: IndexType, +} + +/// The type and scale of an index column. +#[derive(Debug, Clone, Copy)] +pub enum IndexType { + /// Timestamp (time since epoch). Raw values are scaled to nanoseconds. + Timestamp(TimeUnit), + + /// Duration (elapsed time). Raw values are scaled to nanoseconds. + Duration(TimeUnit), + + /// Ordinal sequence index. No scaling applied. + Sequence, +} + +impl IndexType { + /// Multiplier to convert raw values to nanoseconds. Returns 1 for Sequence. + pub(crate) fn ns_multiplier(self) -> i64 { + match self { + Self::Timestamp(unit) | Self::Duration(unit) => unit.ns_multiplier(), + Self::Sequence => 1, + } + } +} + +/// Scale of raw time values. Determines the multiplier to convert to nanoseconds. +#[derive(Debug, Clone, Copy, Default)] +pub enum TimeUnit { + #[default] + Nanoseconds, + Microseconds, + Milliseconds, + Seconds, +} + +impl TimeUnit { + /// Multiplier to convert a raw value in this unit to nanoseconds. + pub fn ns_multiplier(self) -> i64 { + match self { + Self::Nanoseconds => 1, + Self::Microseconds => 1_000, + Self::Milliseconds => 1_000_000, + Self::Seconds => 1_000_000_000, + } + } +} diff --git a/crates/store/re_parquet/src/grouping.rs b/crates/store/re_parquet/src/grouping.rs new file mode 100644 index 000000000000..20a735081024 --- /dev/null +++ b/crates/store/re_parquet/src/grouping.rs @@ -0,0 +1,629 @@ +//! Column grouping algorithm for mapping parquet columns to Rerun entities. + +use re_chunk::EntityPath; +use re_sdk_types::ComponentDescriptor; + +use crate::config::{ColumnGrouping, ColumnMapping, ColumnRule}; + +/// An entry inside a [`ColumnGroup`]. +pub(crate) enum ColumnGroupEntry { + /// A raw column, emitted as-is with `wrap_in_fixed_size_list`. + Raw { col_idx: usize, comp_name: String }, + + /// Multiple columns combined into a typed archetype component. + Component { + col_indices: Vec, + descriptor: ComponentDescriptor, + + /// Struct field name when this entry is part of a multi-entry prefix group. + field_name: String, + }, + + /// Multiple columns combined into N-instance `Scalars` with named series. + ScalarGroup { + col_indices: Vec, + names: Vec, + + /// Struct field name when this entry is part of a multi-entry prefix group. + field_name: String, + }, + + /// Translation + rotation columns combined into a `Transform3D`. + /// + /// In struct mode, emitted as a nested struct with `translation` and + /// `quaternion` fields. In flat mode, emitted as two separate components. + Transform { + translation_col_indices: Vec, + rotation_col_indices: Vec, + translation_descriptor: ComponentDescriptor, + rotation_descriptor: ComponentDescriptor, + + /// Struct field name when this entry is part of a multi-entry prefix group. + field_name: String, + }, +} + +/// A set of columns that will be emitted as a single chunk. +pub(crate) struct ColumnGroup { + pub entity_path: EntityPath, + pub entries: Vec, +} + +/// Compute column groups: prefix-split first, then apply column rules within each group. +pub(crate) fn compute_column_groups( + schema: &arrow::datatypes::Schema, + excluded: &std::collections::HashSet, + entity_path_prefix: &EntityPath, + grouping: &ColumnGrouping, + column_rules: &[ColumnRule], +) -> Vec { + warn_shadowed_rules(column_rules); + + match grouping { + ColumnGrouping::Individual => { + let (mut groups, consumed) = + match_rules_raw(schema, excluded, entity_path_prefix, column_rules); + for (i, field) in schema.fields().iter().enumerate() { + if excluded.contains(&i) || consumed.contains(&i) { + continue; + } + groups.push(ColumnGroup { + entity_path: entity_path_prefix.join(&EntityPath::from(field.name().as_str())), + entries: vec![ColumnGroupEntry::Raw { + col_idx: i, + comp_name: field.name().clone(), + }], + }); + } + groups + } + + ColumnGrouping::Prefix { + delimiter, + use_structs: _, + } => { + let mut prefix_groups: std::collections::BTreeMap> = + std::collections::BTreeMap::new(); + + for (i, field) in schema.fields().iter().enumerate() { + if excluded.contains(&i) { + continue; + } + let name = field.name().as_str(); + let (prefix, comp_name) = match name.find(*delimiter) { + Some(pos) if pos + delimiter.len_utf8() < name.len() => { + (&name[..pos], &name[pos + delimiter.len_utf8()..]) + } + _ => (name, name), + }; + prefix_groups + .entry(prefix.to_owned()) + .or_default() + .push((i, comp_name.to_owned())); + } + + let mut groups = Vec::new(); + for (prefix, comp_entries) in prefix_groups { + let base_path = entity_path_prefix.join(&EntityPath::from(prefix.as_str())); + let all_entries = match_rules_in_group(&comp_entries, column_rules); + if !all_entries.is_empty() { + groups.push(ColumnGroup { + entity_path: base_path, + entries: all_entries, + }); + } + } + groups + } + + ColumnGrouping::ExplicitPrefixes { + prefixes, + use_structs: _, + } => { + // Sort prefixes longest-first so "catalog" is tried before "cat". + let mut sorted_prefixes = prefixes.clone(); + sorted_prefixes.sort_by_key(|b| std::cmp::Reverse(b.len())); + + let mut prefix_groups: std::collections::BTreeMap> = + std::collections::BTreeMap::new(); + let mut unmatched: Vec<(usize, String)> = Vec::new(); + + for (i, field) in schema.fields().iter().enumerate() { + if excluded.contains(&i) { + continue; + } + let name = field.name().as_str(); + let mut matched = false; + for prefix in &sorted_prefixes { + if let Some(remainder) = name.strip_prefix(prefix.as_str()) { + if remainder.is_empty() { + // Exact match (column name == prefix): treat as individual. + break; + } + // Strip one leading underscore so prefix "cat" on "cat_foo" → "foo". + let comp_name = remainder.strip_prefix('_').unwrap_or(remainder); + prefix_groups + .entry(prefix.clone()) + .or_default() + .push((i, comp_name.to_owned())); + matched = true; + break; + } + } + if !matched { + unmatched.push((i, name.to_owned())); + } + } + + let mut groups = Vec::new(); + + for (prefix, comp_entries) in prefix_groups { + let base_path = entity_path_prefix.join(&EntityPath::from(prefix.as_str())); + let all_entries = match_rules_in_group(&comp_entries, column_rules); + if !all_entries.is_empty() { + groups.push(ColumnGroup { + entity_path: base_path, + entries: all_entries, + }); + } + } + + // Unmatched columns: each gets its own individual group. + for (i, name) in unmatched { + groups.push(ColumnGroup { + entity_path: entity_path_prefix.join(&EntityPath::from(name.as_str())), + entries: vec![ColumnGroupEntry::Raw { + col_idx: i, + comp_name: name, + }], + }); + } + + groups + } + } +} + +/// Apply column rules within a prefix group. +/// +/// Returns a flat list of all entries (component + scalar + raw). +fn match_rules_in_group( + entries: &[(usize, String)], + column_rules: &[ColumnRule], +) -> Vec { + let mut consumed = std::collections::HashSet::new(); + let mut all_entries = Vec::new(); + + let name_to_idx: std::collections::HashMap<&str, usize> = entries + .iter() + .map(|(idx, name)| (name.as_str(), *idx)) + .collect(); + + let try_match_suffixes = |suffixes: &[String], + consumed: &mut std::collections::HashSet| + -> Vec<(String, Vec)> { + if suffixes.is_empty() { + return vec![]; + } + + // Strip the leading `_` from suffixes (it corresponds to the delimiter + // already consumed by prefix splitting), but require that `raw_sub` + // is either empty or ends with `_`. This enforces that the suffix + // matched at an underscore boundary within the comp_name. + // + // Example: suffix `_x`, stripped to `x`. + // - comp_name `accel_x` → raw_sub `accel_` → ends with `_` ✓ + // - comp_name `accel_ax` → raw_sub `accel_a` → NOT ending with `_` ✗ + // - comp_name `x` → raw_sub `` → empty ✓ (matched at start) + let stripped: Vec<&str> = suffixes + .iter() + .map(|s| s.strip_prefix('_').unwrap_or(s.as_str())) + .collect(); + let first = stripped[0]; + + let mut matches = vec![]; + for &(idx, ref comp_name) in entries { + if consumed.contains(&idx) { + continue; + } + let Some(raw_sub) = comp_name.strip_suffix(first) else { + continue; + }; + + // Enforce underscore boundary: raw_sub must be empty or end with '_'. + if !raw_sub.is_empty() && !raw_sub.ends_with('_') { + continue; + } + + let mut col_indices = vec![idx]; + let mut all_found = true; + + for &suffix in &stripped[1..] { + let expected = format!("{raw_sub}{suffix}"); + match name_to_idx.get(expected.as_str()) { + Some(&other_idx) if !consumed.contains(&other_idx) => { + col_indices.push(other_idx); + } + _ => { + all_found = false; + break; + } + } + } + + if all_found { + for &ci in &col_indices { + consumed.insert(ci); + } + let sub_prefix = raw_sub.strip_suffix('_').unwrap_or(raw_sub).to_owned(); + matches.push((sub_prefix, col_indices)); + } + } + matches + }; + + for rule in column_rules { + match &rule.mapping { + ColumnMapping::Transform { rotation_suffixes } => { + // Match translation and rotation suffix sets independently, + // then join on sub_prefix to form Transform entries. + use re_sdk_types::archetypes::Transform3D; + + let translation_matches = try_match_suffixes(&rule.suffixes, &mut consumed); + let rotation_matches = try_match_suffixes(rotation_suffixes, &mut consumed); + + // Index rotation matches by sub_prefix for joining. + let mut rot_by_prefix: std::collections::HashMap> = + std::collections::HashMap::new(); + for (sub_prefix, col_indices) in &rotation_matches { + rot_by_prefix.insert(sub_prefix.clone(), col_indices.clone()); + } + + let mut unmatched_translations: Vec<(String, Vec)> = Vec::new(); + for (sub_prefix, trans_indices) in translation_matches { + if let Some(rot_indices) = rot_by_prefix.remove(&sub_prefix) { + let field_name = derive_field_name( + &sub_prefix, + &suffix_common_prefix(&rule.suffixes), + rule.field_name_override.as_deref(), + ); + all_entries.push(ColumnGroupEntry::Transform { + translation_col_indices: trans_indices, + rotation_col_indices: rot_indices, + translation_descriptor: Transform3D::descriptor_translation(), + rotation_descriptor: Transform3D::descriptor_quaternion(), + field_name, + }); + } else { + unmatched_translations.push((sub_prefix, trans_indices)); + } + } + + // Unconsume columns from unmatched translation/rotation sets + // so they can be picked up by later rules. + for (_prefix, indices) in &unmatched_translations { + for &ci in indices { + consumed.remove(&ci); + } + } + for indices in rot_by_prefix.values() { + for &ci in indices { + consumed.remove(&ci); + } + } + } + mapping => { + let suffix_fallback = suffix_common_prefix(&rule.suffixes); + for (sub_prefix, col_indices) in try_match_suffixes(&rule.suffixes, &mut consumed) { + let field_name = derive_field_name( + &sub_prefix, + &suffix_fallback, + rule.field_name_override.as_deref(), + ); + match mapping { + ColumnMapping::Component { descriptor } => { + all_entries.push(ColumnGroupEntry::Component { + col_indices, + descriptor: descriptor.clone(), + field_name, + }); + } + ColumnMapping::Scalars { names } => { + let mut field_name = field_name; + if field_name.is_empty() { + field_name = "scalars".to_owned(); + } + all_entries.push(ColumnGroupEntry::ScalarGroup { + col_indices, + names: names.clone(), + field_name, + }); + } + ColumnMapping::Transform { .. } => unreachable!(), + } + } + } + } + } + + all_entries.extend( + entries + .iter() + .filter(|(idx, _)| !consumed.contains(idx)) + .map(|(idx, name)| ColumnGroupEntry::Raw { + col_idx: *idx, + comp_name: name.clone(), + }), + ); + + all_entries +} + +/// Scan raw column names for suffix-pattern matches (used by [`ColumnGrouping::Individual`]). +fn match_rules_raw( + schema: &arrow::datatypes::Schema, + excluded: &std::collections::HashSet, + entity_path_prefix: &EntityPath, + rules: &[ColumnRule], +) -> (Vec, std::collections::HashSet) { + let mut consumed = std::collections::HashSet::new(); + let mut grouped_entries: std::collections::BTreeMap> = + std::collections::BTreeMap::new(); + + let name_to_idx: std::collections::HashMap<&str, usize> = schema + .fields() + .iter() + .enumerate() + .filter(|(i, _)| !excluded.contains(i)) + .map(|(i, f)| (f.name().as_str(), i)) + .collect(); + + /// Try to match all suffixes against raw column names, returning `(prefix, col_indices)` pairs. + fn try_match_raw( + suffixes: &[String], + name_to_idx: &std::collections::HashMap<&str, usize>, + consumed: &std::collections::HashSet, + ) -> Vec<(String, Vec)> { + if suffixes.is_empty() { + return vec![]; + } + let first_suffix = &suffixes[0]; + let mut matches = vec![]; + for (&name, &idx) in name_to_idx { + if consumed.contains(&idx) { + continue; + } + let Some(prefix) = name.strip_suffix(first_suffix.as_str()) else { + continue; + }; + let mut col_indices = vec![idx]; + let mut all_found = true; + for suffix in &suffixes[1..] { + let expected = format!("{prefix}{suffix}"); + match name_to_idx.get(expected.as_str()) { + Some(&other_idx) if !consumed.contains(&other_idx) => { + col_indices.push(other_idx); + } + _ => { + all_found = false; + break; + } + } + } + if all_found { + matches.push((prefix.to_owned(), col_indices)); + } + } + matches + } + + for rule in rules { + if let ColumnMapping::Transform { rotation_suffixes } = &rule.mapping { + use re_sdk_types::archetypes::Transform3D; + + let trans_matches = try_match_raw(&rule.suffixes, &name_to_idx, &consumed); + // Consume translation columns first. + for (_, indices) in &trans_matches { + for &ci in indices { + consumed.insert(ci); + } + } + let rot_matches = try_match_raw(rotation_suffixes, &name_to_idx, &consumed); + for (_, indices) in &rot_matches { + for &ci in indices { + consumed.insert(ci); + } + } + + // Join on prefix. + let mut rot_by_prefix: std::collections::HashMap> = + rot_matches.into_iter().collect(); + + for (prefix, trans_indices) in trans_matches { + if let Some(rot_indices) = rot_by_prefix.remove(&prefix) { + grouped_entries + .entry(prefix) + .or_default() + .push(ColumnGroupEntry::Transform { + translation_col_indices: trans_indices, + rotation_col_indices: rot_indices, + translation_descriptor: Transform3D::descriptor_translation(), + rotation_descriptor: Transform3D::descriptor_quaternion(), + field_name: String::new(), + }); + } else { + // Unconsume unmatched translation columns. + for &ci in &trans_indices { + consumed.remove(&ci); + } + } + } + // Unconsume unmatched rotation columns. + for indices in rot_by_prefix.values() { + for &ci in indices { + consumed.remove(&ci); + } + } + } else { + if rule.suffixes.is_empty() { + continue; + } + let first_suffix = &rule.suffixes[0]; + + for (&name, &idx) in &name_to_idx { + if consumed.contains(&idx) { + continue; + } + let Some(prefix) = name.strip_suffix(first_suffix.as_str()) else { + continue; + }; + + let mut col_indices = vec![idx]; + let mut all_found = true; + + for suffix in &rule.suffixes[1..] { + let expected = format!("{prefix}{suffix}"); + match name_to_idx.get(expected.as_str()) { + Some(&other_idx) if !consumed.contains(&other_idx) => { + col_indices.push(other_idx); + } + _ => { + all_found = false; + break; + } + } + } + + if all_found { + for &ci in &col_indices { + consumed.insert(ci); + } + let entry = match &rule.mapping { + ColumnMapping::Component { descriptor } => ColumnGroupEntry::Component { + col_indices, + descriptor: descriptor.clone(), + field_name: prefix.to_owned(), + }, + ColumnMapping::Scalars { names } => ColumnGroupEntry::ScalarGroup { + col_indices, + names: names.clone(), + field_name: prefix.to_owned(), + }, + ColumnMapping::Transform { .. } => unreachable!(), + }; + grouped_entries + .entry(prefix.to_owned()) + .or_default() + .push(entry); + } + } + } + } + + let groups = grouped_entries + .into_iter() + .map(|(prefix, entries)| { + let entity_path = if prefix.is_empty() { + entity_path_prefix.clone() + } else { + entity_path_prefix.join(&EntityPath::from(prefix.as_str())) + }; + ColumnGroup { + entity_path, + entries, + } + }) + .collect(); + + (groups, consumed) +} + +/// Derive the struct field name from sub-prefix and optional override. +/// +/// When `field_name_override` is `Some`, `suffix_fallback` is ignored entirely — +/// the override replaces whatever the `suffix_fallback` would have contributed. +fn derive_field_name( + sub_prefix: &str, + suffix_fallback: &str, + field_name_override: Option<&str>, +) -> String { + // Treat empty override as no override. + let field_name_override = field_name_override.filter(|s| !s.is_empty()); + + match field_name_override { + Some(ovr) => { + let clean_ovr = ovr.strip_prefix('_').unwrap_or(ovr); + if sub_prefix.is_empty() { + clean_ovr.to_owned() + } else { + format!("{sub_prefix}{ovr}") + } + } + None => { + if sub_prefix.is_empty() { + if suffix_fallback.is_empty() { + String::new() + } else { + suffix_fallback.to_owned() + } + } else { + sub_prefix.to_owned() + } + } + } +} + +/// Derive a field name from the common prefix of suffix patterns. +/// +/// For suffixes like `["_pos_x", "_pos_y", "_pos_z"]`, returns `"pos"`. +/// For suffixes like `["_x", "_y", "_z"]`, returns `""`. +fn suffix_common_prefix(suffixes: &[String]) -> String { + let stripped: Vec<&str> = suffixes + .iter() + .map(|s| s.strip_prefix('_').unwrap_or(s.as_str())) + .collect(); + if stripped.is_empty() { + return String::new(); + } + let first = stripped[0].as_bytes(); + let mut len = first.len(); + for s in &stripped[1..] { + let b = s.as_bytes(); + len = len.min(b.len()); + for i in 0..len { + if first[i] != b[i] { + len = i; + break; + } + } + } + let prefix = &stripped[0][..len]; + prefix.strip_suffix('_').unwrap_or(prefix).to_owned() +} + +/// Log a warning if an earlier rule may shadow a later, more specific rule. +fn warn_shadowed_rules(rules: &[ColumnRule]) { + for i in 0..rules.len() { + for j in (i + 1)..rules.len() { + let a = &rules[i].suffixes; + let b = &rules[j].suffixes; + if a.len() == b.len() { + let shadows = a.iter().zip(b.iter()).all(|(sa, sb)| { + let sa = sa.strip_prefix('_').unwrap_or(sa.as_str()); + let sb = sb.strip_prefix('_').unwrap_or(sb.as_str()); + sb.ends_with(sa) + }); + if shadows { + re_log::warn_once!( + "Column rule {} (suffixes {:?}) may shadow rule {} (suffixes {:?}). \ + Consider reordering so more specific rules come first.", + i, + a, + j, + b + ); + } + } + } + } +} diff --git a/crates/store/re_parquet/src/lib.rs b/crates/store/re_parquet/src/lib.rs new file mode 100644 index 000000000000..e86edf66561e --- /dev/null +++ b/crates/store/re_parquet/src/lib.rs @@ -0,0 +1,44 @@ +//! Core parquet-to-chunk loading logic for Rerun. +//! +//! Reads any `.parquet` file, introspects its Arrow schema, and maps columns +//! to Rerun components. Row groups are streamed as individual chunks via a +//! pull-based iterator to reduce peak memory usage. + +mod config; +mod grouping; +mod streaming; +mod timeline; + +pub use config::{ + ColumnGrouping, ColumnMapping, ColumnRule, IndexColumn, IndexType, ParquetConfig, TimeUnit, +}; +pub use streaming::ParquetError; + +use re_chunk::{Chunk, EntityPath}; + +/// Load a parquet file and return an iterator of chunks. +/// +/// The first chunk (if any) contains file-level metadata at `EntityPath::properties()`. +/// Subsequent chunks contain data grouped according to the config. +/// The caller is responsible for forwarding them to a recording, channel, etc. +/// +/// The iterator may yield `Err` for individual record batch failures. +/// Callers who want to continue despite errors should skip `Err` items. +pub fn load_parquet( + path: &std::path::Path, + config: &ParquetConfig, + entity_path_prefix: &EntityPath, +) -> Result>, ParquetError> { + streaming::load_from_path(path, config, entity_path_prefix) +} + +/// Load parquet from in-memory bytes and return an iterator of chunks. +/// +/// See [`load_parquet`] for details on the returned iterator. +pub fn load_parquet_from_bytes( + bytes: &[u8], + config: &ParquetConfig, + entity_path_prefix: &EntityPath, +) -> Result>, ParquetError> { + streaming::load_from_bytes(bytes, config, entity_path_prefix) +} diff --git a/crates/store/re_parquet/src/streaming.rs b/crates/store/re_parquet/src/streaming.rs new file mode 100644 index 000000000000..d52b7161f800 --- /dev/null +++ b/crates/store/re_parquet/src/streaming.rs @@ -0,0 +1,887 @@ +//! Core batch→chunk conversion with an iterator-based streaming API. + +use std::collections::VecDeque; +use std::sync::Arc; + +use arrow::array::{ + Array, FixedSizeListArray, Float32Array, Float64Array, RecordBatch, RecordBatchReader as _, + StructArray, +}; +use arrow::buffer::OffsetBuffer; +use arrow::datatypes::{DataType, Field, Fields}; +use re_chunk::{Chunk, ChunkId, EntityPath, RowId, TimeColumn, TimePoint}; +// Component: for KeyValuePairs::name(), ComponentBatch: for .try_serialized() +use re_sdk_types::{Component as _, ComponentBatch as _, ComponentDescriptor, datatypes}; + +use crate::config::{ColumnGrouping, ParquetConfig}; +use crate::grouping::{ColumnGroup, ColumnGroupEntry, compute_column_groups}; +use crate::timeline::{self, TimelineInfo}; + +const PARQUET_METADATA_ARCHETYPE: &str = "ParquetMetadata"; + +/// Errors that can occur during parquet loading. +#[derive(Debug, thiserror::Error)] +pub enum ParquetError { + #[error(transparent)] + Arrow(#[from] arrow::error::ArrowError), + + #[error(transparent)] + Other(#[from] anyhow::Error), +} + +/// Load a parquet file from disk and return a chunk iterator. +pub(crate) fn load_from_path( + path: &std::path::Path, + config: &ParquetConfig, + entity_path_prefix: &EntityPath, +) -> Result { + use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; + + let file = + std::fs::File::open(path).map_err(|err| ParquetError::from(anyhow::Error::from(err)))?; + let builder = ParquetRecordBatchReaderBuilder::try_new(file) + .map_err(|err| ParquetError::from(anyhow::Error::from(err)))?; + + let metadata = builder.metadata().clone(); + let reader = builder + .build() + .map_err(|err| ParquetError::from(anyhow::Error::from(err)))?; + let schema = reader.schema().clone(); + + build_iterator( + Box::new(reader), + schema, + &metadata, + config, + entity_path_prefix.clone(), + ) +} + +/// Load parquet from in-memory bytes and return a chunk iterator. +pub(crate) fn load_from_bytes( + bytes: &[u8], + config: &ParquetConfig, + entity_path_prefix: &EntityPath, +) -> Result { + use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; + + let builder = ParquetRecordBatchReaderBuilder::try_new(bytes::Bytes::copy_from_slice(bytes)) + .map_err(|err| ParquetError::from(anyhow::Error::from(err)))?; + + let metadata = builder.metadata().clone(); + let reader = builder + .build() + .map_err(|err| ParquetError::from(anyhow::Error::from(err)))?; + let schema = reader.schema().clone(); + + build_iterator( + Box::new(reader), + schema, + &metadata, + config, + entity_path_prefix.clone(), + ) +} + +/// Construct a [`ParquetChunkIterator`] from a reader and config. +fn build_iterator( + reader: Box>>, + schema: Arc, + parquet_metadata: &parquet::file::metadata::ParquetMetaData, + config: &ParquetConfig, + entity_path_prefix: EntityPath, +) -> Result { + re_tracing::profile_function!(); + + let timeline_infos: Vec = if config.index_columns.is_empty() { + vec![] + } else { + timeline::resolve_explicit_index_columns(&schema, &config.index_columns)? + }; + + let static_col_map: Vec<(usize, String)> = config + .static_columns + .iter() + .filter_map(|name| { + schema + .fields() + .iter() + .position(|f| f.name() == name) + .map(|idx| (idx, name.clone())) + }) + .collect(); + + let excluded: std::collections::HashSet = timeline_infos + .iter() + .map(|tl| tl.column_index) + .chain(static_col_map.iter().map(|(idx, _)| *idx)) + .collect(); + + let column_groups = compute_column_groups( + &schema, + &excluded, + &entity_path_prefix, + &config.column_grouping, + &config.column_rules, + ); + + // use_structs is only meaningful for Prefix mode. Individual mode always + // produces single-entry groups, so the struct/flat dispatch is a no-op. + let use_structs = matches!( + &config.column_grouping, + ColumnGrouping::Prefix { + use_structs: true, + .. + } | ColumnGrouping::ExplicitPrefixes { + use_structs: true, + .. + } + ); + + let metadata_chunk = build_metadata_chunk(parquet_metadata).map(Box::new); + + Ok(ParquetChunkIterator { + phase: Phase::Metadata(metadata_chunk), + reader, + column_groups, + timeline_infos, + entity_path_prefix, + schema, + static_col_map, + static_reference: None, + use_structs, + row_offset: 0, + pending: VecDeque::new(), + }) +} + +// --------------------------------------------------------------------------- +// Iterator state machine +// --------------------------------------------------------------------------- + +enum Phase { + /// Yield the metadata chunk (if any), then transition to `DataBatches`. + Metadata(Option>), + + /// Read and process record batches. + DataBatches, + + /// Terminal state. + Done, +} + +/// Pull-based iterator that yields [`Chunk`]s from a parquet file. +/// +/// The iterator may yield `Err` for individual record batch failures. +/// Callers who want to continue despite errors should skip `Err` items. +pub(crate) struct ParquetChunkIterator { + phase: Phase, + reader: Box>>, + column_groups: Vec, + timeline_infos: Vec, + entity_path_prefix: EntityPath, + schema: Arc, + + /// Map from column index to column name for columns designated as static/timeless. + static_col_map: Vec<(usize, String)>, + + /// First-row values for static columns, captured from the first batch. + /// Used to verify consistency across subsequent batches. + static_reference: Option)>>, + + /// Running row count across batches, used as offset for fallback `row_index` timeline. + row_offset: i64, + + /// Whether multi-entry prefix groups should be wrapped in a `StructArray`. + /// When false, each entry becomes its own chunk (flat/pre-struct layout). + use_structs: bool, + + /// Chunks queued for yield by `next()`. Filled by `build_data_chunks` (one per + /// column group per batch) and `build_finalization_chunks`. Bounded by the number + /// of column groups, not file size. + pending: VecDeque>, +} + +impl Iterator for ParquetChunkIterator { + type Item = Result; + + fn next(&mut self) -> Option { + loop { + if let Some(item) = self.pending.pop_front() { + return Some(item); + } + + match self.phase { + Phase::Metadata(ref mut meta) => { + let chunk = meta.take(); + self.phase = Phase::DataBatches; + if let Some(c) = chunk { + return Some(Ok(*c)); + } + } + + Phase::DataBatches => match self.reader.next() { + Some(Ok(batch)) => { + if batch.num_rows() == 0 { + continue; + } + + if let Err(err) = self.process_static_columns(&batch) { + self.phase = Phase::Done; + return Some(Err(err)); + } + + let timelines = self.build_timelines(&batch); + self.build_data_chunks(&batch, &timelines); + + #[expect(clippy::cast_possible_wrap)] + { + self.row_offset += batch.num_rows() as i64; + } + } + Some(Err(err)) => { + return Some(Err(err.into())); + } + None => { + self.build_finalization_chunks(); + self.phase = Phase::Done; + } + }, + + Phase::Done => return None, + } + } + } +} + +impl ParquetChunkIterator { + /// Verify static columns are uniform and consistent across batches. + fn process_static_columns(&mut self, batch: &RecordBatch) -> Result<(), ParquetError> { + for (col_idx, col_name) in &self.static_col_map { + let array = batch.column(*col_idx); + verify_column_uniform(array.as_ref(), col_name)?; + + if let Some(ref refs) = self.static_reference { + let ref_val = &refs + .iter() + .find(|(n, _)| n == col_name) + .expect("static reference should contain all static columns") + .1; + let current_first = format_first_value(array.as_ref()); + let stored_first = format_first_value(ref_val.as_ref()); + if current_first != stored_first { + return Err(anyhow::anyhow!( + "Static column '{col_name}' changed between batches: \ + '{stored_first}' → '{current_first}'" + ) + .into()); + } + } + } + + if self.static_reference.is_none() && !self.static_col_map.is_empty() { + self.static_reference = Some( + self.static_col_map + .iter() + .map(|(col_idx, col_name)| { + (col_name.clone(), batch.column(*col_idx).slice(0, 1)) + }) + .collect(), + ); + } + + Ok(()) + } + + /// Build timeline columns for a single batch. + fn build_timelines( + &self, + batch: &RecordBatch, + ) -> re_chunk::external::nohash_hasher::IntMap { + let mut tls: re_chunk::external::nohash_hasher::IntMap<_, TimeColumn> = Default::default(); + for tl_info in &self.timeline_infos { + let time_col = batch.column(tl_info.column_index); + if let Some(times) = + timeline::extract_time_values(time_col.as_ref(), tl_info.ns_multiplier) + { + let time_column = TimeColumn::new(Some(true), tl_info.timeline, times); + tls.insert(*tl_info.timeline.name(), time_column); + } + } + if tls.is_empty() { + timeline::fallback_sequence_timeline(self.row_offset, batch.num_rows()) + } else { + tls + } + } + + /// Build data chunks for each column group from a single batch. + fn build_data_chunks( + &mut self, + batch: &RecordBatch, + timelines: &re_chunk::external::nohash_hasher::IntMap, + ) { + let num_rows = batch.num_rows(); + + for group in &self.column_groups { + if self.use_structs { + // Struct mode: one chunk per group. + // Use single-entry shortcut only for Raw/Archetype entries + // (ScalarGroups need the companion _names field → struct). + let needs_struct = group.entries.iter().any(|e| { + matches!( + e, + ColumnGroupEntry::ScalarGroup { .. } | ColumnGroupEntry::Transform { .. } + ) + }); + let components: re_chunk::ChunkComponents = if group.entries.len() == 1 + && !needs_struct + { + build_single_entry_component(&self.schema, batch, &group.entries[0], num_rows) + .into_iter() + .collect() + } else { + build_struct_component(&self.schema, batch, &group.entries, num_rows) + .into_iter() + .collect() + }; + emit_chunk( + &mut self.pending, + group.entity_path.clone(), + timelines, + components, + ); + } else { + // Flat mode: group entries by entity path, one chunk per path. + // This avoids duplicating timeline data for every entry. + let mut by_path: std::collections::BTreeMap< + EntityPath, + Vec<(ComponentDescriptor, arrow::array::ListArray)>, + > = std::collections::BTreeMap::new(); + for entry in &group.entries { + let entity_path = flat_entity_path(&group.entity_path, entry); + if let ColumnGroupEntry::Transform { + translation_col_indices, + rotation_col_indices, + translation_descriptor, + rotation_descriptor, + .. + } = entry + { + // Transform in flat mode: emit both components at the same path. + if let Some(components) = build_transform_components( + batch, + translation_col_indices, + rotation_col_indices, + translation_descriptor, + rotation_descriptor, + ) { + by_path.entry(entity_path).or_default().extend(components); + } + } else if let Some(component) = + build_single_entry_component(&self.schema, batch, entry, num_rows) + { + by_path.entry(entity_path).or_default().push(component); + } + } + for (entity_path, components) in by_path { + let chunk_components: re_chunk::ChunkComponents = + components.into_iter().collect(); + emit_chunk(&mut self.pending, entity_path, timelines, chunk_components); + } + } + } + } + + /// Build finalization chunks: static columns + scalar name components. + fn build_finalization_chunks(&mut self) { + // Static columns as a single timeless chunk. + if let Some(ref refs) = self.static_reference { + let components: re_chunk::ChunkComponents = refs + .iter() + .map(|(name, array)| { + let field = Field::new(name.as_str(), array.data_type().clone(), true); + let list_array = wrap_in_fixed_size_list(&field, array.clone()); + ( + ComponentDescriptor::partial(name.as_str()), + arrow::array::ListArray::from(list_array), + ) + }) + .collect(); + emit_chunk( + &mut self.pending, + self.entity_path_prefix.clone(), + &Default::default(), + components, + ); + } + + // Flat mode: emit static Name components for scalar groups. + // In struct mode, series names are stored as a companion struct field + // (e.g., "accel_names") alongside the scalar data field. + if !self.use_structs { + for group in &self.column_groups { + for entry in &group.entries { + if let ColumnGroupEntry::ScalarGroup { names, .. } = entry { + let entity_path = flat_entity_path(&group.entity_path, entry); + let names_array = arrow::array::StringArray::from(names.clone()); + let inner_field = Arc::new(Field::new("item", DataType::Utf8, false)); + let n = i32::try_from(names.len()).expect("scalar suffix group too large"); + let fsl = + FixedSizeListArray::new(inner_field, n, Arc::new(names_array), None); + let components: re_chunk::ChunkComponents = std::iter::once(( + re_sdk_types::archetypes::SeriesLines::descriptor_names(), + arrow::array::ListArray::from(fsl), + )) + .collect(); + emit_chunk( + &mut self.pending, + entity_path, + &Default::default(), + components, + ); + } + } + } + } + } +} + +// --------------------------------------------------------------------------- +// Metadata chunk +// --------------------------------------------------------------------------- + +/// Build a static chunk from parquet file-level key-value metadata. +fn build_metadata_chunk(metadata: &parquet::file::metadata::ParquetMetaData) -> Option { + let kv_metadata = metadata.file_metadata().key_value_metadata()?; + + if kv_metadata.is_empty() { + return None; + } + + let pairs: Vec = kv_metadata + .iter() + .map(|kv| datatypes::Utf8Pair { + first: kv.key.clone().into(), + second: kv.value.clone().unwrap_or_default().into(), + }) + .collect(); + + let kv_component = re_sdk_types::components::KeyValuePairs(pairs); + + let batch = kv_component + .try_serialized(ComponentDescriptor { + archetype: Some(PARQUET_METADATA_ARCHETYPE.into()), + component: "file_metadata".into(), + component_type: Some(re_sdk_types::components::KeyValuePairs::name()), + }) + .ok()?; + + Chunk::builder(EntityPath::properties()) + .with_serialized_batches(RowId::new(), TimePoint::STATIC, [batch]) + .build() + .ok() +} + +// --------------------------------------------------------------------------- +// Arrow utilities +// --------------------------------------------------------------------------- + +fn emit_chunk( + pending: &mut VecDeque>, + entity_path: EntityPath, + timelines: &re_chunk::external::nohash_hasher::IntMap, + components: re_chunk::ChunkComponents, +) { + match Chunk::from_auto_row_ids(ChunkId::new(), entity_path, timelines.clone(), components) { + Ok(chunk) => pending.push_back(Ok(chunk)), + Err(err) => pending.push_back(Err(anyhow::anyhow!( + "Failed to build chunk from Parquet batch: {err}" + ) + .into())), + } +} + +/// Derive the entity path for a single entry in flat mode. +/// +/// Archetype/ScalarGroup entries append their `field_name` as a sub-path +/// (e.g., base `/A` + `field_name` `pos` → `/A/pos`). Raw entries stay at base. +fn flat_entity_path(base: &EntityPath, entry: &ColumnGroupEntry) -> EntityPath { + let sub = match entry { + ColumnGroupEntry::Component { field_name, .. } + | ColumnGroupEntry::ScalarGroup { field_name, .. } + | ColumnGroupEntry::Transform { field_name, .. } => field_name.as_str(), + ColumnGroupEntry::Raw { .. } => "", + }; + if sub.is_empty() { + base.clone() + } else { + base.join(&EntityPath::from(sub)) + } +} + +/// Build a single `List` component from all entries in a prefix group. +fn build_struct_component( + schema: &arrow::datatypes::Schema, + batch: &RecordBatch, + entries: &[ColumnGroupEntry], + num_rows: usize, +) -> Option<(ComponentDescriptor, arrow::array::ListArray)> { + let mut struct_fields: Vec> = Vec::new(); + let mut struct_arrays: Vec> = Vec::new(); + + for entry in entries { + match entry { + ColumnGroupEntry::Raw { col_idx, comp_name } => { + let source_field = &schema.fields()[*col_idx]; + let array = batch.column(*col_idx).clone(); + struct_fields.push(Arc::new(Field::new( + comp_name.as_str(), + array.data_type().clone(), + source_field.is_nullable(), + ))); + struct_arrays.push(array); + } + ColumnGroupEntry::Component { + col_indices, + field_name, + .. + } => { + // TODO(nick): build_archetype_array ignores source null bitmaps (pre-existing gap) + let array = build_archetype_array(batch, col_indices)?; + struct_fields.push(Arc::new(Field::new( + field_name.as_str(), + array.data_type().clone(), + true, + ))); + struct_arrays.push(array); + } + ColumnGroupEntry::ScalarGroup { + col_indices, + names, + field_name, + } => { + // TODO(nick): build_scalar_fsl_array ignores source null bitmaps (pre-existing gap) + let array = build_scalar_fsl_array(batch, col_indices, num_rows)?; + struct_fields.push(Arc::new(Field::new( + field_name.as_str(), + array.data_type().clone(), + true, + ))); + struct_arrays.push(array); + + // Add a companion field with the series names so the viewer + // can associate labels with the scalar data. + let names_array = build_names_array(names, num_rows); + let names_field_name = format!("{field_name}_names"); + struct_fields.push(Arc::new(Field::new( + names_field_name.as_str(), + names_array.data_type().clone(), + true, + ))); + struct_arrays.push(names_array); + } + ColumnGroupEntry::Transform { + translation_col_indices, + rotation_col_indices, + field_name, + .. + } => { + // Build a nested struct with `translation` and `quaternion` fields. + let trans_array = build_archetype_array(batch, translation_col_indices)?; + let rot_array = build_archetype_array(batch, rotation_col_indices)?; + + let inner_fields = Fields::from(vec![ + Arc::new(Field::new( + "translation", + trans_array.data_type().clone(), + true, + )), + Arc::new(Field::new( + "quaternion", + rot_array.data_type().clone(), + true, + )), + ]); + let inner_struct = + StructArray::try_new(inner_fields.clone(), vec![trans_array, rot_array], None) + .ok()?; + + struct_fields.push(Arc::new(Field::new( + field_name.as_str(), + DataType::Struct(inner_fields), + true, + ))); + struct_arrays.push(Arc::new(inner_struct)); + } + } + } + + let struct_array = + StructArray::try_new(Fields::from(struct_fields), struct_arrays, None).ok()?; + + // Each row has exactly 1 struct instance → offsets [0, 1, 2, ..., num_rows] + let offsets = OffsetBuffer::from_lengths(std::iter::repeat_n(1usize, num_rows)); + let struct_field = Arc::new(Field::new("item", struct_array.data_type().clone(), true)); + let list_array = + arrow::array::ListArray::try_new(struct_field, offsets, Arc::new(struct_array), None) + .ok()?; + + Some((ComponentDescriptor::partial("data"), list_array)) +} + +/// Build a `FixedSizeList(N, Float64)` array from `N` scalar columns (interleaved). +fn build_scalar_fsl_array( + batch: &RecordBatch, + col_indices: &[usize], + num_rows: usize, +) -> Option> { + let n = col_indices.len(); + let columns: Vec> = col_indices + .iter() + .map(|&idx| read_f64_column(batch.column(idx).as_ref())) + .collect::>>()?; + + let mut values = Vec::with_capacity(num_rows * n); + for i in 0..num_rows { + for col in &columns { + values.push(col[i]); + } + } + + let float_array = Float64Array::from(values); + let inner_field = Arc::new(Field::new("item", DataType::Float64, false)); + let n_i32 = i32::try_from(n).expect("scalar suffix group too large"); + Some(Arc::new(FixedSizeListArray::new( + inner_field, + n_i32, + Arc::new(float_array), + None, + ))) +} + +/// Build a `FixedSizeList(N, Utf8)` array with the same names repeated for each row. +/// +/// Used in struct mode to embed series labels alongside scalar data. +fn build_names_array(names: &[String], num_rows: usize) -> Arc { + let n = names.len(); + let mut values = Vec::with_capacity(num_rows * n); + for _ in 0..num_rows { + for name in names { + values.push(name.as_str()); + } + } + let string_array = arrow::array::StringArray::from(values); + let inner_field = Arc::new(Field::new("item", DataType::Utf8, false)); + let n_i32 = i32::try_from(n).expect("scalar suffix group too large"); + Arc::new(FixedSizeListArray::new( + inner_field, + n_i32, + Arc::new(string_array), + None, + )) +} + +/// Build a component from a single [`ColumnGroupEntry`] (no struct wrapping). +/// +/// Preserves the current behavior for single-entry groups. +fn build_single_entry_component( + schema: &arrow::datatypes::Schema, + batch: &RecordBatch, + entry: &ColumnGroupEntry, + num_rows: usize, +) -> Option<(ComponentDescriptor, arrow::array::ListArray)> { + match entry { + ColumnGroupEntry::Raw { col_idx, comp_name } => { + let field = &schema.fields()[*col_idx]; + let array = batch.column(*col_idx).clone(); + let list_array = wrap_in_fixed_size_list(field, array); + Some(( + ComponentDescriptor::partial(comp_name.as_str()), + arrow::array::ListArray::from(list_array), + )) + } + ColumnGroupEntry::Component { + col_indices, + descriptor, + .. + } => { + let array = build_archetype_array(batch, col_indices)?; + let inner_field = Arc::new(Field::new("item", array.data_type().clone(), true)); + let fsl = FixedSizeListArray::new(inner_field, 1, array, None); + Some((descriptor.clone(), arrow::array::ListArray::from(fsl))) + } + ColumnGroupEntry::ScalarGroup { col_indices, .. } => { + let array = build_scalar_fsl_array(batch, col_indices, num_rows)?; + let inner_field = Arc::new(Field::new("item", array.data_type().clone(), true)); + let fsl = FixedSizeListArray::new(inner_field, 1, array, None); + Some(( + re_sdk_types::archetypes::Scalars::descriptor_scalars(), + arrow::array::ListArray::from(fsl), + )) + } + ColumnGroupEntry::Transform { .. } => { + // Transform entries are handled separately (they emit two components). + None + } + } +} + +/// Build two `(descriptor, ListArray)` pairs for a `Transform` entry in flat mode. +fn build_transform_components( + batch: &RecordBatch, + translation_col_indices: &[usize], + rotation_col_indices: &[usize], + translation_descriptor: &ComponentDescriptor, + rotation_descriptor: &ComponentDescriptor, +) -> Option> { + let trans_array = build_archetype_array(batch, translation_col_indices)?; + let rot_array = build_archetype_array(batch, rotation_col_indices)?; + + let trans_inner = Arc::new(Field::new("item", trans_array.data_type().clone(), true)); + let trans_fsl = FixedSizeListArray::new(trans_inner, 1, trans_array, None); + + let rot_inner = Arc::new(Field::new("item", rot_array.data_type().clone(), true)); + let rot_fsl = FixedSizeListArray::new(rot_inner, 1, rot_array, None); + + Some(vec![ + ( + translation_descriptor.clone(), + arrow::array::ListArray::from(trans_fsl), + ), + ( + rotation_descriptor.clone(), + arrow::array::ListArray::from(rot_fsl), + ), + ]) +} + +/// Build a `FixedSizeList(N, Float32)` array from `N` scalar columns. +fn build_archetype_array(batch: &RecordBatch, col_indices: &[usize]) -> Option> { + let num_rows = batch.num_rows(); + let n = col_indices.len(); + + let columns: Vec> = col_indices + .iter() + .map(|&idx| read_f32_column(batch.column(idx).as_ref())) + .collect::>>()?; + + let mut values = Vec::with_capacity(num_rows * n); + for i in 0..num_rows { + for col in &columns { + values.push(col[i]); + } + } + + let float_array = Float32Array::from(values); + let inner_field = Arc::new(Field::new("item", DataType::Float32, false)); + let n_i32 = i32::try_from(n).expect("archetype element count too large"); + Some(Arc::new(FixedSizeListArray::new( + inner_field, + n_i32, + Arc::new(float_array), + None, + ))) +} + +/// Convert a numeric Arrow array to `Vec` via arrow cast. +fn read_f64_column(array: &dyn Array) -> Option> { + let casted = arrow::compute::cast(array, &DataType::Float64) + .map_err(|_err| { + re_log::warn_once!( + "Unsupported column type for scalar mapping: {:?}", + array.data_type() + ); + }) + .ok()?; + let arr = casted.as_any().downcast_ref::()?; + Some(arr.values().iter().copied().collect()) +} + +/// Convert a numeric Arrow array to `Vec` via arrow cast. +fn read_f32_column(array: &dyn Array) -> Option> { + let casted = arrow::compute::cast(array, &DataType::Float32) + .map_err(|_err| { + re_log::warn_once!( + "Unsupported column type for archetype mapping: {:?}", + array.data_type() + ); + }) + .ok()?; + let arr = casted.as_any().downcast_ref::()?; + Some(arr.values().iter().copied().collect()) +} + +/// Verify that every value in `array` is identical. +fn verify_column_uniform(array: &dyn Array, col_name: &str) -> Result<(), ParquetError> { + if array.len() <= 1 { + return Ok(()); + } + if !is_array_uniform(array) { + return Err( + anyhow::anyhow!("Static column '{col_name}' contains non-uniform values").into(), + ); + } + Ok(()) +} + +/// Check whether all elements in an Arrow array are equal to the first element. +fn is_array_uniform(array: &dyn Array) -> bool { + let len = array.len(); + if len <= 1 { + return true; + } + // slice returns ArrayRef which implements Datum (needed by cmp::eq). + let all = array.slice(0, len); + let first = arrow::array::Scalar::new(array.slice(0, 1)); + if let Ok(bools) = arrow::compute::kernels::cmp::eq(&all, &first) { + bools.true_count() == len + } else { + re_log::warn_once!( + "Cannot verify uniformity for column type {:?}, assuming uniform", + array.data_type() + ); + true + } +} + +/// Format the first element of an array as a string for cross-batch comparison. +fn format_first_value(array: &dyn Array) -> String { + if array.is_empty() { + return String::new(); + } + + macro_rules! fmt_primitive { + ($arr_ty:ty) => { + if let Some(arr) = array.as_any().downcast_ref::<$arr_ty>() { + return format!("{}", arr.value(0)); + } + }; + } + fmt_primitive!(Float64Array); + fmt_primitive!(Float32Array); + fmt_primitive!(arrow::array::Int64Array); + fmt_primitive!(arrow::array::Int32Array); + + if let Some(arr) = array.as_any().downcast_ref::() { + return arr.value(0).to_owned(); + } + if let Some(arr) = array + .as_any() + .downcast_ref::() + { + return arr.value(0).to_owned(); + } + + format!("{array:?}") +} + +/// Wrap each element of an array into a `FixedSizeList` of size 1. +fn wrap_in_fixed_size_list(field: &Field, array: Arc) -> FixedSizeListArray { + let inner_field = Arc::new(Field::new( + "item", + field.data_type().clone(), + field.is_nullable(), + )); + FixedSizeListArray::new(inner_field, 1, array, None) +} diff --git a/crates/store/re_parquet/src/timeline.rs b/crates/store/re_parquet/src/timeline.rs new file mode 100644 index 000000000000..03e28ff7076b --- /dev/null +++ b/crates/store/re_parquet/src/timeline.rs @@ -0,0 +1,155 @@ +//! Timeline resolution and time-value extraction from parquet schemas. + +use arrow::array::{Array, AsArray as _}; +use arrow::buffer::ScalarBuffer; +use arrow::datatypes::DataType; +use re_chunk::TimeColumn; +use re_log_types::{TimeType, Timeline}; + +use crate::config::{IndexColumn, IndexType}; +use crate::streaming::ParquetError; + +/// Identifies which column should be used as a timeline and how to scale it. +pub(crate) struct TimelineInfo { + pub column_index: usize, + pub timeline: Timeline, + + /// Multiplier to convert raw column values to nanoseconds. + /// Always 1 for Sequence timelines. + pub ns_multiplier: i64, +} + +/// Resolve explicit [`IndexColumn`] entries to [`TimelineInfo`]. +/// +/// Returns an error if any named column does not exist in the schema. +pub(crate) fn resolve_explicit_index_columns( + schema: &arrow::datatypes::Schema, + columns: &[IndexColumn], +) -> Result, ParquetError> { + columns + .iter() + .map(|col| { + let (column_index, _field) = schema + .fields() + .iter() + .enumerate() + .find(|(_, f)| f.name() == &col.name) + .ok_or_else(|| { + ParquetError::from(anyhow::anyhow!( + "Index column '{}' not found in parquet schema", + col.name + )) + })?; + + let time_type = match col.index_type { + IndexType::Timestamp(_) => TimeType::TimestampNs, + IndexType::Duration(_) => TimeType::DurationNs, + IndexType::Sequence => TimeType::Sequence, + }; + + Ok(TimelineInfo { + column_index, + timeline: Timeline::new(col.name.as_str(), time_type), + ns_multiplier: col.index_type.ns_multiplier(), + }) + }) + .collect() +} + +/// Extract i64 time values from a column, applying the given scaling multiplier. +/// +/// The `ns_multiplier` converts raw values to nanoseconds (1 for ns or sequence, +/// `1_000` for us, etc.). This is determined by the user's `IndexColumn` config, +/// NOT by Arrow schema metadata. +pub(crate) fn extract_time_values( + array: &dyn Array, + ns_multiplier: i64, +) -> Option> { + let raw = extract_raw_i64(array)?; + if ns_multiplier == 1 { + Some(raw) + } else { + let scaled: Vec = raw.iter().map(|&v| v * ns_multiplier).collect(); + Some(ScalarBuffer::from(scaled)) + } +} + +/// Extract raw i64 values from an Arrow array without any unit conversion. +/// +/// For Timestamp/Duration typed arrays, the raw stored i64 is extracted by +/// reading the underlying buffer directly (all Arrow temporal types store i64). +fn extract_raw_i64(array: &dyn Array) -> Option> { + match array.data_type() { + DataType::Int64 => { + let arr = array.as_primitive::(); + Some(arr.values().clone()) + } + + DataType::Int32 => { + let arr = array.as_primitive::(); + let vals: Vec = arr.values().iter().map(|&v| i64::from(v)).collect(); + Some(ScalarBuffer::from(vals)) + } + + DataType::Int16 => { + let arr = array.as_primitive::(); + let vals: Vec = arr.values().iter().map(|&v| i64::from(v)).collect(); + Some(ScalarBuffer::from(vals)) + } + + DataType::UInt64 => { + let arr = array.as_primitive::(); + #[expect(clippy::cast_possible_wrap)] + let vals: Vec = arr.values().iter().map(|&v| v as i64).collect(); + Some(ScalarBuffer::from(vals)) + } + + DataType::UInt32 => { + let arr = array.as_primitive::(); + let vals: Vec = arr.values().iter().map(|&v| i64::from(v)).collect(); + Some(ScalarBuffer::from(vals)) + } + + DataType::Float64 => { + let arr = array.as_primitive::(); + #[expect(clippy::cast_possible_truncation)] + let vals: Vec = arr.values().iter().map(|&v| v as i64).collect(); + Some(ScalarBuffer::from(vals)) + } + + DataType::Float32 => { + let arr = array.as_primitive::(); + #[expect(clippy::cast_possible_truncation)] + let vals: Vec = arr.values().iter().map(|&v| v as i64).collect(); + Some(ScalarBuffer::from(vals)) + } + + // All Arrow Timestamp and Duration arrays store i64 values internally. + // We read the raw buffer directly to avoid needing the `compute` feature + // for `arrow::compute::cast`. Buffer layout is identical across all + // temporal unit variants (Nanosecond, Microsecond, Millisecond, Second). + DataType::Timestamp(_, _) | DataType::Duration(_) => { + let data = array.to_data(); + let buffer = data.buffers()[0].clone(); + let values = ScalarBuffer::::new(buffer, data.offset(), data.len()); + Some(values) + } + + other => { + re_log::warn_once!("Cannot use column with type {other:?} as a timeline index"); + None + } + } +} + +/// Create a fallback sequence timeline using row indices starting at `offset`. +pub(crate) fn fallback_sequence_timeline( + offset: i64, + num_rows: usize, +) -> re_chunk::external::nohash_hasher::IntMap { + let timeline = Timeline::new("row_index", TimeType::Sequence); + #[expect(clippy::cast_possible_wrap)] + let times: Vec = (offset..offset + num_rows as i64).collect(); + let time_column = TimeColumn::new(Some(true), timeline, ScalarBuffer::from(times)); + std::iter::once((*timeline.name(), time_column)).collect() +} diff --git a/crates/store/re_parquet/tests/integration.rs b/crates/store/re_parquet/tests/integration.rs new file mode 100644 index 000000000000..97414e7e660c --- /dev/null +++ b/crates/store/re_parquet/tests/integration.rs @@ -0,0 +1,1287 @@ +//! Integration tests for `re_parquet`. + +// Test helpers intentionally use simplified Arrow constructors; the nuances +// handled by the *_with_metadata / *_with_options variants are irrelevant here. +#![expect(clippy::disallowed_methods)] +#![expect(clippy::unwrap_used)] + +use std::sync::Arc; + +use arrow::array::{Float64Array, Int64Array, RecordBatch, StringArray}; +use arrow::datatypes::{DataType, Field, Schema}; +use re_chunk::{Chunk, EntityPath}; +use re_log_types::TimeType; +use re_parquet::{ + ColumnGrouping, ColumnMapping, ColumnRule, IndexColumn, IndexType, ParquetConfig, TimeUnit, +}; + +// --------------------------------------------------------------------------- +// Test helpers +// --------------------------------------------------------------------------- + +fn write_parquet_tmp(batch: &RecordBatch) -> std::path::PathBuf { + use parquet::arrow::ArrowWriter; + + let dir = std::env::temp_dir().join("re_parquet_tests"); + std::fs::create_dir_all(&dir).unwrap(); + + let path = dir.join(format!("{}.parquet", re_chunk::ChunkId::new())); + let file = std::fs::File::create(&path).unwrap(); + let mut writer = ArrowWriter::try_new(file, batch.schema(), None).unwrap(); + writer.write(batch).unwrap(); + writer.close().unwrap(); + + path +} + +fn write_parquet_tmp_with_metadata( + batch: &RecordBatch, + kv: Vec, +) -> std::path::PathBuf { + use parquet::arrow::ArrowWriter; + use parquet::file::properties::WriterProperties; + + let dir = std::env::temp_dir().join("re_parquet_tests"); + std::fs::create_dir_all(&dir).unwrap(); + + let path = dir.join(format!("{}.parquet", re_chunk::ChunkId::new())); + let file = std::fs::File::create(&path).unwrap(); + + let props = WriterProperties::builder() + .set_key_value_metadata(Some(kv)) + .build(); + let mut writer = ArrowWriter::try_new(file, batch.schema(), Some(props)).unwrap(); + writer.write(batch).unwrap(); + writer.close().unwrap(); + + path +} + +fn load_chunks(path: &std::path::Path, config: &ParquetConfig) -> Vec { + let prefix = EntityPath::from("/"); + re_parquet::load_parquet(path, config, &prefix) + .unwrap() + .collect::, _>>() + .unwrap() +} + +fn data_chunks(chunks: &[Chunk]) -> Vec<&Chunk> { + chunks + .iter() + .filter(|c| c.entity_path() != &EntityPath::properties()) + .collect() +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[test] +fn basic_individual_grouping() { + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + ])), + vec![ + Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0])), + Arc::new(Float64Array::from(vec![4.0, 5.0, 6.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::Individual, + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + assert_eq!(data.len(), 2); + + let x_chunk = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/x")) + .unwrap(); + assert_eq!(x_chunk.num_rows(), 3); + assert_eq!(x_chunk.num_components(), 1); + assert!(x_chunk.timelines().contains_key(&"row_index".into())); + + let y_chunk = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/y")) + .unwrap(); + assert_eq!(y_chunk.num_rows(), 3); + assert_eq!(y_chunk.num_components(), 1); + + // Prefix-named columns stay separate in individual mode. + let batch2 = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("camera_rgb", DataType::Float64, false), + Field::new("camera_depth", DataType::Float64, false), + ])), + vec![ + Arc::new(Float64Array::from(vec![1.0, 2.0])), + Arc::new(Float64Array::from(vec![3.0, 4.0])), + ], + ) + .unwrap(); + + let path2 = write_parquet_tmp(&batch2); + let chunks2 = load_chunks(&path2, &config); + let data2 = data_chunks(&chunks2); + + assert_eq!(data2.len(), 2); + assert!( + data2 + .iter() + .any(|c| c.entity_path() == &EntityPath::from("/camera_rgb")) + ); + assert!( + data2 + .iter() + .any(|c| c.entity_path() == &EntityPath::from("/camera_depth")) + ); +} + +#[test] +fn prefix_grouping() { + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("frame_index", DataType::Int64, false), + Field::new("camera_rgb", DataType::Float64, false), + Field::new("camera_depth", DataType::Float64, false), + Field::new("joint_position", DataType::Float64, false), + Field::new("joint_velocity", DataType::Float64, false), + Field::new("action", DataType::Float64, false), + ])), + vec![ + Arc::new(Int64Array::from(vec![0, 1, 2])), + Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0])), + Arc::new(Float64Array::from(vec![4.0, 5.0, 6.0])), + Arc::new(Float64Array::from(vec![0.1, 0.2, 0.3])), + Arc::new(Float64Array::from(vec![0.4, 0.5, 0.6])), + Arc::new(Float64Array::from(vec![10.0, 20.0, 30.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::Prefix { + delimiter: '_', + use_structs: true, + }, + index_columns: vec![IndexColumn { + name: "frame_index".into(), + index_type: IndexType::Sequence, + }], + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + // frame_index → timeline, camera_* → 1 struct group, joint_* → 1 struct group, action → 1 single group + assert_eq!(data.len(), 3); + + // Multi-column prefix groups produce a single struct component named "data" + let camera = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/camera")) + .expect("should have /camera entity"); + assert_eq!(camera.num_rows(), 3); + assert_eq!( + camera.num_components(), + 1, + "struct component wraps both columns" + ); + assert!(camera.timelines().contains_key(&"frame_index".into())); + + // Verify the struct has the expected fields + let camera_list = camera.components().get_array("data".into()).unwrap(); + let camera_struct = camera_list + .values() + .as_any() + .downcast_ref::() + .expect("should be a StructArray"); + assert_eq!(camera_struct.num_columns(), 2); + assert_eq!(camera_struct.column_by_name("rgb").unwrap().len(), 3); + assert_eq!(camera_struct.column_by_name("depth").unwrap().len(), 3); + + let joint = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/joint")) + .expect("should have /joint entity"); + assert_eq!(joint.num_rows(), 3); + assert_eq!( + joint.num_components(), + 1, + "struct component wraps both columns" + ); + + let joint_list = joint.components().get_array("data".into()).unwrap(); + let joint_struct = joint_list + .values() + .as_any() + .downcast_ref::() + .expect("should be a StructArray"); + assert_eq!(joint_struct.num_columns(), 2); + assert!(joint_struct.column_by_name("position").is_some()); + assert!(joint_struct.column_by_name("velocity").is_some()); + + // Single-column prefix group: no struct wrapping + let action = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/action")) + .expect("should have /action entity"); + assert_eq!(action.num_rows(), 3); + assert_eq!(action.num_components(), 1); +} + +#[test] +fn explicit_timestamp_index() { + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("ts", DataType::Int64, false), + Field::new("value", DataType::Float64, false), + ])), + vec![ + Arc::new(Int64Array::from(vec![100, 200, 300])), + Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::Individual, + index_columns: vec![IndexColumn { + name: "ts".into(), + index_type: IndexType::Timestamp(TimeUnit::Nanoseconds), + }], + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + assert_eq!(data.len(), 1); + assert_eq!(data[0].entity_path(), &EntityPath::from("/value")); + assert!(data[0].timelines().contains_key(&"ts".into())); + let tl = data[0].timelines().get(&"ts".into()).unwrap(); + assert_eq!(tl.timeline().typ(), TimeType::TimestampNs); +} + +#[test] +fn explicit_sequence_index() { + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("frame_id", DataType::Int64, false), + Field::new("sensor", DataType::Float64, false), + ])), + vec![ + Arc::new(Int64Array::from(vec![0, 1, 2])), + Arc::new(Float64Array::from(vec![10.0, 20.0, 30.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::Individual, + index_columns: vec![IndexColumn { + name: "frame_id".into(), + index_type: IndexType::Sequence, + }], + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + assert_eq!(data.len(), 1); + assert_eq!(data[0].entity_path(), &EntityPath::from("/sensor")); + assert!(data[0].timelines().contains_key(&"frame_id".into())); + let tl = data[0].timelines().get(&"frame_id".into()).unwrap(); + assert_eq!(tl.timeline().typ(), TimeType::Sequence); +} + +#[test] +fn explicit_duration_index() { + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("elapsed_us", DataType::Int64, false), + Field::new("value", DataType::Float64, false), + ])), + vec![ + Arc::new(Int64Array::from(vec![100, 200, 300])), + Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::Individual, + index_columns: vec![IndexColumn { + name: "elapsed_us".into(), + index_type: IndexType::Duration(TimeUnit::Microseconds), + }], + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + assert_eq!(data.len(), 1); + let tl = data[0].timelines().get(&"elapsed_us".into()).unwrap(); + assert_eq!(tl.timeline().typ(), TimeType::DurationNs); + let times: Vec = tl.times_raw().to_vec(); + assert_eq!(times, vec![100_000, 200_000, 300_000]); +} + +#[test] +fn time_unit_scaling() { + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("ts_ms", DataType::Int64, false), + Field::new("value", DataType::Float64, false), + ])), + vec![ + Arc::new(Int64Array::from(vec![1, 2, 3])), + Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::Individual, + index_columns: vec![IndexColumn { + name: "ts_ms".into(), + index_type: IndexType::Timestamp(TimeUnit::Milliseconds), + }], + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + assert_eq!(data.len(), 1); + let tl = data[0].timelines().get(&"ts_ms".into()).unwrap(); + let times: Vec = tl.times_raw().to_vec(); + assert_eq!(times, vec![1_000_000, 2_000_000, 3_000_000]); +} + +#[test] +fn missing_index_column_is_error() { + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![Field::new("x", DataType::Float64, false)])), + vec![Arc::new(Float64Array::from(vec![1.0]))], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::Individual, + index_columns: vec![IndexColumn { + name: "nonexistent".into(), + index_type: IndexType::Sequence, + }], + ..Default::default() + }; + let prefix = EntityPath::from("/"); + assert!(re_parquet::load_parquet(&path, &config, &prefix).is_err()); +} + +#[test] +fn static_columns() { + // Uniform static columns -> timeless chunk + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("frame_index", DataType::Int64, false), + Field::new("value", DataType::Float64, false), + Field::new("suite", DataType::Utf8, false), + Field::new("agg", DataType::Utf8, false), + ])), + vec![ + Arc::new(Int64Array::from(vec![0, 1, 2])), + Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0])), + Arc::new(StringArray::from(vec!["test_suite"; 3])), + Arc::new(StringArray::from(vec!["mean"; 3])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::Individual, + index_columns: vec![IndexColumn { + name: "frame_index".into(), + index_type: IndexType::Sequence, + }], + static_columns: vec!["suite".into(), "agg".into()], + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let all = data_chunks(&chunks); + + let static_chunks: Vec<_> = all.iter().filter(|c| c.is_static()).collect(); + assert_eq!(static_chunks.len(), 1); + assert_eq!(static_chunks[0].num_rows(), 1); + assert_eq!(static_chunks[0].num_components(), 2); + + let data_only: Vec<_> = all.iter().filter(|c| !c.is_static()).collect(); + assert_eq!(data_only.len(), 1); + assert_eq!(data_only[0].entity_path(), &EntityPath::from("/value")); + + // Non-uniform static column -> error + let bad_batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("x", DataType::Float64, false), + Field::new("suite", DataType::Utf8, false), + ])), + vec![ + Arc::new(Float64Array::from(vec![1.0, 2.0])), + Arc::new(StringArray::from(vec!["a", "b"])), + ], + ) + .unwrap(); + + let bad_path = write_parquet_tmp(&bad_batch); + let bad_config = ParquetConfig { + column_grouping: ColumnGrouping::Individual, + static_columns: vec!["suite".into()], + ..Default::default() + }; + let prefix = EntityPath::from("/"); + let result: Vec<_> = re_parquet::load_parquet(&bad_path, &bad_config, &prefix) + .unwrap() + .collect(); + + assert!( + result.iter().any(|r| r.is_err()), + "Non-uniform static column should produce an error" + ); +} + +#[test] +fn empty_batch() { + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![Field::new("x", DataType::Int64, false)])), + vec![Arc::new(Int64Array::from(Vec::::new()))], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let chunks = load_chunks(&path, &ParquetConfig::default()); + let data = data_chunks(&chunks); + assert!(data.is_empty()); +} + +#[test] +fn file_metadata() { + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![Field::new("x", DataType::Int64, false)])), + vec![Arc::new(Int64Array::from(vec![1]))], + ) + .unwrap(); + + let kv = vec![ + parquet::file::metadata::KeyValue::new("author".to_owned(), Some("test".to_owned())), + parquet::file::metadata::KeyValue::new("version".to_owned(), Some("1.0".to_owned())), + ]; + let path = write_parquet_tmp_with_metadata(&batch, kv); + let chunks = load_chunks(&path, &ParquetConfig::default()); + + let props = chunks + .iter() + .find(|c| c.entity_path() == &EntityPath::properties()) + .expect("should have a properties chunk"); + assert!(props.is_static()); + + assert!(!data_chunks(&chunks).is_empty()); +} + +#[test] +fn archetype_rules_transform3d() { + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("frame_index", DataType::Int64, false), + Field::new("A_pos_x", DataType::Float64, false), + Field::new("A_pos_y", DataType::Float64, false), + Field::new("A_pos_z", DataType::Float64, false), + Field::new("A_quat_x", DataType::Float64, false), + Field::new("A_quat_y", DataType::Float64, false), + Field::new("A_quat_z", DataType::Float64, false), + Field::new("A_quat_w", DataType::Float64, false), + Field::new("A_speed", DataType::Float64, false), + ])), + vec![ + Arc::new(Int64Array::from(vec![0, 1])), + Arc::new(Float64Array::from(vec![1.0, 4.0])), + Arc::new(Float64Array::from(vec![2.0, 5.0])), + Arc::new(Float64Array::from(vec![3.0, 6.0])), + Arc::new(Float64Array::from(vec![0.0, 0.0])), + Arc::new(Float64Array::from(vec![0.0, 0.0])), + Arc::new(Float64Array::from(vec![0.0, 0.0])), + Arc::new(Float64Array::from(vec![1.0, 1.0])), + Arc::new(Float64Array::from(vec![9.0, 8.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::Prefix { + delimiter: '_', + use_structs: true, + }, + index_columns: vec![IndexColumn { + name: "frame_index".into(), + index_type: IndexType::Sequence, + }], + column_rules: vec![ + ColumnRule { + suffixes: vec!["_pos_x".into(), "_pos_y".into(), "_pos_z".into()], + mapping: ColumnMapping::translation3d(), + field_name_override: None, + }, + ColumnRule { + suffixes: vec![ + "_quat_x".into(), + "_quat_y".into(), + "_quat_z".into(), + "_quat_w".into(), + ], + mapping: ColumnMapping::rotation_quat(), + field_name_override: None, + }, + ], + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + // All columns for prefix "A" collapse into a single chunk with one struct component + let a_chunks: Vec<_> = data + .iter() + .filter(|c| c.entity_path() == &EntityPath::from("/A")) + .collect(); + + assert_eq!(a_chunks.len(), 1, "all entries in one struct → one chunk"); + + let a_chunk = a_chunks[0]; + assert_eq!(a_chunk.num_rows(), 2); + assert_eq!(a_chunk.num_components(), 1, "single struct component"); + assert!(a_chunk.timelines().contains_key(&"frame_index".into())); + + // Verify the struct fields: archetype fields + raw leftover + let a_list = a_chunk.components().get_array("data".into()).unwrap(); + let a_struct = a_list + .values() + .as_any() + .downcast_ref::() + .expect("should be a StructArray"); + + // 3 struct fields: pos (FixedSizeList(3, Float32)), quat (FixedSizeList(4, Float32)), speed (Float64) + assert_eq!(a_struct.num_columns(), 3); + + let pos_field = a_struct + .column_by_name("pos") + .expect("should have pos field"); + assert!( + matches!(pos_field.data_type(), DataType::FixedSizeList(_, 3)), + "pos should be FixedSizeList(3, _), got {:?}", + pos_field.data_type() + ); + + let quat_field = a_struct + .column_by_name("quat") + .expect("should have quat field"); + assert!( + matches!(quat_field.data_type(), DataType::FixedSizeList(_, 4)), + "quat should be FixedSizeList(4, _), got {:?}", + quat_field.data_type() + ); + + let speed_field = a_struct + .column_by_name("speed") + .expect("should have speed field"); + assert_eq!(speed_field.data_type(), &DataType::Float64); +} + +#[test] +fn prefix_grouping_flat() { + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("frame_index", DataType::Int64, false), + Field::new("camera_rgb", DataType::Float64, false), + Field::new("camera_depth", DataType::Float64, false), + Field::new("joint_position", DataType::Float64, false), + Field::new("joint_velocity", DataType::Float64, false), + Field::new("action", DataType::Float64, false), + ])), + vec![ + Arc::new(Int64Array::from(vec![0, 1, 2])), + Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0])), + Arc::new(Float64Array::from(vec![4.0, 5.0, 6.0])), + Arc::new(Float64Array::from(vec![0.1, 0.2, 0.3])), + Arc::new(Float64Array::from(vec![0.4, 0.5, 0.6])), + Arc::new(Float64Array::from(vec![10.0, 20.0, 30.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::Prefix { + delimiter: '_', + use_structs: false, + }, + index_columns: vec![IndexColumn { + name: "frame_index".into(), + index_type: IndexType::Sequence, + }], + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + // Flat mode: entries grouped by entity path, no struct wrapping + assert_eq!( + data.len(), + 3, + "one chunk per entity path (camera, joint, action)" + ); + + let camera = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/camera")) + .expect("should have /camera"); + assert_eq!(camera.num_rows(), 3); + assert_eq!( + camera.num_components(), + 2, + "rgb and depth as separate components" + ); + assert!(camera.timelines().contains_key(&"frame_index".into())); + + let joint = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/joint")) + .expect("should have /joint"); + assert_eq!(joint.num_rows(), 3); + assert_eq!( + joint.num_components(), + 2, + "position and velocity as separate components" + ); + + let action = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/action")) + .expect("should have /action"); + assert_eq!(action.num_rows(), 3); + assert_eq!(action.num_components(), 1); +} + +#[test] +fn archetype_rules_transform3d_flat() { + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("frame_index", DataType::Int64, false), + Field::new("A_pos_x", DataType::Float64, false), + Field::new("A_pos_y", DataType::Float64, false), + Field::new("A_pos_z", DataType::Float64, false), + Field::new("A_quat_x", DataType::Float64, false), + Field::new("A_quat_y", DataType::Float64, false), + Field::new("A_quat_z", DataType::Float64, false), + Field::new("A_quat_w", DataType::Float64, false), + Field::new("A_speed", DataType::Float64, false), + ])), + vec![ + Arc::new(Int64Array::from(vec![0, 1])), + Arc::new(Float64Array::from(vec![1.0, 4.0])), + Arc::new(Float64Array::from(vec![2.0, 5.0])), + Arc::new(Float64Array::from(vec![3.0, 6.0])), + Arc::new(Float64Array::from(vec![0.0, 0.0])), + Arc::new(Float64Array::from(vec![0.0, 0.0])), + Arc::new(Float64Array::from(vec![0.0, 0.0])), + Arc::new(Float64Array::from(vec![1.0, 1.0])), + Arc::new(Float64Array::from(vec![9.0, 8.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::Prefix { + delimiter: '_', + use_structs: false, + }, + index_columns: vec![IndexColumn { + name: "frame_index".into(), + index_type: IndexType::Sequence, + }], + column_rules: vec![ + ColumnRule { + suffixes: vec!["_pos_x".into(), "_pos_y".into(), "_pos_z".into()], + mapping: ColumnMapping::translation3d(), + field_name_override: None, + }, + ColumnRule { + suffixes: vec![ + "_quat_x".into(), + "_quat_y".into(), + "_quat_z".into(), + "_quat_w".into(), + ], + mapping: ColumnMapping::rotation_quat(), + field_name_override: None, + }, + ], + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + assert_eq!(data.len(), 3, "pos + quat + speed as separate chunks"); + + let pos = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/A/pos")) + .expect("should have /A/pos"); + assert_eq!(pos.num_rows(), 2); + assert_eq!(pos.num_components(), 1); + + let quat = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/A/quat")) + .expect("should have /A/quat"); + assert_eq!(quat.num_rows(), 2); + assert_eq!(quat.num_components(), 1); + + let speed = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/A")) + .expect("should have /A (raw speed)"); + assert_eq!(speed.num_rows(), 2); + assert_eq!(speed.num_components(), 1); +} + +#[test] +fn scalar_suffixes_flat() { + // Columns like sensor_accel_x where after prefix split on '_' the comp_names + // are accel_x, accel_y, accel_z — suffix _x matches accel_x but NOT accel_ax. + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("frame_index", DataType::Int64, false), + Field::new("sensor_accel_x", DataType::Float64, false), + Field::new("sensor_accel_y", DataType::Float64, false), + Field::new("sensor_accel_z", DataType::Float64, false), + ])), + vec![ + Arc::new(Int64Array::from(vec![0, 1])), + Arc::new(Float64Array::from(vec![1.0, 2.0])), + Arc::new(Float64Array::from(vec![3.0, 4.0])), + Arc::new(Float64Array::from(vec![5.0, 6.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::Prefix { + delimiter: '_', + use_structs: false, + }, + index_columns: vec![IndexColumn { + name: "frame_index".into(), + index_type: IndexType::Sequence, + }], + column_rules: vec![ColumnRule { + suffixes: vec!["_x".into(), "_y".into(), "_z".into()], + mapping: ColumnMapping::Scalars { + names: vec!["x".into(), "y".into(), "z".into()], + }, + field_name_override: None, + }], + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + // comp_names: accel_x, accel_y, accel_z + // suffix _x matches accel_x → raw_sub "accel" → sub_prefix "accel" + // field_name = "accel" → entity path = /sensor/accel + let scalars_path = EntityPath::from("/sensor/accel"); + + // Data chunk: Scalars component + let data_only: Vec<_> = data.iter().filter(|c| !c.is_static()).collect(); + assert_eq!(data_only.len(), 1); + assert_eq!(data_only[0].entity_path(), &scalars_path); + assert_eq!(data_only[0].num_rows(), 2); + + // Static Name chunk: series labels + let static_chunks: Vec<_> = data.iter().filter(|c| c.is_static()).collect(); + assert_eq!(static_chunks.len(), 1, "should have static Name chunk"); + assert_eq!(static_chunks[0].entity_path(), &scalars_path); + assert_eq!(static_chunks[0].num_components(), 1); +} + +#[test] +fn scalar_suffixes_no_false_match() { + // Suffix _x should NOT match comp_name ending in "ax" (no delimiter boundary) + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("frame_index", DataType::Int64, false), + Field::new("sensor_accel_ax", DataType::Float64, false), + Field::new("sensor_accel_ay", DataType::Float64, false), + Field::new("sensor_accel_az", DataType::Float64, false), + ])), + vec![ + Arc::new(Int64Array::from(vec![0, 1])), + Arc::new(Float64Array::from(vec![1.0, 2.0])), + Arc::new(Float64Array::from(vec![3.0, 4.0])), + Arc::new(Float64Array::from(vec![5.0, 6.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::Prefix { + delimiter: '_', + use_structs: true, + }, + index_columns: vec![IndexColumn { + name: "frame_index".into(), + index_type: IndexType::Sequence, + }], + column_rules: vec![ColumnRule { + suffixes: vec!["_x".into(), "_y".into(), "_z".into()], + mapping: ColumnMapping::Scalars { + names: vec!["x".into(), "y".into(), "z".into()], + }, + field_name_override: None, + }], + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + // accel_ax does NOT end in _x, so no scalar group should be created. + // All three columns should be raw entries in the "sensor" struct. + let sensor = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/sensor")) + .expect("should have /sensor"); + let sensor_list = sensor.components().get_array("data".into()).unwrap(); + let sensor_struct = sensor_list + .values() + .as_any() + .downcast_ref::() + .expect("should be a StructArray"); + // 3 raw fields, not grouped into a scalar + assert_eq!(sensor_struct.num_columns(), 3); + assert!(sensor_struct.column_by_name("accel_ax").is_some()); + assert!(sensor_struct.column_by_name("accel_ay").is_some()); + assert!(sensor_struct.column_by_name("accel_az").is_some()); +} + +// --------------------------------------------------------------------------- +// Explicit prefix grouping +// --------------------------------------------------------------------------- + +#[test] +fn explicit_prefixes_basic() { + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("frame_index", DataType::Int64, false), + Field::new("fooa", DataType::Float64, false), + Field::new("foob", DataType::Float64, false), + Field::new("fooc", DataType::Float64, false), + Field::new("cata", DataType::Float64, false), + Field::new("catb", DataType::Float64, false), + Field::new("catc", DataType::Float64, false), + Field::new("other", DataType::Float64, false), + ])), + vec![ + Arc::new(Int64Array::from(vec![0, 1])), + Arc::new(Float64Array::from(vec![1.0, 2.0])), + Arc::new(Float64Array::from(vec![3.0, 4.0])), + Arc::new(Float64Array::from(vec![5.0, 6.0])), + Arc::new(Float64Array::from(vec![7.0, 8.0])), + Arc::new(Float64Array::from(vec![9.0, 10.0])), + Arc::new(Float64Array::from(vec![11.0, 12.0])), + Arc::new(Float64Array::from(vec![13.0, 14.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::ExplicitPrefixes { + prefixes: vec!["cat".into(), "foo".into()], + use_structs: true, + }, + index_columns: vec![IndexColumn { + name: "frame_index".into(), + index_type: IndexType::Sequence, + }], + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + // foo group (3 columns), cat group (3 columns), "other" individual + assert_eq!(data.len(), 3); + + let foo = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/foo")) + .expect("should have /foo"); + assert_eq!(foo.num_rows(), 2); + // Multi-column group → struct with 3 fields + let foo_list = foo.components().get_array("data".into()).unwrap(); + let foo_struct = foo_list + .values() + .as_any() + .downcast_ref::() + .expect("should be a StructArray"); + assert_eq!(foo_struct.num_columns(), 3); + assert!(foo_struct.column_by_name("a").is_some()); + assert!(foo_struct.column_by_name("b").is_some()); + assert!(foo_struct.column_by_name("c").is_some()); + + let cat = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/cat")) + .expect("should have /cat"); + assert_eq!(cat.num_rows(), 2); + let cat_list = cat.components().get_array("data".into()).unwrap(); + let cat_struct = cat_list + .values() + .as_any() + .downcast_ref::() + .expect("should be a StructArray"); + assert_eq!(cat_struct.num_columns(), 3); + + let other = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/other")) + .expect("should have /other for unmatched column"); + assert_eq!(other.num_rows(), 2); +} + +#[test] +fn explicit_prefixes_longest_first() { + // "catalog" prefix should match "catalogfoo" before "cat" does + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("catx", DataType::Float64, false), + Field::new("catalogfoo", DataType::Float64, false), + ])), + vec![ + Arc::new(Float64Array::from(vec![1.0])), + Arc::new(Float64Array::from(vec![2.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::ExplicitPrefixes { + prefixes: vec!["cat".into(), "catalog".into()], + use_structs: true, + }, + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + // "catx" → prefix "cat", comp "x" (single-column group, no struct) + // "catalogfoo" → prefix "catalog", comp "foo" (single-column group) + assert!( + data.iter() + .any(|c| c.entity_path() == &EntityPath::from("/cat")), + "should have /cat" + ); + assert!( + data.iter() + .any(|c| c.entity_path() == &EntityPath::from("/catalog")), + "should have /catalog" + ); +} + +#[test] +fn explicit_prefixes_underscore_stripping() { + // prefix "cat" on column "cat_foo" should give comp "foo" (leading _ stripped) + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("cat_foo", DataType::Float64, false), + Field::new("cat_bar", DataType::Float64, false), + ])), + vec![ + Arc::new(Float64Array::from(vec![1.0])), + Arc::new(Float64Array::from(vec![2.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::ExplicitPrefixes { + prefixes: vec!["cat".into()], + use_structs: true, + }, + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + let cat = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/cat")) + .expect("should have /cat"); + let cat_list = cat.components().get_array("data".into()).unwrap(); + let cat_struct = cat_list + .values() + .as_any() + .downcast_ref::() + .expect("should be a StructArray"); + // Comp names should be "foo" and "bar", not "_foo" and "_bar" + assert!( + cat_struct.column_by_name("foo").is_some(), + "should have field 'foo'" + ); + assert!( + cat_struct.column_by_name("bar").is_some(), + "should have field 'bar'" + ); +} + +// --------------------------------------------------------------------------- +// Struct-mode Name emission +// --------------------------------------------------------------------------- + +#[test] +fn scalar_suffixes_struct_names_in_struct() { + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("frame_index", DataType::Int64, false), + Field::new("sensor_accel_x", DataType::Float64, false), + Field::new("sensor_accel_y", DataType::Float64, false), + Field::new("sensor_accel_z", DataType::Float64, false), + ])), + vec![ + Arc::new(Int64Array::from(vec![0, 1])), + Arc::new(Float64Array::from(vec![1.0, 2.0])), + Arc::new(Float64Array::from(vec![3.0, 4.0])), + Arc::new(Float64Array::from(vec![5.0, 6.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::Prefix { + delimiter: '_', + use_structs: true, + }, + index_columns: vec![IndexColumn { + name: "frame_index".into(), + index_type: IndexType::Sequence, + }], + column_rules: vec![ColumnRule { + suffixes: vec!["_x".into(), "_y".into(), "_z".into()], + mapping: ColumnMapping::Scalars { + names: vec!["x".into(), "y".into(), "z".into()], + }, + field_name_override: None, + }], + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + let sensor_path = EntityPath::from("/sensor"); + + // In struct mode, names are embedded in the struct — no static Name chunks + let data_only: Vec<_> = data.iter().filter(|c| !c.is_static()).collect(); + assert_eq!(data_only.len(), 1); + assert_eq!(data_only[0].entity_path(), &sensor_path); + + let static_chunks: Vec<_> = data.iter().filter(|c| c.is_static()).collect(); + assert_eq!( + static_chunks.len(), + 0, + "struct mode should NOT emit static Name chunks" + ); + + // Verify the struct has both data and names fields + let sensor_list = data_only[0].components().get_array("data".into()).unwrap(); + let sensor_struct = sensor_list + .values() + .as_any() + .downcast_ref::() + .expect("should be a StructArray"); + + // Should have "accel" (data) and "accel_names" (labels) + assert_eq!(sensor_struct.num_columns(), 2); + assert!( + sensor_struct.column_by_name("accel").is_some(), + "should have 'accel' data field" + ); + let names_col = sensor_struct + .column_by_name("accel_names") + .expect("should have 'accel_names' field"); + assert!( + matches!(names_col.data_type(), DataType::FixedSizeList(_, 3)), + "names should be FixedSizeList(3, _)" + ); +} + +// --------------------------------------------------------------------------- +// Field name override +// --------------------------------------------------------------------------- + +#[test] +fn field_name_override_archetype() { + // Columns Foo_name_pos_x/y/z and Foo_name_quat_x/y/z/w + // Without override both get field_name "name" → collision. + // With override "_pos" and "_quat" → "name_pos" and "name_quat". + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("frame_index", DataType::Int64, false), + Field::new("Foo_name_pos_x", DataType::Float64, false), + Field::new("Foo_name_pos_y", DataType::Float64, false), + Field::new("Foo_name_pos_z", DataType::Float64, false), + Field::new("Foo_name_quat_x", DataType::Float64, false), + Field::new("Foo_name_quat_y", DataType::Float64, false), + Field::new("Foo_name_quat_z", DataType::Float64, false), + Field::new("Foo_name_quat_w", DataType::Float64, false), + ])), + vec![ + Arc::new(Int64Array::from(vec![0, 1])), + Arc::new(Float64Array::from(vec![1.0, 4.0])), + Arc::new(Float64Array::from(vec![2.0, 5.0])), + Arc::new(Float64Array::from(vec![3.0, 6.0])), + Arc::new(Float64Array::from(vec![0.0, 0.0])), + Arc::new(Float64Array::from(vec![0.0, 0.0])), + Arc::new(Float64Array::from(vec![0.0, 0.0])), + Arc::new(Float64Array::from(vec![1.0, 1.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::Prefix { + delimiter: '_', + use_structs: true, + }, + index_columns: vec![IndexColumn { + name: "frame_index".into(), + index_type: IndexType::Sequence, + }], + column_rules: vec![ + ColumnRule { + suffixes: vec!["_pos_x".into(), "_pos_y".into(), "_pos_z".into()], + mapping: ColumnMapping::translation3d(), + field_name_override: Some("_pos".into()), + }, + ColumnRule { + suffixes: vec![ + "_quat_x".into(), + "_quat_y".into(), + "_quat_z".into(), + "_quat_w".into(), + ], + mapping: ColumnMapping::rotation_quat(), + field_name_override: Some("_quat".into()), + }, + ], + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + let foo = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/Foo")) + .expect("should have /Foo"); + let foo_list = foo.components().get_array("data".into()).unwrap(); + let foo_struct = foo_list + .values() + .as_any() + .downcast_ref::() + .expect("should be a StructArray"); + + // Should have name_pos and name_quat, NOT two fields both named "name" + assert_eq!(foo_struct.num_columns(), 2); + assert!( + foo_struct.column_by_name("name_pos").is_some(), + "should have field 'name_pos', got fields: {:?}", + foo_struct + .fields() + .iter() + .map(|f| f.name()) + .collect::>() + ); + assert!( + foo_struct.column_by_name("name_quat").is_some(), + "should have field 'name_quat', got fields: {:?}", + foo_struct + .fields() + .iter() + .map(|f| f.name()) + .collect::>() + ); +} + +#[test] +fn field_name_override_empty_sub_prefix() { + // When sub_prefix is empty, override (stripped of _) is used directly + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("Foo_pos_x", DataType::Float64, false), + Field::new("Foo_pos_y", DataType::Float64, false), + Field::new("Foo_pos_z", DataType::Float64, false), + ])), + vec![ + Arc::new(Float64Array::from(vec![1.0])), + Arc::new(Float64Array::from(vec![2.0])), + Arc::new(Float64Array::from(vec![3.0])), + ], + ) + .unwrap(); + + let path = write_parquet_tmp(&batch); + let config = ParquetConfig { + column_grouping: ColumnGrouping::Prefix { + delimiter: '_', + use_structs: true, + }, + column_rules: vec![ColumnRule { + suffixes: vec!["_pos_x".into(), "_pos_y".into(), "_pos_z".into()], + mapping: ColumnMapping::Component { + descriptor: re_sdk_types::archetypes::Transform3D::descriptor_translation(), + }, + field_name_override: Some("_pos".into()), + }], + ..Default::default() + }; + let chunks = load_chunks(&path, &config); + let data = data_chunks(&chunks); + + let foo = data + .iter() + .find(|c| c.entity_path() == &EntityPath::from("/Foo")) + .expect("should have /Foo"); + + // sub_prefix is empty (comp "pos_x" strip_suffix "pos_x" → ""), + // override "_pos" → field_name "pos" + let foo_list = foo.components().get_array("data".into()); + // Single-entry group: no struct wrapping, component is the archetype directly. + // The field_name is used for flat_entity_path but not for struct field when single entry. + assert!(foo_list.is_some() || foo.num_components() == 1); +} diff --git a/crates/store/re_protos/Cargo.toml b/crates/store/re_protos/Cargo.toml index 90cd4628cf9a..d58610313178 100644 --- a/crates/store/re_protos/Cargo.toml +++ b/crates/store/re_protos/Cargo.toml @@ -36,6 +36,7 @@ arrow.workspace = true http.workspace = true jiff.workspace = true lz4_flex.workspace = true +opentelemetry.workspace = true pin-project-lite.workspace = true prost-types.workspace = true prost.workspace = true diff --git a/crates/store/re_protos/proto/rerun/v1alpha1/cloud.proto b/crates/store/re_protos/proto/rerun/v1alpha1/cloud.proto index 604dfa3db39d..a4b57b9c52c7 100644 --- a/crates/store/re_protos/proto/rerun/v1alpha1/cloud.proto +++ b/crates/store/re_protos/proto/rerun/v1alpha1/cloud.proto @@ -23,6 +23,12 @@ import "rerun/v1alpha1/log_msg.proto"; service RerunCloudService { rpc Version(VersionRequest) returns (VersionResponse) {} + // Returns information about the currently authenticated user. + // + // This is a lightweight endpoint that can be used to verify that authentication + // is successful and to retrieve the user's identity. + rpc WhoAmI(WhoAmIRequest) returns (WhoAmIResponse) {} + // --- Catalog --- rpc FindEntries(FindEntriesRequest) returns (FindEntriesResponse) {} @@ -198,6 +204,9 @@ service RerunCloudService { // Query the status of submitted tasks as soon as they are no longer pending rpc QueryTasksOnCompletion(QueryTasksOnCompletionRequest) returns (stream QueryTasksOnCompletionResponse) {} + // Cancel existing tasks + rpc CancelTasks(CancelTasksRequest) returns (CancelTasksResponse); + // --- Utilities --- // Rerun Manifests maintenance operations: scalar index creation, compaction, etc. @@ -219,6 +228,25 @@ message VersionResponse { // A single version string representing the version of the whole stack. string version = 2; + + // Cloud provider hosting this instance (e.g. "aws", "azure"). Null if not deployed on cloud. + optional string cloud_provider = 3; + + // Cloud region where this instance is deployed (e.g. "us-west-2", "eastus"). Null if not deployed on cloud. + optional string cloud_region = 4; +} + +message WhoAmIRequest {} + +message WhoAmIResponse { + // The user ID of the authenticated user, if any. + optional string user_id = 1; + + // Whether the user has read access. + bool can_read = 2; + + // Whether the user has write access. + bool can_write = 3; } // Application level error - used as `details` in the `google.rpc.Status` message @@ -590,6 +618,10 @@ message QueryDatasetRequest { // all segments will be queried. repeated rerun.common.v1alpha1.SegmentId segment_ids = 11; + // Will ask the server to generate direct URLs for the requested segments. + // It is not guaranteed that the server will return all (or any) of them. + bool generate_direct_urls = 12; + // Client can specify specific chunk ids to include. If left unspecified (empty list), // all chunks that match other query parameters will be included. repeated rerun.common.v1alpha1.Tuid chunk_ids = 3; @@ -849,6 +881,15 @@ message QueryTasksOnCompletionResponse { rerun.common.v1alpha1.DataframePart data = 1; } +// `CancelTasksRequest` is the request message for cancelling a number of tasks +message CancelTasksRequest { + // Unique identifiers for the tasks + repeated rerun.common.v1alpha1.TaskId ids = 1; +} + +// `CancelTasksResponse` is the response message for cancelling a number of tasks +message CancelTasksResponse {} + // --- Catalog --- // FindEntries diff --git a/crates/store/re_protos/proto/rerun/v1alpha1/common.proto b/crates/store/re_protos/proto/rerun/v1alpha1/common.proto index f3c9baf5bb17..3ca44256ae12 100644 --- a/crates/store/re_protos/proto/rerun/v1alpha1/common.proto +++ b/crates/store/re_protos/proto/rerun/v1alpha1/common.proto @@ -64,7 +64,7 @@ message IndexRange { } // The unique identifier of an entity, e.g. `camera/3/points` -// See for more on entity paths. +// See for more on entity paths. message EntityPath { string path = 1; } diff --git a/crates/store/re_protos/src/headers.rs b/crates/store/re_protos/src/headers.rs index a645764fa46d..ebc5b28b354c 100644 --- a/crates/store/re_protos/src/headers.rs +++ b/crates/store/re_protos/src/headers.rs @@ -30,12 +30,14 @@ pub const HTTP_HEADER_AUTHORIZATION: &str = "authorization"; /// Example: /// ``` /// # use re_protos::headers::RerunHeadersInjectorExt as _; -/// let mut req = tonic::Request::new(()).with_entry_name("droid:sample2k").unwrap(); +/// # use re_log_types::EntryName; +/// let entry_name = EntryName::new("my_entry").unwrap(); +/// let mut req = tonic::Request::new(()).with_entry_name(entry_name).unwrap(); /// ``` pub trait RerunHeadersInjectorExt: Sized { fn with_entry_id(self, entry_id: re_log_types::EntryId) -> tonic::Result; - fn with_entry_name(self, entry_name: impl AsRef) -> tonic::Result; + fn with_entry_name(self, entry_name: EntryName) -> tonic::Result; fn with_metadata(self, md: &tonic::metadata::MetadataMap) -> Self; } @@ -56,11 +58,11 @@ impl RerunHeadersInjectorExt for tonic::Request { Ok(self) } - fn with_entry_name(mut self, entry_name: impl AsRef) -> tonic::Result { + fn with_entry_name(mut self, entry_name: EntryName) -> tonic::Result { const HEADER: &str = RERUN_HTTP_HEADER_ENTRY_NAME; - let entry_name = entry_name.as_ref(); - let entry_name = tonic::metadata::BinaryMetadataValue::from_bytes(entry_name.as_bytes()); + let entry_name = + tonic::metadata::BinaryMetadataValue::from_bytes(entry_name.as_str().as_bytes()); self.metadata_mut().insert_bin(HEADER, entry_name); @@ -97,7 +99,7 @@ impl RerunHeadersInjectorExt for tonic::Request { pub trait RerunHeadersExtractorExt { fn entry_id(&self) -> tonic::Result>; - fn entry_name(&self) -> tonic::Result>; + fn entry_name(&self) -> tonic::Result>; } impl RerunHeadersExtractorExt for tonic::Request { @@ -122,7 +124,7 @@ impl RerunHeadersExtractorExt for tonic::Request { Ok(Some(entry_id)) } - fn entry_name(&self) -> tonic::Result> { + fn entry_name(&self) -> tonic::Result> { const HEADER: &str = RERUN_HTTP_HEADER_ENTRY_NAME; let Some(entry_name) = self.metadata().get_bin(HEADER) else { @@ -139,6 +141,8 @@ impl RerunHeadersExtractorExt for tonic::Request { "'{entry_name:?}' is not a valid value for '{HEADER}': {err:#}" )) })?; + let entry_name = EntryName::new(&entry_name) + .map_err(|err| tonic::Status::invalid_argument(err.to_string()))?; Ok(Some(entry_name)) } @@ -312,6 +316,8 @@ use pin_project_lite::pin_project; use tower::Service; use tower::layer::Layer; +use crate::EntryName; + /// Layer that applies [`PropagateHeaders`] which propagates multiple headers at once from requests to responses. /// /// If the headers are present on the request they'll be applied to the response as well. This could diff --git a/crates/store/re_protos/src/lib.rs b/crates/store/re_protos/src/lib.rs index fb8b327a6ceb..a2827074a5fb 100644 --- a/crates/store/re_protos/src/lib.rs +++ b/crates/store/re_protos/src/lib.rs @@ -10,6 +10,9 @@ pub mod external { } pub mod headers; +pub mod trace_id_layer; + +pub use re_log_types::{EntryName, InvalidEntryNameError}; // This extra module is needed, because of how imports from different packages are resolved. // For example, `rerun.remote_store.v1alpha1.EncoderVersion` is resolved to `super::super::remote_store::v1alpha1::EncoderVersion`. @@ -111,6 +114,9 @@ pub enum TypeConversionError { type_name: &'static str, }, + #[error("invalid entry name: {0}")] + InvalidEntryName(#[from] InvalidEntryNameError), + #[error("failed to parse timestamp: {0}")] InvalidTime(#[from] jiff::Error), diff --git a/crates/store/re_protos/src/trace_id_layer.rs b/crates/store/re_protos/src/trace_id_layer.rs new file mode 100644 index 000000000000..5e83df51d2d5 --- /dev/null +++ b/crates/store/re_protos/src/trace_id_layer.rs @@ -0,0 +1,101 @@ +use std::future::Future; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + +use opentelemetry::TraceId; +use tower::Service; +use tower::layer::Layer; + +/// The HTTP header key for the request trace ID, used to correlate responses with +/// distributed traces for debugging and support. +pub const RERUN_HTTP_HEADER_REQUEST_TRACE_ID: &str = "x-request-trace-id"; + +/// A function that returns the current trace ID, if any. +pub type TraceIdProvider = Arc Option + Send + Sync>; + +/// A [`tower::Layer`] that injects a trace ID into all responses +/// via the [`RERUN_HTTP_HEADER_REQUEST_TRACE_ID`] header. +/// +/// The trace ID is obtained by calling the provided [`TraceIdProvider`]. +/// +/// See [`TraceIdService`]. +#[derive(Clone)] +pub struct TraceIdLayer { + trace_id_provider: TraceIdProvider, +} + +impl TraceIdLayer { + pub fn new(trace_id_provider: TraceIdProvider) -> Self { + Self { trace_id_provider } + } +} + +impl Layer for TraceIdLayer { + type Service = TraceIdService; + + fn layer(&self, inner: S) -> Self::Service { + TraceIdService { + inner, + trace_id_provider: Arc::clone(&self.trace_id_provider), + } + } +} + +/// A [`tower::Service`] that injects a trace ID into all responses +/// via the [`RERUN_HTTP_HEADER_REQUEST_TRACE_ID`] header. +/// +/// See [`TraceIdLayer`]. +#[derive(Clone)] +pub struct TraceIdService { + inner: S, + trace_id_provider: TraceIdProvider, +} + +type BoxFuture<'a, T> = Pin + Send + 'a>>; + +impl Service> for TraceIdService +where + S: Service, Response = http::Response> + Clone + Send + 'static, + S::Future: Send + 'static, + ReqBody: Send + 'static, +{ + type Response = S::Response; + type Error = S::Error; + type Future = BoxFuture<'static, Result>; + + fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + self.inner.poll_ready(cx) + } + + fn call(&mut self, req: http::Request) -> Self::Future { + // See: https://docs.rs/tower/latest/tower/trait.Service.html#be-careful-when-cloning-inner-services + let clone = self.inner.clone(); + let mut inner = std::mem::replace(&mut self.inner, clone); + let trace_id_provider = Arc::clone(&self.trace_id_provider); + + Box::pin(async move { + let mut response = inner.call(req).await?; + + if let Some(trace_id) = (trace_id_provider)() { + let trace_id = trace_id.to_string(); + match http::HeaderValue::from_str(&trace_id) { + Ok(header_value) => { + response + .headers_mut() + .insert(RERUN_HTTP_HEADER_REQUEST_TRACE_ID, header_value); + } + Err(err) => { + tracing::warn!( + trace_id, + %err, + "failed to convert trace ID to header value" + ); + } + } + } + + Ok(response) + }) + } +} diff --git a/crates/store/re_protos/src/v1alpha1/rerun.cloud.v1alpha1.ext.rs b/crates/store/re_protos/src/v1alpha1/rerun.cloud.v1alpha1.ext.rs index d8bdd977da80..9975d2f246e8 100644 --- a/crates/store/re_protos/src/v1alpha1/rerun.cloud.v1alpha1.ext.rs +++ b/crates/store/re_protos/src/v1alpha1/rerun.cloud.v1alpha1.ext.rs @@ -1,11 +1,11 @@ use std::sync::Arc; use arrow::array::{ - Array, ArrayRef, BinaryArray, BooleanArray, FixedSizeBinaryBuilder, ListBuilder, RecordBatch, - RecordBatchOptions, StringArray, StringBuilder, TimestampNanosecondArray, UInt8Array, - UInt64Array, + Array, ArrayRef, BinaryArray, BooleanArray, DictionaryArray, FixedSizeBinaryBuilder, + Int64Array, ListBuilder, PrimitiveDictionaryBuilder, RecordBatch, RecordBatchOptions, + StringArray, StringBuilder, TimestampNanosecondArray, UInt8Array, UInt64Array, }; -use arrow::datatypes::{DataType, Field, FieldRef, Schema, TimeUnit}; +use arrow::datatypes::{DataType, Field, FieldRef, Int32Type, Int64Type, Schema, TimeUnit}; use arrow::error::ArrowError; use prost::Name as _; use re_arrow_util::ArrowArrayDowncastRef as _; @@ -242,6 +242,7 @@ impl crate::cloud::v1alpha1::UnregisterFromDatasetRequest { #[derive(Debug, Clone)] pub struct QueryDatasetRequest { pub segment_ids: Vec, + pub generate_direct_urls: bool, pub chunk_ids: Vec, pub entity_paths: Vec, pub select_all_entity_paths: bool, @@ -264,6 +265,7 @@ impl Default for QueryDatasetRequest { exclude_temporal_data: false, scan_parameters: None, query: None, + generate_direct_urls: false, } } } @@ -284,6 +286,7 @@ impl From for crate::cloud::v1alpha1::QueryDatasetRequest { exclude_temporal_data: value.exclude_temporal_data, scan_parameters: value.scan_parameters.map(Into::into), query: value.query.map(Into::into), + generate_direct_urls: value.generate_direct_urls, } } } @@ -334,6 +337,8 @@ impl TryFrom for QueryDatasetReques .transpose()?, query: value.query.map(|q| q.try_into()).transpose()?, + + generate_direct_urls: value.generate_direct_urls, }) } } @@ -349,7 +354,11 @@ impl QueryDatasetResponse { pub const FIELD_CHUNK_KEY: &str = "chunk_key"; pub const FIELD_CHUNK_ENTITY_PATH: &str = "chunk_entity_path"; pub const FIELD_CHUNK_IS_STATIC: &str = "chunk_is_static"; + pub const FIELD_CHUNK_BYTE_OFFSET: &str = "chunk_byte_offset"; pub const FIELD_CHUNK_BYTE_LENGTH: &str = "chunk_byte_len"; + pub const FIELD_CHUNK_BYTE_LENGTH_UNCOMPRESSED: &str = "chunk_byte_size_uncompressed"; + pub const FIELD_DIRECT_URL: &str = "rerun_layer_direct_url"; + pub const FIELD_DIRECT_URL_EXPIRES_AT: &str = "rerun_layer_direct_url_expires_at"; pub fn field_chunk_id() -> FieldRef { lazy_field_ref!( @@ -423,6 +432,52 @@ impl QueryDatasetResponse { )) } + pub fn field_chunk_byte_len_uncompressed() -> FieldRef { + lazy_field_ref!(Field::new( + Self::FIELD_CHUNK_BYTE_LENGTH_UNCOMPRESSED, + DataType::UInt64, + true + )) + } + + pub fn field_direct_url() -> FieldRef { + lazy_field_ref!(Field::new( + Self::FIELD_DIRECT_URL, + DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), + true + )) + } + + pub fn field_direct_url_expires_at() -> FieldRef { + lazy_field_ref!(Field::new( + Self::FIELD_DIRECT_URL_EXPIRES_AT, + DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Int64)), + true + )) + } + + /// Per-timeline `{timeline_name}:start` column that carries `time_min` for each chunk. + /// + /// Consumed by the client's `build_segment_manifests` to compute the per-segment safe + /// horizon; no other downstream consumer reads the time range, so there is no matching + /// `:end` column. + /// + /// The column type is `Int64` because all rerun time types store `i64` internally. + pub fn field_timeline_start(timeline_name: &str) -> FieldRef { + let metadata = std::collections::HashMap::from([ + ("rerun:index".to_owned(), timeline_name.to_owned()), + ( + re_sorbet::metadata::RERUN_KIND.to_owned(), + "index".to_owned(), + ), + ("rerun:index_marker".to_owned(), "start".to_owned()), + ]); + Arc::new( + Field::new(format!("{timeline_name}:start"), DataType::Int64, true) + .with_metadata(metadata), + ) + } + pub fn fields() -> Vec { vec![ Self::field_chunk_id(), @@ -432,6 +487,9 @@ impl QueryDatasetResponse { Self::field_chunk_entity_path(), Self::field_chunk_is_static(), Self::field_chunk_byte_len(), + Self::field_chunk_byte_len_uncompressed(), + Self::field_direct_url(), + Self::field_direct_url_expires_at(), ] } @@ -452,10 +510,47 @@ impl QueryDatasetResponse { chunk_entity_paths: Vec, chunk_is_static: Vec, chunk_byte_lengths: Vec, + chunk_byte_lengths_uncompressed: Vec>, + chunk_direct_urls: Vec>, + chunk_direct_urls_expiry: Vec>, ) -> arrow::error::Result { - let schema = Arc::new(Self::schema()); + Self::create_dataframe_with_timelines( + chunk_ids, + chunk_segment_ids, + chunk_layer_names, + chunk_keys, + chunk_entity_paths, + chunk_is_static, + chunk_byte_lengths, + chunk_byte_lengths_uncompressed, + chunk_direct_urls, + chunk_direct_urls_expiry, + &Default::default(), + ) + } - let columns: Vec = vec![ + #[expect(clippy::too_many_arguments)] + pub fn create_dataframe_with_timelines( + chunk_ids: Vec, + chunk_segment_ids: Vec, + chunk_layer_names: Vec, + chunk_keys: Vec<&[u8]>, + chunk_entity_paths: Vec, + chunk_is_static: Vec, + chunk_byte_lengths: Vec, + chunk_byte_lengths_uncompressed: Vec>, + chunk_direct_urls: Vec>, + chunk_direct_urls_expiry: Vec>, + timelines: &std::collections::BTreeMap>)>, + ) -> arrow::error::Result { + let num_rows = chunk_ids.len(); + + let mut chunk_direct_url_expiry_builder = + PrimitiveDictionaryBuilder::::new(); + chunk_direct_url_expiry_builder.extend(chunk_direct_urls_expiry); + + let mut fields: Vec = Self::fields(); + let mut columns: Vec = vec![ chunk_ids .to_arrow() .expect("to_arrow for ChunkIds never fails"), @@ -465,12 +560,28 @@ impl QueryDatasetResponse { Arc::new(StringArray::from(chunk_entity_paths)), Arc::new(BooleanArray::from(chunk_is_static)), Arc::new(UInt64Array::from(chunk_byte_lengths)), + Arc::new(UInt64Array::from(chunk_byte_lengths_uncompressed)), + Arc::new( + chunk_direct_urls + .iter() + .map(|s| s.as_deref()) + .collect::>(), + ), + Arc::new(chunk_direct_url_expiry_builder.finish()), ]; + // Caller is responsible for producing the same `timelines` set for every response of a + // single query, so all batches share a schema and the client can concatenate them. + for (timeline_name, (_data_type, mins)) in timelines { + fields.push(Self::field_timeline_start(timeline_name)); + columns.push(Arc::new(Int64Array::from(mins.clone())) as ArrayRef); + } + + let schema = Arc::new(Schema::new(fields)); RecordBatch::try_new_with_options( schema, columns, - &RecordBatchOptions::default().with_row_count(Some(chunk_ids.len())), + &RecordBatchOptions::default().with_row_count(Some(num_rows)), ) } } @@ -682,46 +793,14 @@ impl crate::cloud::v1alpha1::EntryFilter { } } -// --- EntryDetails --- - -/// Maximum length of an entry name. -const MAX_ENTRY_NAME_LENGTH: usize = 180; - -/// Validate an entry name. -/// -/// Entry names must: -/// - Be at most 180 characters long -/// - Only contain ASCII alphanumeric characters, underscores, hyphens, dots, and spaces -/// -// TODO(RR-3718): Entry names should support a broader set of characters. -pub fn validate_entry_name(name: &str) -> Result<(), String> { - if name.len() > MAX_ENTRY_NAME_LENGTH { - return Err(format!( - "name '{name}' exceeds maximum length of {MAX_ENTRY_NAME_LENGTH} characters (got {})", - name.len() - )); - } - - if let Some(ch) = name.chars().find(|c| { - !c.is_ascii_alphanumeric() - && *c != '_' - && *c != '-' - && *c != '.' - && *c != ' ' - && *c != '[' - && *c != ']' - && *c != ':' - }) { - return Err(format!("name '{name}' contains invalid character '{ch}'")); - } +pub use crate::EntryName; - Ok(()) -} +// --- EntryDetails --- #[derive(Debug, Clone)] pub struct EntryDetails { pub id: re_log_types::EntryId, - pub name: String, + pub name: EntryName, pub kind: crate::cloud::v1alpha1::EntryKind, pub created_at: jiff::Timestamp, pub updated_at: jiff::Timestamp, @@ -736,9 +815,11 @@ impl TryFrom for EntryDetails { .id .ok_or(missing_field!(crate::cloud::v1alpha1::EntryDetails, "id"))? .try_into()?, - name: value - .name - .ok_or(missing_field!(crate::cloud::v1alpha1::EntryDetails, "name"))?, + name: EntryName::new( + value + .name + .ok_or(missing_field!(crate::cloud::v1alpha1::EntryDetails, "name"))?, + )?, kind: value.entry_kind.try_into()?, created_at: { let ts = value.created_at.ok_or(missing_field!( @@ -762,7 +843,7 @@ impl From for crate::cloud::v1alpha1::EntryDetails { fn from(value: EntryDetails) -> Self { Self { id: Some(value.id.into()), - name: Some(value.name), + name: Some(value.name.to_string()), entry_kind: value.kind as _, created_at: { let ts = value.created_at; @@ -881,7 +962,7 @@ impl From for crate::cloud::v1alpha1::DatasetEntry { #[derive(Debug, Clone)] pub struct CreateDatasetEntryRequest { /// Entry name (must be unique in catalog). - pub name: String, + pub name: EntryName, /// Override, use at your own risk. pub id: Option, @@ -890,7 +971,7 @@ pub struct CreateDatasetEntryRequest { impl From for crate::cloud::v1alpha1::CreateDatasetEntryRequest { fn from(value: CreateDatasetEntryRequest) -> Self { Self { - name: Some(value.name), + name: Some(value.name.to_string()), id: value.id.map(Into::into), } } @@ -902,12 +983,12 @@ impl TryFrom for CreateDatase fn try_from( value: crate::cloud::v1alpha1::CreateDatasetEntryRequest, ) -> Result { + let name_str = value.name.ok_or(missing_field!( + crate::cloud::v1alpha1::CreateDatasetEntryRequest, + "name" + ))?; Ok(Self { - name: value.name.ok_or(missing_field!( - crate::cloud::v1alpha1::CreateDatasetEntryRequest, - "name" - ))?, - + name: EntryName::new(name_str).map_err(TypeConversionError::InvalidEntryName)?, id: value.id.map(TryInto::try_into).transpose()?, }) } @@ -950,7 +1031,7 @@ impl TryFrom for CreateDatas #[derive(Debug, Clone)] pub struct CreateTableEntryRequest { - pub name: String, + pub name: EntryName, pub schema: Schema, pub provider_details: Option, } @@ -959,7 +1040,7 @@ impl TryFrom for crate::cloud::v1alpha1::CreateTableEnt type Error = TypeConversionError; fn try_from(value: CreateTableEntryRequest) -> Result { Ok(Self { - name: value.name, + name: value.name.to_string(), schema: Some((&value.schema).try_into()?), provider_details: value .provider_details @@ -975,7 +1056,8 @@ impl TryFrom<&crate::cloud::v1alpha1::CreateTableEntryRequest> for CreateTableEn value: &crate::cloud::v1alpha1::CreateTableEntryRequest, ) -> Result { Ok(Self { - name: value.name.clone(), + name: EntryName::new(value.name.clone()) + .map_err(TypeConversionError::InvalidEntryName)?, schema: value .schema .as_ref() @@ -1164,20 +1246,28 @@ impl TryFrom for re_log_types::Entry #[derive(Debug, Clone, Default)] pub struct EntryDetailsUpdate { - pub name: Option, + pub name: Option, } impl TryFrom for EntryDetailsUpdate { type Error = TypeConversionError; fn try_from(value: crate::cloud::v1alpha1::EntryDetailsUpdate) -> Result { - Ok(Self { name: value.name }) + Ok(Self { + name: value + .name + .map(EntryName::new) + .transpose() + .map_err(TypeConversionError::InvalidEntryName)?, + }) } } impl From for crate::cloud::v1alpha1::EntryDetailsUpdate { fn from(value: EntryDetailsUpdate) -> Self { - Self { name: value.name } + Self { + name: value.name.map(|name| name.to_string()), + } } } @@ -1306,7 +1396,7 @@ impl TryFrom for ReadTableEntryR #[derive(Debug, Clone)] pub struct RegisterTableRequest { - pub name: String, + pub name: EntryName, pub provider_details: ProviderDetails, } @@ -1314,7 +1404,7 @@ impl TryFrom for crate::cloud::v1alpha1::RegisterTableRequ type Error = TypeConversionError; fn try_from(value: RegisterTableRequest) -> Result { Ok(Self { - name: value.name, + name: value.name.to_string(), provider_details: Some((&value.provider_details).try_into()?), }) } @@ -1325,7 +1415,7 @@ impl TryFrom for RegisterTableRequ fn try_from(value: crate::cloud::v1alpha1::RegisterTableRequest) -> Result { Ok(Self { - name: value.name, + name: EntryName::new(value.name).map_err(TypeConversionError::InvalidEntryName)?, provider_details: ProviderDetails::try_from(&value.provider_details.ok_or( missing_field!( crate::cloud::v1alpha1::RegisterTableRequest, @@ -2727,6 +2817,27 @@ impl From for crate::cloud::v1alpha1::TableInsertMode { // --- +/// Ergonomic counterpart to the codegen'd [`crate::cloud::v1alpha1::VersionResponse`]. +pub struct VersionResponse { + pub build_info: Option, + pub version: String, + pub cloud_provider: Option, + pub cloud_region: Option, +} + +impl From for VersionResponse { + fn from(value: crate::cloud::v1alpha1::VersionResponse) -> Self { + Self { + build_info: value.build_info.map(Into::into), + version: value.version, + cloud_provider: value.cloud_provider, + cloud_region: value.cloud_region, + } + } +} + +// --- + #[cfg(test)] mod tests { use arrow::datatypes::ToByteSlice as _; @@ -2742,6 +2853,10 @@ mod tests { let chunk_entity_paths = vec!["/".to_owned(), "/".to_owned()]; let chunk_is_static = vec![true, false]; let chunk_byte_lengths = vec![1024u64, 2048u64]; + let direct_urls = vec![None, None]; + let direct_urls_expiry = vec![None, None]; + + let chunk_byte_lengths_uncompressed = vec![Some(2048u64), Some(4096u64)]; QueryDatasetResponse::create_dataframe( chunk_ids, @@ -2751,6 +2866,9 @@ mod tests { chunk_entity_paths, chunk_is_static, chunk_byte_lengths, + chunk_byte_lengths_uncompressed, + direct_urls, + direct_urls_expiry, ) .unwrap(); } diff --git a/crates/store/re_protos/src/v1alpha1/rerun.cloud.v1alpha1.rs b/crates/store/re_protos/src/v1alpha1/rerun.cloud.v1alpha1.rs index 77dd103b46b5..0c1762e64dfb 100644 --- a/crates/store/re_protos/src/v1alpha1/rerun.cloud.v1alpha1.rs +++ b/crates/store/re_protos/src/v1alpha1/rerun.cloud.v1alpha1.rs @@ -18,6 +18,12 @@ pub struct VersionResponse { /// A single version string representing the version of the whole stack. #[prost(string, tag = "2")] pub version: ::prost::alloc::string::String, + /// Cloud provider hosting this instance (e.g. "aws", "azure"). Null if not deployed on cloud. + #[prost(string, optional, tag = "3")] + pub cloud_provider: ::core::option::Option<::prost::alloc::string::String>, + /// Cloud region where this instance is deployed (e.g. "us-west-2", "eastus"). Null if not deployed on cloud. + #[prost(string, optional, tag = "4")] + pub cloud_region: ::core::option::Option<::prost::alloc::string::String>, } impl ::prost::Name for VersionResponse { const NAME: &'static str = "VersionResponse"; @@ -29,6 +35,40 @@ impl ::prost::Name for VersionResponse { "/rerun.cloud.v1alpha1.VersionResponse".into() } } +#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +pub struct WhoAmIRequest {} +impl ::prost::Name for WhoAmIRequest { + const NAME: &'static str = "WhoAmIRequest"; + const PACKAGE: &'static str = "rerun.cloud.v1alpha1"; + fn full_name() -> ::prost::alloc::string::String { + "rerun.cloud.v1alpha1.WhoAmIRequest".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/rerun.cloud.v1alpha1.WhoAmIRequest".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct WhoAmIResponse { + /// The user ID of the authenticated user, if any. + #[prost(string, optional, tag = "1")] + pub user_id: ::core::option::Option<::prost::alloc::string::String>, + /// Whether the user has read access. + #[prost(bool, tag = "2")] + pub can_read: bool, + /// Whether the user has write access. + #[prost(bool, tag = "3")] + pub can_write: bool, +} +impl ::prost::Name for WhoAmIResponse { + const NAME: &'static str = "WhoAmIResponse"; + const PACKAGE: &'static str = "rerun.cloud.v1alpha1"; + fn full_name() -> ::prost::alloc::string::String { + "rerun.cloud.v1alpha1.WhoAmIResponse".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/rerun.cloud.v1alpha1.WhoAmIResponse".into() + } +} /// Application level error - used as `details` in the `google.rpc.Status` message #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] pub struct Error { @@ -762,6 +802,10 @@ pub struct QueryDatasetRequest { /// all segments will be queried. #[prost(message, repeated, tag = "11")] pub segment_ids: ::prost::alloc::vec::Vec, + /// Will ask the server to generate direct URLs for the requested segments. + /// It is not guaranteed that the server will return all (or any) of them. + #[prost(bool, tag = "12")] + pub generate_direct_urls: bool, /// Client can specify specific chunk ids to include. If left unspecified (empty list), /// all chunks that match other query parameters will be included. #[prost(message, repeated, tag = "3")] @@ -1213,6 +1257,36 @@ impl ::prost::Name for QueryTasksOnCompletionResponse { "/rerun.cloud.v1alpha1.QueryTasksOnCompletionResponse".into() } } +/// `CancelTasksRequest` is the request message for cancelling a number of tasks +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CancelTasksRequest { + /// Unique identifiers for the tasks + #[prost(message, repeated, tag = "1")] + pub ids: ::prost::alloc::vec::Vec, +} +impl ::prost::Name for CancelTasksRequest { + const NAME: &'static str = "CancelTasksRequest"; + const PACKAGE: &'static str = "rerun.cloud.v1alpha1"; + fn full_name() -> ::prost::alloc::string::String { + "rerun.cloud.v1alpha1.CancelTasksRequest".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/rerun.cloud.v1alpha1.CancelTasksRequest".into() + } +} +/// `CancelTasksResponse` is the response message for cancelling a number of tasks +#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +pub struct CancelTasksResponse {} +impl ::prost::Name for CancelTasksResponse { + const NAME: &'static str = "CancelTasksResponse"; + const PACKAGE: &'static str = "rerun.cloud.v1alpha1"; + fn full_name() -> ::prost::alloc::string::String { + "rerun.cloud.v1alpha1.CancelTasksResponse".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/rerun.cloud.v1alpha1.CancelTasksResponse".into() + } +} #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] pub struct FindEntriesRequest { #[prost(message, optional, tag = "1")] @@ -2020,6 +2094,28 @@ pub mod rerun_cloud_service_client { )); self.inner.unary(req, path, codec).await } + /// Returns information about the currently authenticated user. + /// + /// This is a lightweight endpoint that can be used to verify that authentication + /// is successful and to retrieve the user's identity. + pub async fn who_am_i( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result, tonic::Status> { + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/rerun.cloud.v1alpha1.RerunCloudService/WhoAmI", + ); + let mut req = request.into_request(); + req.extensions_mut().insert(GrpcMethod::new( + "rerun.cloud.v1alpha1.RerunCloudService", + "WhoAmI", + )); + self.inner.unary(req, path, codec).await + } pub async fn find_entries( &mut self, request: impl tonic::IntoRequest, @@ -2691,6 +2787,26 @@ pub mod rerun_cloud_service_client { )); self.inner.server_streaming(req, path, codec).await } + /// Cancel existing tasks + pub async fn cancel_tasks( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result, tonic::Status> + { + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/rerun.cloud.v1alpha1.RerunCloudService/CancelTasks", + ); + let mut req = request.into_request(); + req.extensions_mut().insert(GrpcMethod::new( + "rerun.cloud.v1alpha1.RerunCloudService", + "CancelTasks", + )); + self.inner.unary(req, path, codec).await + } /// Rerun Manifests maintenance operations: scalar index creation, compaction, etc. /// /// This endpoint requires the standard dataset headers. @@ -2753,6 +2869,14 @@ pub mod rerun_cloud_service_server { &self, request: tonic::Request, ) -> std::result::Result, tonic::Status>; + /// Returns information about the currently authenticated user. + /// + /// This is a lightweight endpoint that can be used to verify that authentication + /// is successful and to retrieve the user's identity. + async fn who_am_i( + &self, + request: tonic::Request, + ) -> std::result::Result, tonic::Status>; async fn find_entries( &self, request: tonic::Request, @@ -3017,6 +3141,11 @@ pub mod rerun_cloud_service_server { &self, request: tonic::Request, ) -> std::result::Result, tonic::Status>; + /// Cancel existing tasks + async fn cancel_tasks( + &self, + request: tonic::Request, + ) -> std::result::Result, tonic::Status>; /// Rerun Manifests maintenance operations: scalar index creation, compaction, etc. /// /// This endpoint requires the standard dataset headers. @@ -3155,6 +3284,45 @@ pub mod rerun_cloud_service_server { }; Box::pin(fut) } + "/rerun.cloud.v1alpha1.RerunCloudService/WhoAmI" => { + #[allow(non_camel_case_types)] + struct WhoAmISvc(pub Arc); + impl tonic::server::UnaryService for WhoAmISvc { + type Response = super::WhoAmIResponse; + type Future = BoxFuture, tonic::Status>; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::who_am_i(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = WhoAmISvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } "/rerun.cloud.v1alpha1.RerunCloudService/FindEntries" => { #[allow(non_camel_case_types)] struct FindEntriesSvc(pub Arc); @@ -4400,6 +4568,48 @@ pub mod rerun_cloud_service_server { }; Box::pin(fut) } + "/rerun.cloud.v1alpha1.RerunCloudService/CancelTasks" => { + #[allow(non_camel_case_types)] + struct CancelTasksSvc(pub Arc); + impl + tonic::server::UnaryService + for CancelTasksSvc + { + type Response = super::CancelTasksResponse; + type Future = BoxFuture, tonic::Status>; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::cancel_tasks(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = CancelTasksSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } "/rerun.cloud.v1alpha1.RerunCloudService/DoMaintenance" => { #[allow(non_camel_case_types)] struct DoMaintenanceSvc(pub Arc); diff --git a/crates/store/re_protos/src/v1alpha1/rerun.common.v1alpha1.rs b/crates/store/re_protos/src/v1alpha1/rerun.common.v1alpha1.rs index db0b72b20f7b..a727c4514d78 100644 --- a/crates/store/re_protos/src/v1alpha1/rerun.common.v1alpha1.rs +++ b/crates/store/re_protos/src/v1alpha1/rerun.common.v1alpha1.rs @@ -121,7 +121,7 @@ impl ::prost::Name for IndexRange { } } /// The unique identifier of an entity, e.g. `camera/3/points` -/// See <> for more on entity paths. +/// See <> for more on entity paths. #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] pub struct EntityPath { #[prost(string, tag = "1")] diff --git a/crates/store/re_query/Cargo.toml b/crates/store/re_query/Cargo.toml index 0a80bf2d4a1e..f230e334877f 100644 --- a/crates/store/re_query/Cargo.toml +++ b/crates/store/re_query/Cargo.toml @@ -28,7 +28,6 @@ codegen = [] [dependencies] # Rerun dependencies: -re_arrow_util.workspace = true re_byte_size.workspace = true re_chunk.workspace = true re_chunk_store.workspace = true @@ -53,6 +52,7 @@ thiserror.workspace = true [dev-dependencies] +re_arrow_util.workspace = true re_log_encoding.workspace = true re_sdk_types.workspace = true diff --git a/crates/store/re_query/src/bin/clamped_zip.rs b/crates/store/re_query/src/bin/clamped_zip.rs index 7b110f4b49d1..dd385c3903c7 100644 --- a/crates/store/re_query/src/bin/clamped_zip.rs +++ b/crates/store/re_query/src/bin/clamped_zip.rs @@ -257,16 +257,15 @@ fn generate_impl(params: &Params) -> String { .collect_vec() .join(", "); - let next = - params - .to_required_names() - .into_iter() - .map(|r| format!("let {r}_next = self.{r}.next()?;")) - .chain(params.to_optional_names().into_iter().map(|o| { - format!("let {o}_next = self.{o}.next().or(self.{o}_latest_value.take());") - })) - .collect_vec() - .join("\n"); + let next = params + .to_required_names() + .into_iter() + .map(|r| format!("let {r}_next = self.{r}.next()?;")) + .chain(params.to_optional_names().into_iter().map(|o| { + format!("let {o}_next = self.{o}.next().or_else(|| self.{o}_latest_value.take());") + })) + .collect_vec() + .join("\n"); let update_latest = params .to_optional_names() @@ -314,8 +313,8 @@ fn generate_impl(params: &Params) -> String { } fn main() { - let num_required = 1..3; - let num_optional = 1..10; + let num_required = 1..=2; + let num_optional = 1..=5; let output = num_required .flat_map(|num_required| { @@ -340,7 +339,7 @@ fn main() { println!( " - // This file was generated using `cargo r -p re_query --all-features --bin clamped_zip`. + // This file was generated using `cargo r -p re_query --all-features --bin clamped_zip > crates/store/re_query/src/clamped_zip/generated.rs && cargo fmt`. // DO NOT EDIT. // --- diff --git a/crates/store/re_query/src/bin/range_zip.rs b/crates/store/re_query/src/bin/range_zip.rs index 964f94d708e2..71718717e6a3 100644 --- a/crates/store/re_query/src/bin/range_zip.rs +++ b/crates/store/re_query/src/bin/range_zip.rs @@ -320,14 +320,14 @@ fn generate_struct(params: &Params) -> String { /// while let Some((_, data)) = o0.next_if(|(index, _)| index <= &max_index) { /// o0_data = Some(data); /// } -/// let o0_data = o0_data.or(o0_data_latest.take()); +/// let o0_data = o0_data.or_else(|| o0_data_latest.take()); /// o0_data_latest.clone_from(&o0_data); /// /// let mut o1_data = None; /// while let Some((_, data)) = o1.next_if(|(index, _)| index <= &max_index) { /// o1_data = Some(data); /// } -/// let o1_data = o1_data.or(o1_data_latest.take()); +/// let o1_data = o1_data.or_else(|| o1_data_latest.take()); /// o1_data_latest.clone_from(&o1_data); /// /// Some((max_index, r0_data, r1_data, o0_data, o1_data)) @@ -419,7 +419,7 @@ fn generate_impl(params: &Params) -> String { while let Some((_, data)) = {o}.next_if(|(index, _)| index <= &max_index) {{ {o}_data = Some(data); }} - let {o}_data = {o}_data.or({o}_data_latest.take()); + let {o}_data = {o}_data.or_else(|| {o}_data_latest.take()); {o}_data_latest.clone_from(&{o}_data); " ) @@ -456,8 +456,8 @@ fn generate_impl(params: &Params) -> String { } fn main() { - let num_required = 1..3; - let num_optional = 1..10; + let num_required = 1..=2; + let num_optional = 1..=10; let output = num_required .flat_map(|num_required| { @@ -482,7 +482,7 @@ fn main() { println!( " - // This file was generated using `cargo r -p re_query --all-features --bin range_zip`. + // This file was generated using `cargo r -p re_query --all-features --bin range_zip > crates/store/re_query/src/range_zip/generated.rs && cargo fmt`. // DO NOT EDIT. // --- diff --git a/crates/store/re_query/src/cache.rs b/crates/store/re_query/src/cache.rs index ef44391525e6..806803e82a62 100644 --- a/crates/store/re_query/src/cache.rs +++ b/crates/store/re_query/src/cache.rs @@ -246,8 +246,10 @@ impl std::fmt::Debug for QueryCache { " [{cache_key:?} (pending_invalidation_min={:?})]", cache.pending_invalidations.first().map(|&t| { let range = AbsoluteTimeRange::new(t, TimeInt::MAX); - if let Some(time_type) = - store.read().time_column_type(&cache_key.timeline_name) + if let Some(time_type) = store + .read() + .schema() + .time_column_type(&cache_key.timeline_name) { time_type.format_range_utc(range) } else { @@ -549,6 +551,7 @@ impl ChunkStoreSubscriber for QueryCache { } } } + ChunkStoreDiff::SchemaAddition(_) => {} // Nothing to do here. } } diff --git a/crates/store/re_query/src/clamped_zip/generated.rs b/crates/store/re_query/src/clamped_zip/generated.rs index b1aa5e54fc56..9b2fa42a4d9b 100644 --- a/crates/store/re_query/src/clamped_zip/generated.rs +++ b/crates/store/re_query/src/clamped_zip/generated.rs @@ -1,4 +1,4 @@ -// This file was generated using `cargo r -p re_query --all-features --bin clamped_zip`. +// This file was generated using `cargo r -p re_query --all-features --bin clamped_zip > crates/store/re_query/src/clamped_zip/generated.rs && cargo fmt`. // DO NOT EDIT. // --- @@ -577,1581 +577,102 @@ where } } -/// Returns a new [`ClampedZip1x6`] iterator. -/// -/// The number of elements in a clamped zip iterator corresponds to the number of elements in the -/// shortest of its required iterators (`r0`). -/// -/// Optional iterators (`o0`, `o1`, `o2`, `o3`, `o4`, `o5`) will repeat their latest values if they happen to be too short -/// to be zipped with the shortest of the required iterators. -/// -/// If an optional iterator is not only too short but actually empty, its associated default function -/// (`o0_default_fn`, `o1_default_fn`, `o2_default_fn`, `o3_default_fn`, `o4_default_fn`, `o5_default_fn`) will be executed and the resulting value repeated as necessary. -pub fn clamped_zip_1x6( - r0: R0, - o0: O0, - o0_default_fn: D0, - o1: O1, - o1_default_fn: D1, - o2: O2, - o2_default_fn: D2, - o3: O3, - o3_default_fn: D3, - o4: O4, - o4_default_fn: D4, - o5: O5, - o5_default_fn: D5, -) -> ClampedZip1x6< - R0::IntoIter, - O0::IntoIter, - O1::IntoIter, - O2::IntoIter, - O3::IntoIter, - O4::IntoIter, - O5::IntoIter, - D0, - D1, - D2, - D3, - D4, - D5, -> -where - R0: IntoIterator, - O0: IntoIterator, - O0::Item: Clone, - O1: IntoIterator, - O1::Item: Clone, - O2: IntoIterator, - O2::Item: Clone, - O3: IntoIterator, - O3::Item: Clone, - O4: IntoIterator, - O4::Item: Clone, - O5: IntoIterator, - O5::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, -{ - ClampedZip1x6 { - r0: r0.into_iter(), - o0: o0.into_iter(), - o1: o1.into_iter(), - o2: o2.into_iter(), - o3: o3.into_iter(), - o4: o4.into_iter(), - o5: o5.into_iter(), - o0_default_fn, - o1_default_fn, - o2_default_fn, - o3_default_fn, - o4_default_fn, - o5_default_fn, - o0_latest_value: None, - o1_latest_value: None, - o2_latest_value: None, - o3_latest_value: None, - o4_latest_value: None, - o5_latest_value: None, - } -} - -/// Implements a clamped zip iterator combinator with 2 required iterators and 2 optional -/// iterators. -/// -/// See [`clamped_zip_1x6`] for more information. -pub struct ClampedZip1x6 -where - R0: Iterator, - O0: Iterator, - O0::Item: Clone, - O1: Iterator, - O1::Item: Clone, - O2: Iterator, - O2::Item: Clone, - O3: Iterator, - O3::Item: Clone, - O4: Iterator, - O4::Item: Clone, - O5: Iterator, - O5::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, -{ - r0: R0, - o0: O0, - o1: O1, - o2: O2, - o3: O3, - o4: O4, - o5: O5, - o0_default_fn: D0, - o1_default_fn: D1, - o2_default_fn: D2, - o3_default_fn: D3, - o4_default_fn: D4, - o5_default_fn: D5, - - o0_latest_value: Option, - o1_latest_value: Option, - o2_latest_value: Option, - o3_latest_value: Option, - o4_latest_value: Option, - o5_latest_value: Option, -} - -impl Iterator - for ClampedZip1x6 -where - R0: Iterator, - O0: Iterator, - O0::Item: Clone, - O1: Iterator, - O1::Item: Clone, - O2: Iterator, - O2::Item: Clone, - O3: Iterator, - O3::Item: Clone, - O4: Iterator, - O4::Item: Clone, - O5: Iterator, - O5::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, -{ - type Item = ( - R0::Item, - O0::Item, - O1::Item, - O2::Item, - O3::Item, - O4::Item, - O5::Item, - ); - - #[inline] - fn next(&mut self) -> Option { - let r0_next = self.r0.next()?; - let o0_next = self.o0.next().or_else(|| self.o0_latest_value.take()); - let o1_next = self.o1.next().or_else(|| self.o1_latest_value.take()); - let o2_next = self.o2.next().or_else(|| self.o2_latest_value.take()); - let o3_next = self.o3.next().or_else(|| self.o3_latest_value.take()); - let o4_next = self.o4.next().or_else(|| self.o4_latest_value.take()); - let o5_next = self.o5.next().or_else(|| self.o5_latest_value.take()); - - self.o0_latest_value.clone_from(&o0_next); - self.o1_latest_value.clone_from(&o1_next); - self.o2_latest_value.clone_from(&o2_next); - self.o3_latest_value.clone_from(&o3_next); - self.o4_latest_value.clone_from(&o4_next); - self.o5_latest_value.clone_from(&o5_next); - - Some(( - r0_next, - o0_next.unwrap_or_else(|| (self.o0_default_fn)()), - o1_next.unwrap_or_else(|| (self.o1_default_fn)()), - o2_next.unwrap_or_else(|| (self.o2_default_fn)()), - o3_next.unwrap_or_else(|| (self.o3_default_fn)()), - o4_next.unwrap_or_else(|| (self.o4_default_fn)()), - o5_next.unwrap_or_else(|| (self.o5_default_fn)()), - )) - } -} - -/// Returns a new [`ClampedZip1x7`] iterator. -/// -/// The number of elements in a clamped zip iterator corresponds to the number of elements in the -/// shortest of its required iterators (`r0`). -/// -/// Optional iterators (`o0`, `o1`, `o2`, `o3`, `o4`, `o5`, `o6`) will repeat their latest values if they happen to be too short -/// to be zipped with the shortest of the required iterators. -/// -/// If an optional iterator is not only too short but actually empty, its associated default function -/// (`o0_default_fn`, `o1_default_fn`, `o2_default_fn`, `o3_default_fn`, `o4_default_fn`, `o5_default_fn`, `o6_default_fn`) will be executed and the resulting value repeated as necessary. -pub fn clamped_zip_1x7( - r0: R0, - o0: O0, - o0_default_fn: D0, - o1: O1, - o1_default_fn: D1, - o2: O2, - o2_default_fn: D2, - o3: O3, - o3_default_fn: D3, - o4: O4, - o4_default_fn: D4, - o5: O5, - o5_default_fn: D5, - o6: O6, - o6_default_fn: D6, -) -> ClampedZip1x7< - R0::IntoIter, - O0::IntoIter, - O1::IntoIter, - O2::IntoIter, - O3::IntoIter, - O4::IntoIter, - O5::IntoIter, - O6::IntoIter, - D0, - D1, - D2, - D3, - D4, - D5, - D6, -> -where - R0: IntoIterator, - O0: IntoIterator, - O0::Item: Clone, - O1: IntoIterator, - O1::Item: Clone, - O2: IntoIterator, - O2::Item: Clone, - O3: IntoIterator, - O3::Item: Clone, - O4: IntoIterator, - O4::Item: Clone, - O5: IntoIterator, - O5::Item: Clone, - O6: IntoIterator, - O6::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, -{ - ClampedZip1x7 { - r0: r0.into_iter(), - o0: o0.into_iter(), - o1: o1.into_iter(), - o2: o2.into_iter(), - o3: o3.into_iter(), - o4: o4.into_iter(), - o5: o5.into_iter(), - o6: o6.into_iter(), - o0_default_fn, - o1_default_fn, - o2_default_fn, - o3_default_fn, - o4_default_fn, - o5_default_fn, - o6_default_fn, - o0_latest_value: None, - o1_latest_value: None, - o2_latest_value: None, - o3_latest_value: None, - o4_latest_value: None, - o5_latest_value: None, - o6_latest_value: None, - } -} - -/// Implements a clamped zip iterator combinator with 2 required iterators and 2 optional -/// iterators. -/// -/// See [`clamped_zip_1x7`] for more information. -pub struct ClampedZip1x7 -where - R0: Iterator, - O0: Iterator, - O0::Item: Clone, - O1: Iterator, - O1::Item: Clone, - O2: Iterator, - O2::Item: Clone, - O3: Iterator, - O3::Item: Clone, - O4: Iterator, - O4::Item: Clone, - O5: Iterator, - O5::Item: Clone, - O6: Iterator, - O6::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, -{ - r0: R0, - o0: O0, - o1: O1, - o2: O2, - o3: O3, - o4: O4, - o5: O5, - o6: O6, - o0_default_fn: D0, - o1_default_fn: D1, - o2_default_fn: D2, - o3_default_fn: D3, - o4_default_fn: D4, - o5_default_fn: D5, - o6_default_fn: D6, - - o0_latest_value: Option, - o1_latest_value: Option, - o2_latest_value: Option, - o3_latest_value: Option, - o4_latest_value: Option, - o5_latest_value: Option, - o6_latest_value: Option, -} - -impl Iterator - for ClampedZip1x7 -where - R0: Iterator, - O0: Iterator, - O0::Item: Clone, - O1: Iterator, - O1::Item: Clone, - O2: Iterator, - O2::Item: Clone, - O3: Iterator, - O3::Item: Clone, - O4: Iterator, - O4::Item: Clone, - O5: Iterator, - O5::Item: Clone, - O6: Iterator, - O6::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, -{ - type Item = ( - R0::Item, - O0::Item, - O1::Item, - O2::Item, - O3::Item, - O4::Item, - O5::Item, - O6::Item, - ); - - #[inline] - fn next(&mut self) -> Option { - let r0_next = self.r0.next()?; - let o0_next = self.o0.next().or_else(|| self.o0_latest_value.take()); - let o1_next = self.o1.next().or_else(|| self.o1_latest_value.take()); - let o2_next = self.o2.next().or_else(|| self.o2_latest_value.take()); - let o3_next = self.o3.next().or_else(|| self.o3_latest_value.take()); - let o4_next = self.o4.next().or_else(|| self.o4_latest_value.take()); - let o5_next = self.o5.next().or_else(|| self.o5_latest_value.take()); - let o6_next = self.o6.next().or_else(|| self.o6_latest_value.take()); - - self.o0_latest_value.clone_from(&o0_next); - self.o1_latest_value.clone_from(&o1_next); - self.o2_latest_value.clone_from(&o2_next); - self.o3_latest_value.clone_from(&o3_next); - self.o4_latest_value.clone_from(&o4_next); - self.o5_latest_value.clone_from(&o5_next); - self.o6_latest_value.clone_from(&o6_next); - - Some(( - r0_next, - o0_next.unwrap_or_else(|| (self.o0_default_fn)()), - o1_next.unwrap_or_else(|| (self.o1_default_fn)()), - o2_next.unwrap_or_else(|| (self.o2_default_fn)()), - o3_next.unwrap_or_else(|| (self.o3_default_fn)()), - o4_next.unwrap_or_else(|| (self.o4_default_fn)()), - o5_next.unwrap_or_else(|| (self.o5_default_fn)()), - o6_next.unwrap_or_else(|| (self.o6_default_fn)()), - )) - } -} - -/// Returns a new [`ClampedZip1x8`] iterator. -/// -/// The number of elements in a clamped zip iterator corresponds to the number of elements in the -/// shortest of its required iterators (`r0`). -/// -/// Optional iterators (`o0`, `o1`, `o2`, `o3`, `o4`, `o5`, `o6`, `o7`) will repeat their latest values if they happen to be too short -/// to be zipped with the shortest of the required iterators. -/// -/// If an optional iterator is not only too short but actually empty, its associated default function -/// (`o0_default_fn`, `o1_default_fn`, `o2_default_fn`, `o3_default_fn`, `o4_default_fn`, `o5_default_fn`, `o6_default_fn`, `o7_default_fn`) will be executed and the resulting value repeated as necessary. -pub fn clamped_zip_1x8( - r0: R0, - o0: O0, - o0_default_fn: D0, - o1: O1, - o1_default_fn: D1, - o2: O2, - o2_default_fn: D2, - o3: O3, - o3_default_fn: D3, - o4: O4, - o4_default_fn: D4, - o5: O5, - o5_default_fn: D5, - o6: O6, - o6_default_fn: D6, - o7: O7, - o7_default_fn: D7, -) -> ClampedZip1x8< - R0::IntoIter, - O0::IntoIter, - O1::IntoIter, - O2::IntoIter, - O3::IntoIter, - O4::IntoIter, - O5::IntoIter, - O6::IntoIter, - O7::IntoIter, - D0, - D1, - D2, - D3, - D4, - D5, - D6, - D7, -> -where - R0: IntoIterator, - O0: IntoIterator, - O0::Item: Clone, - O1: IntoIterator, - O1::Item: Clone, - O2: IntoIterator, - O2::Item: Clone, - O3: IntoIterator, - O3::Item: Clone, - O4: IntoIterator, - O4::Item: Clone, - O5: IntoIterator, - O5::Item: Clone, - O6: IntoIterator, - O6::Item: Clone, - O7: IntoIterator, - O7::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, - D7: Fn() -> O7::Item, -{ - ClampedZip1x8 { - r0: r0.into_iter(), - o0: o0.into_iter(), - o1: o1.into_iter(), - o2: o2.into_iter(), - o3: o3.into_iter(), - o4: o4.into_iter(), - o5: o5.into_iter(), - o6: o6.into_iter(), - o7: o7.into_iter(), - o0_default_fn, - o1_default_fn, - o2_default_fn, - o3_default_fn, - o4_default_fn, - o5_default_fn, - o6_default_fn, - o7_default_fn, - o0_latest_value: None, - o1_latest_value: None, - o2_latest_value: None, - o3_latest_value: None, - o4_latest_value: None, - o5_latest_value: None, - o6_latest_value: None, - o7_latest_value: None, - } -} - -/// Implements a clamped zip iterator combinator with 2 required iterators and 2 optional -/// iterators. -/// -/// See [`clamped_zip_1x8`] for more information. -pub struct ClampedZip1x8 -where - R0: Iterator, - O0: Iterator, - O0::Item: Clone, - O1: Iterator, - O1::Item: Clone, - O2: Iterator, - O2::Item: Clone, - O3: Iterator, - O3::Item: Clone, - O4: Iterator, - O4::Item: Clone, - O5: Iterator, - O5::Item: Clone, - O6: Iterator, - O6::Item: Clone, - O7: Iterator, - O7::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, - D7: Fn() -> O7::Item, -{ - r0: R0, - o0: O0, - o1: O1, - o2: O2, - o3: O3, - o4: O4, - o5: O5, - o6: O6, - o7: O7, - o0_default_fn: D0, - o1_default_fn: D1, - o2_default_fn: D2, - o3_default_fn: D3, - o4_default_fn: D4, - o5_default_fn: D5, - o6_default_fn: D6, - o7_default_fn: D7, - - o0_latest_value: Option, - o1_latest_value: Option, - o2_latest_value: Option, - o3_latest_value: Option, - o4_latest_value: Option, - o5_latest_value: Option, - o6_latest_value: Option, - o7_latest_value: Option, -} - -impl Iterator - for ClampedZip1x8 -where - R0: Iterator, - O0: Iterator, - O0::Item: Clone, - O1: Iterator, - O1::Item: Clone, - O2: Iterator, - O2::Item: Clone, - O3: Iterator, - O3::Item: Clone, - O4: Iterator, - O4::Item: Clone, - O5: Iterator, - O5::Item: Clone, - O6: Iterator, - O6::Item: Clone, - O7: Iterator, - O7::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, - D7: Fn() -> O7::Item, -{ - type Item = ( - R0::Item, - O0::Item, - O1::Item, - O2::Item, - O3::Item, - O4::Item, - O5::Item, - O6::Item, - O7::Item, - ); - - #[inline] - fn next(&mut self) -> Option { - let r0_next = self.r0.next()?; - let o0_next = self.o0.next().or_else(|| self.o0_latest_value.take()); - let o1_next = self.o1.next().or_else(|| self.o1_latest_value.take()); - let o2_next = self.o2.next().or_else(|| self.o2_latest_value.take()); - let o3_next = self.o3.next().or_else(|| self.o3_latest_value.take()); - let o4_next = self.o4.next().or_else(|| self.o4_latest_value.take()); - let o5_next = self.o5.next().or_else(|| self.o5_latest_value.take()); - let o6_next = self.o6.next().or_else(|| self.o6_latest_value.take()); - let o7_next = self.o7.next().or_else(|| self.o7_latest_value.take()); - - self.o0_latest_value.clone_from(&o0_next); - self.o1_latest_value.clone_from(&o1_next); - self.o2_latest_value.clone_from(&o2_next); - self.o3_latest_value.clone_from(&o3_next); - self.o4_latest_value.clone_from(&o4_next); - self.o5_latest_value.clone_from(&o5_next); - self.o6_latest_value.clone_from(&o6_next); - self.o7_latest_value.clone_from(&o7_next); - - Some(( - r0_next, - o0_next.unwrap_or_else(|| (self.o0_default_fn)()), - o1_next.unwrap_or_else(|| (self.o1_default_fn)()), - o2_next.unwrap_or_else(|| (self.o2_default_fn)()), - o3_next.unwrap_or_else(|| (self.o3_default_fn)()), - o4_next.unwrap_or_else(|| (self.o4_default_fn)()), - o5_next.unwrap_or_else(|| (self.o5_default_fn)()), - o6_next.unwrap_or_else(|| (self.o6_default_fn)()), - o7_next.unwrap_or_else(|| (self.o7_default_fn)()), - )) - } -} - -/// Returns a new [`ClampedZip1x9`] iterator. -/// -/// The number of elements in a clamped zip iterator corresponds to the number of elements in the -/// shortest of its required iterators (`r0`). -/// -/// Optional iterators (`o0`, `o1`, `o2`, `o3`, `o4`, `o5`, `o6`, `o7`, `o8`) will repeat their latest values if they happen to be too short -/// to be zipped with the shortest of the required iterators. -/// -/// If an optional iterator is not only too short but actually empty, its associated default function -/// (`o0_default_fn`, `o1_default_fn`, `o2_default_fn`, `o3_default_fn`, `o4_default_fn`, `o5_default_fn`, `o6_default_fn`, `o7_default_fn`, `o8_default_fn`) will be executed and the resulting value repeated as necessary. -pub fn clamped_zip_1x9( - r0: R0, - o0: O0, - o0_default_fn: D0, - o1: O1, - o1_default_fn: D1, - o2: O2, - o2_default_fn: D2, - o3: O3, - o3_default_fn: D3, - o4: O4, - o4_default_fn: D4, - o5: O5, - o5_default_fn: D5, - o6: O6, - o6_default_fn: D6, - o7: O7, - o7_default_fn: D7, - o8: O8, - o8_default_fn: D8, -) -> ClampedZip1x9< - R0::IntoIter, - O0::IntoIter, - O1::IntoIter, - O2::IntoIter, - O3::IntoIter, - O4::IntoIter, - O5::IntoIter, - O6::IntoIter, - O7::IntoIter, - O8::IntoIter, - D0, - D1, - D2, - D3, - D4, - D5, - D6, - D7, - D8, -> -where - R0: IntoIterator, - O0: IntoIterator, - O0::Item: Clone, - O1: IntoIterator, - O1::Item: Clone, - O2: IntoIterator, - O2::Item: Clone, - O3: IntoIterator, - O3::Item: Clone, - O4: IntoIterator, - O4::Item: Clone, - O5: IntoIterator, - O5::Item: Clone, - O6: IntoIterator, - O6::Item: Clone, - O7: IntoIterator, - O7::Item: Clone, - O8: IntoIterator, - O8::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, - D7: Fn() -> O7::Item, - D8: Fn() -> O8::Item, -{ - ClampedZip1x9 { - r0: r0.into_iter(), - o0: o0.into_iter(), - o1: o1.into_iter(), - o2: o2.into_iter(), - o3: o3.into_iter(), - o4: o4.into_iter(), - o5: o5.into_iter(), - o6: o6.into_iter(), - o7: o7.into_iter(), - o8: o8.into_iter(), - o0_default_fn, - o1_default_fn, - o2_default_fn, - o3_default_fn, - o4_default_fn, - o5_default_fn, - o6_default_fn, - o7_default_fn, - o8_default_fn, - o0_latest_value: None, - o1_latest_value: None, - o2_latest_value: None, - o3_latest_value: None, - o4_latest_value: None, - o5_latest_value: None, - o6_latest_value: None, - o7_latest_value: None, - o8_latest_value: None, - } -} - -/// Implements a clamped zip iterator combinator with 2 required iterators and 2 optional -/// iterators. -/// -/// See [`clamped_zip_1x9`] for more information. -pub struct ClampedZip1x9 -where - R0: Iterator, - O0: Iterator, - O0::Item: Clone, - O1: Iterator, - O1::Item: Clone, - O2: Iterator, - O2::Item: Clone, - O3: Iterator, - O3::Item: Clone, - O4: Iterator, - O4::Item: Clone, - O5: Iterator, - O5::Item: Clone, - O6: Iterator, - O6::Item: Clone, - O7: Iterator, - O7::Item: Clone, - O8: Iterator, - O8::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, - D7: Fn() -> O7::Item, - D8: Fn() -> O8::Item, -{ - r0: R0, - o0: O0, - o1: O1, - o2: O2, - o3: O3, - o4: O4, - o5: O5, - o6: O6, - o7: O7, - o8: O8, - o0_default_fn: D0, - o1_default_fn: D1, - o2_default_fn: D2, - o3_default_fn: D3, - o4_default_fn: D4, - o5_default_fn: D5, - o6_default_fn: D6, - o7_default_fn: D7, - o8_default_fn: D8, - - o0_latest_value: Option, - o1_latest_value: Option, - o2_latest_value: Option, - o3_latest_value: Option, - o4_latest_value: Option, - o5_latest_value: Option, - o6_latest_value: Option, - o7_latest_value: Option, - o8_latest_value: Option, -} - -impl Iterator - for ClampedZip1x9 -where - R0: Iterator, - O0: Iterator, - O0::Item: Clone, - O1: Iterator, - O1::Item: Clone, - O2: Iterator, - O2::Item: Clone, - O3: Iterator, - O3::Item: Clone, - O4: Iterator, - O4::Item: Clone, - O5: Iterator, - O5::Item: Clone, - O6: Iterator, - O6::Item: Clone, - O7: Iterator, - O7::Item: Clone, - O8: Iterator, - O8::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, - D7: Fn() -> O7::Item, - D8: Fn() -> O8::Item, -{ - type Item = ( - R0::Item, - O0::Item, - O1::Item, - O2::Item, - O3::Item, - O4::Item, - O5::Item, - O6::Item, - O7::Item, - O8::Item, - ); - - #[inline] - fn next(&mut self) -> Option { - let r0_next = self.r0.next()?; - let o0_next = self.o0.next().or_else(|| self.o0_latest_value.take()); - let o1_next = self.o1.next().or_else(|| self.o1_latest_value.take()); - let o2_next = self.o2.next().or_else(|| self.o2_latest_value.take()); - let o3_next = self.o3.next().or_else(|| self.o3_latest_value.take()); - let o4_next = self.o4.next().or_else(|| self.o4_latest_value.take()); - let o5_next = self.o5.next().or_else(|| self.o5_latest_value.take()); - let o6_next = self.o6.next().or_else(|| self.o6_latest_value.take()); - let o7_next = self.o7.next().or_else(|| self.o7_latest_value.take()); - let o8_next = self.o8.next().or_else(|| self.o8_latest_value.take()); - - self.o0_latest_value.clone_from(&o0_next); - self.o1_latest_value.clone_from(&o1_next); - self.o2_latest_value.clone_from(&o2_next); - self.o3_latest_value.clone_from(&o3_next); - self.o4_latest_value.clone_from(&o4_next); - self.o5_latest_value.clone_from(&o5_next); - self.o6_latest_value.clone_from(&o6_next); - self.o7_latest_value.clone_from(&o7_next); - self.o8_latest_value.clone_from(&o8_next); - - Some(( - r0_next, - o0_next.unwrap_or_else(|| (self.o0_default_fn)()), - o1_next.unwrap_or_else(|| (self.o1_default_fn)()), - o2_next.unwrap_or_else(|| (self.o2_default_fn)()), - o3_next.unwrap_or_else(|| (self.o3_default_fn)()), - o4_next.unwrap_or_else(|| (self.o4_default_fn)()), - o5_next.unwrap_or_else(|| (self.o5_default_fn)()), - o6_next.unwrap_or_else(|| (self.o6_default_fn)()), - o7_next.unwrap_or_else(|| (self.o7_default_fn)()), - o8_next.unwrap_or_else(|| (self.o8_default_fn)()), - )) - } -} - -/// Returns a new [`ClampedZip2x1`] iterator. -/// -/// The number of elements in a clamped zip iterator corresponds to the number of elements in the -/// shortest of its required iterators (`r0`, `r1`). -/// -/// Optional iterators (`o0`) will repeat their latest values if they happen to be too short -/// to be zipped with the shortest of the required iterators. -/// -/// If an optional iterator is not only too short but actually empty, its associated default function -/// (`o0_default_fn`) will be executed and the resulting value repeated as necessary. -pub fn clamped_zip_2x1( - r0: R0, - r1: R1, - o0: O0, - o0_default_fn: D0, -) -> ClampedZip2x1 -where - R0: IntoIterator, - R1: IntoIterator, - O0: IntoIterator, - O0::Item: Clone, - D0: Fn() -> O0::Item, -{ - ClampedZip2x1 { - r0: r0.into_iter(), - r1: r1.into_iter(), - o0: o0.into_iter(), - o0_default_fn, - o0_latest_value: None, - } -} - -/// Implements a clamped zip iterator combinator with 2 required iterators and 2 optional -/// iterators. -/// -/// See [`clamped_zip_2x1`] for more information. -pub struct ClampedZip2x1 -where - R0: Iterator, - R1: Iterator, - O0: Iterator, - O0::Item: Clone, - D0: Fn() -> O0::Item, -{ - r0: R0, - r1: R1, - o0: O0, - o0_default_fn: D0, - - o0_latest_value: Option, -} - -impl Iterator for ClampedZip2x1 -where - R0: Iterator, - R1: Iterator, - O0: Iterator, - O0::Item: Clone, - D0: Fn() -> O0::Item, -{ - type Item = (R0::Item, R1::Item, O0::Item); - - #[inline] - fn next(&mut self) -> Option { - let r0_next = self.r0.next()?; - let r1_next = self.r1.next()?; - let o0_next = self.o0.next().or_else(|| self.o0_latest_value.take()); - - self.o0_latest_value.clone_from(&o0_next); - - Some(( - r0_next, - r1_next, - o0_next.unwrap_or_else(|| (self.o0_default_fn)()), - )) - } -} - -/// Returns a new [`ClampedZip2x2`] iterator. -/// -/// The number of elements in a clamped zip iterator corresponds to the number of elements in the -/// shortest of its required iterators (`r0`, `r1`). -/// -/// Optional iterators (`o0`, `o1`) will repeat their latest values if they happen to be too short -/// to be zipped with the shortest of the required iterators. -/// -/// If an optional iterator is not only too short but actually empty, its associated default function -/// (`o0_default_fn`, `o1_default_fn`) will be executed and the resulting value repeated as necessary. -pub fn clamped_zip_2x2( - r0: R0, - r1: R1, - o0: O0, - o0_default_fn: D0, - o1: O1, - o1_default_fn: D1, -) -> ClampedZip2x2 -where - R0: IntoIterator, - R1: IntoIterator, - O0: IntoIterator, - O0::Item: Clone, - O1: IntoIterator, - O1::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, -{ - ClampedZip2x2 { - r0: r0.into_iter(), - r1: r1.into_iter(), - o0: o0.into_iter(), - o1: o1.into_iter(), - o0_default_fn, - o1_default_fn, - o0_latest_value: None, - o1_latest_value: None, - } -} - -/// Implements a clamped zip iterator combinator with 2 required iterators and 2 optional -/// iterators. -/// -/// See [`clamped_zip_2x2`] for more information. -pub struct ClampedZip2x2 -where - R0: Iterator, - R1: Iterator, - O0: Iterator, - O0::Item: Clone, - O1: Iterator, - O1::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, -{ - r0: R0, - r1: R1, - o0: O0, - o1: O1, - o0_default_fn: D0, - o1_default_fn: D1, - - o0_latest_value: Option, - o1_latest_value: Option, -} - -impl Iterator for ClampedZip2x2 -where - R0: Iterator, - R1: Iterator, - O0: Iterator, - O0::Item: Clone, - O1: Iterator, - O1::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, -{ - type Item = (R0::Item, R1::Item, O0::Item, O1::Item); - - #[inline] - fn next(&mut self) -> Option { - let r0_next = self.r0.next()?; - let r1_next = self.r1.next()?; - let o0_next = self.o0.next().or_else(|| self.o0_latest_value.take()); - let o1_next = self.o1.next().or_else(|| self.o1_latest_value.take()); - - self.o0_latest_value.clone_from(&o0_next); - self.o1_latest_value.clone_from(&o1_next); - - Some(( - r0_next, - r1_next, - o0_next.unwrap_or_else(|| (self.o0_default_fn)()), - o1_next.unwrap_or_else(|| (self.o1_default_fn)()), - )) - } -} - -/// Returns a new [`ClampedZip2x3`] iterator. -/// -/// The number of elements in a clamped zip iterator corresponds to the number of elements in the -/// shortest of its required iterators (`r0`, `r1`). -/// -/// Optional iterators (`o0`, `o1`, `o2`) will repeat their latest values if they happen to be too short -/// to be zipped with the shortest of the required iterators. -/// -/// If an optional iterator is not only too short but actually empty, its associated default function -/// (`o0_default_fn`, `o1_default_fn`, `o2_default_fn`) will be executed and the resulting value repeated as necessary. -pub fn clamped_zip_2x3( - r0: R0, - r1: R1, - o0: O0, - o0_default_fn: D0, - o1: O1, - o1_default_fn: D1, - o2: O2, - o2_default_fn: D2, -) -> ClampedZip2x3 -where - R0: IntoIterator, - R1: IntoIterator, - O0: IntoIterator, - O0::Item: Clone, - O1: IntoIterator, - O1::Item: Clone, - O2: IntoIterator, - O2::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, -{ - ClampedZip2x3 { - r0: r0.into_iter(), - r1: r1.into_iter(), - o0: o0.into_iter(), - o1: o1.into_iter(), - o2: o2.into_iter(), - o0_default_fn, - o1_default_fn, - o2_default_fn, - o0_latest_value: None, - o1_latest_value: None, - o2_latest_value: None, - } -} - -/// Implements a clamped zip iterator combinator with 2 required iterators and 2 optional -/// iterators. -/// -/// See [`clamped_zip_2x3`] for more information. -pub struct ClampedZip2x3 -where - R0: Iterator, - R1: Iterator, - O0: Iterator, - O0::Item: Clone, - O1: Iterator, - O1::Item: Clone, - O2: Iterator, - O2::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, -{ - r0: R0, - r1: R1, - o0: O0, - o1: O1, - o2: O2, - o0_default_fn: D0, - o1_default_fn: D1, - o2_default_fn: D2, - - o0_latest_value: Option, - o1_latest_value: Option, - o2_latest_value: Option, -} - -impl Iterator for ClampedZip2x3 -where - R0: Iterator, - R1: Iterator, - O0: Iterator, - O0::Item: Clone, - O1: Iterator, - O1::Item: Clone, - O2: Iterator, - O2::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, -{ - type Item = (R0::Item, R1::Item, O0::Item, O1::Item, O2::Item); - - #[inline] - fn next(&mut self) -> Option { - let r0_next = self.r0.next()?; - let r1_next = self.r1.next()?; - let o0_next = self.o0.next().or_else(|| self.o0_latest_value.take()); - let o1_next = self.o1.next().or_else(|| self.o1_latest_value.take()); - let o2_next = self.o2.next().or_else(|| self.o2_latest_value.take()); - - self.o0_latest_value.clone_from(&o0_next); - self.o1_latest_value.clone_from(&o1_next); - self.o2_latest_value.clone_from(&o2_next); - - Some(( - r0_next, - r1_next, - o0_next.unwrap_or_else(|| (self.o0_default_fn)()), - o1_next.unwrap_or_else(|| (self.o1_default_fn)()), - o2_next.unwrap_or_else(|| (self.o2_default_fn)()), - )) - } -} - -/// Returns a new [`ClampedZip2x4`] iterator. -/// -/// The number of elements in a clamped zip iterator corresponds to the number of elements in the -/// shortest of its required iterators (`r0`, `r1`). -/// -/// Optional iterators (`o0`, `o1`, `o2`, `o3`) will repeat their latest values if they happen to be too short -/// to be zipped with the shortest of the required iterators. -/// -/// If an optional iterator is not only too short but actually empty, its associated default function -/// (`o0_default_fn`, `o1_default_fn`, `o2_default_fn`, `o3_default_fn`) will be executed and the resulting value repeated as necessary. -pub fn clamped_zip_2x4( - r0: R0, - r1: R1, - o0: O0, - o0_default_fn: D0, - o1: O1, - o1_default_fn: D1, - o2: O2, - o2_default_fn: D2, - o3: O3, - o3_default_fn: D3, -) -> ClampedZip2x4< - R0::IntoIter, - R1::IntoIter, - O0::IntoIter, - O1::IntoIter, - O2::IntoIter, - O3::IntoIter, - D0, - D1, - D2, - D3, -> -where - R0: IntoIterator, - R1: IntoIterator, - O0: IntoIterator, - O0::Item: Clone, - O1: IntoIterator, - O1::Item: Clone, - O2: IntoIterator, - O2::Item: Clone, - O3: IntoIterator, - O3::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, -{ - ClampedZip2x4 { - r0: r0.into_iter(), - r1: r1.into_iter(), - o0: o0.into_iter(), - o1: o1.into_iter(), - o2: o2.into_iter(), - o3: o3.into_iter(), - o0_default_fn, - o1_default_fn, - o2_default_fn, - o3_default_fn, - o0_latest_value: None, - o1_latest_value: None, - o2_latest_value: None, - o3_latest_value: None, - } -} - -/// Implements a clamped zip iterator combinator with 2 required iterators and 2 optional -/// iterators. -/// -/// See [`clamped_zip_2x4`] for more information. -pub struct ClampedZip2x4 -where - R0: Iterator, - R1: Iterator, - O0: Iterator, - O0::Item: Clone, - O1: Iterator, - O1::Item: Clone, - O2: Iterator, - O2::Item: Clone, - O3: Iterator, - O3::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, -{ - r0: R0, - r1: R1, - o0: O0, - o1: O1, - o2: O2, - o3: O3, - o0_default_fn: D0, - o1_default_fn: D1, - o2_default_fn: D2, - o3_default_fn: D3, - - o0_latest_value: Option, - o1_latest_value: Option, - o2_latest_value: Option, - o3_latest_value: Option, -} - -impl Iterator - for ClampedZip2x4 -where - R0: Iterator, - R1: Iterator, - O0: Iterator, - O0::Item: Clone, - O1: Iterator, - O1::Item: Clone, - O2: Iterator, - O2::Item: Clone, - O3: Iterator, - O3::Item: Clone, - D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, -{ - type Item = (R0::Item, R1::Item, O0::Item, O1::Item, O2::Item, O3::Item); - - #[inline] - fn next(&mut self) -> Option { - let r0_next = self.r0.next()?; - let r1_next = self.r1.next()?; - let o0_next = self.o0.next().or_else(|| self.o0_latest_value.take()); - let o1_next = self.o1.next().or_else(|| self.o1_latest_value.take()); - let o2_next = self.o2.next().or_else(|| self.o2_latest_value.take()); - let o3_next = self.o3.next().or_else(|| self.o3_latest_value.take()); - - self.o0_latest_value.clone_from(&o0_next); - self.o1_latest_value.clone_from(&o1_next); - self.o2_latest_value.clone_from(&o2_next); - self.o3_latest_value.clone_from(&o3_next); - - Some(( - r0_next, - r1_next, - o0_next.unwrap_or_else(|| (self.o0_default_fn)()), - o1_next.unwrap_or_else(|| (self.o1_default_fn)()), - o2_next.unwrap_or_else(|| (self.o2_default_fn)()), - o3_next.unwrap_or_else(|| (self.o3_default_fn)()), - )) - } -} - -/// Returns a new [`ClampedZip2x5`] iterator. +/// Returns a new [`ClampedZip2x1`] iterator. /// /// The number of elements in a clamped zip iterator corresponds to the number of elements in the /// shortest of its required iterators (`r0`, `r1`). /// -/// Optional iterators (`o0`, `o1`, `o2`, `o3`, `o4`) will repeat their latest values if they happen to be too short +/// Optional iterators (`o0`) will repeat their latest values if they happen to be too short /// to be zipped with the shortest of the required iterators. /// /// If an optional iterator is not only too short but actually empty, its associated default function -/// (`o0_default_fn`, `o1_default_fn`, `o2_default_fn`, `o3_default_fn`, `o4_default_fn`) will be executed and the resulting value repeated as necessary. -pub fn clamped_zip_2x5( +/// (`o0_default_fn`) will be executed and the resulting value repeated as necessary. +pub fn clamped_zip_2x1( r0: R0, r1: R1, o0: O0, o0_default_fn: D0, - o1: O1, - o1_default_fn: D1, - o2: O2, - o2_default_fn: D2, - o3: O3, - o3_default_fn: D3, - o4: O4, - o4_default_fn: D4, -) -> ClampedZip2x5< - R0::IntoIter, - R1::IntoIter, - O0::IntoIter, - O1::IntoIter, - O2::IntoIter, - O3::IntoIter, - O4::IntoIter, - D0, - D1, - D2, - D3, - D4, -> +) -> ClampedZip2x1 where R0: IntoIterator, R1: IntoIterator, O0: IntoIterator, O0::Item: Clone, - O1: IntoIterator, - O1::Item: Clone, - O2: IntoIterator, - O2::Item: Clone, - O3: IntoIterator, - O3::Item: Clone, - O4: IntoIterator, - O4::Item: Clone, D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, { - ClampedZip2x5 { + ClampedZip2x1 { r0: r0.into_iter(), r1: r1.into_iter(), o0: o0.into_iter(), - o1: o1.into_iter(), - o2: o2.into_iter(), - o3: o3.into_iter(), - o4: o4.into_iter(), o0_default_fn, - o1_default_fn, - o2_default_fn, - o3_default_fn, - o4_default_fn, o0_latest_value: None, - o1_latest_value: None, - o2_latest_value: None, - o3_latest_value: None, - o4_latest_value: None, } } /// Implements a clamped zip iterator combinator with 2 required iterators and 2 optional /// iterators. /// -/// See [`clamped_zip_2x5`] for more information. -pub struct ClampedZip2x5 +/// See [`clamped_zip_2x1`] for more information. +pub struct ClampedZip2x1 where R0: Iterator, R1: Iterator, O0: Iterator, O0::Item: Clone, - O1: Iterator, - O1::Item: Clone, - O2: Iterator, - O2::Item: Clone, - O3: Iterator, - O3::Item: Clone, - O4: Iterator, - O4::Item: Clone, D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, { r0: R0, r1: R1, o0: O0, - o1: O1, - o2: O2, - o3: O3, - o4: O4, o0_default_fn: D0, - o1_default_fn: D1, - o2_default_fn: D2, - o3_default_fn: D3, - o4_default_fn: D4, o0_latest_value: Option, - o1_latest_value: Option, - o2_latest_value: Option, - o3_latest_value: Option, - o4_latest_value: Option, } -impl Iterator - for ClampedZip2x5 +impl Iterator for ClampedZip2x1 where R0: Iterator, - R1: Iterator, - O0: Iterator, - O0::Item: Clone, - O1: Iterator, - O1::Item: Clone, - O2: Iterator, - O2::Item: Clone, - O3: Iterator, - O3::Item: Clone, - O4: Iterator, - O4::Item: Clone, + R1: Iterator, + O0: Iterator, + O0::Item: Clone, D0: Fn() -> O0::Item, - D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, { - type Item = ( - R0::Item, - R1::Item, - O0::Item, - O1::Item, - O2::Item, - O3::Item, - O4::Item, - ); + type Item = (R0::Item, R1::Item, O0::Item); #[inline] fn next(&mut self) -> Option { let r0_next = self.r0.next()?; let r1_next = self.r1.next()?; let o0_next = self.o0.next().or_else(|| self.o0_latest_value.take()); - let o1_next = self.o1.next().or_else(|| self.o1_latest_value.take()); - let o2_next = self.o2.next().or_else(|| self.o2_latest_value.take()); - let o3_next = self.o3.next().or_else(|| self.o3_latest_value.take()); - let o4_next = self.o4.next().or_else(|| self.o4_latest_value.take()); self.o0_latest_value.clone_from(&o0_next); - self.o1_latest_value.clone_from(&o1_next); - self.o2_latest_value.clone_from(&o2_next); - self.o3_latest_value.clone_from(&o3_next); - self.o4_latest_value.clone_from(&o4_next); Some(( r0_next, r1_next, o0_next.unwrap_or_else(|| (self.o0_default_fn)()), - o1_next.unwrap_or_else(|| (self.o1_default_fn)()), - o2_next.unwrap_or_else(|| (self.o2_default_fn)()), - o3_next.unwrap_or_else(|| (self.o3_default_fn)()), - o4_next.unwrap_or_else(|| (self.o4_default_fn)()), )) } } -/// Returns a new [`ClampedZip2x6`] iterator. +/// Returns a new [`ClampedZip2x2`] iterator. /// /// The number of elements in a clamped zip iterator corresponds to the number of elements in the /// shortest of its required iterators (`r0`, `r1`). /// -/// Optional iterators (`o0`, `o1`, `o2`, `o3`, `o4`, `o5`) will repeat their latest values if they happen to be too short +/// Optional iterators (`o0`, `o1`) will repeat their latest values if they happen to be too short /// to be zipped with the shortest of the required iterators. /// /// If an optional iterator is not only too short but actually empty, its associated default function -/// (`o0_default_fn`, `o1_default_fn`, `o2_default_fn`, `o3_default_fn`, `o4_default_fn`, `o5_default_fn`) will be executed and the resulting value repeated as necessary. -pub fn clamped_zip_2x6( +/// (`o0_default_fn`, `o1_default_fn`) will be executed and the resulting value repeated as necessary. +pub fn clamped_zip_2x2( r0: R0, r1: R1, o0: O0, o0_default_fn: D0, o1: O1, o1_default_fn: D1, - o2: O2, - o2_default_fn: D2, - o3: O3, - o3_default_fn: D3, - o4: O4, - o4_default_fn: D4, - o5: O5, - o5_default_fn: D5, -) -> ClampedZip2x6< - R0::IntoIter, - R1::IntoIter, - O0::IntoIter, - O1::IntoIter, - O2::IntoIter, - O3::IntoIter, - O4::IntoIter, - O5::IntoIter, - D0, - D1, - D2, - D3, - D4, - D5, -> +) -> ClampedZip2x2 where R0: IntoIterator, R1: IntoIterator, @@ -2159,50 +680,26 @@ where O0::Item: Clone, O1: IntoIterator, O1::Item: Clone, - O2: IntoIterator, - O2::Item: Clone, - O3: IntoIterator, - O3::Item: Clone, - O4: IntoIterator, - O4::Item: Clone, - O5: IntoIterator, - O5::Item: Clone, D0: Fn() -> O0::Item, D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, { - ClampedZip2x6 { + ClampedZip2x2 { r0: r0.into_iter(), r1: r1.into_iter(), o0: o0.into_iter(), o1: o1.into_iter(), - o2: o2.into_iter(), - o3: o3.into_iter(), - o4: o4.into_iter(), - o5: o5.into_iter(), o0_default_fn, o1_default_fn, - o2_default_fn, - o3_default_fn, - o4_default_fn, - o5_default_fn, o0_latest_value: None, o1_latest_value: None, - o2_latest_value: None, - o3_latest_value: None, - o4_latest_value: None, - o5_latest_value: None, } } /// Implements a clamped zip iterator combinator with 2 required iterators and 2 optional /// iterators. /// -/// See [`clamped_zip_2x6`] for more information. -pub struct ClampedZip2x6 +/// See [`clamped_zip_2x2`] for more information. +pub struct ClampedZip2x2 where R0: Iterator, R1: Iterator, @@ -2210,46 +707,21 @@ where O0::Item: Clone, O1: Iterator, O1::Item: Clone, - O2: Iterator, - O2::Item: Clone, - O3: Iterator, - O3::Item: Clone, - O4: Iterator, - O4::Item: Clone, - O5: Iterator, - O5::Item: Clone, D0: Fn() -> O0::Item, D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, { r0: R0, r1: R1, o0: O0, o1: O1, - o2: O2, - o3: O3, - o4: O4, - o5: O5, o0_default_fn: D0, o1_default_fn: D1, - o2_default_fn: D2, - o3_default_fn: D3, - o4_default_fn: D4, - o5_default_fn: D5, o0_latest_value: Option, o1_latest_value: Option, - o2_latest_value: Option, - o3_latest_value: Option, - o4_latest_value: Option, - o5_latest_value: Option, } -impl Iterator - for ClampedZip2x6 +impl Iterator for ClampedZip2x2 where R0: Iterator, R1: Iterator, @@ -2257,31 +729,10 @@ where O0::Item: Clone, O1: Iterator, O1::Item: Clone, - O2: Iterator, - O2::Item: Clone, - O3: Iterator, - O3::Item: Clone, - O4: Iterator, - O4::Item: Clone, - O5: Iterator, - O5::Item: Clone, D0: Fn() -> O0::Item, D1: Fn() -> O1::Item, - D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, { - type Item = ( - R0::Item, - R1::Item, - O0::Item, - O1::Item, - O2::Item, - O3::Item, - O4::Item, - O5::Item, - ); + type Item = (R0::Item, R1::Item, O0::Item, O1::Item); #[inline] fn next(&mut self) -> Option { @@ -2289,42 +740,30 @@ where let r1_next = self.r1.next()?; let o0_next = self.o0.next().or_else(|| self.o0_latest_value.take()); let o1_next = self.o1.next().or_else(|| self.o1_latest_value.take()); - let o2_next = self.o2.next().or_else(|| self.o2_latest_value.take()); - let o3_next = self.o3.next().or_else(|| self.o3_latest_value.take()); - let o4_next = self.o4.next().or_else(|| self.o4_latest_value.take()); - let o5_next = self.o5.next().or_else(|| self.o5_latest_value.take()); self.o0_latest_value.clone_from(&o0_next); self.o1_latest_value.clone_from(&o1_next); - self.o2_latest_value.clone_from(&o2_next); - self.o3_latest_value.clone_from(&o3_next); - self.o4_latest_value.clone_from(&o4_next); - self.o5_latest_value.clone_from(&o5_next); Some(( r0_next, r1_next, o0_next.unwrap_or_else(|| (self.o0_default_fn)()), o1_next.unwrap_or_else(|| (self.o1_default_fn)()), - o2_next.unwrap_or_else(|| (self.o2_default_fn)()), - o3_next.unwrap_or_else(|| (self.o3_default_fn)()), - o4_next.unwrap_or_else(|| (self.o4_default_fn)()), - o5_next.unwrap_or_else(|| (self.o5_default_fn)()), )) } } -/// Returns a new [`ClampedZip2x7`] iterator. +/// Returns a new [`ClampedZip2x3`] iterator. /// /// The number of elements in a clamped zip iterator corresponds to the number of elements in the /// shortest of its required iterators (`r0`, `r1`). /// -/// Optional iterators (`o0`, `o1`, `o2`, `o3`, `o4`, `o5`, `o6`) will repeat their latest values if they happen to be too short +/// Optional iterators (`o0`, `o1`, `o2`) will repeat their latest values if they happen to be too short /// to be zipped with the shortest of the required iterators. /// /// If an optional iterator is not only too short but actually empty, its associated default function -/// (`o0_default_fn`, `o1_default_fn`, `o2_default_fn`, `o3_default_fn`, `o4_default_fn`, `o5_default_fn`, `o6_default_fn`) will be executed and the resulting value repeated as necessary. -pub fn clamped_zip_2x7( +/// (`o0_default_fn`, `o1_default_fn`, `o2_default_fn`) will be executed and the resulting value repeated as necessary. +pub fn clamped_zip_2x3( r0: R0, r1: R1, o0: O0, @@ -2333,32 +772,7 @@ pub fn clamped_zip_2x7 ClampedZip2x7< - R0::IntoIter, - R1::IntoIter, - O0::IntoIter, - O1::IntoIter, - O2::IntoIter, - O3::IntoIter, - O4::IntoIter, - O5::IntoIter, - O6::IntoIter, - D0, - D1, - D2, - D3, - D4, - D5, - D6, -> +) -> ClampedZip2x3 where R0: IntoIterator, R1: IntoIterator, @@ -2368,54 +782,30 @@ where O1::Item: Clone, O2: IntoIterator, O2::Item: Clone, - O3: IntoIterator, - O3::Item: Clone, - O4: IntoIterator, - O4::Item: Clone, - O5: IntoIterator, - O5::Item: Clone, - O6: IntoIterator, - O6::Item: Clone, D0: Fn() -> O0::Item, D1: Fn() -> O1::Item, D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, { - ClampedZip2x7 { + ClampedZip2x3 { r0: r0.into_iter(), r1: r1.into_iter(), o0: o0.into_iter(), o1: o1.into_iter(), o2: o2.into_iter(), - o3: o3.into_iter(), - o4: o4.into_iter(), - o5: o5.into_iter(), - o6: o6.into_iter(), o0_default_fn, o1_default_fn, o2_default_fn, - o3_default_fn, - o4_default_fn, - o5_default_fn, - o6_default_fn, o0_latest_value: None, o1_latest_value: None, o2_latest_value: None, - o3_latest_value: None, - o4_latest_value: None, - o5_latest_value: None, - o6_latest_value: None, } } /// Implements a clamped zip iterator combinator with 2 required iterators and 2 optional /// iterators. /// -/// See [`clamped_zip_2x7`] for more information. -pub struct ClampedZip2x7 +/// See [`clamped_zip_2x3`] for more information. +pub struct ClampedZip2x3 where R0: Iterator, R1: Iterator, @@ -2425,50 +815,25 @@ where O1::Item: Clone, O2: Iterator, O2::Item: Clone, - O3: Iterator, - O3::Item: Clone, - O4: Iterator, - O4::Item: Clone, - O5: Iterator, - O5::Item: Clone, - O6: Iterator, - O6::Item: Clone, D0: Fn() -> O0::Item, D1: Fn() -> O1::Item, D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, { r0: R0, r1: R1, o0: O0, o1: O1, o2: O2, - o3: O3, - o4: O4, - o5: O5, - o6: O6, o0_default_fn: D0, o1_default_fn: D1, o2_default_fn: D2, - o3_default_fn: D3, - o4_default_fn: D4, - o5_default_fn: D5, - o6_default_fn: D6, o0_latest_value: Option, o1_latest_value: Option, o2_latest_value: Option, - o3_latest_value: Option, - o4_latest_value: Option, - o5_latest_value: Option, - o6_latest_value: Option, } -impl Iterator - for ClampedZip2x7 +impl Iterator for ClampedZip2x3 where R0: Iterator, R1: Iterator, @@ -2478,33 +843,11 @@ where O1::Item: Clone, O2: Iterator, O2::Item: Clone, - O3: Iterator, - O3::Item: Clone, - O4: Iterator, - O4::Item: Clone, - O5: Iterator, - O5::Item: Clone, - O6: Iterator, - O6::Item: Clone, D0: Fn() -> O0::Item, D1: Fn() -> O1::Item, D2: Fn() -> O2::Item, - D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, { - type Item = ( - R0::Item, - R1::Item, - O0::Item, - O1::Item, - O2::Item, - O3::Item, - O4::Item, - O5::Item, - O6::Item, - ); + type Item = (R0::Item, R1::Item, O0::Item, O1::Item, O2::Item); #[inline] fn next(&mut self) -> Option { @@ -2513,18 +856,10 @@ where let o0_next = self.o0.next().or_else(|| self.o0_latest_value.take()); let o1_next = self.o1.next().or_else(|| self.o1_latest_value.take()); let o2_next = self.o2.next().or_else(|| self.o2_latest_value.take()); - let o3_next = self.o3.next().or_else(|| self.o3_latest_value.take()); - let o4_next = self.o4.next().or_else(|| self.o4_latest_value.take()); - let o5_next = self.o5.next().or_else(|| self.o5_latest_value.take()); - let o6_next = self.o6.next().or_else(|| self.o6_latest_value.take()); self.o0_latest_value.clone_from(&o0_next); self.o1_latest_value.clone_from(&o1_next); self.o2_latest_value.clone_from(&o2_next); - self.o3_latest_value.clone_from(&o3_next); - self.o4_latest_value.clone_from(&o4_next); - self.o5_latest_value.clone_from(&o5_next); - self.o6_latest_value.clone_from(&o6_next); Some(( r0_next, @@ -2532,25 +867,21 @@ where o0_next.unwrap_or_else(|| (self.o0_default_fn)()), o1_next.unwrap_or_else(|| (self.o1_default_fn)()), o2_next.unwrap_or_else(|| (self.o2_default_fn)()), - o3_next.unwrap_or_else(|| (self.o3_default_fn)()), - o4_next.unwrap_or_else(|| (self.o4_default_fn)()), - o5_next.unwrap_or_else(|| (self.o5_default_fn)()), - o6_next.unwrap_or_else(|| (self.o6_default_fn)()), )) } } -/// Returns a new [`ClampedZip2x8`] iterator. +/// Returns a new [`ClampedZip2x4`] iterator. /// /// The number of elements in a clamped zip iterator corresponds to the number of elements in the /// shortest of its required iterators (`r0`, `r1`). /// -/// Optional iterators (`o0`, `o1`, `o2`, `o3`, `o4`, `o5`, `o6`, `o7`) will repeat their latest values if they happen to be too short +/// Optional iterators (`o0`, `o1`, `o2`, `o3`) will repeat their latest values if they happen to be too short /// to be zipped with the shortest of the required iterators. /// /// If an optional iterator is not only too short but actually empty, its associated default function -/// (`o0_default_fn`, `o1_default_fn`, `o2_default_fn`, `o3_default_fn`, `o4_default_fn`, `o5_default_fn`, `o6_default_fn`, `o7_default_fn`) will be executed and the resulting value repeated as necessary. -pub fn clamped_zip_2x8( +/// (`o0_default_fn`, `o1_default_fn`, `o2_default_fn`, `o3_default_fn`) will be executed and the resulting value repeated as necessary. +pub fn clamped_zip_2x4( r0: R0, r1: R1, o0: O0, @@ -2561,33 +892,17 @@ pub fn clamped_zip_2x8 ClampedZip2x8< +) -> ClampedZip2x4< R0::IntoIter, R1::IntoIter, O0::IntoIter, O1::IntoIter, O2::IntoIter, O3::IntoIter, - O4::IntoIter, - O5::IntoIter, - O6::IntoIter, - O7::IntoIter, D0, D1, D2, D3, - D4, - D5, - D6, - D7, > where R0: IntoIterator, @@ -2600,58 +915,34 @@ where O2::Item: Clone, O3: IntoIterator, O3::Item: Clone, - O4: IntoIterator, - O4::Item: Clone, - O5: IntoIterator, - O5::Item: Clone, - O6: IntoIterator, - O6::Item: Clone, - O7: IntoIterator, - O7::Item: Clone, D0: Fn() -> O0::Item, D1: Fn() -> O1::Item, D2: Fn() -> O2::Item, D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, - D7: Fn() -> O7::Item, { - ClampedZip2x8 { + ClampedZip2x4 { r0: r0.into_iter(), r1: r1.into_iter(), o0: o0.into_iter(), o1: o1.into_iter(), o2: o2.into_iter(), o3: o3.into_iter(), - o4: o4.into_iter(), - o5: o5.into_iter(), - o6: o6.into_iter(), - o7: o7.into_iter(), o0_default_fn, o1_default_fn, o2_default_fn, o3_default_fn, - o4_default_fn, - o5_default_fn, - o6_default_fn, - o7_default_fn, o0_latest_value: None, o1_latest_value: None, o2_latest_value: None, o3_latest_value: None, - o4_latest_value: None, - o5_latest_value: None, - o6_latest_value: None, - o7_latest_value: None, } } /// Implements a clamped zip iterator combinator with 2 required iterators and 2 optional /// iterators. /// -/// See [`clamped_zip_2x8`] for more information. -pub struct ClampedZip2x8 +/// See [`clamped_zip_2x4`] for more information. +pub struct ClampedZip2x4 where R0: Iterator, R1: Iterator, @@ -2663,22 +954,10 @@ where O2::Item: Clone, O3: Iterator, O3::Item: Clone, - O4: Iterator, - O4::Item: Clone, - O5: Iterator, - O5::Item: Clone, - O6: Iterator, - O6::Item: Clone, - O7: Iterator, - O7::Item: Clone, D0: Fn() -> O0::Item, D1: Fn() -> O1::Item, D2: Fn() -> O2::Item, D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, - D7: Fn() -> O7::Item, { r0: R0, r1: R1, @@ -2686,31 +965,19 @@ where o1: O1, o2: O2, o3: O3, - o4: O4, - o5: O5, - o6: O6, - o7: O7, o0_default_fn: D0, o1_default_fn: D1, o2_default_fn: D2, o3_default_fn: D3, - o4_default_fn: D4, - o5_default_fn: D5, - o6_default_fn: D6, - o7_default_fn: D7, o0_latest_value: Option, o1_latest_value: Option, o2_latest_value: Option, o3_latest_value: Option, - o4_latest_value: Option, - o5_latest_value: Option, - o6_latest_value: Option, - o7_latest_value: Option, } -impl Iterator - for ClampedZip2x8 +impl Iterator + for ClampedZip2x4 where R0: Iterator, R1: Iterator, @@ -2722,35 +989,12 @@ where O2::Item: Clone, O3: Iterator, O3::Item: Clone, - O4: Iterator, - O4::Item: Clone, - O5: Iterator, - O5::Item: Clone, - O6: Iterator, - O6::Item: Clone, - O7: Iterator, - O7::Item: Clone, D0: Fn() -> O0::Item, D1: Fn() -> O1::Item, D2: Fn() -> O2::Item, D3: Fn() -> O3::Item, - D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, - D7: Fn() -> O7::Item, { - type Item = ( - R0::Item, - R1::Item, - O0::Item, - O1::Item, - O2::Item, - O3::Item, - O4::Item, - O5::Item, - O6::Item, - O7::Item, - ); + type Item = (R0::Item, R1::Item, O0::Item, O1::Item, O2::Item, O3::Item); #[inline] fn next(&mut self) -> Option { @@ -2760,19 +1004,11 @@ where let o1_next = self.o1.next().or_else(|| self.o1_latest_value.take()); let o2_next = self.o2.next().or_else(|| self.o2_latest_value.take()); let o3_next = self.o3.next().or_else(|| self.o3_latest_value.take()); - let o4_next = self.o4.next().or_else(|| self.o4_latest_value.take()); - let o5_next = self.o5.next().or_else(|| self.o5_latest_value.take()); - let o6_next = self.o6.next().or_else(|| self.o6_latest_value.take()); - let o7_next = self.o7.next().or_else(|| self.o7_latest_value.take()); self.o0_latest_value.clone_from(&o0_next); self.o1_latest_value.clone_from(&o1_next); self.o2_latest_value.clone_from(&o2_next); self.o3_latest_value.clone_from(&o3_next); - self.o4_latest_value.clone_from(&o4_next); - self.o5_latest_value.clone_from(&o5_next); - self.o6_latest_value.clone_from(&o6_next); - self.o7_latest_value.clone_from(&o7_next); Some(( r0_next, @@ -2781,46 +1017,21 @@ where o1_next.unwrap_or_else(|| (self.o1_default_fn)()), o2_next.unwrap_or_else(|| (self.o2_default_fn)()), o3_next.unwrap_or_else(|| (self.o3_default_fn)()), - o4_next.unwrap_or_else(|| (self.o4_default_fn)()), - o5_next.unwrap_or_else(|| (self.o5_default_fn)()), - o6_next.unwrap_or_else(|| (self.o6_default_fn)()), - o7_next.unwrap_or_else(|| (self.o7_default_fn)()), )) } } -/// Returns a new [`ClampedZip2x9`] iterator. +/// Returns a new [`ClampedZip2x5`] iterator. /// /// The number of elements in a clamped zip iterator corresponds to the number of elements in the /// shortest of its required iterators (`r0`, `r1`). /// -/// Optional iterators (`o0`, `o1`, `o2`, `o3`, `o4`, `o5`, `o6`, `o7`, `o8`) will repeat their latest values if they happen to be too short +/// Optional iterators (`o0`, `o1`, `o2`, `o3`, `o4`) will repeat their latest values if they happen to be too short /// to be zipped with the shortest of the required iterators. /// /// If an optional iterator is not only too short but actually empty, its associated default function -/// (`o0_default_fn`, `o1_default_fn`, `o2_default_fn`, `o3_default_fn`, `o4_default_fn`, `o5_default_fn`, `o6_default_fn`, `o7_default_fn`, `o8_default_fn`) will be executed and the resulting value repeated as necessary. -pub fn clamped_zip_2x9< - R0, - R1, - O0, - O1, - O2, - O3, - O4, - O5, - O6, - O7, - O8, - D0, - D1, - D2, - D3, - D4, - D5, - D6, - D7, - D8, ->( +/// (`o0_default_fn`, `o1_default_fn`, `o2_default_fn`, `o3_default_fn`, `o4_default_fn`) will be executed and the resulting value repeated as necessary. +pub fn clamped_zip_2x5( r0: R0, r1: R1, o0: O0, @@ -2833,15 +1044,7 @@ pub fn clamped_zip_2x9< o3_default_fn: D3, o4: O4, o4_default_fn: D4, - o5: O5, - o5_default_fn: D5, - o6: O6, - o6_default_fn: D6, - o7: O7, - o7_default_fn: D7, - o8: O8, - o8_default_fn: D8, -) -> ClampedZip2x9< +) -> ClampedZip2x5< R0::IntoIter, R1::IntoIter, O0::IntoIter, @@ -2849,19 +1052,11 @@ pub fn clamped_zip_2x9< O2::IntoIter, O3::IntoIter, O4::IntoIter, - O5::IntoIter, - O6::IntoIter, - O7::IntoIter, - O8::IntoIter, D0, D1, D2, D3, D4, - D5, - D6, - D7, - D8, > where R0: IntoIterator, @@ -2876,25 +1071,13 @@ where O3::Item: Clone, O4: IntoIterator, O4::Item: Clone, - O5: IntoIterator, - O5::Item: Clone, - O6: IntoIterator, - O6::Item: Clone, - O7: IntoIterator, - O7::Item: Clone, - O8: IntoIterator, - O8::Item: Clone, D0: Fn() -> O0::Item, D1: Fn() -> O1::Item, D2: Fn() -> O2::Item, D3: Fn() -> O3::Item, D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, - D7: Fn() -> O7::Item, - D8: Fn() -> O8::Item, { - ClampedZip2x9 { + ClampedZip2x5 { r0: r0.into_iter(), r1: r1.into_iter(), o0: o0.into_iter(), @@ -2902,57 +1085,25 @@ where o2: o2.into_iter(), o3: o3.into_iter(), o4: o4.into_iter(), - o5: o5.into_iter(), - o6: o6.into_iter(), - o7: o7.into_iter(), - o8: o8.into_iter(), o0_default_fn, o1_default_fn, o2_default_fn, o3_default_fn, o4_default_fn, - o5_default_fn, - o6_default_fn, - o7_default_fn, - o8_default_fn, o0_latest_value: None, o1_latest_value: None, o2_latest_value: None, o3_latest_value: None, o4_latest_value: None, - o5_latest_value: None, - o6_latest_value: None, - o7_latest_value: None, - o8_latest_value: None, } } /// Implements a clamped zip iterator combinator with 2 required iterators and 2 optional /// iterators. /// -/// See [`clamped_zip_2x9`] for more information. -pub struct ClampedZip2x9< - R0, - R1, - O0, - O1, - O2, - O3, - O4, - O5, - O6, - O7, - O8, - D0, - D1, - D2, - D3, - D4, - D5, - D6, - D7, - D8, -> where +/// See [`clamped_zip_2x5`] for more information. +pub struct ClampedZip2x5 +where R0: Iterator, R1: Iterator, O0: Iterator, @@ -2965,23 +1116,11 @@ pub struct ClampedZip2x9< O3::Item: Clone, O4: Iterator, O4::Item: Clone, - O5: Iterator, - O5::Item: Clone, - O6: Iterator, - O6::Item: Clone, - O7: Iterator, - O7::Item: Clone, - O8: Iterator, - O8::Item: Clone, D0: Fn() -> O0::Item, D1: Fn() -> O1::Item, D2: Fn() -> O2::Item, D3: Fn() -> O3::Item, D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, - D7: Fn() -> O7::Item, - D8: Fn() -> O8::Item, { r0: R0, r1: R1, @@ -2990,54 +1129,21 @@ pub struct ClampedZip2x9< o2: O2, o3: O3, o4: O4, - o5: O5, - o6: O6, - o7: O7, - o8: O8, o0_default_fn: D0, o1_default_fn: D1, o2_default_fn: D2, o3_default_fn: D3, o4_default_fn: D4, - o5_default_fn: D5, - o6_default_fn: D6, - o7_default_fn: D7, - o8_default_fn: D8, o0_latest_value: Option, o1_latest_value: Option, o2_latest_value: Option, o3_latest_value: Option, o4_latest_value: Option, - o5_latest_value: Option, - o6_latest_value: Option, - o7_latest_value: Option, - o8_latest_value: Option, } -impl Iterator - for ClampedZip2x9< - R0, - R1, - O0, - O1, - O2, - O3, - O4, - O5, - O6, - O7, - O8, - D0, - D1, - D2, - D3, - D4, - D5, - D6, - D7, - D8, - > +impl Iterator + for ClampedZip2x5 where R0: Iterator, R1: Iterator, @@ -3051,23 +1157,11 @@ where O3::Item: Clone, O4: Iterator, O4::Item: Clone, - O5: Iterator, - O5::Item: Clone, - O6: Iterator, - O6::Item: Clone, - O7: Iterator, - O7::Item: Clone, - O8: Iterator, - O8::Item: Clone, D0: Fn() -> O0::Item, D1: Fn() -> O1::Item, D2: Fn() -> O2::Item, D3: Fn() -> O3::Item, D4: Fn() -> O4::Item, - D5: Fn() -> O5::Item, - D6: Fn() -> O6::Item, - D7: Fn() -> O7::Item, - D8: Fn() -> O8::Item, { type Item = ( R0::Item, @@ -3077,10 +1171,6 @@ where O2::Item, O3::Item, O4::Item, - O5::Item, - O6::Item, - O7::Item, - O8::Item, ); #[inline] @@ -3092,20 +1182,12 @@ where let o2_next = self.o2.next().or_else(|| self.o2_latest_value.take()); let o3_next = self.o3.next().or_else(|| self.o3_latest_value.take()); let o4_next = self.o4.next().or_else(|| self.o4_latest_value.take()); - let o5_next = self.o5.next().or_else(|| self.o5_latest_value.take()); - let o6_next = self.o6.next().or_else(|| self.o6_latest_value.take()); - let o7_next = self.o7.next().or_else(|| self.o7_latest_value.take()); - let o8_next = self.o8.next().or_else(|| self.o8_latest_value.take()); self.o0_latest_value.clone_from(&o0_next); self.o1_latest_value.clone_from(&o1_next); self.o2_latest_value.clone_from(&o2_next); self.o3_latest_value.clone_from(&o3_next); self.o4_latest_value.clone_from(&o4_next); - self.o5_latest_value.clone_from(&o5_next); - self.o6_latest_value.clone_from(&o6_next); - self.o7_latest_value.clone_from(&o7_next); - self.o8_latest_value.clone_from(&o8_next); Some(( r0_next, @@ -3115,10 +1197,6 @@ where o2_next.unwrap_or_else(|| (self.o2_default_fn)()), o3_next.unwrap_or_else(|| (self.o3_default_fn)()), o4_next.unwrap_or_else(|| (self.o4_default_fn)()), - o5_next.unwrap_or_else(|| (self.o5_default_fn)()), - o6_next.unwrap_or_else(|| (self.o6_default_fn)()), - o7_next.unwrap_or_else(|| (self.o7_default_fn)()), - o8_next.unwrap_or_else(|| (self.o8_default_fn)()), )) } } diff --git a/crates/store/re_query/src/latest_at.rs b/crates/store/re_query/src/latest_at.rs index beaaacd88654..bab125efd1ac 100644 --- a/crates/store/re_query/src/latest_at.rs +++ b/crates/store/re_query/src/latest_at.rs @@ -80,79 +80,84 @@ impl QueryCache { { let potential_clears = self.might_require_clearing.read(); - let mut clear_entity_path = entity_path.clone(); - loop { - if !potential_clears.contains(&clear_entity_path) { - // This entity does not contain any `Clear`-related data at all, there's no - // point in running actual queries. - - let Some(parent_entity_path) = clear_entity_path.parent() else { - break; - }; - clear_entity_path = parent_entity_path; - - continue; - } + // Fast path: most stores have no `Clear` components at all, e.g. the + // blueprint store. + if !potential_clears.is_empty() { + let mut clear_entity_path = entity_path.clone(); + loop { + if !potential_clears.contains(&clear_entity_path) { + // This entity does not contain any `Clear`-related data at all, there's no + // point in running actual queries. + + let Some(parent_entity_path) = clear_entity_path.parent() else { + break; + }; + clear_entity_path = parent_entity_path; + + continue; + } - let component = archetypes::Clear::descriptor_is_recursive().component; - let key = - QueryCacheKey::new(clear_entity_path.clone(), query.timeline(), component); + let component = archetypes::Clear::descriptor_is_recursive().component; + let key = + QueryCacheKey::new(clear_entity_path.clone(), query.timeline(), component); - let cache = Arc::clone( - self.latest_at_per_cache_key - .write() - .entry(key.clone()) - .or_insert_with(|| Arc::new(RwLock::new(LatestAtCache::new(key)))), - ); + let cache = Arc::clone( + self.latest_at_per_cache_key + .write() + .entry(key.clone()) + .or_insert_with(|| Arc::new(RwLock::new(LatestAtCache::new(key)))), + ); - let mut cache = cache.write(); - cache.handle_pending_invalidation(); + let mut cache = cache.write(); + cache.handle_pending_invalidation(); - let (cached, missing) = - cache.latest_at(&store, query, &clear_entity_path, component); - if cfg!(debug_assertions) && !missing.is_empty() { - debug_assert!( - cached.is_none(), - "should never receive partial latest-at results" - ); - } + let (cached, missing) = + cache.latest_at(&store, query, &clear_entity_path, component); + if cfg!(debug_assertions) && !missing.is_empty() { + debug_assert!( + cached.is_none(), + "should never receive partial latest-at results" + ); + } - if let Some(cached) = cached { - // TODO(andreas): Should clear also work if the component is not fully tagged? - let found_recursive_clear = cached - .component_mono::(component) - .and_then(Result::ok) - == Some(ClearIsRecursive(true.into())); - // When checking the entity itself, any kind of `Clear` component - // (i.e. recursive or not) will do. - // - // For (recursive) parents, we need to deserialize the data to make sure the - // recursive flag is set. - if (clear_entity_path == *entity_path || found_recursive_clear) - && let Some(index) = cached.index(&query.timeline()) - && compare_indices(index, max_clear_index) == std::cmp::Ordering::Greater - { - max_clear_index = index; + if let Some(cached) = cached { + // TODO(andreas): Should clear also work if the component is not fully tagged? + let found_recursive_clear = cached + .component_mono::(component) + .and_then(Result::ok) + == Some(ClearIsRecursive(true.into())); + // When checking the entity itself, any kind of `Clear` component + // (i.e. recursive or not) will do. + // + // For (recursive) parents, we need to deserialize the data to make sure the + // recursive flag is set. + if (clear_entity_path == *entity_path || found_recursive_clear) + && let Some(index) = cached.index(&query.timeline()) + && compare_indices(index, max_clear_index) + == std::cmp::Ordering::Greater + { + max_clear_index = index; + } + } else if !missing.is_empty() { + // The query engine did find a relevant chunk that contains some kind of tombstone. + // + // We don't know anything else about this tombstone, since we don't have access to its data. + // In particular, we don't know whether its index shadows the one of the data we're looking for, + // nor if it is recursive or not. + // + // Because we don't know, we must assume the worst: it's both recursive and shadowing. + // Indicate that we're missing this tombstone, and treat the data as incomplete until we know more. + + max_clear_index = (TimeInt::MAX, RowId::MAX); + results.missing_virtual.extend(missing); } - } else if !missing.is_empty() { - // The query engine did find a relevant chunk that contains some kind of tombstone. - // - // We don't know anything else about this tombstone, since we don't have access to its data. - // In particular, we don't know whether its index shadows the one of the data we're looking for, - // nor if it is recursive or not. - // - // Because we don't know, we must assume the worst: it's both recursive and shadowing. - // Indicate that we're missing this tombstone, and treat the data as incomplete until we know more. - - max_clear_index = (TimeInt::MAX, RowId::MAX); - results.missing_virtual.extend(missing); - } - let Some(parent_entity_path) = clear_entity_path.parent() else { - break; - }; + let Some(parent_entity_path) = clear_entity_path.parent() else { + break; + }; - clear_entity_path = parent_entity_path; + clear_entity_path = parent_entity_path; + } } } @@ -724,7 +729,7 @@ impl LatestAtCache { if let Some(cached) = per_query_time.get(&query.at()) { // Report to the store that we used this chunk to signal that // it should stay in memory. - store.report_used_physical_chunk_id(cached.unit.id()); + store.report_used_physical_chunk_id(cached.unit.original_chunk_id()); return (Some(cached.unit.clone()), vec![]); } @@ -745,8 +750,8 @@ impl LatestAtCache { .chunks .into_iter() .filter_map(|chunk| { - let chunk = chunk.latest_at(query, component).into_unit()?; - chunk.index(&query.timeline()).map(|index| (index, chunk)) + let unit = chunk.latest_at(query, component)?; + unit.index(&query.timeline()).map(|index| (index, unit)) }) .max_by_key(|(index, _chunk)| *index) else { @@ -816,7 +821,8 @@ mod tests { use itertools::Itertools as _; use re_chunk::{Chunk, ChunkId, RowId}; use re_chunk_store::{ - ChunkStore, ChunkStoreConfig, ChunkStoreEvent, ChunkStoreHandle, ChunkStoreSubscriber as _, + ChunkDeletionReason, ChunkStore, ChunkStoreConfig, ChunkStoreEvent, ChunkStoreHandle, + ChunkStoreSubscriber as _, }; use re_log_encoding::RrdManifest; use re_log_types::example_components::{MyPoint, MyPoints}; @@ -907,6 +913,7 @@ mod tests { let dels = store.write().remove_chunks_shallow( vec![Arc::new(chunk1.clone()), Arc::new(chunk3.clone())], None, + ChunkDeletionReason::ExplicitDrop, ); cache.on_events( &dels @@ -934,9 +941,11 @@ mod tests { assert_eq!(expected, results); } - let dels = store - .write() - .remove_chunks_shallow(vec![Arc::new(chunk2.clone())], None); + let dels = store.write().remove_chunks_shallow( + vec![Arc::new(chunk2.clone())], + None, + ChunkDeletionReason::ExplicitDrop, + ); cache.on_events( &dels .into_iter() @@ -1099,9 +1108,11 @@ mod tests { assert_eq!(expected, results); } - let dels = store - .write() - .remove_chunks_shallow(vec![Arc::new(tombstone.clone())], None); + let dels = store.write().remove_chunks_shallow( + vec![Arc::new(tombstone.clone())], + None, + ChunkDeletionReason::ExplicitDrop, + ); cache.on_events( &dels .into_iter() @@ -1129,9 +1140,11 @@ mod tests { assert_eq!(expected, results); } - let dels = store - .write() - .remove_chunks_deep(vec![Arc::new(tombstone.clone())], None); + let dels = store.write().remove_chunks_deep( + vec![Arc::new(tombstone.clone())], + None, + ChunkDeletionReason::GarbageCollection, + ); cache.on_events( &dels .into_iter() @@ -1205,7 +1218,7 @@ mod tests { let mut cache = QueryCache::new(store.clone()); // The store is now aware that there is a virtual tombstone pending somewhere, and so should be the cache. - cache.on_events(&[store.write().insert_rrd_manifest(rrd_manifest).unwrap()]); + cache.on_events(&store.write().insert_rrd_manifest(rrd_manifest)); // Load the physical data into the store, but not the tombstone. cache.on_events( diff --git a/crates/store/re_query/src/range.rs b/crates/store/re_query/src/range.rs index 282ed2c6e810..de473fc84201 100644 --- a/crates/store/re_query/src/range.rs +++ b/crates/store/re_query/src/range.rs @@ -19,6 +19,9 @@ impl QueryCache { /// See [`RangeResults`] for more information about how to handle the results. /// /// This is a cached API -- data will be lazily cached upon access. + /// The returned chunks preserve the original [`ChunkId`]s from the source [`ChunkStore`], + /// even though [`re_chunk::Chunk::range`] internally creates new IDs when slicing. + /// This is important so that `VideoStreamCache` can track which physical chunks are in use. pub fn range( &self, query: &RangeQuery, @@ -98,6 +101,9 @@ pub struct RangeResults { pub missing_virtual: Vec, /// Results for each individual component. + /// + /// The chunks preserve the original [`ChunkId`]s from the [`ChunkStore`], + /// so that `VideoStreamCache` can track which physical chunks are in use. pub components: IntMap>, } @@ -292,6 +298,10 @@ impl RangeCache { /// them, load them, and then try the query again. /// /// Returns `(cached_chunks, missing_chunk_ids)`. + /// + /// The returned chunks preserve the original [`ChunkId`]s from the [`ChunkStore`], + /// even though [`Chunk::range`] internally creates new IDs when slicing. + /// This is done by calling [`Chunk::with_id`] to restore the original ID after slicing. fn range( &mut self, store: &ChunkStore, @@ -318,6 +328,9 @@ impl RangeCache { // Therefore, we do not even check for partial results here. for raw_chunk in &results.chunks { self.chunks.entry(raw_chunk.id()).or_insert_with(|| { + // Preserve the original chunk ID so the store can track which physical chunks are in use. + let original_chunk_id = raw_chunk.id(); + // Densify the cached chunk according to the cache key's component, which // will speed up future arrow operations on this chunk. let (chunk, densified) = raw_chunk.densified(component); @@ -325,7 +338,9 @@ impl RangeCache { // Pre-sort the cached chunk according to the cache key's timeline. // // TODO(#7008): avoid unnecessary sorting on the unhappy path - let chunk = chunk.sorted_by_timeline_if_unsorted(&self.cache_key.timeline_name); + let chunk = chunk + .sorted_by_timeline_if_unsorted(&self.cache_key.timeline_name) + .with_id(original_chunk_id); let reallocated = densified || !raw_chunk.is_timeline_sorted(&self.cache_key.timeline_name); @@ -355,7 +370,8 @@ impl RangeCache { let chunk = &cached_sorted_chunk.chunk; - chunk.range(query, component) + let original_chunk_id = chunk.id(); + chunk.range(query, component).with_id(original_chunk_id) }) .filter(|chunk| !chunk.is_empty()) .collect(); @@ -382,7 +398,7 @@ mod tests { use std::sync::Arc; use re_chunk::{Chunk, ChunkId, RowId}; - use re_chunk_store::{ChunkStore, ChunkStoreConfig, ChunkStoreHandle}; + use re_chunk_store::{ChunkDeletionReason, ChunkStore, ChunkStoreConfig, ChunkStoreHandle}; use re_log_types::example_components::{MyPoint, MyPoints}; use re_log_types::external::re_tuid::Tuid; use re_log_types::{EntityPath, TimePoint, Timeline}; @@ -463,6 +479,7 @@ mod tests { store.write().remove_chunks_shallow( vec![Arc::new(chunk1.clone()), Arc::new(chunk3.clone())], None, + ChunkDeletionReason::ExplicitDrop, ); // We've removed the first and last chunks from the store: results should now be partial. @@ -480,9 +497,11 @@ mod tests { // Reminder: the store events are irrelevant here, since the range cache still always unconditionally // performs the underlying query regardless (only the sorting/slicing is cached). - store - .write() - .remove_chunks_shallow(vec![Arc::new(chunk2.clone())], None); + store.write().remove_chunks_shallow( + vec![Arc::new(chunk2.clone())], + None, + ChunkDeletionReason::ExplicitDrop, + ); // Now we've removed absolutely everything: we should only get partial results. { @@ -527,6 +546,60 @@ mod tests { } } + /// The cache must preserve original `ChunkId`s so that `VideoStreamCache` can track physical chunk usage. + #[test] + fn range_preserves_chunk_ids() { + let store = ChunkStore::new( + re_log_types::StoreId::random(re_log_types::StoreKind::Recording, "test_app"), + ChunkStoreConfig::ALL_DISABLED, + ); + let store = ChunkStoreHandle::new(store); + + let entity_path: EntityPath = "some_entity".into(); + let timeline_frame = Timeline::new_sequence("frame"); + let component = MyPoints::descriptor_points().component; + + let mut next_chunk_id = next_chunk_id_generator(0x42); + + let chunk1 = create_chunk_with_point( + next_chunk_id(), + entity_path.clone(), + TimePoint::from_iter([(timeline_frame, 1)]), + MyPoint::new(1.0, 1.0), + ); + let chunk2 = create_chunk_with_point( + next_chunk_id(), + entity_path.clone(), + TimePoint::from_iter([(timeline_frame, 2)]), + MyPoint::new(2.0, 2.0), + ); + + let chunk1_id = chunk1.id(); + let chunk2_id = chunk2.id(); + + store.write().insert_chunk(&Arc::new(chunk1)).unwrap(); + store.write().insert_chunk(&Arc::new(chunk2)).unwrap(); + + let cache = QueryCache::new(store); + + let query = RangeQuery::new(*timeline_frame.name(), AbsoluteTimeRange::new(0, 3)); + let results = cache.range(&query, &entity_path, [component]); + + let result_chunks = results.get(component).expect("should have results"); + assert_eq!(result_chunks.len(), 2); + + let result_ids: std::collections::BTreeSet<_> = + result_chunks.iter().map(|c| c.id()).collect(); + assert!( + result_ids.contains(&chunk1_id), + "result should preserve chunk1's original ID" + ); + assert!( + result_ids.contains(&chunk2_id), + "result should preserve chunk2's original ID" + ); + } + fn next_chunk_id_generator(prefix: u64) -> impl FnMut() -> re_chunk::ChunkId { let mut chunk_id = re_chunk::ChunkId::from_tuid(Tuid::from_nanos_and_inc(prefix, 0)); move || { diff --git a/crates/store/re_query/src/range_zip/generated.rs b/crates/store/re_query/src/range_zip/generated.rs index 178d9ddbd0b5..98770c038be0 100644 --- a/crates/store/re_query/src/range_zip/generated.rs +++ b/crates/store/re_query/src/range_zip/generated.rs @@ -1,4 +1,4 @@ -// This file was generated using `cargo r -p re_query --all-features --bin range_zip`. +// This file was generated using `cargo r -p re_query --all-features --bin range_zip > crates/store/re_query/src/range_zip/generated.rs && cargo fmt`. // DO NOT EDIT. // --- @@ -1628,6 +1628,379 @@ where } } +/// Returns a new [`RangeZip1x10`] iterator. +/// +/// The number of elements in a range zip iterator corresponds to the number of elements in the +/// shortest of its required iterators (`r0`). +/// +/// Each call to `next` is guaranteed to yield the next value for each required iterator, +/// as well as the most recent index amongst all of them. +/// +/// Optional iterators accumulate their state and yield their most recent value (if any), +/// each time the required iterators fire. +pub fn range_zip_1x10< + Idx, + IR0, + R0, + IO0, + O0, + IO1, + O1, + IO2, + O2, + IO3, + O3, + IO4, + O4, + IO5, + O5, + IO6, + O6, + IO7, + O7, + IO8, + O8, + IO9, + O9, +>( + r0: IR0, + o0: IO0, + o1: IO1, + o2: IO2, + o3: IO3, + o4: IO4, + o5: IO5, + o6: IO6, + o7: IO7, + o8: IO8, + o9: IO9, +) -> RangeZip1x10< + Idx, + IR0::IntoIter, + R0, + IO0::IntoIter, + O0, + IO1::IntoIter, + O1, + IO2::IntoIter, + O2, + IO3::IntoIter, + O3, + IO4::IntoIter, + O4, + IO5::IntoIter, + O5, + IO6::IntoIter, + O6, + IO7::IntoIter, + O7, + IO8::IntoIter, + O8, + IO9::IntoIter, + O9, +> +where + Idx: std::cmp::Ord, + IR0: IntoIterator, + IO0: IntoIterator, + IO1: IntoIterator, + IO2: IntoIterator, + IO3: IntoIterator, + IO4: IntoIterator, + IO5: IntoIterator, + IO6: IntoIterator, + IO7: IntoIterator, + IO8: IntoIterator, + IO9: IntoIterator, +{ + RangeZip1x10 { + r0: r0.into_iter(), + o0: o0.into_iter().peekable(), + o1: o1.into_iter().peekable(), + o2: o2.into_iter().peekable(), + o3: o3.into_iter().peekable(), + o4: o4.into_iter().peekable(), + o5: o5.into_iter().peekable(), + o6: o6.into_iter().peekable(), + o7: o7.into_iter().peekable(), + o8: o8.into_iter().peekable(), + o9: o9.into_iter().peekable(), + + o0_data_latest: None, + o1_data_latest: None, + o2_data_latest: None, + o3_data_latest: None, + o4_data_latest: None, + o5_data_latest: None, + o6_data_latest: None, + o7_data_latest: None, + o8_data_latest: None, + o9_data_latest: None, + } +} + +/// Implements a range zip iterator combinator with 2 required iterators and 2 optional +/// iterators. +/// +/// See [`range_zip_1x10`] for more information. +pub struct RangeZip1x10< + Idx, + IR0, + R0, + IO0, + O0, + IO1, + O1, + IO2, + O2, + IO3, + O3, + IO4, + O4, + IO5, + O5, + IO6, + O6, + IO7, + O7, + IO8, + O8, + IO9, + O9, +> where + Idx: std::cmp::Ord, + IR0: Iterator, + IO0: Iterator, + IO1: Iterator, + IO2: Iterator, + IO3: Iterator, + IO4: Iterator, + IO5: Iterator, + IO6: Iterator, + IO7: Iterator, + IO8: Iterator, + IO9: Iterator, +{ + r0: IR0, + o0: Peekable, + o1: Peekable, + o2: Peekable, + o3: Peekable, + o4: Peekable, + o5: Peekable, + o6: Peekable, + o7: Peekable, + o8: Peekable, + o9: Peekable, + + o0_data_latest: Option, + o1_data_latest: Option, + o2_data_latest: Option, + o3_data_latest: Option, + o4_data_latest: Option, + o5_data_latest: Option, + o6_data_latest: Option, + o7_data_latest: Option, + o8_data_latest: Option, + o9_data_latest: Option, +} + +impl< + Idx, + IR0, + R0, + IO0, + O0, + IO1, + O1, + IO2, + O2, + IO3, + O3, + IO4, + O4, + IO5, + O5, + IO6, + O6, + IO7, + O7, + IO8, + O8, + IO9, + O9, +> Iterator + for RangeZip1x10< + Idx, + IR0, + R0, + IO0, + O0, + IO1, + O1, + IO2, + O2, + IO3, + O3, + IO4, + O4, + IO5, + O5, + IO6, + O6, + IO7, + O7, + IO8, + O8, + IO9, + O9, + > +where + Idx: std::cmp::Ord, + IR0: Iterator, + IO0: Iterator, + IO1: Iterator, + IO2: Iterator, + IO3: Iterator, + IO4: Iterator, + IO5: Iterator, + IO6: Iterator, + IO7: Iterator, + IO8: Iterator, + IO9: Iterator, + O0: Clone, + O1: Clone, + O2: Clone, + O3: Clone, + O4: Clone, + O5: Clone, + O6: Clone, + O7: Clone, + O8: Clone, + O9: Clone, +{ + type Item = ( + Idx, + R0, + Option, + Option, + Option, + Option, + Option, + Option, + Option, + Option, + Option, + Option, + ); + + #[inline] + fn next(&mut self) -> Option { + let Self { + r0, + o0, + o1, + o2, + o3, + o4, + o5, + o6, + o7, + o8, + o9, + o0_data_latest, + o1_data_latest, + o2_data_latest, + o3_data_latest, + o4_data_latest, + o5_data_latest, + o6_data_latest, + o7_data_latest, + o8_data_latest, + o9_data_latest, + } = self; + + let (r0_index, r0_data) = r0.next()?; + + let max_index = [r0_index].into_iter().max()?; + + let mut o0_data = None; + while let Some((_, data)) = o0.next_if(|(index, _)| index <= &max_index) { + o0_data = Some(data); + } + let o0_data = o0_data.or_else(|| o0_data_latest.take()); + o0_data_latest.clone_from(&o0_data); + + let mut o1_data = None; + while let Some((_, data)) = o1.next_if(|(index, _)| index <= &max_index) { + o1_data = Some(data); + } + let o1_data = o1_data.or_else(|| o1_data_latest.take()); + o1_data_latest.clone_from(&o1_data); + + let mut o2_data = None; + while let Some((_, data)) = o2.next_if(|(index, _)| index <= &max_index) { + o2_data = Some(data); + } + let o2_data = o2_data.or_else(|| o2_data_latest.take()); + o2_data_latest.clone_from(&o2_data); + + let mut o3_data = None; + while let Some((_, data)) = o3.next_if(|(index, _)| index <= &max_index) { + o3_data = Some(data); + } + let o3_data = o3_data.or_else(|| o3_data_latest.take()); + o3_data_latest.clone_from(&o3_data); + + let mut o4_data = None; + while let Some((_, data)) = o4.next_if(|(index, _)| index <= &max_index) { + o4_data = Some(data); + } + let o4_data = o4_data.or_else(|| o4_data_latest.take()); + o4_data_latest.clone_from(&o4_data); + + let mut o5_data = None; + while let Some((_, data)) = o5.next_if(|(index, _)| index <= &max_index) { + o5_data = Some(data); + } + let o5_data = o5_data.or_else(|| o5_data_latest.take()); + o5_data_latest.clone_from(&o5_data); + + let mut o6_data = None; + while let Some((_, data)) = o6.next_if(|(index, _)| index <= &max_index) { + o6_data = Some(data); + } + let o6_data = o6_data.or_else(|| o6_data_latest.take()); + o6_data_latest.clone_from(&o6_data); + + let mut o7_data = None; + while let Some((_, data)) = o7.next_if(|(index, _)| index <= &max_index) { + o7_data = Some(data); + } + let o7_data = o7_data.or_else(|| o7_data_latest.take()); + o7_data_latest.clone_from(&o7_data); + + let mut o8_data = None; + while let Some((_, data)) = o8.next_if(|(index, _)| index <= &max_index) { + o8_data = Some(data); + } + let o8_data = o8_data.or_else(|| o8_data_latest.take()); + o8_data_latest.clone_from(&o8_data); + + let mut o9_data = None; + while let Some((_, data)) = o9.next_if(|(index, _)| index <= &max_index) { + o9_data = Some(data); + } + let o9_data = o9_data.or_else(|| o9_data_latest.take()); + o9_data_latest.clone_from(&o9_data); + + Some(( + max_index, r0_data, o0_data, o1_data, o2_data, o3_data, o4_data, o5_data, o6_data, + o7_data, o8_data, o9_data, + )) + } +} + /// Returns a new [`RangeZip2x1`] iterator. /// /// The number of elements in a range zip iterator corresponds to the number of elements in the @@ -2144,7 +2517,194 @@ where IO3: IntoIterator, IO4: IntoIterator, { - RangeZip2x5 { + RangeZip2x5 { + r0: r0.into_iter(), + r1: r1.into_iter(), + o0: o0.into_iter().peekable(), + o1: o1.into_iter().peekable(), + o2: o2.into_iter().peekable(), + o3: o3.into_iter().peekable(), + o4: o4.into_iter().peekable(), + + o0_data_latest: None, + o1_data_latest: None, + o2_data_latest: None, + o3_data_latest: None, + o4_data_latest: None, + } +} + +/// Implements a range zip iterator combinator with 2 required iterators and 2 optional +/// iterators. +/// +/// See [`range_zip_2x5`] for more information. +pub struct RangeZip2x5 +where + Idx: std::cmp::Ord, + IR0: Iterator, + IR1: Iterator, + IO0: Iterator, + IO1: Iterator, + IO2: Iterator, + IO3: Iterator, + IO4: Iterator, +{ + r0: IR0, + r1: IR1, + o0: Peekable, + o1: Peekable, + o2: Peekable, + o3: Peekable, + o4: Peekable, + + o0_data_latest: Option, + o1_data_latest: Option, + o2_data_latest: Option, + o3_data_latest: Option, + o4_data_latest: Option, +} + +impl Iterator + for RangeZip2x5 +where + Idx: std::cmp::Ord, + IR0: Iterator, + IR1: Iterator, + IO0: Iterator, + IO1: Iterator, + IO2: Iterator, + IO3: Iterator, + IO4: Iterator, + O0: Clone, + O1: Clone, + O2: Clone, + O3: Clone, + O4: Clone, +{ + type Item = ( + Idx, + R0, + R1, + Option, + Option, + Option, + Option, + Option, + ); + + #[inline] + fn next(&mut self) -> Option { + let Self { + r0, + r1, + o0, + o1, + o2, + o3, + o4, + o0_data_latest, + o1_data_latest, + o2_data_latest, + o3_data_latest, + o4_data_latest, + } = self; + + let (r0_index, r0_data) = r0.next()?; + let (r1_index, r1_data) = r1.next()?; + + let max_index = [r0_index, r1_index].into_iter().max()?; + + let mut o0_data = None; + while let Some((_, data)) = o0.next_if(|(index, _)| index <= &max_index) { + o0_data = Some(data); + } + let o0_data = o0_data.or_else(|| o0_data_latest.take()); + o0_data_latest.clone_from(&o0_data); + + let mut o1_data = None; + while let Some((_, data)) = o1.next_if(|(index, _)| index <= &max_index) { + o1_data = Some(data); + } + let o1_data = o1_data.or_else(|| o1_data_latest.take()); + o1_data_latest.clone_from(&o1_data); + + let mut o2_data = None; + while let Some((_, data)) = o2.next_if(|(index, _)| index <= &max_index) { + o2_data = Some(data); + } + let o2_data = o2_data.or_else(|| o2_data_latest.take()); + o2_data_latest.clone_from(&o2_data); + + let mut o3_data = None; + while let Some((_, data)) = o3.next_if(|(index, _)| index <= &max_index) { + o3_data = Some(data); + } + let o3_data = o3_data.or_else(|| o3_data_latest.take()); + o3_data_latest.clone_from(&o3_data); + + let mut o4_data = None; + while let Some((_, data)) = o4.next_if(|(index, _)| index <= &max_index) { + o4_data = Some(data); + } + let o4_data = o4_data.or_else(|| o4_data_latest.take()); + o4_data_latest.clone_from(&o4_data); + + Some(( + max_index, r0_data, r1_data, o0_data, o1_data, o2_data, o3_data, o4_data, + )) + } +} + +/// Returns a new [`RangeZip2x6`] iterator. +/// +/// The number of elements in a range zip iterator corresponds to the number of elements in the +/// shortest of its required iterators (`r0`, `r1`). +/// +/// Each call to `next` is guaranteed to yield the next value for each required iterator, +/// as well as the most recent index amongst all of them. +/// +/// Optional iterators accumulate their state and yield their most recent value (if any), +/// each time the required iterators fire. +pub fn range_zip_2x6( + r0: IR0, + r1: IR1, + o0: IO0, + o1: IO1, + o2: IO2, + o3: IO3, + o4: IO4, + o5: IO5, +) -> RangeZip2x6< + Idx, + IR0::IntoIter, + R0, + IR1::IntoIter, + R1, + IO0::IntoIter, + O0, + IO1::IntoIter, + O1, + IO2::IntoIter, + O2, + IO3::IntoIter, + O3, + IO4::IntoIter, + O4, + IO5::IntoIter, + O5, +> +where + Idx: std::cmp::Ord, + IR0: IntoIterator, + IR1: IntoIterator, + IO0: IntoIterator, + IO1: IntoIterator, + IO2: IntoIterator, + IO3: IntoIterator, + IO4: IntoIterator, + IO5: IntoIterator, +{ + RangeZip2x6 { r0: r0.into_iter(), r1: r1.into_iter(), o0: o0.into_iter().peekable(), @@ -2152,20 +2712,22 @@ where o2: o2.into_iter().peekable(), o3: o3.into_iter().peekable(), o4: o4.into_iter().peekable(), + o5: o5.into_iter().peekable(), o0_data_latest: None, o1_data_latest: None, o2_data_latest: None, o3_data_latest: None, o4_data_latest: None, + o5_data_latest: None, } } /// Implements a range zip iterator combinator with 2 required iterators and 2 optional /// iterators. /// -/// See [`range_zip_2x5`] for more information. -pub struct RangeZip2x5 +/// See [`range_zip_2x6`] for more information. +pub struct RangeZip2x6 where Idx: std::cmp::Ord, IR0: Iterator, @@ -2175,6 +2737,7 @@ where IO2: Iterator, IO3: Iterator, IO4: Iterator, + IO5: Iterator, { r0: IR0, r1: IR1, @@ -2183,16 +2746,18 @@ where o2: Peekable, o3: Peekable, o4: Peekable, + o5: Peekable, o0_data_latest: Option, o1_data_latest: Option, o2_data_latest: Option, o3_data_latest: Option, o4_data_latest: Option, + o5_data_latest: Option, } -impl Iterator - for RangeZip2x5 +impl Iterator + for RangeZip2x6 where Idx: std::cmp::Ord, IR0: Iterator, @@ -2202,11 +2767,13 @@ where IO2: Iterator, IO3: Iterator, IO4: Iterator, + IO5: Iterator, O0: Clone, O1: Clone, O2: Clone, O3: Clone, O4: Clone, + O5: Clone, { type Item = ( Idx, @@ -2217,6 +2784,7 @@ where Option, Option, Option, + Option, ); #[inline] @@ -2229,11 +2797,13 @@ where o2, o3, o4, + o5, o0_data_latest, o1_data_latest, o2_data_latest, o3_data_latest, o4_data_latest, + o5_data_latest, } = self; let (r0_index, r0_data) = r0.next()?; @@ -2276,13 +2846,20 @@ where let o4_data = o4_data.or_else(|| o4_data_latest.take()); o4_data_latest.clone_from(&o4_data); + let mut o5_data = None; + while let Some((_, data)) = o5.next_if(|(index, _)| index <= &max_index) { + o5_data = Some(data); + } + let o5_data = o5_data.or_else(|| o5_data_latest.take()); + o5_data_latest.clone_from(&o5_data); + Some(( - max_index, r0_data, r1_data, o0_data, o1_data, o2_data, o3_data, o4_data, + max_index, r0_data, r1_data, o0_data, o1_data, o2_data, o3_data, o4_data, o5_data, )) } } -/// Returns a new [`RangeZip2x6`] iterator. +/// Returns a new [`RangeZip2x7`] iterator. /// /// The number of elements in a range zip iterator corresponds to the number of elements in the /// shortest of its required iterators (`r0`, `r1`). @@ -2292,7 +2869,27 @@ where /// /// Optional iterators accumulate their state and yield their most recent value (if any), /// each time the required iterators fire. -pub fn range_zip_2x6( +pub fn range_zip_2x7< + Idx, + IR0, + R0, + IR1, + R1, + IO0, + O0, + IO1, + O1, + IO2, + O2, + IO3, + O3, + IO4, + O4, + IO5, + O5, + IO6, + O6, +>( r0: IR0, r1: IR1, o0: IO0, @@ -2301,7 +2898,8 @@ pub fn range_zip_2x6 RangeZip2x6< + o6: IO6, +) -> RangeZip2x7< Idx, IR0::IntoIter, R0, @@ -2319,6 +2917,8 @@ pub fn range_zip_2x6 where Idx: std::cmp::Ord, @@ -2330,8 +2930,9 @@ where IO3: IntoIterator, IO4: IntoIterator, IO5: IntoIterator, + IO6: IntoIterator, { - RangeZip2x6 { + RangeZip2x7 { r0: r0.into_iter(), r1: r1.into_iter(), o0: o0.into_iter().peekable(), @@ -2340,6 +2941,7 @@ where o3: o3.into_iter().peekable(), o4: o4.into_iter().peekable(), o5: o5.into_iter().peekable(), + o6: o6.into_iter().peekable(), o0_data_latest: None, o1_data_latest: None, @@ -2347,15 +2949,35 @@ where o3_data_latest: None, o4_data_latest: None, o5_data_latest: None, + o6_data_latest: None, } } /// Implements a range zip iterator combinator with 2 required iterators and 2 optional /// iterators. /// -/// See [`range_zip_2x6`] for more information. -pub struct RangeZip2x6 -where +/// See [`range_zip_2x7`] for more information. +pub struct RangeZip2x7< + Idx, + IR0, + R0, + IR1, + R1, + IO0, + O0, + IO1, + O1, + IO2, + O2, + IO3, + O3, + IO4, + O4, + IO5, + O5, + IO6, + O6, +> where Idx: std::cmp::Ord, IR0: Iterator, IR1: Iterator, @@ -2365,6 +2987,7 @@ where IO3: Iterator, IO4: Iterator, IO5: Iterator, + IO6: Iterator, { r0: IR0, r1: IR1, @@ -2374,6 +2997,7 @@ where o3: Peekable, o4: Peekable, o5: Peekable, + o6: Peekable, o0_data_latest: Option, o1_data_latest: Option, @@ -2381,10 +3005,31 @@ where o3_data_latest: Option, o4_data_latest: Option, o5_data_latest: Option, + o6_data_latest: Option, } -impl Iterator - for RangeZip2x6 +impl Iterator + for RangeZip2x7< + Idx, + IR0, + R0, + IR1, + R1, + IO0, + O0, + IO1, + O1, + IO2, + O2, + IO3, + O3, + IO4, + O4, + IO5, + O5, + IO6, + O6, + > where Idx: std::cmp::Ord, IR0: Iterator, @@ -2395,12 +3040,14 @@ where IO3: Iterator, IO4: Iterator, IO5: Iterator, + IO6: Iterator, O0: Clone, O1: Clone, O2: Clone, O3: Clone, O4: Clone, O5: Clone, + O6: Clone, { type Item = ( Idx, @@ -2412,6 +3059,7 @@ where Option, Option, Option, + Option, ); #[inline] @@ -2425,12 +3073,14 @@ where o3, o4, o5, + o6, o0_data_latest, o1_data_latest, o2_data_latest, o3_data_latest, o4_data_latest, o5_data_latest, + o6_data_latest, } = self; let (r0_index, r0_data) = r0.next()?; @@ -2480,13 +3130,21 @@ where let o5_data = o5_data.or_else(|| o5_data_latest.take()); o5_data_latest.clone_from(&o5_data); + let mut o6_data = None; + while let Some((_, data)) = o6.next_if(|(index, _)| index <= &max_index) { + o6_data = Some(data); + } + let o6_data = o6_data.or_else(|| o6_data_latest.take()); + o6_data_latest.clone_from(&o6_data); + Some(( max_index, r0_data, r1_data, o0_data, o1_data, o2_data, o3_data, o4_data, o5_data, + o6_data, )) } } -/// Returns a new [`RangeZip2x7`] iterator. +/// Returns a new [`RangeZip2x8`] iterator. /// /// The number of elements in a range zip iterator corresponds to the number of elements in the /// shortest of its required iterators (`r0`, `r1`). @@ -2496,7 +3154,7 @@ where /// /// Optional iterators accumulate their state and yield their most recent value (if any), /// each time the required iterators fire. -pub fn range_zip_2x7< +pub fn range_zip_2x8< Idx, IR0, R0, @@ -2516,6 +3174,8 @@ pub fn range_zip_2x7< O5, IO6, O6, + IO7, + O7, >( r0: IR0, r1: IR1, @@ -2526,7 +3186,8 @@ pub fn range_zip_2x7< o4: IO4, o5: IO5, o6: IO6, -) -> RangeZip2x7< + o7: IO7, +) -> RangeZip2x8< Idx, IR0::IntoIter, R0, @@ -2546,6 +3207,8 @@ pub fn range_zip_2x7< O5, IO6::IntoIter, O6, + IO7::IntoIter, + O7, > where Idx: std::cmp::Ord, @@ -2558,8 +3221,9 @@ where IO4: IntoIterator, IO5: IntoIterator, IO6: IntoIterator, + IO7: IntoIterator, { - RangeZip2x7 { + RangeZip2x8 { r0: r0.into_iter(), r1: r1.into_iter(), o0: o0.into_iter().peekable(), @@ -2569,6 +3233,7 @@ where o4: o4.into_iter().peekable(), o5: o5.into_iter().peekable(), o6: o6.into_iter().peekable(), + o7: o7.into_iter().peekable(), o0_data_latest: None, o1_data_latest: None, @@ -2577,14 +3242,15 @@ where o4_data_latest: None, o5_data_latest: None, o6_data_latest: None, + o7_data_latest: None, } } /// Implements a range zip iterator combinator with 2 required iterators and 2 optional /// iterators. /// -/// See [`range_zip_2x7`] for more information. -pub struct RangeZip2x7< +/// See [`range_zip_2x8`] for more information. +pub struct RangeZip2x8< Idx, IR0, R0, @@ -2604,6 +3270,8 @@ pub struct RangeZip2x7< O5, IO6, O6, + IO7, + O7, > where Idx: std::cmp::Ord, IR0: Iterator, @@ -2615,6 +3283,7 @@ pub struct RangeZip2x7< IO4: Iterator, IO5: Iterator, IO6: Iterator, + IO7: Iterator, { r0: IR0, r1: IR1, @@ -2625,6 +3294,7 @@ pub struct RangeZip2x7< o4: Peekable, o5: Peekable, o6: Peekable, + o7: Peekable, o0_data_latest: Option, o1_data_latest: Option, @@ -2633,10 +3303,12 @@ pub struct RangeZip2x7< o4_data_latest: Option, o5_data_latest: Option, o6_data_latest: Option, + o7_data_latest: Option, } -impl Iterator - for RangeZip2x7< +impl + Iterator + for RangeZip2x8< Idx, IR0, R0, @@ -2656,6 +3328,8 @@ impl where Idx: std::cmp::Ord, @@ -2668,6 +3342,7 @@ where IO4: Iterator, IO5: Iterator, IO6: Iterator, + IO7: Iterator, O0: Clone, O1: Clone, O2: Clone, @@ -2675,6 +3350,7 @@ where O4: Clone, O5: Clone, O6: Clone, + O7: Clone, { type Item = ( Idx, @@ -2687,6 +3363,7 @@ where Option, Option, Option, + Option, ); #[inline] @@ -2701,6 +3378,7 @@ where o4, o5, o6, + o7, o0_data_latest, o1_data_latest, o2_data_latest, @@ -2708,6 +3386,7 @@ where o4_data_latest, o5_data_latest, o6_data_latest, + o7_data_latest, } = self; let (r0_index, r0_data) = r0.next()?; @@ -2764,14 +3443,21 @@ where let o6_data = o6_data.or_else(|| o6_data_latest.take()); o6_data_latest.clone_from(&o6_data); + let mut o7_data = None; + while let Some((_, data)) = o7.next_if(|(index, _)| index <= &max_index) { + o7_data = Some(data); + } + let o7_data = o7_data.or_else(|| o7_data_latest.take()); + o7_data_latest.clone_from(&o7_data); + Some(( max_index, r0_data, r1_data, o0_data, o1_data, o2_data, o3_data, o4_data, o5_data, - o6_data, + o6_data, o7_data, )) } } -/// Returns a new [`RangeZip2x8`] iterator. +/// Returns a new [`RangeZip2x9`] iterator. /// /// The number of elements in a range zip iterator corresponds to the number of elements in the /// shortest of its required iterators (`r0`, `r1`). @@ -2781,7 +3467,7 @@ where /// /// Optional iterators accumulate their state and yield their most recent value (if any), /// each time the required iterators fire. -pub fn range_zip_2x8< +pub fn range_zip_2x9< Idx, IR0, R0, @@ -2803,6 +3489,8 @@ pub fn range_zip_2x8< O6, IO7, O7, + IO8, + O8, >( r0: IR0, r1: IR1, @@ -2814,7 +3502,8 @@ pub fn range_zip_2x8< o5: IO5, o6: IO6, o7: IO7, -) -> RangeZip2x8< + o8: IO8, +) -> RangeZip2x9< Idx, IR0::IntoIter, R0, @@ -2836,6 +3525,8 @@ pub fn range_zip_2x8< O6, IO7::IntoIter, O7, + IO8::IntoIter, + O8, > where Idx: std::cmp::Ord, @@ -2849,8 +3540,9 @@ where IO5: IntoIterator, IO6: IntoIterator, IO7: IntoIterator, + IO8: IntoIterator, { - RangeZip2x8 { + RangeZip2x9 { r0: r0.into_iter(), r1: r1.into_iter(), o0: o0.into_iter().peekable(), @@ -2861,6 +3553,7 @@ where o5: o5.into_iter().peekable(), o6: o6.into_iter().peekable(), o7: o7.into_iter().peekable(), + o8: o8.into_iter().peekable(), o0_data_latest: None, o1_data_latest: None, @@ -2870,14 +3563,15 @@ where o5_data_latest: None, o6_data_latest: None, o7_data_latest: None, + o8_data_latest: None, } } /// Implements a range zip iterator combinator with 2 required iterators and 2 optional /// iterators. /// -/// See [`range_zip_2x8`] for more information. -pub struct RangeZip2x8< +/// See [`range_zip_2x9`] for more information. +pub struct RangeZip2x9< Idx, IR0, R0, @@ -2899,6 +3593,8 @@ pub struct RangeZip2x8< O6, IO7, O7, + IO8, + O8, > where Idx: std::cmp::Ord, IR0: Iterator, @@ -2911,6 +3607,7 @@ pub struct RangeZip2x8< IO5: Iterator, IO6: Iterator, IO7: Iterator, + IO8: Iterator, { r0: IR0, r1: IR1, @@ -2922,6 +3619,7 @@ pub struct RangeZip2x8< o5: Peekable, o6: Peekable, o7: Peekable, + o8: Peekable, o0_data_latest: Option, o1_data_latest: Option, @@ -2931,11 +3629,35 @@ pub struct RangeZip2x8< o5_data_latest: Option, o6_data_latest: Option, o7_data_latest: Option, + o8_data_latest: Option, } -impl - Iterator - for RangeZip2x8< +impl< + Idx, + IR0, + R0, + IR1, + R1, + IO0, + O0, + IO1, + O1, + IO2, + O2, + IO3, + O3, + IO4, + O4, + IO5, + O5, + IO6, + O6, + IO7, + O7, + IO8, + O8, +> Iterator + for RangeZip2x9< Idx, IR0, R0, @@ -2957,6 +3679,8 @@ impl where Idx: std::cmp::Ord, @@ -2970,6 +3694,7 @@ where IO5: Iterator, IO6: Iterator, IO7: Iterator, + IO8: Iterator, O0: Clone, O1: Clone, O2: Clone, @@ -2978,6 +3703,7 @@ where O5: Clone, O6: Clone, O7: Clone, + O8: Clone, { type Item = ( Idx, @@ -2991,6 +3717,7 @@ where Option, Option, Option, + Option, ); #[inline] @@ -3006,6 +3733,7 @@ where o5, o6, o7, + o8, o0_data_latest, o1_data_latest, o2_data_latest, @@ -3014,6 +3742,7 @@ where o5_data_latest, o6_data_latest, o7_data_latest, + o8_data_latest, } = self; let (r0_index, r0_data) = r0.next()?; @@ -3077,14 +3806,21 @@ where let o7_data = o7_data.or_else(|| o7_data_latest.take()); o7_data_latest.clone_from(&o7_data); + let mut o8_data = None; + while let Some((_, data)) = o8.next_if(|(index, _)| index <= &max_index) { + o8_data = Some(data); + } + let o8_data = o8_data.or_else(|| o8_data_latest.take()); + o8_data_latest.clone_from(&o8_data); + Some(( max_index, r0_data, r1_data, o0_data, o1_data, o2_data, o3_data, o4_data, o5_data, - o6_data, o7_data, + o6_data, o7_data, o8_data, )) } } -/// Returns a new [`RangeZip2x9`] iterator. +/// Returns a new [`RangeZip2x10`] iterator. /// /// The number of elements in a range zip iterator corresponds to the number of elements in the /// shortest of its required iterators (`r0`, `r1`). @@ -3094,7 +3830,7 @@ where /// /// Optional iterators accumulate their state and yield their most recent value (if any), /// each time the required iterators fire. -pub fn range_zip_2x9< +pub fn range_zip_2x10< Idx, IR0, R0, @@ -3118,6 +3854,8 @@ pub fn range_zip_2x9< O7, IO8, O8, + IO9, + O9, >( r0: IR0, r1: IR1, @@ -3130,7 +3868,8 @@ pub fn range_zip_2x9< o6: IO6, o7: IO7, o8: IO8, -) -> RangeZip2x9< + o9: IO9, +) -> RangeZip2x10< Idx, IR0::IntoIter, R0, @@ -3154,6 +3893,8 @@ pub fn range_zip_2x9< O7, IO8::IntoIter, O8, + IO9::IntoIter, + O9, > where Idx: std::cmp::Ord, @@ -3168,8 +3909,9 @@ where IO6: IntoIterator, IO7: IntoIterator, IO8: IntoIterator, + IO9: IntoIterator, { - RangeZip2x9 { + RangeZip2x10 { r0: r0.into_iter(), r1: r1.into_iter(), o0: o0.into_iter().peekable(), @@ -3181,6 +3923,7 @@ where o6: o6.into_iter().peekable(), o7: o7.into_iter().peekable(), o8: o8.into_iter().peekable(), + o9: o9.into_iter().peekable(), o0_data_latest: None, o1_data_latest: None, @@ -3191,14 +3934,15 @@ where o6_data_latest: None, o7_data_latest: None, o8_data_latest: None, + o9_data_latest: None, } } /// Implements a range zip iterator combinator with 2 required iterators and 2 optional /// iterators. /// -/// See [`range_zip_2x9`] for more information. -pub struct RangeZip2x9< +/// See [`range_zip_2x10`] for more information. +pub struct RangeZip2x10< Idx, IR0, R0, @@ -3222,6 +3966,8 @@ pub struct RangeZip2x9< O7, IO8, O8, + IO9, + O9, > where Idx: std::cmp::Ord, IR0: Iterator, @@ -3235,6 +3981,7 @@ pub struct RangeZip2x9< IO6: Iterator, IO7: Iterator, IO8: Iterator, + IO9: Iterator, { r0: IR0, r1: IR1, @@ -3247,6 +3994,7 @@ pub struct RangeZip2x9< o6: Peekable, o7: Peekable, o8: Peekable, + o9: Peekable, o0_data_latest: Option, o1_data_latest: Option, @@ -3257,6 +4005,7 @@ pub struct RangeZip2x9< o6_data_latest: Option, o7_data_latest: Option, o8_data_latest: Option, + o9_data_latest: Option, } impl< @@ -3283,8 +4032,10 @@ impl< O7, IO8, O8, + IO9, + O9, > Iterator - for RangeZip2x9< + for RangeZip2x10< Idx, IR0, R0, @@ -3308,6 +4059,8 @@ impl< O7, IO8, O8, + IO9, + O9, > where Idx: std::cmp::Ord, @@ -3322,6 +4075,7 @@ where IO6: Iterator, IO7: Iterator, IO8: Iterator, + IO9: Iterator, O0: Clone, O1: Clone, O2: Clone, @@ -3331,6 +4085,7 @@ where O6: Clone, O7: Clone, O8: Clone, + O9: Clone, { type Item = ( Idx, @@ -3345,6 +4100,7 @@ where Option, Option, Option, + Option, ); #[inline] @@ -3361,6 +4117,7 @@ where o6, o7, o8, + o9, o0_data_latest, o1_data_latest, o2_data_latest, @@ -3370,6 +4127,7 @@ where o6_data_latest, o7_data_latest, o8_data_latest, + o9_data_latest, } = self; let (r0_index, r0_data) = r0.next()?; @@ -3440,9 +4198,16 @@ where let o8_data = o8_data.or_else(|| o8_data_latest.take()); o8_data_latest.clone_from(&o8_data); + let mut o9_data = None; + while let Some((_, data)) = o9.next_if(|(index, _)| index <= &max_index) { + o9_data = Some(data); + } + let o9_data = o9_data.or_else(|| o9_data_latest.take()); + o9_data_latest.clone_from(&o9_data); + Some(( max_index, r0_data, r1_data, o0_data, o1_data, o2_data, o3_data, o4_data, o5_data, - o6_data, o7_data, o8_data, + o6_data, o7_data, o8_data, o9_data, )) } } diff --git a/crates/store/re_query/src/storage_engine.rs b/crates/store/re_query/src/storage_engine.rs index 12f342b7cff7..88927eed238a 100644 --- a/crates/store/re_query/src/storage_engine.rs +++ b/crates/store/re_query/src/storage_engine.rs @@ -157,6 +157,11 @@ impl StorageEngineReadGuard<'_> { &self.store } + #[inline] + pub fn schema(&self) -> &re_chunk_store::StoreSchema { + self.store.schema() + } + #[inline] pub fn cache(&self) -> &QueryCache { &self.cache @@ -204,6 +209,11 @@ impl StorageEngineArcReadGuard { &self.store } + #[inline] + pub fn schema(&self) -> &re_chunk_store::StoreSchema { + self.store.schema() + } + #[inline] pub fn cache(&self) -> &QueryCache { &self.cache diff --git a/crates/store/re_query/tests/latest_at.rs b/crates/store/re_query/tests/latest_at.rs index 98a3d1cb93e7..96bfde707845 100644 --- a/crates/store/re_query/tests/latest_at.rs +++ b/crates/store/re_query/tests/latest_at.rs @@ -640,6 +640,51 @@ fn static_invalidation() { ); } +#[test] +fn same_row_id_across_chunks() { + let store = ChunkStore::new_handle( + re_log_types::StoreId::random(re_log_types::StoreKind::Recording, "test_app"), + Default::default(), + ); + let mut caches = QueryCache::new(store.clone()); + + let entity_path = "point"; + let timepoint = [build_frame_nr(123)]; + + // Two separate chunks share a single RowId, each carrying a different component. + let row_id = RowId::new(); + let points = vec![MyPoint::new(1.0, 2.0), MyPoint::new(3.0, 4.0)]; + let colors = vec![MyColor::from_rgb(255, 0, 0)]; + + let chunk_points = Chunk::builder(entity_path) + .with_archetype(row_id, timepoint, &MyPoints::new(points.clone())) + .build() + .unwrap(); + insert_and_react(&mut store.write(), &mut caches, &Arc::new(chunk_points)); + + let chunk_colors = Chunk::builder(entity_path) + .with_archetype( + row_id, + timepoint, + &MyPoints::update_fields().with_colors(colors.clone()), + ) + .build() + .unwrap(); + insert_and_react(&mut store.write(), &mut caches, &Arc::new(chunk_colors)); + + let query = re_chunk_store::LatestAtQuery::new(*timepoint[0].0.name(), timepoint[0].1); + let expected_compound_index = (TimeInt::new_temporal(123), row_id); + query_and_compare( + &caches, + &store.read(), + &query, + &entity_path.into(), + expected_compound_index, + &points, + &colors, + ); +} + // --- fn insert_and_react(store: &mut ChunkStore, caches: &mut QueryCache, chunk: &Arc) { diff --git a/crates/store/re_query/tests/range.rs b/crates/store/re_query/tests/range.rs index 4e97dd838a16..f2a848c5e284 100644 --- a/crates/store/re_query/tests/range.rs +++ b/crates/store/re_query/tests/range.rs @@ -1228,6 +1228,54 @@ fn concurrent_multitenant_edge_case2() { ); } +#[test] +fn same_row_id_across_chunks() -> anyhow::Result<()> { + let store = ChunkStore::new_handle( + re_log_types::StoreId::random(re_log_types::StoreKind::Recording, "test_app"), + Default::default(), + ); + let mut caches = QueryCache::new(store.clone()); + + let entity_path: EntityPath = "point".into(); + + // Two separate chunks at different times share a single RowId. + let row_id = RowId::new(); + + let timepoint1 = [build_frame_nr(123)]; + let points1 = vec![MyPoint::new(1.0, 2.0), MyPoint::new(3.0, 4.0)]; + let chunk1 = Chunk::builder(entity_path.clone()) + .with_archetype(row_id, timepoint1, &MyPoints::new(points1.clone())) + .build()?; + insert_and_react(&mut store.write(), &mut caches, &Arc::new(chunk1)); + + let timepoint2 = [build_frame_nr(223)]; + let points2 = vec![MyPoint::new(10.0, 20.0), MyPoint::new(30.0, 40.0)]; + let chunk2 = Chunk::builder(entity_path.clone()) + .with_archetype(row_id, timepoint2, &MyPoints::new(points2.clone())) + .build()?; + insert_and_react(&mut store.write(), &mut caches, &Arc::new(chunk2)); + + let query = RangeQuery::new( + *timepoint1[0].0.name(), + AbsoluteTimeRange::new(timepoint1[0].1, timepoint2[0].1), + ); + + let expected_points = &[ + ((TimeInt::new_temporal(123), row_id), points1.as_slice()), + ((TimeInt::new_temporal(223), row_id), points2.as_slice()), + ]; + query_and_compare( + &caches, + &store.read(), + &query, + &entity_path, + expected_points, + &[], + ); + + Ok(()) +} + // // --- fn insert_and_react(store: &mut ChunkStore, caches: &mut QueryCache, chunk: &Arc) { diff --git a/crates/store/re_redap_client/Cargo.toml b/crates/store/re_redap_client/Cargo.toml index ea011ad2a450..8f007fb2ac31 100644 --- a/crates/store/re_redap_client/Cargo.toml +++ b/crates/store/re_redap_client/Cargo.toml @@ -36,6 +36,7 @@ re_auth = { workspace = true, features = ["oauth"] } re_backoff.workspace = true re_byte_size.workspace = true re_chunk.workspace = true +re_error.workspace = true re_format.workspace = true re_log_channel.workspace = true re_log_encoding = { workspace = true, features = ["encoder", "decoder"] } @@ -50,6 +51,7 @@ ehttp.workspace = true futures.workspace = true itertools.workspace = true jiff.workspace = true +opentelemetry.workspace = true serde.workspace = true thiserror.workspace = true tokio.workspace = true diff --git a/crates/store/re_redap_client/src/api_error.rs b/crates/store/re_redap_client/src/api_error.rs new file mode 100644 index 000000000000..e33342d56629 --- /dev/null +++ b/crates/store/re_redap_client/src/api_error.rs @@ -0,0 +1,376 @@ +use std::sync::Arc; + +use crate::connection_registry::ClientCredentialsError; +use crate::extract_trace_id; + +#[derive(Clone, Debug)] +pub struct ApiError { + /// A message that does NOT include the contents of [`Self::source`]. + pub message: String, + + pub kind: ApiErrorKind, + + pub source: Option>, + + /// When the error comes from the server returning a trace id, we include it in the client + /// error for easier reporting. + trace_id: Option, +} + +/// Convenience for `Result` +pub type ApiResult = Result; + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum ApiErrorKind { + NotFound, + AlreadyExists, + PermissionDenied, + Unauthenticated, + + /// The gRPC endpoint has not been implemented. + Unimplemented, + Connection, + Timeout, + Internal, + InvalidArguments, + ResourcesExhausted, + + /// Failed to decode data received from the server (e.g. protobuf → Arrow conversion). + Deserialization, + + /// Failed to encode data for sending to the server. + Serialization, + + InvalidServer, +} + +impl From for ApiErrorKind { + fn from(code: tonic::Code) -> Self { + match code { + tonic::Code::NotFound => Self::NotFound, + tonic::Code::AlreadyExists => Self::AlreadyExists, + tonic::Code::PermissionDenied => Self::PermissionDenied, + tonic::Code::ResourceExhausted => Self::ResourcesExhausted, + tonic::Code::Unauthenticated => Self::Unauthenticated, + tonic::Code::Unimplemented => Self::Unimplemented, + tonic::Code::Unavailable => Self::Connection, + tonic::Code::InvalidArgument => Self::InvalidArguments, + tonic::Code::DeadlineExceeded => Self::Timeout, + _ => Self::Internal, + } + } +} + +impl ApiErrorKind { + /// Transient errors that may succeed on retry (with backoff). + pub fn is_retryable(self) -> bool { + match self { + Self::Connection | Self::Timeout | Self::Internal | Self::ResourcesExhausted => true, + + Self::NotFound + | Self::AlreadyExists + | Self::PermissionDenied + | Self::Unauthenticated + | Self::Unimplemented + | Self::InvalidArguments + | Self::Deserialization + | Self::Serialization + | Self::InvalidServer => false, + } + } +} + +impl std::fmt::Display for ApiErrorKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NotFound => write!(f, "NotFound"), + Self::AlreadyExists => write!(f, "AlreadyExists"), + Self::PermissionDenied => write!(f, "PermissionDenied"), + Self::Unauthenticated => write!(f, "Unauthenticated"), + Self::Unimplemented => write!(f, "Unimplemented"), + Self::Connection => write!(f, "Connection"), + Self::Internal => write!(f, "Internal"), + Self::InvalidArguments => write!(f, "InvalidArguments"), + Self::ResourcesExhausted => write!(f, "ResourcesExhausted"), + Self::Deserialization => write!(f, "Deserialization"), + Self::Serialization => write!(f, "Serialization"), + Self::Timeout => write!(f, "Timeout"), + Self::InvalidServer => write!(f, "InvalidServer"), + } + } +} + +impl ApiError { + #[inline] + fn new(kind: ApiErrorKind, message: impl Into) -> Self { + Self { + message: message.into(), + kind, + source: None, + trace_id: None, + } + } + + /// Do NOT include `err` in the `message` - it will be added for you. + #[inline] + fn new_with_source( + err: impl std::error::Error + Send + Sync + 'static, + kind: ApiErrorKind, + message: impl Into, + ) -> Self { + Self { + message: message.into(), + kind, + source: Some(Arc::new(err)), + trace_id: None, + } + } + + /// Do NOT include `err` in the `message` - it will be added for you. + #[inline] + fn new_with_source_and_trace_id( + err: impl std::error::Error + Send + Sync + 'static, + kind: ApiErrorKind, + message: impl Into, + trace_id: opentelemetry::TraceId, + ) -> Self { + Self { + message: message.into(), + kind, + source: Some(Arc::new(err)), + trace_id: Some(trace_id), + } + } + + /// Construct an [`ApiError`] with an explicit `kind` and an optional `trace_id`. + /// + /// Do NOT include `err` in the `message` - it will be added for you. + pub fn with_kind_and_source( + kind: ApiErrorKind, + trace_id: Option, + err: impl std::error::Error + Send + Sync + 'static, + message: impl Into, + ) -> Self { + Self { + message: message.into(), + kind, + source: Some(Arc::new(err)), + trace_id, + } + } + + /// Do NOT include `err` in the `message` - it will be added for you. + pub fn tonic(err: tonic::Status, message: impl Into) -> Self { + let message = message.into(); + let kind = ApiErrorKind::from(err.code()); + let trace_id = extract_trace_id(err.metadata()); + let err = crate::TonicStatusError::from(err); // Wrap in TonicStatusError so we get our nice Display formatting + if let Some(trace_id) = trace_id { + Self::new_with_source_and_trace_id(err, kind, message, trace_id) + } else { + Self::new_with_source(err, kind, message) + } + } + + /// Sets the trace-id if not already set. + #[must_use] + pub fn with_trace_id(mut self, trace_id: Option) -> Self { + if self.trace_id.is_none() { + self.trace_id = trace_id; + } + self + } + + /// Failed to decode data received from the server. + pub fn deserialization( + trace_id: Option, + message: impl Into, + ) -> Self { + Self { + message: message.into(), + kind: ApiErrorKind::Deserialization, + source: None, + trace_id, + } + } + + /// Failed to decode data received from the server. + /// + /// Do NOT include `err` in the `message` - it will be added for you. + pub fn deserialization_with_source( + trace_id: Option, + err: impl std::error::Error + Send + Sync + 'static, + message: impl Into, + ) -> Self { + Self { + message: message.into(), + kind: ApiErrorKind::Deserialization, + source: Some(Arc::new(err)), + trace_id, + } + } + + /// Failed to encode data for sending to the server. + pub fn serialization(message: impl Into) -> Self { + Self::new(ApiErrorKind::Serialization, message) + } + + /// Failed to encode data for sending to the server. + /// + /// Do NOT include `err` in the `message` - it will be added for you. + pub fn serialization_with_source( + err: impl std::error::Error + Send + Sync + 'static, + message: impl Into, + ) -> Self { + Self::new_with_source(err, ApiErrorKind::Serialization, message) + } + + /// Do NOT include `err` in the `message` - it will be added for you. + pub fn invalid_arguments_with_source( + trace_id: Option, + err: impl std::error::Error + Send + Sync + 'static, + message: impl Into, + ) -> Self { + Self { + message: message.into(), + kind: ApiErrorKind::InvalidArguments, + source: Some(Arc::new(err)), + trace_id, + } + } + + pub fn internal(message: impl Into) -> Self { + Self::new(ApiErrorKind::Internal, message) + } + + /// Do NOT include `err` in the `message` - it will be added for you. + pub fn internal_with_source( + trace_id: Option, + err: impl std::error::Error + Send + Sync + 'static, + message: impl Into, + ) -> Self { + Self { + message: message.into(), + kind: ApiErrorKind::Internal, + source: Some(Arc::new(err)), + trace_id, + } + } + + /// Do NOT include `err` in the `message` - it will be added for you. + pub fn connection_with_source( + trace_id: Option, + err: impl std::error::Error + Send + Sync + 'static, + message: impl Into, + ) -> Self { + Self { + message: message.into(), + kind: ApiErrorKind::Connection, + source: Some(Arc::new(err)), + trace_id, + } + } + + pub fn connection(message: impl Into) -> Self { + Self::new(ApiErrorKind::Connection, message) + } + + pub fn permission_denied( + trace_id: Option, + message: impl Into, + ) -> Self { + Self { + message: message.into(), + kind: ApiErrorKind::PermissionDenied, + source: None, + trace_id, + } + } + + /// Do NOT include `err` in the `message` - it will be added for you. + pub fn credentials_with_source( + trace_id: Option, + err: ClientCredentialsError, + message: impl Into, + ) -> Self { + Self { + message: message.into(), + kind: ApiErrorKind::Unauthenticated, + source: Some(Arc::new(err)), + trace_id, + } + } + + /// Raised when `GET /version` against the requested origin returns a non-2xx response. + /// + /// The included status line and body snippet usually tell the user whether the path is + /// wrong (404 from a non-Rerun HTTP server), the server is down (5xx), or they hit a + /// reverse proxy that redirected somewhere unexpected. Connection-refused (wrong port + /// or server not running) hits a different error path above. + #[expect(clippy::needless_pass_by_value)] + pub fn invalid_server_with_response( + origin: re_uri::Origin, + status: u16, + status_text: &str, + body_snippet: Option<&str>, + hint: Option<&str>, + ) -> Self { + let mut msg = format!( + "{origin} is not a valid Rerun server (GET /version returned HTTP {status} {status_text})" + ); + if let Some(body) = body_snippet.filter(|s| !s.is_empty()) { + msg.push_str(": "); + msg.push_str(body); + } + if let Some(hint) = hint { + msg.push_str(". "); + msg.push_str(hint); + } + Self::new(ApiErrorKind::InvalidServer, msg) + } + + /// Helper method to downcast the source error to a `ClientCredentialsError` if possible. + #[inline] + pub fn as_client_credentials_error(&self) -> Option<&ClientCredentialsError> { + self.source + .as_ref()? + .downcast_ref::() + } + + #[inline] + pub fn is_client_credentials_error(&self) -> bool { + self.kind == ApiErrorKind::Unauthenticated + && matches!(self.source.as_ref(), Some(e) if e.is::()) + } +} + +impl std::fmt::Display for ApiError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { + message, + kind, + source, + trace_id, + } = self; + + write!(f, "{message} ({kind})")?; + + if let Some(trace_id) = trace_id { + write!(f, " (trace-id: {trace_id})")?; + } + + if let Some(err) = source { + write!(f, ", {err}")?; + } + + Ok(()) + } +} + +impl std::error::Error for ApiError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + self.source + .as_ref() + .map(|e| e.as_ref() as &(dyn std::error::Error + 'static)) + } +} diff --git a/crates/store/re_redap_client/src/api_response_stream.rs b/crates/store/re_redap_client/src/api_response_stream.rs new file mode 100644 index 000000000000..3f71fd6182d6 --- /dev/null +++ b/crates/store/re_redap_client/src/api_response_stream.rs @@ -0,0 +1,61 @@ +use tokio_stream::{Stream, StreamExt as _}; + +use crate::{ApiError, ApiResult, extract_trace_id}; + +/// A stream that optionally carries a server-assigned trace-id +/// from the initial gRPC response metadata. +/// +/// Functions consuming the stream should attach the trace-id to any errors they produce, +/// and pass it along to any [`ApiResponseStream`] they return. +pub struct ApiResponseStream { + inner: std::pin::Pin> + Send>>, + trace_id: Option, +} + +impl ApiResponseStream { + pub fn new( + inner: impl Stream> + Send + 'static, + trace_id: Option, + ) -> Self { + Self { + inner: Box::pin(inner), + trace_id, + } + } + + pub fn trace_id(&self) -> Option { + self.trace_id + } +} + +impl Stream for ApiResponseStream { + type Item = ApiResult; + + fn poll_next( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + self.inner.as_mut().poll_next(cx) + } +} + +impl ApiResponseStream { + /// Creates an [`ApiResponseStream`] from a streaming [`tonic::Response`], + /// extracting the trace-id from the response metadata and converting + /// tonic stream errors to [`ApiError`]s. + pub fn from_tonic_response( + response: tonic::Response>, + endpoint: &'static str, + ) -> Self { + let trace_id = extract_trace_id(response.metadata()); + let stream = response.into_inner().map(move |item| { + item.map_err(|err| { + ApiError::tonic(err, format!("{endpoint} stream failed")).with_trace_id(trace_id) + }) + }); + Self { + inner: Box::pin(stream), + trace_id, + } + } +} diff --git a/crates/store/re_redap_client/src/connection_client.rs b/crates/store/re_redap_client/src/connection_client.rs index b33c982bb97b..270a95928732 100644 --- a/crates/store/re_redap_client/src/connection_client.rs +++ b/crates/store/re_redap_client/src/connection_client.rs @@ -4,6 +4,7 @@ use itertools::Itertools as _; use re_arrow_util::ArrowArrayDowncastRef as _; use re_log_encoding::{RawRrdManifest, ToApplication as _}; use re_log_types::EntryId; +use re_protos::EntryName; use re_protos::cloud::v1alpha1::ext::{ CreateDatasetEntryResponse, CreateTableEntryRequest, DataSource, DataSourceKind, DatasetDetails, DatasetEntry, EntryDetails, EntryDetailsUpdate, LanceTable, ProviderDetails, @@ -11,17 +12,17 @@ use re_protos::cloud::v1alpha1::ext::{ ReadDatasetEntryResponse, ReadTableEntryResponse, RegisterTableResponse, RegisterWithDatasetRequest, RegisterWithDatasetTaskDescriptor, TableEntry, TableInsertMode, UnregisterFromDatasetRequest, UpdateDatasetEntryRequest, UpdateDatasetEntryResponse, - UpdateEntryRequest, UpdateEntryResponse, + UpdateEntryRequest, UpdateEntryResponse, VersionResponse, }; use re_protos::cloud::v1alpha1::rerun_cloud_service_client::RerunCloudServiceClient; use re_protos::cloud::v1alpha1::{ - CreateDatasetEntryRequest, DeleteEntryRequest, EntryFilter, EntryKind, FetchChunksRequest, - FindEntriesRequest, GetDatasetManifestSchemaRequest, GetDatasetManifestSchemaResponse, - GetDatasetSchemaRequest, GetRrdManifestResponse, GetSegmentTableSchemaRequest, - GetSegmentTableSchemaResponse, QueryDatasetResponse, QueryTasksOnCompletionResponse, - QueryTasksResponse, ReadDatasetEntryRequest, ReadTableEntryRequest, - RegisterWithDatasetResponse, ScanSegmentTableRequest, ScanSegmentTableResponse, - UnregisterFromDatasetResponse, VersionRequest, WriteTableRequest, + CancelTasksRequest, CreateDatasetEntryRequest, DeleteEntryRequest, EntryFilter, EntryKind, + FetchChunksRequest, FindEntriesRequest, GetDatasetManifestSchemaRequest, + GetDatasetManifestSchemaResponse, GetDatasetSchemaRequest, GetRrdManifestResponse, + GetSegmentTableSchemaRequest, GetSegmentTableSchemaResponse, QueryDatasetResponse, + QueryTasksOnCompletionResponse, QueryTasksResponse, ReadDatasetEntryRequest, + ReadTableEntryRequest, RegisterWithDatasetResponse, ScanSegmentTableRequest, + ScanSegmentTableResponse, UnregisterFromDatasetResponse, VersionRequest, WriteTableRequest, }; use re_protos::common::v1alpha1::ext::{IfDuplicateBehavior, ScanParameters, SegmentId}; use re_protos::common::v1alpha1::{DataframePart, TaskId}; @@ -29,25 +30,37 @@ use re_protos::external::prost::bytes::Bytes; use re_protos::headers::RerunHeadersInjectorExt as _; use re_protos::{TypeConversionError, invalid_schema, missing_column, missing_field}; use tokio_stream::{Stream, StreamExt as _}; +use tonic::IntoStreamingRequest as _; use tonic::codegen::{Body, StdError}; -use tonic::{IntoStreamingRequest as _, Status}; use url::Url; -use crate::{ApiError, ApiResult}; +use crate::{ApiError, ApiResponseStream, ApiResult, extract_trace_id}; -pub type ResponseStream = std::pin::Pin> + Send>>; +/// Extension trait for [`tonic::Response`] that extracts both the inner value +/// and the server's trace-id in one step. +trait TonicResponseExt { + fn into_inner_and_trace_id(self) -> (T, Option); +} + +impl TonicResponseExt for tonic::Response { + fn into_inner_and_trace_id(self) -> (T, Option) { + let trace_id = extract_trace_id(self.metadata()); + (self.into_inner(), trace_id) + } +} pub type FetchChunksResponseStream = - ResponseStream; + ApiResponseStream; pub type QueryDatasetResponseStream = - ResponseStream; + ApiResponseStream; pub struct SegmentQueryParams { pub dataset_id: EntryId, pub segment_id: SegmentId, pub include_static_data: bool, pub include_temporal_data: bool, + pub generate_direct_urls: bool, pub query: Option, } @@ -96,24 +109,49 @@ where .map(|_| ()) } - /// Find all entries matching the given filter. - pub async fn find_entries(&mut self, filter: EntryFilter) -> ApiResult> { - let result = self + /// Returns version and deployment information from the server. + pub async fn version_info(&mut self) -> ApiResult { + let response = self .inner() - .find_entries(FindEntriesRequest { - filter: Some(filter), - }) + .version(VersionRequest {}) + .await + .map_err(|err| ApiError::tonic(err, "/Version failed"))? + .into_inner(); + Ok(response.into()) + } + + /// Calls the `/WhoAmI` endpoint to verify authentication and retrieve the user's identity + /// and permissions. + pub async fn who_am_i(&mut self) -> ApiResult { + self.inner() + .who_am_i(re_protos::cloud::v1alpha1::WhoAmIRequest {}) .await - .map_err(|err| ApiError::tonic(err, "/FindEntries failed"))? - .into_inner() - .entries; + .map(|resp| resp.into_inner()) + .map_err(|err| ApiError::tonic(err, "/WhoAmI failed")) + } + + /// Find all entries matching the given filter. + pub async fn find_entries(&mut self, filter: EntryFilter) -> ApiResult> { + let (response, trace_id) = TonicResponseExt::into_inner_and_trace_id( + self.inner() + .find_entries(FindEntriesRequest { + filter: Some(filter), + }) + .await + .map_err(|err| ApiError::tonic(err, "/FindEntries failed"))?, + ); - result + response + .entries .into_iter() .map(TryInto::try_into) .collect::, _>>() .map_err(|err| { - ApiError::serialization_with_source(err, "failed parsing /FindEntries response") + ApiError::deserialization_with_source( + trace_id, + err, + "failed parsing /FindEntries response", + ) }) } @@ -135,46 +173,51 @@ where entry_id: EntryId, entry_details_update: EntryDetailsUpdate, ) -> ApiResult { - let response: UpdateEntryResponse = self - .inner() - .update_entry(tonic::Request::new( - UpdateEntryRequest { - id: entry_id, - entry_details_update, - } - .into(), - )) - .await - .map_err(|err| ApiError::tonic(err, "/UpdateEntry failed"))? - .into_inner() - .try_into() - .map_err(|err| { - ApiError::serialization_with_source(err, "failed parsing /UpdateEntry response") - })?; + let (inner, trace_id) = TonicResponseExt::into_inner_and_trace_id( + self.inner() + .update_entry(tonic::Request::new( + UpdateEntryRequest { + id: entry_id, + entry_details_update, + } + .into(), + )) + .await + .map_err(|err| ApiError::tonic(err, "/UpdateEntry failed"))?, + ); + let response: UpdateEntryResponse = inner.try_into().map_err(|err| { + ApiError::deserialization_with_source( + trace_id, + err, + "failed parsing /UpdateEntry response", + ) + })?; Ok(response.entry_details) } /// Get the Arrow schema for a dataset entry. + #[tracing::instrument(level = "info", skip_all)] pub async fn get_dataset_schema(&mut self, entry_id: EntryId) -> ApiResult { - self.inner() - .get_dataset_schema( - tonic::Request::new(GetDatasetSchemaRequest {}) - .with_entry_id(entry_id) - .map_err(|err| { - ApiError::tonic(err, "failed building /GetDatasetSchema request") - })?, - ) - .await - .map_err(|err| ApiError::tonic(err, "/GetDatasetSchema failed"))? - .into_inner() - .schema() - .map_err(|err| { - ApiError::serialization_with_source( - err, - "failed parsing /GetDatasetSchema response", + let (inner, trace_id) = TonicResponseExt::into_inner_and_trace_id( + self.inner() + .get_dataset_schema( + tonic::Request::new(GetDatasetSchemaRequest {}) + .with_entry_id(entry_id) + .map_err(|err| { + ApiError::tonic(err, "failed building /GetDatasetSchema request") + })?, ) - }) + .await + .map_err(|err| ApiError::tonic(err, "/GetDatasetSchema failed"))?, + ); + inner.schema().map_err(|err| { + ApiError::deserialization_with_source( + trace_id, + err, + "failed parsing /GetDatasetSchema response", + ) + }) } /// Create a new dataset entry. @@ -183,47 +226,47 @@ where name: String, entry_id: Option, ) -> ApiResult { - let response: CreateDatasetEntryResponse = self - .inner() - .create_dataset_entry(CreateDatasetEntryRequest { - name: Some(name), - id: entry_id.map(Into::into), - }) - .await - .map_err(|err| ApiError::tonic(err, "/CreateDatasetEntry failed"))? - .into_inner() - .try_into() - .map_err(|err| { - ApiError::serialization_with_source( - err, - "failed parsing /CreateDatasetEntry response", - ) - })?; + let (inner, trace_id) = TonicResponseExt::into_inner_and_trace_id( + self.inner() + .create_dataset_entry(CreateDatasetEntryRequest { + name: Some(name), + id: entry_id.map(Into::into), + }) + .await + .map_err(|err| ApiError::tonic(err, "/CreateDatasetEntry failed"))?, + ); + let response: CreateDatasetEntryResponse = inner.try_into().map_err(|err| { + ApiError::deserialization_with_source( + trace_id, + err, + "failed parsing /CreateDatasetEntry response", + ) + })?; Ok(response.dataset) } /// Get information on a dataset entry. pub async fn read_dataset_entry(&mut self, entry_id: EntryId) -> ApiResult { - let response: ReadDatasetEntryResponse = self - .inner() - .read_dataset_entry( - tonic::Request::new(ReadDatasetEntryRequest {}) - .with_entry_id(entry_id) - .map_err(|err| { - ApiError::tonic(err, "failed building /ReadDatasetEntry request") - })?, - ) - .await - .map_err(|err| ApiError::tonic(err, "/ReadDatasetEntry failed"))? - .into_inner() - .try_into() - .map_err(|err| { - ApiError::serialization_with_source( - err, - "failed parsing /ReadDatasetEntry response", + let (inner, trace_id) = TonicResponseExt::into_inner_and_trace_id( + self.inner() + .read_dataset_entry( + tonic::Request::new(ReadDatasetEntryRequest {}) + .with_entry_id(entry_id) + .map_err(|err| { + ApiError::tonic(err, "failed building /ReadDatasetEntry request") + })?, ) - })?; + .await + .map_err(|err| ApiError::tonic(err, "/ReadDatasetEntry failed"))?, + ); + let response: ReadDatasetEntryResponse = inner.try_into().map_err(|err| { + ApiError::deserialization_with_source( + trace_id, + err, + "failed parsing /ReadDatasetEntry response", + ) + })?; Ok(response.dataset_entry) } @@ -234,71 +277,78 @@ where entry_id: EntryId, dataset_details: DatasetDetails, ) -> ApiResult { - let response: UpdateDatasetEntryResponse = self - .inner() - .update_dataset_entry(tonic::Request::new( - UpdateDatasetEntryRequest { - id: entry_id, - dataset_details, - } - .into(), - )) - .await - .map_err(|err| ApiError::tonic(err, "/UpdateDatasetEntry failed"))? - .into_inner() - .try_into() - .map_err(|err| { - ApiError::serialization_with_source( - err, - "failed parsing /UpdateDatasetEntry response", - ) - })?; + let (inner, trace_id) = TonicResponseExt::into_inner_and_trace_id( + self.inner() + .update_dataset_entry(tonic::Request::new( + UpdateDatasetEntryRequest { + id: entry_id, + dataset_details, + } + .into(), + )) + .await + .map_err(|err| ApiError::tonic(err, "/UpdateDatasetEntry failed"))?, + ); + let response: UpdateDatasetEntryResponse = inner.try_into().map_err(|err| { + ApiError::deserialization_with_source( + trace_id, + err, + "failed parsing /UpdateDatasetEntry response", + ) + })?; Ok(response.dataset_entry) } /// Get information on a table entry. pub async fn read_table_entry(&mut self, entry_id: EntryId) -> ApiResult { - let response: ReadTableEntryResponse = self - .inner() - .read_table_entry(ReadTableEntryRequest { - id: Some(entry_id.into()), - }) - .await - .map_err(|err| ApiError::tonic(err, "/ReadTableEntry failed"))? - .into_inner() - .try_into() - .map_err(|err| { - ApiError::serialization_with_source(err, "failed parsing /ReadTableEntry response") - })?; + let (inner, trace_id) = TonicResponseExt::into_inner_and_trace_id( + self.inner() + .read_table_entry(ReadTableEntryRequest { + id: Some(entry_id.into()), + }) + .await + .map_err(|err| ApiError::tonic(err, "/ReadTableEntry failed"))?, + ); + let response: ReadTableEntryResponse = inner.try_into().map_err(|err| { + ApiError::deserialization_with_source( + trace_id, + err, + "failed parsing /ReadTableEntry response", + ) + })?; Ok(response.table_entry) } //TODO(ab): accept entry name pub async fn get_segment_table_schema(&mut self, entry_id: EntryId) -> ApiResult { - self.inner() - .get_segment_table_schema( - tonic::Request::new(GetSegmentTableSchemaRequest {}) - .with_entry_id(entry_id) - .map_err(|err| { - ApiError::tonic(err, "failed building /GetSegmentTableSchema request") - })?, - ) - .await - .map_err(|err| ApiError::tonic(err, "GetSegmentTableSchema failed"))? - .into_inner() + let (inner, trace_id) = TonicResponseExt::into_inner_and_trace_id( + self.inner() + .get_segment_table_schema( + tonic::Request::new(GetSegmentTableSchemaRequest {}) + .with_entry_id(entry_id) + .map_err(|err| { + ApiError::tonic(err, "failed building /GetSegmentTableSchema request") + })?, + ) + .await + .map_err(|err| ApiError::tonic(err, "GetSegmentTableSchema failed"))?, + ); + inner .schema .ok_or_else(|| { let err = missing_field!(GetSegmentTableSchemaResponse, "schema"); - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "missing field in /GetSegmentTableSchema response", ) })? .try_into() .map_err(|err| { - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "failed parsing /GetSegmentTableSchema response", ) @@ -313,7 +363,7 @@ where ) -> ApiResult> { const COLUMN_NAME: &str = ScanSegmentTableResponse::FIELD_SEGMENT_ID; - let mut stream = self + let response = self .inner() .scan_segment_table( tonic::Request::new(ScanSegmentTableRequest { @@ -323,26 +373,27 @@ where .map_err(|err| ApiError::tonic(err, "failed building /ScanSegmentTable request"))?, ) .await - .map_err(|err| ApiError::tonic(err, "/ScanSegmentTable failed"))? - .into_inner(); + .map_err(|err| ApiError::tonic(err, "/ScanSegmentTable failed"))?; + + let mut stream = ApiResponseStream::from_tonic_response(response, "/ScanSegmentTable"); + let trace_id = stream.trace_id(); let mut segment_ids = Vec::new(); while let Some(resp) = stream.next().await { - let record_batch: RecordBatch = resp - .map_err(|err| { - ApiError::tonic(err, "failed receiving item from /ScanSegmentTable stream") - })? + let record_batch: RecordBatch = resp? .data() .map_err(|err| { - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "failed parsing item from /ScanSegmentTable stream", ) })? .try_into() .map_err(|err| { - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "failed decoding item from /ScanSegmentTable stream", ) @@ -350,7 +401,8 @@ where let segment_id_col = record_batch.column_by_name(COLUMN_NAME).ok_or_else(|| { let err = missing_column!(ScanSegmentTableResponse, COLUMN_NAME); - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "missing column from item in /ScanSegmentTable stream", ) @@ -359,7 +411,8 @@ where let segment_id_array = segment_id_col .try_downcast_array_ref::() .map_err(|err| { - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "unexpected types in item in /ScanSegmentTable stream", ) @@ -380,45 +433,51 @@ where &mut self, entry_id: EntryId, ) -> ApiResult { - self.inner() - .get_dataset_manifest_schema( - tonic::Request::new(GetDatasetManifestSchemaRequest {}) - .with_entry_id(entry_id) - .map_err(|err| { - ApiError::tonic(err, "failed building /GetDatasetManifestSchema request") - })?, - ) - .await - .map_err(|err| ApiError::tonic(err, "/GetDatasetManifestSchema failed"))? - .into_inner() + let (inner, trace_id) = TonicResponseExt::into_inner_and_trace_id( + self.inner() + .get_dataset_manifest_schema( + tonic::Request::new(GetDatasetManifestSchemaRequest {}) + .with_entry_id(entry_id) + .map_err(|err| { + ApiError::tonic( + err, + "failed building /GetDatasetManifestSchema request", + ) + })?, + ) + .await + .map_err(|err| ApiError::tonic(err, "/GetDatasetManifestSchema failed"))?, + ); + inner .schema .ok_or_else(|| { let err = missing_field!(GetDatasetManifestSchemaResponse, "schema"); - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "missing field in /GetDatasetManifestSchema response", ) })? .try_into() .map_err(|err| { - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "failed parsing /GetDatasetManifestSchema response", ) }) } - /// Get the full [`RawRrdManifest`] of a recording. - pub async fn get_rrd_manifest( + /// Stream the [`RawRrdManifest`] parts of a recording as they arrive from the server. + /// + /// Each item in the returned stream is a manifest part (a slice of the full manifest). + /// Use [`RawRrdManifest::concat`] to combine parts if needed. + pub async fn get_rrd_manifest_stream( &mut self, dataset_id: EntryId, segment_id: SegmentId, - ) -> ApiResult { - // TODO(cmc): at some point we should probably continue the stream all the way down, but - // for now we simplify downstream's life by concatenating everything in here. - let mut rrd_manifest_parts = Vec::new(); - - let responses = self + ) -> ApiResult> { + let response = self .inner() .get_rrd_manifest( tonic::Request::new(re_protos::cloud::v1alpha1::GetRrdManifestRequest { @@ -428,39 +487,52 @@ where .map_err(|err| ApiError::tonic(err, "failed building /GetRrdManifest request"))?, ) .await - .map_err(|err| ApiError::tonic(err, "/GetRrdManifest failed"))? - .into_inner(); + .map_err(|err| ApiError::tonic(err, "/GetRrdManifest failed"))?; - futures::pin_mut!(responses); - while let Some(resp) = responses.next().await { - let rrd_manifest_part = resp - .map_err(|err| { - ApiError::connection_with_source( - err, - "failed fetching /GetRrdManifest response part", - ) - })? + let stream = ApiResponseStream::from_tonic_response(response, "/GetRrdManifest"); + let trace_id = stream.trace_id(); + let stream = stream.map(move |resp| { + resp? .rrd_manifest .ok_or_else(|| { let err = missing_field!(GetRrdManifestResponse, "rrd_manifest"); - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "missing field in /GetRrdManifest response", ) })? .to_application(()) .map_err(|err| { - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "failed parsing /GetRrdManifest response", ) - })?; + }) + }); + Ok(ApiResponseStream::new(stream, trace_id)) + } + + /// Get the full [`RawRrdManifest`] of a recording, concatenated from all stream parts. + pub async fn get_rrd_manifest( + &mut self, + dataset_id: EntryId, + segment_id: SegmentId, + ) -> ApiResult { + let stream = self.get_rrd_manifest_stream(dataset_id, segment_id).await?; + let trace_id = stream.trace_id(); - rrd_manifest_parts.push(rrd_manifest_part); + futures::pin_mut!(stream); + + let mut rrd_manifest_parts = Vec::new(); + while let Some(part) = stream.next().await { + rrd_manifest_parts.push(part?); } let Some(mut rrd_manifest) = rrd_manifest_parts.first().cloned() else { - return Err(ApiError::serialization( + return Err(ApiError::deserialization( + trace_id, "failed to parse the response for /GetRrdManifest (no data)", )); }; @@ -468,7 +540,8 @@ where let data_parts = rrd_manifest_parts.into_iter().map(|p| p.data).collect_vec(); rrd_manifest.data = re_arrow_util::concat_polymorphic_batches(&data_parts).map_err(|err| { - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "failed concatenating /GetRrdManifest response parts", ) @@ -491,6 +564,7 @@ where include_static_data, include_temporal_data, query, + generate_direct_urls, } = params; let query_request = QueryDatasetRequest { @@ -509,20 +583,22 @@ where columns: FetchChunksRequest::required_column_names(), ..Default::default() }), + generate_direct_urls, }; - Ok(Box::pin( - self.inner() - .query_dataset( - tonic::Request::new(query_request.into()) - .with_entry_id(dataset_id) - .map_err(|err| { - ApiError::tonic(err, "failed building /QueryDataset request") - })?, - ) - .await - .map_err(|err| ApiError::tonic(err, "/QueryDataset failed"))? - .into_inner(), + let response = self + .inner() + .query_dataset( + tonic::Request::new(query_request.into()) + .with_entry_id(dataset_id) + .map_err(|err| ApiError::tonic(err, "failed building /QueryDataset request"))?, + ) + .await + .map_err(|err| ApiError::tonic(err, "/QueryDataset failed"))?; + + Ok(ApiResponseStream::from_tonic_response( + response, + "/QueryDataset", )) } @@ -536,23 +612,19 @@ where &mut self, params: SegmentQueryParams, ) -> ApiResult> { - self.query_dataset_raw(params) - .await? + let stream = self.query_dataset_raw(params).await?; + let trace_id = stream.trace_id(); + stream .collect::>() .await .into_iter() - .collect::, _>>() - .map_err(|err| { - ApiError::tonic( - err, - "failed receiving items in /QueryDataset response stream", - ) - })? + .collect::, _>>()? .into_iter() .map(|resp| { resp.data.ok_or_else(|| { let err = missing_field!(QueryDatasetResponse, "data"); - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "missing field in item in /QueryDataset response stream", ) @@ -560,7 +632,11 @@ where }) .map(|batch| { arrow::array::RecordBatch::try_from(batch?).map_err(|err| { - ApiError::serialization_with_source(err, "failed converting to RecordBatch") + ApiError::deserialization_with_source( + trace_id, + err, + "failed converting to RecordBatch", + ) }) }) .collect() @@ -575,14 +651,17 @@ where chunk_infos: vec![DataframePart::from(record_batch)], }; - let fetch_chunks_response_stream = self + let response = self .inner() .fetch_chunks(fetch_chunks_request) .await - .map_err(|err| ApiError::tonic(err, "/FetchChunks failed"))? - .into_inner(); + // NOTE: `ApiError::tonic` already extracts the trace-id from the error metadata. + .map_err(|err| ApiError::tonic(err, "/FetchChunks failed"))?; - Ok(Box::pin(fetch_chunks_response_stream)) + Ok(ApiResponseStream::from_tonic_response( + response, + "/FetchChunks", + )) } /// Fetches chunks for a specified partition and query. @@ -592,24 +671,19 @@ where &mut self, params: SegmentQueryParams, ) -> ApiResult { - let chunk_info_batches = self - .query_dataset_raw(params) - .await? + let stream = self.query_dataset_raw(params).await?; + let query_trace_id = stream.trace_id(); + let chunk_info_batches = stream .collect::>() .await .into_iter() - .collect::, _>>() - .map_err(|err| { - ApiError::tonic( - err, - "failed receiving items in /QueryDataset response stream", - ) - })? + .collect::, _>>()? .into_iter() .map(|resp| { resp.data.ok_or_else(|| { let err = missing_field!(QueryDatasetResponse, "data"); - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + query_trace_id, err, "missing field in item in /QueryDataset response stream", ) @@ -618,22 +692,26 @@ where .collect::, _>>()?; if chunk_info_batches.is_empty() { - let empty_stream = tokio_stream::empty(); - return Ok(Box::pin(empty_stream)); + return Ok(ApiResponseStream::new( + tokio_stream::empty::>(), + None, + )); } let fetch_chunks_request = FetchChunksRequest { chunk_infos: chunk_info_batches, }; - let fetch_chunks_response_stream = self + let response = self .inner() .fetch_chunks(fetch_chunks_request) .await - .map_err(|err| ApiError::tonic(err, "/FetchChunks failed"))? - .into_inner(); + .map_err(|err| ApiError::tonic(err, "/FetchChunks failed"))?; - Ok(Box::pin(fetch_chunks_response_stream)) + Ok(ApiResponseStream::from_tonic_response( + response, + "/FetchChunks", + )) } /// Initiate registration of the provided recording URIs with a dataset and return the @@ -654,23 +732,26 @@ where .with_entry_id(dataset_id) .map_err(|err| ApiError::tonic(err, "failed building /RegisterWithDataset request"))?; - let response: RecordBatch = self - .inner() - .register_with_dataset(req.map(Into::into)) - .await - .map_err(|err| ApiError::tonic(err, "/RegisterWithDataset failed"))? - .into_inner() + let (inner, trace_id) = TonicResponseExt::into_inner_and_trace_id( + self.inner() + .register_with_dataset(req.map(Into::into)) + .await + .map_err(|err| ApiError::tonic(err, "/RegisterWithDataset failed"))?, + ); + let response: RecordBatch = inner .data .ok_or_else(|| { let err = missing_field!(RegisterWithDatasetResponse, "data"); - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "missing field in /RegisterWithDataset response", ) })? .try_into() .map_err(|err| { - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "failed decoding /RegisterWithDataset response", ) @@ -684,7 +765,8 @@ where .contains(&RegisterWithDatasetResponse::schema()) { let err = invalid_schema!(RegisterWithDatasetResponse); - return Err(ApiError::serialization_with_source( + return Err(ApiError::deserialization_with_source( + trace_id, err, "invalid schema in /RegisterWithDataset response", )); @@ -700,7 +782,8 @@ where }) .ok_or_else(|| { let err = missing_column!(RegisterWithDatasetResponse, column_name); - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "missing column in /RegisterWithDataset response", ) @@ -716,14 +799,19 @@ where RegisterWithDatasetResponse, RegisterWithDatasetResponse::FIELD_SEGMENT_TYPE ); - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "missing column in /RegisterWithDataset response", ) })?, ) .map_err(|err| { - ApiError::serialization_with_source(err, "failed parsing /RegisterWithDataset response") + ApiError::deserialization_with_source( + trace_id, + err, + "failed parsing /RegisterWithDataset response", + ) })?; let storage_url_column = get_string_array(RegisterWithDatasetResponse::FIELD_STORAGE_URL)?; let task_id_column = get_string_array(RegisterWithDatasetResponse::FIELD_TASK_ID)?; @@ -740,7 +828,8 @@ where segment_id .ok_or_else(|| { let err = missing_field!(RegisterWithDatasetResponse, "segment_id"); - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "missing field in /RegisterWithDataset response", ) @@ -750,13 +839,15 @@ where segment_type, storage_url: url::Url::parse(storage_url.ok_or_else(|| { let err = missing_field!(RegisterWithDatasetResponse, "storage_url"); - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "missing field in /RegisterWithDataset response", ) })?) .map_err(|err| { - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, TypeConversionError::UrlParseError(err), "failed to parse /RegisterWithDataset response", ) @@ -765,7 +856,8 @@ where id: task_id .ok_or_else(|| { let err = missing_field!(RegisterWithDatasetResponse, "task_id"); - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "missing field in /RegisterWithDataset response", ) @@ -814,30 +906,32 @@ where .map_err(|err| ApiError::tonic(err, "failed building /UnregisterFromDataset request"))?; use futures::TryStreamExt as _; - let responses: Vec<_> = self + let response = self .inner() .unregister_from_dataset(req) .await - .map_err(|err| ApiError::tonic(err, "/UnregisterFromDataset failed"))? - .into_inner() - .try_collect() - .await .map_err(|err| ApiError::tonic(err, "/UnregisterFromDataset failed"))?; + let stream = ApiResponseStream::from_tonic_response(response, "/UnregisterFromDataset"); + let trace_id = stream.trace_id(); + let responses: Vec<_> = stream.try_collect().await?; + let batches: ApiResult> = responses .into_iter() .map(|resp| { resp.data .ok_or_else(|| { let err = missing_field!(UnregisterFromDatasetResponse, "data"); - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "missing field in /UnregisterFromDataset response", ) })? .try_into() .map_err(|err| { - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "failed decoding /UnregisterFromDataset response", ) @@ -850,24 +944,34 @@ where /// Register a foreign Lance table to a new table entry in the catalog. //TODO(ab): in the future, we will probably support my types of tables (parquet on S3, etc.) - pub async fn register_table(&mut self, name: String, url: url::Url) -> ApiResult { + pub async fn register_table( + &mut self, + name: EntryName, + url: url::Url, + ) -> ApiResult { let request = re_protos::cloud::v1alpha1::ext::RegisterTableRequest { name, provider_details: ProviderDetails::LanceTable(LanceTable { table_url: url }), }; - let response: RegisterTableResponse = self - .inner() - .register_table(tonic::Request::new(request.try_into().map_err(|err| { - ApiError::serialization_with_source(err, "failed building /RegisterTable request") - })?)) - .await - .map_err(|err| ApiError::tonic(err, "/RegisterTable failed"))? - .into_inner() - .try_into() - .map_err(|err| { - ApiError::serialization_with_source(err, "failed parsing /RegisterTable response") - })?; + let (inner, trace_id) = TonicResponseExt::into_inner_and_trace_id( + self.inner() + .register_table(tonic::Request::new(request.try_into().map_err(|err| { + ApiError::serialization_with_source( + err, + "failed building /RegisterTable request", + ) + })?)) + .await + .map_err(|err| ApiError::tonic(err, "/RegisterTable failed"))?, + ); + let response: RegisterTableResponse = inner.try_into().map_err(|err| { + ApiError::deserialization_with_source( + trace_id, + err, + "failed parsing /RegisterTable response", + ) + })?; Ok(response.table_entry) } @@ -914,7 +1018,7 @@ where Ok(()) } - pub async fn get_table_names(&mut self) -> ApiResult> { + pub async fn get_table_names(&mut self) -> ApiResult> { Ok(self .find_entries(re_protos::cloud::v1alpha1::EntryFilter { entry_kind: Some(EntryKind::Table.into()), @@ -931,7 +1035,7 @@ where &mut self, task_ids: Vec, timeout: std::time::Duration, - ) -> ApiResult> { + ) -> ApiResult> { let q = QueryTasksOnCompletionRequest { task_ids, timeout }; let response = self .inner() @@ -942,9 +1046,20 @@ where ) })?)) .await - .map_err(|err| ApiError::tonic(err, "/QueryTasksOnCompletion failed"))? - .into_inner(); - Ok(response) + .map_err(|err| ApiError::tonic(err, "/QueryTasksOnCompletion failed"))?; + Ok(ApiResponseStream::from_tonic_response( + response, + "/QueryTasksOnCompletion", + )) + } + + pub async fn cancel_tasks(&mut self, task_ids: Vec) -> ApiResult { + self.inner() + .cancel_tasks(CancelTasksRequest { ids: task_ids }) + .await + .map_err(|err| ApiError::tonic(err, "/CancelTasks failed"))?; + + Ok(()) } pub async fn query_tasks(&mut self, task_ids: Vec) -> ApiResult { @@ -962,26 +1077,29 @@ where pub async fn get_entry_id( &mut self, - entry_name: &str, + entry_name: &EntryName, entry_kind: Option, ) -> ApiResult> { - self.inner() - .find_entries(FindEntriesRequest { - filter: Some(EntryFilter { - id: None, - name: Some(entry_name.to_owned()), - entry_kind: entry_kind.map(|kind| kind.into()), - }), - }) - .await - .map_err(|err| ApiError::tonic(err, "/FindEntries failed"))? - .into_inner() + let (inner, trace_id) = TonicResponseExt::into_inner_and_trace_id( + self.inner() + .find_entries(FindEntriesRequest { + filter: Some(EntryFilter { + id: None, + name: Some(entry_name.to_string()), + entry_kind: entry_kind.map(|kind| kind.into()), + }), + }) + .await + .map_err(|err| ApiError::tonic(err, "/FindEntries failed"))?, + ); + inner .entries .first() .and_then(|entry| entry.id) .map(|id| { - EntryId::try_from(id) - .map_err(|err| ApiError::serialization_with_source(err, "/FindEntries failed")) + EntryId::try_from(id).map_err(|err| { + ApiError::deserialization_with_source(trace_id, err, "/FindEntries failed") + }) }) .transpose() } @@ -1014,35 +1132,35 @@ where /// NOTE: if `url` is provided, the caller must ensure that it is writable and yet unused. pub async fn create_table_entry( &mut self, - name: &str, + name: EntryName, url: Option, schema: SchemaRef, ) -> ApiResult { let provider_details = url.map(|url| ProviderDetails::LanceTable(LanceTable { table_url: url })); let request = CreateTableEntryRequest { - name: name.to_owned(), + name, schema: schema.as_ref().clone(), provider_details, }; - let resp = self + let (resp, trace_id) = self .inner() .create_table_entry(tonic::Request::new(request.try_into().map_err(|err| { - ApiError::internal_with_source(err, "/CreateTableEntry failed") + ApiError::internal_with_source(None, err, "/CreateTableEntry failed") })?)) .await .map_err(|err| ApiError::tonic(err, "failed to create table"))? - .into_inner(); + .into_inner_and_trace_id(); resp.table .ok_or_else(|| { - ApiError::tonic( - Status::invalid_argument("entry ID not set in response"), - "/CreateTable failed", + ApiError::deserialization( + trace_id, + "/CreateTable failed: entry ID not set in response", ) })? .try_into() - .map_err(|err| ApiError::internal_with_source(err, "/CreateTable failed")) + .map_err(|err| ApiError::internal_with_source(trace_id, err, "/CreateTable failed")) } } diff --git a/crates/store/re_redap_client/src/connection_registry.rs b/crates/store/re_redap_client/src/connection_registry.rs index d5c5c17970b0..4d9f363365ae 100644 --- a/crates/store/re_redap_client/src/connection_registry.rs +++ b/crates/store/re_redap_client/src/connection_registry.rs @@ -1,11 +1,12 @@ use std::collections::HashMap; use std::collections::hash_map::Entry; use std::error::Error as _; +use std::fmt::Write as _; use std::sync::Arc; use re_auth::Jwt; use re_auth::credentials::CredentialsProviderError; -use re_protos::cloud::v1alpha1::{EntryFilter, FindEntriesRequest}; +use re_protos::cloud::v1alpha1::{EntryFilter, FindEntriesRequest, WhoAmIRequest}; use re_uri::Origin; use tokio::sync::RwLock; use tonic::Code; @@ -108,6 +109,9 @@ pub enum ClientCredentialsError { #[error("{0}")] HostMismatch(re_auth::HostMismatchError), + + #[error("the server requires authentication for read access")] + NotAuthorized, } /// Registry of all tokens and connections to the redap servers. @@ -132,6 +136,22 @@ pub enum Credentials { Stored, } +impl Credentials { + /// Whether these credentials have write permission. + /// + /// Returns `None` for [`Credentials::Stored`] (resolved at connect time, + /// permissions unknown) or if the token claims cannot be decoded. + pub fn has_write_permission(&self) -> Option { + match self { + Self::Token(jwt) => { + let claims: re_auth::Claims = jwt.decode_claims().ok()?; + Some(claims.has_write_permission()) + } + Self::Stored => None, + } + } +} + #[derive(Clone, Debug)] pub enum CredentialSource { PerOrigin, @@ -203,6 +223,7 @@ impl ConnectionRegistryHandle { /// - Local credentials for Rerun Cloud /// /// Failing that, no token will be used. + #[tracing::instrument(level = "info", skip_all)] pub async fn client(&self, origin: re_uri::Origin) -> ApiResult { // happy path { @@ -286,6 +307,7 @@ impl ConnectionRegistryHandle { /// Try creating (and validating) a raw client using whatever token we might have available. /// /// If successful, returns both the client and the working token. + #[tracing::instrument(level = "info", skip_all)] async fn try_create_raw_client( origin: re_uri::Origin, possible_credentials: impl Iterator, @@ -331,6 +353,57 @@ impl ConnectionRegistryHandle { } } + /// Probe `{origin}/version` to detect non-Rerun endpoints (e.g. user typed + /// `asdf.rerun.io` instead of `api.asdf.rerun.io`). + /// + /// Returns `Ok(())` if the probe is inconclusive (server responded as expected), + /// `Err(_)` with a friendly diagnostic if the origin is clearly not a Rerun server. + #[tracing::instrument(level = "info", skip_all)] + async fn ensure_is_rerun_server(origin: &re_uri::Origin) -> ApiResult<()> { + let res = crate::with_retry("http_version_fetch", || async { + ehttp::fetch_async(ehttp::Request::get(format!("{}/version", origin.as_url()))) + .await + .map_err(|err| { + let mut msg = format!("failed to connect to server '{origin}': {err}"); + if let Some(suggested) = suggest_api_prefix(origin) { + write!(msg, ". Did you mean '{suggested}'?").ok(); + } + ApiError::connection(msg) + }) + }) + .await?; + + if res.ok { + // Server responded as expected — probe is inconclusive about why gRPC failed. + return Ok(()); + } + + let hint = suggest_api_prefix(origin).map(|suggested| { + format!("Did you mean '{suggested}'? Rerun Cloud endpoints require the 'api.' prefix") + }); + // Truncate the body so we don't dump an entire HTML error page into the error. + let body_snippet = std::str::from_utf8(&res.bytes) + .ok() + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(|s| { + const MAX: usize = 200; + if s.len() > MAX { + format!("{}…", &s[..s.floor_char_boundary(MAX)]) + } else { + s.to_owned() + } + }); + Err(ApiError::invalid_server_with_response( + origin.clone(), + res.status, + &res.status_text, + body_snippet.as_deref(), + hint.as_deref(), + )) + } + + #[tracing::instrument(level = "info", skip_all)] async fn create_and_validate_raw_client_with_token( origin: re_uri::Origin, credentials: Option, @@ -345,6 +418,7 @@ impl ConnectionRegistryHandle { Some(Credentials::Token(token)) => { token.for_host(&host).map_err(|err| { ApiError::credentials_with_source( + None, ClientCredentialsError::HostMismatch(err), format!("token not allowed for host '{host}'"), ) @@ -359,6 +433,7 @@ impl ConnectionRegistryHandle { if let Ok(Some(c)) = re_auth::oauth::load_credentials() { c.access_token().jwt().for_host(&host).map_err(|err| { ApiError::credentials_with_source( + None, ClientCredentialsError::HostMismatch(err), format!("stored token not allowed for host '{host}'"), ) @@ -369,69 +444,92 @@ impl ConnectionRegistryHandle { None => None, }; - // It's a common mistake to connect to `asdf.rerun.io` instead of `api.asdf.rerun.io`, - // so if what we're trying to connect to is not a valid Rerun server, then cut out - // a layer of noise: - { - let res = crate::with_retry("http_version_fetch", || async { - match ehttp::fetch_async(ehttp::Request::get(format!( - "{}/version", - origin.as_url() - ))) - .await - { - Ok(res) => Ok(res), - Err(err) => { - let mut msg = format!("failed to connect to server '{origin}': {err}"); - if let Some(suggested) = suggest_api_prefix(&origin) { - msg.push_str(&format!(". Did you mean '{suggested}'?")); - } - Err(ApiError::connection(msg)) - } - } - }) - .await?; + let mut raw_client = match crate::grpc::client(origin.clone(), provider).await { + Ok(client) => client, + Err(grpc_err) => { + // It's a common mistake to connect to `asdf.rerun.io` instead of `api.asdf.rerun.io`, + // so if what we're trying to connect to is not a valid Rerun server, then cut out + // a layer of noise: + Self::ensure_is_rerun_server(&origin).await?; - if !res.ok { - let hint = suggest_api_prefix(&origin).map(|suggested| { - format!( - "Did you mean '{suggested}'? Rerun Cloud endpoints require the 'api.' prefix" - ) - }); - return Err(ApiError::invalid_server(origin.clone(), hint.as_deref())); + return Err(grpc_err); } - } + }; - let mut raw_client = crate::grpc::client(origin.clone(), provider).await?; - - // Call the version endpoint to check that authentication is successful. It's ok to do this - // since we're caching the client, so we're not spamming such a request unnecessarily. - // TODO(rerun-io/dataplatform#1069): use the `whoami` endpoint instead when it exists. - let request_result = raw_client - .find_entries(FindEntriesRequest { - filter: Some(EntryFilter { - id: None, - name: None, - entry_kind: None, - }), - }) - .await; + // Call the WhoAmI endpoint to check that authentication is successful. It's ok to do + // this since we're caching the client, so we're not spamming such a request unnecessarily. + let request_result = raw_client.who_am_i(WhoAmIRequest {}).await; + + // TODO(rerun-io/dataplatform#1069): remove the `FindEntries` fallback once all servers + // have been updated to support the `WhoAmI` endpoint. + let request_result = match request_result { + Err(err) if err.code() == Code::Unimplemented => { + re_log::debug_once!( + "Server at {origin} does not support WhoAmI, falling back to FindEntries" + ); + raw_client + .find_entries(FindEntriesRequest { + filter: Some(EntryFilter { + id: None, + name: None, + entry_kind: None, + }), + }) + .await + .map(drop) + } + Ok(resp) => { + let trace_id = crate::extract_trace_id(resp.metadata()); + let re_protos::cloud::v1alpha1::WhoAmIResponse { + user_id, + can_read, + can_write, + } = resp.into_inner(); + let is_anonymous = user_id.is_none(); + let user_id = user_id.as_deref().unwrap_or(""); + re_log::debug_once!( + "Connected to {origin}: {user_id}, can_read={can_read}, can_write={can_write}", + ); + if !can_read { + if is_anonymous { + // Anonymous user without read access — treat as a credentials error + // so the auth flow is triggered. + return Err(ApiError::credentials_with_source( + trace_id, + ClientCredentialsError::NotAuthorized, + "the server requires authentication for read access", + )); + } + return Err(ApiError::permission_denied( + trace_id, + "the server reports that you do not have read access", + )); + } + Ok(()) + } + Err(err) => Err(err), + }; match request_result { + Ok(()) => Ok(raw_client), + // catch unauthenticated errors and forget the token if they happen Err(err) if err.code() == Code::Unauthenticated => { + let trace_id = crate::extract_trace_id(err.metadata()); if let Some(credentials) = credentials { Err(ApiError::credentials_with_source( + trace_id, ClientCredentialsError::UnauthenticatedBadToken { status: err.into(), credentials, }, - "verifying connection to server", + "unauthenticated: bad token", )) } else { Err(ApiError::credentials_with_source( + trace_id, ClientCredentialsError::UnauthenticatedMissingToken(err.into()), - "verifying connection to server", + "unauthenticated: missing token", )) } } @@ -443,12 +541,14 @@ impl ConnectionRegistryHandle { match cred_error { CredentialsProviderError::SessionExpired => { Err(ApiError::credentials_with_source( + None, ClientCredentialsError::SessionExpired, "session expired", )) } CredentialsProviderError::Custom(_) => { Err(ApiError::credentials_with_source( + None, ClientCredentialsError::RefreshError(err.into()), "refreshing credentials", )) @@ -458,8 +558,6 @@ impl ConnectionRegistryHandle { Err(ApiError::tonic(err, "verifying connection to server")) } } - - Ok(_) => Ok(raw_client), } } diff --git a/crates/store/re_redap_client/src/grpc.rs b/crates/store/re_redap_client/src/grpc.rs index 177d000a54ea..b1a0f88d34af 100644 --- a/crates/store/re_redap_client/src/grpc.rs +++ b/crates/store/re_redap_client/src/grpc.rs @@ -14,7 +14,7 @@ use re_log_types::{ use re_protos::cloud::v1alpha1::rerun_cloud_service_client::RerunCloudServiceClient; use re_protos::common::v1alpha1::ext::SegmentId; use re_uri::Origin; -use tokio_stream::{Stream, StreamExt as _}; +use tokio_stream::StreamExt as _; use crate::{ ApiError, ApiErrorKind, ApiResult, ConnectionClient, MAX_DECODING_MESSAGE_SIZE, @@ -46,6 +46,7 @@ pub async fn channel(origin: Origin) -> ApiResult { let ca_cert = tokio::fs::read_to_string(&cert_path).await.map_err(|err| { ApiError::internal_with_source( + None, err, format!("couldn't load local cert at {cert_path:?}"), ) @@ -67,7 +68,7 @@ pub async fn channel(origin: Origin) -> ApiResult { let endpoint = { let mut endpoint = Endpoint::new(http_url) .and_then(|ep| ep.tls_config(tls_config)) - .map_err(|err| ApiError::connection_with_source(err, "connecting to server"))? + .map_err(|err| ApiError::connection_with_source(None, err, "connecting to server"))? .http2_adaptive_window(true) // Optimize for throughput .connect_timeout(std::time::Duration::from_secs(10)); @@ -79,6 +80,7 @@ pub async fn channel(origin: Origin) -> ApiResult { endpoint.connect().await.map_err(|err| { ApiError::connection_with_source( + None, err, format!("failed to connect to server at {origin}"), ) @@ -87,21 +89,21 @@ pub async fn channel(origin: Origin) -> ApiResult { match endpoint { Ok(channel) => Ok(channel), - Err(original_error) => { + Err(original_err) => { if ![ url::Host::Domain("localhost".to_owned()), url::Host::Ipv4(Ipv4Addr::LOCALHOST), ] .contains(&origin.host) { - return Err(original_error); + return Err(original_err); } // If we can't establish a connection, we probe if the server is // expecting unencrypted traffic. If that is the case, we return // a more meaningful error message. let Ok(endpoint) = Endpoint::new(origin.coerce_http_url()) else { - return Err(original_error); + return Err(original_err); }; let endpoint = endpoint.http2_adaptive_window(true); // Optimize for throughput @@ -111,7 +113,7 @@ pub async fn channel(origin: Origin) -> ApiResult { "the server is expecting an unencrypted connection (try `rerun+http://` if you are sure)", )) } else { - Err(original_error) + Err(original_err) } } } @@ -194,7 +196,7 @@ pub(crate) async fn client( .layer(AuthDecorator::new(credentials)) .layer({ let name = None; - let version = None; + let version = std::env::var("RERUN_CLIENT_VERSION_OVERRIDE").ok(); let is_client = true; re_protos::headers::new_rerun_headers_layer(name, version, is_client) }); @@ -215,22 +217,21 @@ pub(crate) async fn client( // TODO(cmc): we should compute contiguous runs of the same segment here, and return a `(String, Vec)` // instead. Because of how the server performs the computation, this will very likely work out well // in practice. +pub type ChunksWithSegment = Vec<(Chunk, Option)>; + #[cfg(not(target_arch = "wasm32"))] -pub fn fetch_chunks_response_to_chunk_and_segment_id( - response: S, -) -> impl Stream)>>> -where - S: Stream>, -{ - response - .then(|resp| { +pub fn fetch_chunks_response_to_chunk_and_segment_id( + response: crate::FetchChunksResponseStream, +) -> crate::ApiResponseStream { + let trace_id = response.trace_id(); + let stream = response + .then(move |resp| { + let trace_id = trace_id; // We want to make sure to offload that compute-heavy work to the compute worker pool: it's // not going to make this one single pipeline any faster, but it will prevent starvation of // the Tokio runtime (which would slow down every other futures currently scheduled!). tokio::task::spawn_blocking(move || { - let r = resp.map_err(|err| { - ApiError::tonic(err, "failed to get item in /FetchChunks response stream") - })?; + let r = resp?; let _span = tracing::trace_span!("fetch_chunks::batch_decode", num_chunks = r.chunks.len()) .entered(); @@ -242,7 +243,8 @@ where use re_log_encoding::ToApplication as _; let arrow_msg = arrow_msg.to_application(()).map_err(|err| { - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "failed to get arrow data for item in /FetchChunks response stream", ) @@ -250,7 +252,8 @@ where let chunk = re_chunk::Chunk::from_record_batch(&arrow_msg.batch).map_err( |err| { - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "failed to parse item in /FetchChunks response stream", ) @@ -262,29 +265,27 @@ where .collect::, _>>() }) }) - .map(|res| { + .map(move |res| { res.map_err(|err| { ApiError::internal_with_source( + trace_id, err, "failed to sync on /FetchChunks response stream", ) }) - .and_then(std::convert::identity) - }) + .flatten() + }); + crate::ApiResponseStream::new(stream, trace_id) } // This code path happens to be shared between native and web, but we don't have a Tokio runtime on web! #[cfg(target_arch = "wasm32")] -pub fn fetch_chunks_response_to_chunk_and_segment_id( - response: S, -) -> impl Stream)>>> -where - S: Stream>, -{ - response.map(|resp| { - let resp = resp.map_err(|err| { - ApiError::tonic(err, "failed to get item in /FetchChunks response stream") - })?; +pub fn fetch_chunks_response_to_chunk_and_segment_id( + response: crate::FetchChunksResponseStream, +) -> crate::ApiResponseStream { + let trace_id = response.trace_id(); + let stream = response.map(move |resp| { + let resp = resp?; let _span = tracing::trace_span!("fetch_chunks::batch_decode", num_chunks = resp.chunks.len()) @@ -297,7 +298,8 @@ where use re_log_encoding::ToApplication as _; let arrow_msg = arrow_msg.to_application(()).map_err(|err| { - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "failed to get arrow data for item in /FetchChunks response stream", ) @@ -305,7 +307,8 @@ where let chunk = re_chunk::Chunk::from_record_batch(&arrow_msg.batch).map_err(|err| { - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "failed to parse item in /FetchChunks response stream", ) @@ -314,7 +317,36 @@ where Ok((chunk, segment_id)) }) .collect::, _>>() - }) + }); + crate::ApiResponseStream::new(stream, trace_id) +} + +/// Callback invoked as chunks are downloaded. +/// +/// Arguments: `(total_bytes_downloaded, total_bytes_expected)`. +/// `total_bytes_expected` may be `None` if the total size is not known. +pub type ProgressCallback = std::sync::Arc) + Send + Sync>; + +/// Options that control how segment data is streamed from the server. +#[derive(Clone, Default)] +pub struct StreamingOptions { + /// If `true`, download all chunks eagerly instead of relying on + /// on-demand streaming via the RRD manifest. + /// + /// This is useful for downloading a full recording to disk. + pub force_full_download: bool, + + /// Optional callback invoked as chunks are downloaded. + pub on_progress: Option, +} + +impl std::fmt::Debug for StreamingOptions { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("StreamingOptions") + .field("force_full_download", &self.force_full_download) + .field("on_progress", &self.on_progress.as_ref().map(|_| "…")) + .finish() + } } /// Canonical way to ingest segment data from a Rerun Data Platform server, dealing with @@ -330,6 +362,7 @@ pub async fn stream_blueprint_and_segment_from_server( mut client: ConnectionClient, tx: re_log_channel::LogSender, uri: re_uri::DatasetSegmentUri, + options: StreamingOptions, ) -> ApiResult { re_log::debug!("Loading {uri}…"); @@ -355,6 +388,7 @@ pub async fn stream_blueprint_and_segment_from_server( store_version: None, }; + // Blueprints are always fully downloaded regardless of streaming options. if stream_segment_from_server( &mut client, blueprint_store_info, @@ -362,6 +396,7 @@ pub async fn stream_blueprint_and_segment_from_server( blueprint_dataset, blueprint_segment, re_uri::Fragment::default(), + &StreamingOptions::default(), ) .await? .is_break() @@ -408,6 +443,7 @@ pub async fn stream_blueprint_and_segment_from_server( dataset_id.into(), segment_id.into(), fragment, + &options, ) .await? .is_break() @@ -426,6 +462,7 @@ async fn stream_segment_from_server( dataset_id: EntryId, segment_id: SegmentId, fragment: re_uri::Fragment, + options: &StreamingOptions, ) -> ApiResult> { let store_id = store_info.store_id.clone(); @@ -469,29 +506,70 @@ async fn stream_segment_from_server( // See the attached issues for more information. let start_time = web_time::Instant::now(); - let manifest_result = client - .get_rrd_manifest(dataset_id, segment_id.clone()) + let manifest_stream_result = client + .get_rrd_manifest_stream(dataset_id, segment_id.clone()) .await; - match manifest_result { - Ok(raw_rrd_manifest) => { - re_log::debug_once!( - "The server supports larger-than-RAM. RRD manifest ({} deflated) loaded in {:.1}s", - re_format::format_bytes(raw_rrd_manifest.total_size_bytes() as _), - start_time.elapsed().as_secs_f32(), - ); + let trace_id = manifest_stream_result + .as_ref() + .ok() + .and_then(|s| s.trace_id()); + match manifest_stream_result { + Ok(manifest_stream) => { + let mut manifest_stream = std::pin::pin!(manifest_stream); + + let mut rrd_manifest_parts: Vec> = Vec::new(); + + while let Some(part_result) = manifest_stream.next().await { + let raw_rrd_manifest_part = part_result?; + + let part_nr = rrd_manifest_parts.len() + 1; + re_log::debug!( + "Received RRD manifest part #{part_nr}/? ({} deflated, {:.1}s elapsed)", + re_format::format_bytes(raw_rrd_manifest_part.total_size_bytes() as _), + start_time.elapsed().as_secs_f32(), + ); - let rrd_manifest = - re_log_encoding::RrdManifest::try_new(raw_rrd_manifest).map_err(|err| { - ApiError::invalid_arguments_with_source(err, "Invalid RRD manifest") - })?; + let rrd_manifest = re_log_encoding::RrdManifest::try_new(&raw_rrd_manifest_part) + .map_err(|err| { + ApiError::invalid_arguments_with_source( + trace_id, + err, + "Invalid RRD manifest part", + ) + })?; + + let rrd_manifest = Arc::new(rrd_manifest); + + if tx + .send(DataSourceMessage::RrdManifest( + store_id.clone(), + rrd_manifest.clone(), + )) + .is_err() + { + re_log::debug!("Receiver disconnected"); + return Ok(ControlFlow::Break(())); + } + + rrd_manifest_parts.push(rrd_manifest); + } - let rrd_manifest = Arc::new(rrd_manifest); + if rrd_manifest_parts.is_empty() { + return Err(ApiError::deserialization( + trace_id, + "failed to parse the response for /GetRrdManifest (no data)", + )); + } + + let part_nr = rrd_manifest_parts.len(); + re_log::debug!( + "Full RRD manifest loaded in {:.1}s in {}", + start_time.elapsed().as_secs_f32(), + re_format::format_plural_s(part_nr, "part") + ); if tx - .send(DataSourceMessage::RrdManifest( - store_id.clone(), - rrd_manifest.clone(), - )) + .send(DataSourceMessage::RrdManifestComplete(store_id.clone())) .is_err() { re_log::debug!("Receiver disconnected"); @@ -499,22 +577,35 @@ async fn stream_segment_from_server( } match store_id.kind() { - StoreKind::Recording => { + StoreKind::Recording if !options.force_full_download => { re_log::debug!("Letting the viewer load chunks on-demand"); return Ok(ControlFlow::Continue(())); } - StoreKind::Blueprint => { - // Load all of the chunks in one go; most important first: - let batch = sort_batch(rrd_manifest.data()).map_err(|err| { - ApiError::invalid_arguments_with_source(err, "Failed to sort chunk index") + StoreKind::Recording | StoreKind::Blueprint => { + re_log::debug!("Loading all of the chunks in one go; most important first"); + let refs: Vec<&re_log_encoding::RrdManifest> = + rrd_manifest_parts.iter().map(|m| m.as_ref()).collect(); + let combined = re_log_encoding::RrdManifest::concat(&refs).map_err(|err| { + ApiError::invalid_arguments_with_source( + trace_id, + err, + "Failed to concatenate RRD manifest parts", + ) + })?; + let batch = sort_batch(combined.chunk_fetcher_rb()).map_err(|err| { + ApiError::invalid_arguments_with_source( + trace_id, + err, + "Failed to sort chunk index", + ) })?; - return load_chunks(client, tx, &store_id, batch).await; + return load_chunks(client, tx, &store_id, batch, options).await; } } } Err(err) => { if err.kind == ApiErrorKind::Unimplemented { - re_log::debug_once!("The server does not support larger-than-RAM"); // Legacy server + re_log::debug_once!("The server does not support on-demand streaming"); // Legacy server } else { re_log::warn!("Failed to load RRD manifest: {err}"); } @@ -540,6 +631,7 @@ async fn stream_segment_from_server( ) .into(), ), + generate_direct_urls: false, }) .await?; @@ -555,12 +647,16 @@ async fn stream_segment_from_server( &time_selection_batches, ) .map_err(|err| { - ApiError::invalid_arguments_with_source(err, "Failed to concat chunk index batches") + ApiError::invalid_arguments_with_source( + None, + err, + "Failed to concat chunk index batches", + ) })?; // Prioritize the chunks: let batch = sort_batch(&batch).map_err(|err| { - ApiError::invalid_arguments_with_source(err, "Failed to sort chunk index") + ApiError::invalid_arguments_with_source(trace_id, err, "Failed to sort chunk index") })?; if let Some(chunk_ids) = chunk_id_column(&batch) { @@ -572,7 +668,10 @@ async fn stream_segment_from_server( ); } - if load_chunks(client, tx, &store_id, batch).await?.is_break() { + if load_chunks(client, tx, &store_id, batch, options) + .await? + .is_break() + { return Ok(ControlFlow::Break(())); } } @@ -587,6 +686,7 @@ async fn stream_segment_from_server( include_static_data: true, include_temporal_data: true, query: None, // everything + generate_direct_urls: false, }) .await?; @@ -596,12 +696,16 @@ async fn stream_segment_from_server( } let batch = arrow::compute::concat_batches(&batches[0].schema(), &batches).map_err(|err| { - ApiError::invalid_arguments_with_source(err, "Failed to concat chunk index batches") + ApiError::invalid_arguments_with_source( + trace_id, + err, + "Failed to concat chunk index batches", + ) })?; // Prioritize the chunks: let batch = sort_batch(&batch).map_err(|err| { - ApiError::invalid_arguments_with_source(err, "Failed to sort chunk index") + ApiError::invalid_arguments_with_source(trace_id, err, "Failed to sort chunk index") })?; if let Some(chunk_ids) = chunk_id_column(&batch) @@ -620,20 +724,22 @@ async fn stream_segment_from_server( }) .collect(); - let filtered_batch = re_arrow_util::take_record_batch(&batch, &filtered_indices) - .map_err(|err| ApiError::invalid_arguments_with_source(err, "take_record_batch"))?; + let filtered_batch = + re_arrow_util::take_record_batch(&batch, &filtered_indices).map_err(|err| { + ApiError::invalid_arguments_with_source(trace_id, err, "take_record_batch") + })?; - load_chunks(client, tx, &store_id, filtered_batch).await + load_chunks(client, tx, &store_id, filtered_batch, options).await } else { - load_chunks(client, tx, &store_id, batch).await + load_chunks(client, tx, &store_id, batch, options).await } } fn chunk_id_column(batch: &RecordBatch) -> Option<&[ChunkId]> { - batch - .column_by_name("chunk_id") - .and_then(|array| array.as_fixed_size_binary_opt()) - .and_then(|array| ChunkId::try_slice_from_arrow(array).ok()) + let array = batch + .column_by_name(re_log_encoding::RawRrdManifest::FIELD_CHUNK_ID) + .and_then(|array| array.as_fixed_size_binary_opt())?; + ChunkId::try_slice_from_arrow(array).ok() } /// Takes a dataframe that looks like an [`re_log_encoding::RrdManifest`] (has a `chunk_key` column). @@ -642,18 +748,30 @@ async fn load_chunks( tx: &re_log_channel::LogSender, store_id: &StoreId, full_batch: RecordBatch, + options: &StreamingOptions, ) -> ApiResult> { - re_log::trace!("Requesting {} chunks from server…", full_batch.num_rows()); + let num_chunks = full_batch.num_rows(); + + re_log::debug!( + "Downloading {} chunks from server…", + re_format::format_uint(num_chunks) + ); + if 25_000 < num_chunks { + re_log::debug_warn!( + "There are {} chunks in this recording. Consider running `rerun rrd optimize` on it!", + re_format::format_uint(num_chunks) + ); + } use futures::stream::FuturesUnordered; // Batch requests in groups of N=32 rows. const BATCH_SIZE: usize = 32; - let num_rows = full_batch.num_rows(); + let total_size_bytes = total_size_bytes_from_batch(&full_batch); let mut futures = FuturesUnordered::new(); - for start in (0..num_rows).step_by(BATCH_SIZE) { - let end = usize::min(start + BATCH_SIZE, num_rows); + for start in (0..num_chunks).step_by(BATCH_SIZE) { + let end = usize::min(start + BATCH_SIZE, num_chunks); let small_batch = full_batch.slice(start, end - start); let mut client = client.clone(); @@ -665,37 +783,62 @@ async fn load_chunks( }); } + let mut downloaded_bytes: u64 = 0; + while let Some(res) = futures::stream::StreamExt::next(&mut futures).await { - let result = res?; + let (result, batch_bytes) = res?; + + downloaded_bytes += batch_bytes; + if let Some(on_progress) = &options.on_progress { + on_progress(downloaded_bytes, total_size_bytes); + } + if result.is_break() { return Ok(ControlFlow::Break(())); } } - re_log::trace!("Finished downloading {} chunks.", num_rows); + re_log::trace!( + "Finished downloading {} chunks.", + re_format::format_uint(num_chunks) + ); Ok(ControlFlow::Continue(())) } +/// Try to extract total deflated size from the batch's `chunk_byte_size` column. +fn total_size_bytes_from_batch(batch: &RecordBatch) -> Option { + let col = batch.column_by_name(re_log_encoding::RawRrdManifest::FIELD_CHUNK_BYTE_SIZE)?; + let array = col.as_primitive_opt::()?; + Some(array.iter().map(|v| v.unwrap_or(0)).sum()) +} + +/// Returns `(control_flow, bytes_downloaded)`. async fn load_small_chunk_batch( client: &mut ConnectionClient, tx: &re_log_channel::LogSender, store_id: &StoreId, batch: &RecordBatch, -) -> ApiResult> { +) -> ApiResult<(ControlFlow<()>, u64)> { // TODO(RR-3323): FetchChunks should expose a proper bidirectional streaming path on native. let chunk_stream = client.fetch_segment_chunks_by_id(batch).await?; let mut chunk_stream = fetch_chunks_response_to_chunk_and_segment_id(chunk_stream); + let trace_id = chunk_stream.trace_id(); + + let mut batch_bytes: u64 = 0; while let Some(chunks) = chunk_stream.next().await { for (chunk, _partition_id) in chunks? { + batch_bytes += chunk.heap_size_bytes(); + if tx .send( LogMsg::ArrowMsg( store_id.clone(), // TODO(#10229): this looks to be converting back and forth? chunk.to_arrow_msg().map_err(|err| { - ApiError::serialization_with_source( + ApiError::deserialization_with_source( + trace_id, err, "failed to parse chunk in /FetchChunks response stream", ) @@ -706,12 +849,12 @@ async fn load_small_chunk_batch( .is_err() { re_log::debug!("Receiver disconnected"); - return Ok(ControlFlow::Break(())); + return Ok((ControlFlow::Break(()), batch_bytes)); } } } - Ok(ControlFlow::Continue(())) + Ok((ControlFlow::Continue(()), batch_bytes)) } fn sort_batch(batch: &RecordBatch) -> Result { @@ -719,9 +862,9 @@ fn sort_batch(batch: &RecordBatch) -> Result { let schema = batch.schema(); - // Get column indices: - let chunk_is_static = schema.index_of("chunk_is_static")?; - let chunk_id = schema.index_of("chunk_id")?; + // Get column indices (these are guaranteed to exist in the pruned batch): + let chunk_is_static = schema.index_of(re_log_encoding::RrdManifest::FIELD_CHUNK_IS_STATIC)?; + let chunk_id = schema.index_of(re_log_encoding::RrdManifest::FIELD_CHUNK_ID)?; let sort_keys = vec![ // Static first: diff --git a/crates/store/re_redap_client/src/lib.rs b/crates/store/re_redap_client/src/lib.rs index 87d5026ba1ac..0521cd2a54e7 100644 --- a/crates/store/re_redap_client/src/lib.rs +++ b/crates/store/re_redap_client/src/lib.rs @@ -1,24 +1,41 @@ //! Official gRPC client for the Rerun Data Protocol. +mod api_error; +mod api_response_stream; mod connection_client; mod connection_registry; mod grpc; -pub use self::connection_client::{GenericConnectionClient, SegmentQueryParams}; +pub use self::api_error::{ApiError, ApiErrorKind, ApiResult}; + +pub use self::api_response_stream::ApiResponseStream; +pub use self::connection_client::{ + FetchChunksResponseStream, GenericConnectionClient, SegmentQueryParams, +}; pub use self::connection_registry::{ ClientCredentialsError, ConnectionClient, ConnectionRegistry, ConnectionRegistryHandle, CredentialSource, Credentials, SourcedCredentials, }; pub use self::grpc::{ - RedapClient, channel, fetch_chunks_response_to_chunk_and_segment_id, - stream_blueprint_and_segment_from_server, + ChunksWithSegment, RedapClient, StreamingOptions, channel, + fetch_chunks_response_to_chunk_and_segment_id, stream_blueprint_and_segment_from_server, }; +/// Re-export of [`opentelemetry::TraceId`] for callers constructing +/// [`ApiError`]s without taking a direct dependency on `opentelemetry`. +pub use opentelemetry::TraceId; + const MAX_DECODING_MESSAGE_SIZE: usize = u32::MAX as usize; /// Responses from the Data Platform can optionally include this header to communicate back the trace id of the request. const GRPC_RESPONSE_TRACEID_HEADER: &str = "x-request-trace-id"; +/// Extract the server's trace-id from gRPC response metadata, if present. +pub fn extract_trace_id(metadata: &tonic::metadata::MetadataMap) -> Option { + let s = metadata.get(GRPC_RESPONSE_TRACEID_HEADER)?.to_str().ok()?; + opentelemetry::TraceId::from_hex(s).ok() +} + /// Wrapper with a nicer error message #[derive(Debug)] pub struct TonicStatusError(Box); @@ -44,26 +61,31 @@ impl TonicStatusError { impl std::fmt::Display for TonicStatusError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // TODO(emilk): duplicated in `re_grpc_server` - let status = &self.0; + // NOTE: duplicated in `re_grpc_server` and `re_grpc_client` + fmt_tonic_status(f, &self.0) + } +} +fn fmt_tonic_status(f: &mut std::fmt::Formatter<'_>, status: &tonic::Status) -> std::fmt::Result { + if status.message().is_empty() { write!(f, "gRPC error")?; + } else { + write!(f, "{}", status.message())?; + } - if status.code() != tonic::Code::Unknown { - write!(f, ", code: '{}'", status.code())?; - } - if !status.message().is_empty() { - write!(f, ", message: {:?}", status.message())?; - } - // Binary data - not useful. - // if !status.details().is_empty() { - // write!(f, ", details: {:?}", status.details())?; - // } - if !status.metadata().is_empty() { - write!(f, ", metadata: {:?}", status.metadata().as_ref())?; - } - Ok(()) + if status.code() != tonic::Code::Unknown { + write!(f, " ({})", status.code())?; + } + + if !status.metadata().is_empty() { + write!( + f, + "{} metadata: {:?}", + re_error::DETAILS_SEPARATOR, + status.metadata().as_ref() + )?; } + Ok(()) } impl From for TonicStatusError { @@ -78,216 +100,8 @@ impl std::error::Error for TonicStatusError { } } -#[derive(Debug)] -pub struct ApiError { - pub message: String, - pub kind: ApiErrorKind, - pub source: Option>, - // when the error comes from the server returning a trace id, we include it in the client - // error for easier reporting. - trace_id: Option, -} - -/// Convenience for `Result` -pub type ApiResult = Result; - -#[derive(Debug, PartialEq, Eq)] -pub enum ApiErrorKind { - NotFound, - AlreadyExists, - PermissionDenied, - Unauthenticated, - - /// The gRPC endpoint has not been implemented - Unimplemented, - Connection, - Timeout, - Internal, - InvalidArguments, - ResourcesExhausted, - Serialization, - InvalidServer, -} - -impl From for ApiErrorKind { - fn from(code: tonic::Code) -> Self { - match code { - tonic::Code::NotFound => Self::NotFound, - tonic::Code::AlreadyExists => Self::AlreadyExists, - tonic::Code::PermissionDenied => Self::PermissionDenied, - tonic::Code::ResourceExhausted => Self::ResourcesExhausted, - tonic::Code::Unauthenticated => Self::Unauthenticated, - tonic::Code::Unimplemented => Self::Unimplemented, - tonic::Code::Unavailable => Self::Connection, - tonic::Code::InvalidArgument => Self::InvalidArguments, - tonic::Code::DeadlineExceeded => Self::Timeout, - _ => Self::Internal, - } - } -} - -impl std::fmt::Display for ApiErrorKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::NotFound => write!(f, "NotFound"), - Self::AlreadyExists => write!(f, "AlreadyExists"), - Self::PermissionDenied => write!(f, "PermissionDenied"), - Self::Unauthenticated => write!(f, "Unauthenticated"), - Self::Unimplemented => write!(f, "Unimplemented"), - Self::Connection => write!(f, "Connection"), - Self::Internal => write!(f, "Internal"), - Self::InvalidArguments => write!(f, "InvalidArguments"), - Self::ResourcesExhausted => write!(f, "ResourcesExhausted"), - Self::Serialization => write!(f, "Serialization"), - Self::Timeout => write!(f, "Timeout"), - Self::InvalidServer => write!(f, "InvalidServer"), - } - } -} - -impl ApiError { - #[inline] - fn new(kind: ApiErrorKind, message: impl Into) -> Self { - Self { - message: message.into(), - kind, - source: None, - trace_id: None, - } - } - - #[inline] - fn new_with_source( - err: impl std::error::Error + Send + Sync + 'static, - kind: ApiErrorKind, - message: impl Into, - ) -> Self { - Self { - message: message.into(), - kind, - source: Some(Box::new(err)), - trace_id: None, - } - } - - #[inline] - fn new_with_source_and_trace( - err: impl std::error::Error + Send + Sync + 'static, - kind: ApiErrorKind, - message: impl Into, - trace_id: impl Into, - ) -> Self { - Self { - message: message.into(), - kind, - source: Some(Box::new(err)), - trace_id: Some(trace_id.into()), - } - } - - pub fn tonic(err: tonic::Status, message: impl Into) -> Self { - let message = format!("{}: {}", message.into(), err.message()); - let kind = ApiErrorKind::from(err.code()); - let trace_id = err - .metadata() - .get(GRPC_RESPONSE_TRACEID_HEADER) - .and_then(|v| v.to_str().ok()) - .map(|s| s.to_owned()); - if let Some(trace_id) = trace_id { - Self::new_with_source_and_trace(err, kind, message, trace_id) - } else { - Self::new_with_source(err, kind, message) - } - } - - pub fn serialization(message: impl Into) -> Self { - Self::new(ApiErrorKind::Serialization, message) - } - - pub fn serialization_with_source( - err: impl std::error::Error + Send + Sync + 'static, - message: impl Into, - ) -> Self { - Self::new_with_source(err, ApiErrorKind::Serialization, message) - } - - pub fn invalid_arguments_with_source( - err: impl std::error::Error + Send + Sync + 'static, - message: impl Into, - ) -> Self { - Self::new_with_source(err, ApiErrorKind::InvalidArguments, message) - } - - pub fn internal_with_source( - err: impl std::error::Error + Send + Sync + 'static, - message: impl Into, - ) -> Self { - Self::new_with_source(err, ApiErrorKind::Internal, message) - } - - pub fn connection_with_source( - err: impl std::error::Error + Send + Sync + 'static, - message: impl Into, - ) -> Self { - Self::new_with_source(err, ApiErrorKind::Connection, message) - } - - pub fn connection(message: impl Into) -> Self { - Self::new(ApiErrorKind::Connection, message) - } - - pub fn credentials_with_source( - err: ClientCredentialsError, - message: impl Into, - ) -> Self { - Self::new_with_source(err, ApiErrorKind::Unauthenticated, message) - } - - #[expect(clippy::needless_pass_by_value)] - pub fn invalid_server(origin: re_uri::Origin, hint: Option<&str>) -> Self { - let mut msg = format!("{origin} is not a valid Rerun server"); - if let Some(hint) = hint { - msg.push_str(". "); - msg.push_str(hint); - } - Self::new(ApiErrorKind::InvalidServer, msg) - } - - /// Helper method to downcast the source error to a `ClientCredentialsError` if possible. - #[inline] - pub fn as_client_credentials_error(&self) -> Option<&ClientCredentialsError> { - self.source - .as_deref()? - .downcast_ref::() - } - - #[inline] - pub fn is_client_credentials_error(&self) -> bool { - self.kind == ApiErrorKind::Unauthenticated - && matches!(self.source.as_deref(), Some(e) if e.is::()) - } -} - -impl std::fmt::Display for ApiError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.message)?; - if let Some(ref trace_id) = self.trace_id { - write!(f, " (trace-id: {trace_id})")?; - } - Ok(()) - } -} - -impl std::error::Error for ApiError { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - self.source - .as_deref() - .map(|e| e as &(dyn std::error::Error + 'static)) - } -} - /// Helper function for executing requests or connection attempts with retries. -#[tracing::instrument(skip(f), level = "debug")] +#[tracing::instrument(skip(f), level = "trace")] pub async fn with_retry(req_name: &str, f: F) -> ApiResult where F: Fn() -> Fut, @@ -310,19 +124,11 @@ where let res = f().await; match res { - Err(err) - if matches!( - err.kind, - ApiErrorKind::Connection - | ApiErrorKind::Timeout - | ApiErrorKind::Internal - | ApiErrorKind::ResourcesExhausted - ) => - { + Err(err) if err.kind.is_retryable() => { last_retryable_err = Some(err); let backoff = backoff_gen.gen_next(); - tracing::debug!( + tracing::trace!( attempts, max_attempts = MAX_ATTEMPTS, ?backoff, @@ -332,8 +138,8 @@ where backoff.sleep().await; } Err(err) => { - // logging at the debug level to avoid having these spam in the viewer - tracing::debug!( + // logging at the trace level to avoid having these spam in debug builds of the viewer + tracing::trace!( attempts, "{req_name} failed with non-retryable error: {err}" ); @@ -341,7 +147,7 @@ where } Ok(value) => { - tracing::debug!(attempts, "{req_name} succeeded"); + tracing::trace!(attempts, "{req_name} succeeded"); return Ok(value); } } @@ -349,7 +155,7 @@ where attempts += 1; } - tracing::debug!( + tracing::trace!( attempts, max_attempts = MAX_ATTEMPTS, "{req_name} failed after max retries, giving up" diff --git a/crates/store/re_redap_tests/Cargo.toml b/crates/store/re_redap_tests/Cargo.toml index caa6ef46c7c6..1ad98d9ba308 100644 --- a/crates/store/re_redap_tests/Cargo.toml +++ b/crates/store/re_redap_tests/Cargo.toml @@ -44,6 +44,3 @@ tempfile.workspace = true tokio.workspace = true tonic.workspace = true url.workspace = true - -# TODO(lancedb/lance#5075): lance doesn't activate chrono/serde itself. Fixed in lance 0.39. -chrono = { workspace = true, features = ["serde"] } diff --git a/crates/store/re_redap_tests/src/lib.rs b/crates/store/re_redap_tests/src/lib.rs index d401452b1f5c..ba25502f3577 100644 --- a/crates/store/re_redap_tests/src/lib.rs +++ b/crates/store/re_redap_tests/src/lib.rs @@ -23,13 +23,13 @@ mod utils; pub use ::re_arrow_util::{FieldsTestExt, RecordBatchTestExt, SchemaTestExt}; pub use self::tests::common::{ - DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt, register_and_wait, + DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt, entry_name, register_and_wait, }; pub use self::tests::*; pub use self::utils::path::TempPath; pub use self::utils::rerun::{ - TuidPrefix, create_minimal_binary_recording_in, create_nasty_recording, - create_recording_with_embeddings, create_recording_with_properties, + TuidPrefix, create_divergent_component_ranges_recording, create_minimal_binary_recording_in, + create_nasty_recording, create_recording_with_embeddings, create_recording_with_properties, create_recording_with_scalars, create_recording_with_static_components, create_recording_with_text, create_simple_blueprint, create_simple_recording, create_simple_recording_in, multi_chunked_entities_recording, diff --git a/crates/store/re_redap_tests/src/tests/column_projection.rs b/crates/store/re_redap_tests/src/tests/column_projection.rs index f53c12d99dd9..bc17e5c0e8eb 100644 --- a/crates/store/re_redap_tests/src/tests/column_projection.rs +++ b/crates/store/re_redap_tests/src/tests/column_projection.rs @@ -8,7 +8,7 @@ use re_protos::cloud::v1alpha1::{ use re_protos::headers::RerunHeadersInjectorExt as _; use crate::tests::common::{ - DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt as _, prop, + DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt as _, entry_name, prop, }; pub async fn test_segment_table_column_projections(service: impl RerunCloudService) { @@ -124,16 +124,16 @@ async fn test_column_projections( tonic::Request::new(ScanSegmentTableRequest { columns: vec!["unknown_column".to_owned()], }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await; match result { - Err(status) => { - assert_eq!(status.code(), tonic::Code::InvalidArgument); - assert!(status.message().contains("unknown_column")); - assert!(status.message().contains("not found")); + Err(err) => { + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!(err.message().contains("unknown_column")); + assert!(err.message().contains("not found")); } Ok(_) => panic!("expected InvalidArgument error for unknown column"), } @@ -150,20 +150,19 @@ async fn test_column_projections( ScanSegmentTableResponse::FIELD_SEGMENT_ID.to_owned(), ], }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await; match result { - Err(status) => { - assert_eq!(status.code(), tonic::Code::InvalidArgument); + Err(err) => { + assert_eq!(err.code(), tonic::Code::InvalidArgument); assert!( - status - .message() + err.message() .contains(ScanSegmentTableResponse::FIELD_SEGMENT_ID) ); - assert!(status.message().contains("twice") || status.message().contains("duplicate")); + assert!(err.message().contains("twice") || err.message().contains("duplicate")); } Ok(_) => panic!("expected InvalidArgument error for duplicate column"), } @@ -179,7 +178,7 @@ async fn projected_segment_table_batch( tonic::Request::new(ScanSegmentTableRequest { columns: column_projection, }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -221,7 +220,7 @@ async fn projected_dataset_manifest_batch( tonic::Request::new(ScanDatasetManifestRequest { columns: column_projection, }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await diff --git a/crates/store/re_redap_tests/src/tests/common.rs b/crates/store/re_redap_tests/src/tests/common.rs index 7d3726eaaa0e..31d97ff555f5 100644 --- a/crates/store/re_redap_tests/src/tests/common.rs +++ b/crates/store/re_redap_tests/src/tests/common.rs @@ -4,6 +4,7 @@ use arrow::array::RecordBatch; use futures::StreamExt as _; use itertools::Itertools as _; use re_log_types::{EntityPath, TimeType}; +use re_protos::EntryName; use re_protos::cloud::v1alpha1::ext::DatasetEntry; use re_protos::cloud::v1alpha1::rerun_cloud_service_server::RerunCloudService; use re_protos::cloud::v1alpha1::{ @@ -13,6 +14,11 @@ use re_protos::cloud::v1alpha1::{ use re_protos::common::v1alpha1::TaskId; use re_protos::common::v1alpha1::ext::IfDuplicateBehavior; use re_protos::headers::RerunHeadersInjectorExt as _; + +/// Test helper: parse a string into an `EntryName`, panicking on invalid names. +pub fn entry_name(name: &str) -> EntryName { + EntryName::new(name).unwrap() +} use re_types_core::AsComponents; use tonic::async_trait; use url::Url; @@ -94,7 +100,7 @@ impl RerunCloudServiceExt for T { on_duplicate: re_protos::common::v1alpha1::IfDuplicateBehavior::from(on_duplicate) as i32, }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .expect("Failed to create a request"); register_with_dataset_blocking(self, request).await; @@ -122,7 +128,7 @@ impl RerunCloudServiceExt for T { }; let request = tonic::Request::new(request.into()) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .expect("Failed to create a request"); use futures::TryStreamExt as _; @@ -161,7 +167,7 @@ impl RerunCloudServiceExt for T { re_protos::cloud::v1alpha1::ext::LanceTable { table_url }, ); let request = re_protos::cloud::v1alpha1::ext::RegisterTableRequest { - name: table_name.to_owned(), + name: entry_name(table_name), provider_details, }; let request = tonic::Request::new(request.try_into().expect("Failed to convert request")); diff --git a/crates/store/re_redap_tests/src/tests/create_dataset.rs b/crates/store/re_redap_tests/src/tests/create_dataset.rs index ae6ba2d08c66..1db4749327f4 100644 --- a/crates/store/re_redap_tests/src/tests/create_dataset.rs +++ b/crates/store/re_redap_tests/src/tests/create_dataset.rs @@ -10,6 +10,8 @@ use re_protos::cloud::v1alpha1::{ }; use re_protos::headers::RerunHeadersInjectorExt as _; +use super::common::entry_name; + pub async fn create_dataset_tests(service: impl RerunCloudService) { // // Create a dataset with just a name @@ -20,7 +22,7 @@ pub async fn create_dataset_tests(service: impl RerunCloudService) { create_dataset_entry( &service, CreateDatasetEntryRequest { - name: dataset1_name.to_owned(), + name: entry_name(dataset1_name), id: None, }, ) @@ -64,7 +66,7 @@ pub async fn create_dataset_tests(service: impl RerunCloudService) { let status = create_dataset_entry( &service, CreateDatasetEntryRequest { - name: dataset1_name.to_owned(), + name: entry_name(dataset1_name), id: None, }, ) @@ -84,7 +86,7 @@ pub async fn create_dataset_tests(service: impl RerunCloudService) { let status = create_dataset_entry( &service, CreateDatasetEntryRequest { - name: "this name is for sure not used but the id might".to_owned(), + name: entry_name("this name is for sure not used but the id might"), id: Some(entry_details.id), }, ) @@ -107,7 +109,7 @@ pub async fn create_dataset_tests(service: impl RerunCloudService) { create_dataset_entry( &service, CreateDatasetEntryRequest { - name: dataset2_name.to_owned(), + name: entry_name(dataset2_name), id: Some(dataset2_id), }, ) @@ -142,7 +144,7 @@ pub async fn create_dataset_tests(service: impl RerunCloudService) { let provider_details = ProviderDetails::LanceTable(LanceTable { table_url }); let create_table_request = CreateTableEntryRequest { - name: table_name.to_owned(), + name: entry_name(table_name), schema: schema.clone(), provider_details: Some(provider_details), } @@ -161,7 +163,7 @@ pub async fn create_dataset_tests(service: impl RerunCloudService) { let status = create_dataset_entry( &service, CreateDatasetEntryRequest { - name: table_name.to_owned(), + name: entry_name(table_name), id: None, }, ) diff --git a/crates/store/re_redap_tests/src/tests/create_table.rs b/crates/store/re_redap_tests/src/tests/create_table.rs index 416337006b36..6f3086ba5533 100644 --- a/crates/store/re_redap_tests/src/tests/create_table.rs +++ b/crates/store/re_redap_tests/src/tests/create_table.rs @@ -9,6 +9,7 @@ use re_protos::cloud::v1alpha1::ext::{ }; use re_protos::cloud::v1alpha1::rerun_cloud_service_server::RerunCloudService; +use super::common::entry_name; use crate::SchemaTestExt as _; pub async fn create_table_entry(service: impl RerunCloudService) { @@ -28,7 +29,7 @@ pub async fn create_table_entry(service: impl RerunCloudService) { let provider_details = ProviderDetails::LanceTable(LanceTable { table_url }); let create_table_request = CreateTableEntryRequest { - name: table_name.to_owned(), + name: entry_name(table_name), schema: schema.clone(), provider_details: Some(provider_details), } @@ -48,7 +49,7 @@ pub async fn create_table_entry(service: impl RerunCloudService) { .expect("table entry details missing"); let entry: EntryDetails = response.try_into().expect("convert into entry details"); - assert_eq!(entry.name, table_name); + assert_eq!(entry.name, entry_name(table_name)); let schema_response = service .get_table_schema(tonic::Request::new(GetTableSchemaRequest { @@ -84,7 +85,7 @@ pub async fn create_table_entry_duplicate_url(service: impl RerunCloudService) { }); let create_table_request = CreateTableEntryRequest { - name: "table_1".to_owned(), + name: entry_name("table_1"), schema: schema.clone(), provider_details: Some(provider_details.clone()), } @@ -98,7 +99,7 @@ pub async fn create_table_entry_duplicate_url(service: impl RerunCloudService) { // Second call with the same URL but a different name should fail with AlreadyExists. let create_table_request_2 = CreateTableEntryRequest { - name: "table_2".to_owned(), + name: entry_name("table_2"), schema, provider_details: Some(provider_details), } @@ -126,7 +127,7 @@ pub async fn create_table_entry_failed_does_not_leak_name(service: impl RerunClo }); let create_table_request = CreateTableEntryRequest { - name: table_name.to_owned(), + name: entry_name(table_name), schema: schema.clone(), provider_details: Some(bad_provider), } @@ -147,7 +148,7 @@ pub async fn create_table_entry_failed_does_not_leak_name(service: impl RerunClo }); let create_table_request = CreateTableEntryRequest { - name: table_name.to_owned(), + name: entry_name(table_name), schema, provider_details: Some(good_provider), } diff --git a/crates/store/re_redap_tests/src/tests/dataset_schema.rs b/crates/store/re_redap_tests/src/tests/dataset_schema.rs index a8cd8677c922..cfb96a8ef79f 100644 --- a/crates/store/re_redap_tests/src/tests/dataset_schema.rs +++ b/crates/store/re_redap_tests/src/tests/dataset_schema.rs @@ -2,7 +2,9 @@ use re_protos::cloud::v1alpha1::GetDatasetSchemaRequest; use re_protos::cloud::v1alpha1::rerun_cloud_service_server::RerunCloudService; use re_protos::headers::RerunHeadersInjectorExt as _; -use super::common::{DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt as _}; +use super::common::{ + DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt as _, entry_name, +}; use crate::SchemaTestExt as _; pub async fn simple_dataset_schema(service: impl RerunCloudService) { @@ -44,7 +46,7 @@ async fn dataset_schema_snapshot( let schema = service .get_dataset_schema( tonic::Request::new(GetDatasetSchemaRequest {}) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await diff --git a/crates/store/re_redap_tests/src/tests/entries_table.rs b/crates/store/re_redap_tests/src/tests/entries_table.rs index 7d4f56724cba..665bdf516e4b 100644 --- a/crates/store/re_redap_tests/src/tests/entries_table.rs +++ b/crates/store/re_redap_tests/src/tests/entries_table.rs @@ -1,11 +1,11 @@ use futures::TryStreamExt as _; use itertools::Itertools as _; use re_log_types::EntryId; -use re_protos::cloud::v1alpha1::ext::EntryDetails; use re_protos::cloud::v1alpha1::rerun_cloud_service_server::RerunCloudService; use re_protos::cloud::v1alpha1::{ DeleteEntryRequest, FindEntriesRequest, GetTableSchemaRequest, ScanTableRequest, }; +use re_protos::{EntryName, cloud::v1alpha1::ext::EntryDetails}; use crate::tests::common::RerunCloudServiceExt as _; use crate::{RecordBatchTestExt as _, SchemaTestExt as _}; @@ -101,7 +101,7 @@ async fn entries_table_id(service: &impl RerunCloudService) -> EntryId { .try_into() .expect("Failed to convert to EntryDetails"); - assert_eq!(entries.name, "__entries"); + assert_eq!(entries.name, EntryName::entries_table()); entries.id } diff --git a/crates/store/re_redap_tests/src/tests/fetch_chunks.rs b/crates/store/re_redap_tests/src/tests/fetch_chunks.rs index 29dee90e95f2..9269917ee1ec 100644 --- a/crates/store/re_redap_tests/src/tests/fetch_chunks.rs +++ b/crates/store/re_redap_tests/src/tests/fetch_chunks.rs @@ -14,6 +14,7 @@ use re_types_core::Loggable as _; use crate::RecordBatchTestExt as _; use crate::tests::common::{ DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt as _, concat_record_batches, + entry_name, }; /// This test makes a snapshot of all the chunks returned for a simple dataset. @@ -44,7 +45,7 @@ pub async fn simple_dataset_fetch_chunk_snapshot(service: impl RerunCloudService let chunk_info = service .query_dataset( tonic::Request::new(QueryDatasetRequest::default().into()) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -148,7 +149,7 @@ pub async fn multi_dataset_fetch_chunk_completeness(service: impl RerunCloudServ } .into(), ) - .with_entry_name(dataset_name_1) + .with_entry_name(entry_name(dataset_name_1)) .unwrap(), ) .await @@ -177,7 +178,7 @@ pub async fn multi_dataset_fetch_chunk_completeness(service: impl RerunCloudServ } .into(), ) - .with_entry_name(dataset_name_1) + .with_entry_name(entry_name(dataset_name_1)) .unwrap(), ) .await diff --git a/crates/store/re_redap_tests/src/tests/indexes.rs b/crates/store/re_redap_tests/src/tests/indexes.rs index a9712db3e434..8be0264dc559 100644 --- a/crates/store/re_redap_tests/src/tests/indexes.rs +++ b/crates/store/re_redap_tests/src/tests/indexes.rs @@ -12,7 +12,9 @@ use re_protos::cloud::v1alpha1::{ use re_protos::common::v1alpha1::{ComponentDescriptor, EntityPath, IndexColumnSelector, Timeline}; use re_protos::headers::RerunHeadersInjectorExt as _; -use super::common::{DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt as _}; +use super::common::{ + DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt as _, entry_name, +}; // --- Tests --- @@ -40,7 +42,7 @@ pub async fn index_lifecycle(service: impl RerunCloudService) { let code = service .search_dataset( tonic::Request::new(req) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -61,7 +63,7 @@ pub async fn index_lifecycle(service: impl RerunCloudService) { let code = service .create_index( tonic::Request::new(req) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -110,7 +112,7 @@ pub async fn index_lifecycle(service: impl RerunCloudService) { let code = service .search_dataset( tonic::Request::new(search_dataset_requests.remove(&column).unwrap()) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -191,7 +193,7 @@ pub async fn index_incremental(service: impl RerunCloudService) { } .into(), ) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -216,7 +218,7 @@ pub async fn index_incremental(service: impl RerunCloudService) { } .into(), ) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -244,7 +246,7 @@ pub async fn dataset_doesnt_exist(service: impl RerunCloudService) { let code = service .list_indexes( tonic::Request::new(ListIndexesRequest {}) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -256,7 +258,7 @@ pub async fn dataset_doesnt_exist(service: impl RerunCloudService) { let code = service .search_dataset( tonic::Request::new(search_dataset_request) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -268,7 +270,7 @@ pub async fn dataset_doesnt_exist(service: impl RerunCloudService) { let code = service .create_index( tonic::Request::new(create_index_request.clone()) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -281,7 +283,7 @@ pub async fn dataset_doesnt_exist(service: impl RerunCloudService) { tonic::Request::new(DeleteIndexesRequest { column: create_index_request.config.unwrap().column, }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -344,7 +346,7 @@ pub async fn column_doesnt_exist(service: impl RerunCloudService) { let code = service .search_dataset( tonic::Request::new(req) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -374,7 +376,7 @@ pub async fn column_doesnt_exist(service: impl RerunCloudService) { let code = service .create_index( tonic::Request::new(req.clone()) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -561,7 +563,7 @@ async fn create_index( req: CreateIndexRequest, ) -> tonic::Result<()> { let _res = service - .create_index(tonic::Request::new(req).with_entry_name(dataset_name)?) + .create_index(tonic::Request::new(req).with_entry_name(entry_name(dataset_name))?) .await?; Ok(()) @@ -573,7 +575,7 @@ async fn search_dataset( req: SearchDatasetRequest, ) -> tonic::Result { let res = service - .search_dataset(tonic::Request::new(req).with_entry_name(dataset_name)?) + .search_dataset(tonic::Request::new(req).with_entry_name(entry_name(dataset_name))?) .await?; use futures::StreamExt as _; @@ -593,7 +595,9 @@ async fn list_indexes( dataset_name: &str, ) -> tonic::Result> { let res = service - .list_indexes(tonic::Request::new(ListIndexesRequest {}).with_entry_name(dataset_name)?) + .list_indexes( + tonic::Request::new(ListIndexesRequest {}).with_entry_name(entry_name(dataset_name))?, + ) .await?; let indexes: HashMap = res @@ -612,7 +616,7 @@ async fn delete_indexes( req: DeleteIndexesRequest, ) -> tonic::Result> { let res = service - .delete_indexes(tonic::Request::new(req).with_entry_name(dataset_name)?) + .delete_indexes(tonic::Request::new(req).with_entry_name(entry_name(dataset_name))?) .await?; let indexes: HashMap = res diff --git a/crates/store/re_redap_tests/src/tests/mod.rs b/crates/store/re_redap_tests/src/tests/mod.rs index 7f652c701bb0..ad623a58b672 100644 --- a/crates/store/re_redap_tests/src/tests/mod.rs +++ b/crates/store/re_redap_tests/src/tests/mod.rs @@ -71,11 +71,14 @@ define_redap_tests! { indexes::index_incremental, indexes::index_lifecycle, query_dataset::query_dataset_should_fail, + query_dataset::query_dataset_consistent_schema_across_timelines, + query_dataset::query_dataset_has_uncompressed_sizes, query_dataset::query_dataset_with_various_queries, query_dataset::query_empty_dataset, query_dataset::query_simple_dataset, query_dataset::query_simple_dataset_with_layers, query_filter::query_dataset_simple_filter, + query_filter::query_dataset_with_limit, query_index_values::query_dataset_index_values, register_segment::register_and_scan_blueprint_dataset, register_segment::register_and_scan_empty_dataset, diff --git a/crates/store/re_redap_tests/src/tests/query_dataset.rs b/crates/store/re_redap_tests/src/tests/query_dataset.rs index 3a4b1d9312c7..fab77e12192d 100644 --- a/crates/store/re_redap_tests/src/tests/query_dataset.rs +++ b/crates/store/re_redap_tests/src/tests/query_dataset.rs @@ -1,6 +1,8 @@ -use arrow::array::{FixedSizeBinaryArray, RecordBatch, RecordBatchOptions, UInt32Array}; +use arrow::array::{ + Array as _, FixedSizeBinaryArray, RecordBatch, RecordBatchOptions, UInt32Array, UInt64Array, +}; use futures::StreamExt as _; -use re_log_types::{AbsoluteTimeRange, TimeInt}; +use re_log_types::{AbsoluteTimeRange, TimeInt, TimeType}; use re_protos::cloud::v1alpha1::QueryDatasetResponse; use re_protos::cloud::v1alpha1::ext::{ DataSource, DataSourceKind, Query, QueryDatasetRequest, QueryLatestAt, QueryRange, @@ -11,6 +13,7 @@ use re_types_core::ChunkId; use crate::tests::common::{ DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt as _, concat_record_batches, + entry_name, }; use crate::{FieldsTestExt as _, RecordBatchTestExt as _, TempPath}; @@ -304,6 +307,7 @@ pub async fn query_dataset_with_various_queries(service: impl RerunCloudService) exclude_temporal_data: false, scan_parameters: None, query, + generate_direct_urls: false, }, &chunk_ids_to_remove, dataset_name, @@ -313,6 +317,142 @@ pub async fn query_dataset_with_various_queries(service: impl RerunCloudService) } } +/// Verify that `chunk_byte_size_uncompressed` is present and populated with +/// non-zero values for every chunk in the query response. +pub async fn query_dataset_has_uncompressed_sizes(service: impl RerunCloudService) { + let data_sources_def = DataSourcesDefinition::new_with_tuid_prefix( + 1, + [LayerDefinition::simple("segment", &["my/entity"])], + ); + + let dataset_name = "dataset"; + service.create_dataset_entry_with_name(dataset_name).await; + service + .register_with_dataset_name_blocking(dataset_name, data_sources_def.to_data_sources()) + .await; + + let chunk_info: Vec = service + .query_dataset( + tonic::Request::new(QueryDatasetRequest::default().into()) + .with_entry_name(entry_name(dataset_name)) + .unwrap(), + ) + .await + .unwrap() + .into_inner() + .flat_map(|resp| futures::stream::iter(resp.unwrap().data)) + .map(|dfp| dfp.try_into().unwrap()) + .collect() + .await; + + let merged = concat_record_batches(&chunk_info); + assert!( + merged.num_rows() > 0, + "query should return at least one chunk" + ); + + let uncompressed_col = merged + .column_by_name(QueryDatasetResponse::FIELD_CHUNK_BYTE_LENGTH_UNCOMPRESSED) + .expect("chunk_byte_size_uncompressed column must be present"); + + let uncompressed = uncompressed_col + .as_any() + .downcast_ref::() + .expect("chunk_byte_size_uncompressed must be UInt64Array"); + + for i in 0..merged.num_rows() { + assert!( + !uncompressed.is_null(i), + "row {i}: uncompressed size must not be null" + ); + assert!( + uncompressed.value(i) > 0, + "row {i}: uncompressed size must be > 0" + ); + } +} + +/// Verify that every response in the `query_dataset` stream has the same schema, even when +/// different segments use different timelines. Regression test for a server bug where each +/// (segment, layer) response only emitted `:start` columns for timelines present in its own +/// chunks, producing mismatched schemas that broke client-side concatenation. +pub async fn query_dataset_consistent_schema_across_timelines(service: impl RerunCloudService) { + let data_sources_def = DataSourcesDefinition::new_with_tuid_prefix( + 1, + [ + LayerDefinition::simple_with_time( + "segment_sequence", + &["my/entity"], + 0, + TimeType::Sequence, + ), + LayerDefinition::simple_with_time( + "segment_timestamp", + &["my/entity"], + 0, + TimeType::TimestampNs, + ), + ], + ); + + let dataset_name = "dataset_mixed_timelines"; + service.create_dataset_entry_with_name(dataset_name).await; + service + .register_with_dataset_name_blocking(dataset_name, data_sources_def.to_data_sources()) + .await; + + let request = QueryDatasetRequest { + query: Some(Query { + columns_always_include_global_indexes: true, + ..Default::default() + }), + ..Default::default() + }; + + let responses: Vec = service + .query_dataset( + tonic::Request::new(request.into()) + .with_entry_name(entry_name(dataset_name)) + .unwrap(), + ) + .await + .unwrap() + .into_inner() + .flat_map(|resp| futures::stream::iter(resp.unwrap().data)) + .map(|dfp| dfp.try_into().unwrap()) + .collect() + .await; + + // Backends are free to split a query across any number of responses (the OSS test server + // emits one per `(segment, layer)`; other backends may fuse them into a single batch). The + // only invariants we care about here are that we got data back and that every response + // shares a schema covering both timelines. + assert!( + !responses.is_empty(), + "expected at least one response, got none", + ); + + let first_schema = responses[0].schema(); + for (idx, rb) in responses.iter().enumerate() { + assert_eq!( + rb.schema(), + first_schema, + "response {idx} has a different schema than response 0 — client-side concatenation would fail", + ); + } + + for expected_col in ["frame_nr:start", "timestamp:start"] { + assert!( + first_schema.field_with_name(expected_col).is_ok(), + "expected `{expected_col}` in response schema, got: {:#?}", + first_schema.fields(), + ); + } + + // concat_batches should succeed now that all responses share a schema. + let _ = concat_record_batches(&responses); +} + // --- // TODO(rerun-io/dataplatform#2228) remove the `chunk_ids_to_remove` parameter @@ -326,7 +466,7 @@ async fn query_dataset_snapshot( let chunk_info = service .query_dataset( tonic::Request::new(query_dataset_request.into()) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -370,6 +510,7 @@ async fn query_dataset_snapshot( .remove_columns(&[ QueryDatasetResponse::FIELD_CHUNK_KEY, QueryDatasetResponse::FIELD_CHUNK_BYTE_LENGTH, + QueryDatasetResponse::FIELD_CHUNK_BYTE_LENGTH_UNCOMPRESSED, ]) .auto_sort_rows() .unwrap(); @@ -390,7 +531,7 @@ fn remove_rows_containing_chunk_id( chunk_ids: &[re_types_core::ChunkId], ) -> RecordBatch { let chunk_id_col = rb - .column_by_name("chunk_id") + .column_by_name(QueryDatasetResponse::FIELD_CHUNK_ID) .expect("Missing column chunk_id"); let chunk_id_array = chunk_id_col diff --git a/crates/store/re_redap_tests/src/tests/query_filter.rs b/crates/store/re_redap_tests/src/tests/query_filter.rs index 6461635948dc..625f4715d7c6 100644 --- a/crates/store/re_redap_tests/src/tests/query_filter.rs +++ b/crates/store/re_redap_tests/src/tests/query_filter.rs @@ -7,7 +7,7 @@ use arrow::array::RecordBatch; use datafusion::datasource::TableProvider as _; use datafusion::execution::SessionState; use datafusion::physical_plan::ExecutionPlanProperties as _; -use datafusion::prelude::{Expr, SessionContext, col, lit}; +use datafusion::prelude::{Expr, SessionConfig, SessionContext, col, lit}; use futures::{StreamExt as _, TryStreamExt as _}; use re_datafusion::{DataframeClientAPI, DataframeQueryTableProvider}; use re_log_types::EntityPath; @@ -55,7 +55,8 @@ pub async fn query_dataset_simple_filter(service: impl RerunCloudService) { &query, &[] as &[&str], None, - None, + None, // arrow_schema — let the provider fetch it + None, // trace_headers ) .await .unwrap(); @@ -93,6 +94,130 @@ pub async fn query_dataset_simple_filter(service: impl RerunCloudService) { } } +pub async fn query_dataset_with_limit(service: impl RerunCloudService) { + #![expect(unsafe_code)] + let original_env = std::env::var("RERUN_CHUNK_MAX_ROWS_IF_UNSORTED").ok(); + + // SAFETY: + // This is simply a test + unsafe { std::env::set_var("RERUN_CHUNK_MAX_ROWS_IF_UNSORTED", "3") }; + + let data_sources_def = DataSourcesDefinition::new_with_tuid_prefix( + 1, + [ + LayerDefinition::multi_chunked_entities( + "my_segment_id1", + &["my/entity", "my/other/entity"], + ), + LayerDefinition::multi_chunked_entities("my_segment_id2", &["my/entity"]), + LayerDefinition::multi_chunked_entities( + "my_segment_id3", + &["my/entity", "another/one", "yet/another/one"], + ), + ], + ); + + let dataset_name = "dataset"; + let dataset_entry = service.create_dataset_entry_with_name(dataset_name).await; + service + .register_with_dataset_name_blocking(dataset_name, data_sources_def.to_data_sources()) + .await; + + let client = create_test_client(service).await; + let query = re_chunk_store::QueryExpression { + view_contents: Some(std::iter::once((EntityPath::from("my/entity"), None)).collect()), + filtered_index: Some("frame_nr".into()), + ..Default::default() + }; + + let table_provider = DataframeQueryTableProvider::new_from_client( + client, + dataset_entry.details.id, + &query, + &[] as &[&str], + None, + None, // arrow_schema — let the provider fetch it + None, // trace_headers + ) + .await + .unwrap(); + + // We need to set 1 target partition, otherwise we will not get exactly the limit we expect + let config = SessionConfig::default().with_target_partitions(1); + let ctx = SessionContext::new_with_config(config); + let state = ctx.state(); + + // First, get the total row count without any limit + let total_rows = execute_with_limit(&table_provider, &ctx, &state, None).await; + assert!( + total_rows > 1, + "Test requires more than 1 row to be meaningful, got {total_rows}" + ); + + // Limit of 1 should return exactly 1 row + let rows = execute_with_limit(&table_provider, &ctx, &state, Some(1)).await; + assert_eq!(rows, 1, "limit=1 should return exactly 1 row"); + + // Limit smaller than total should return exactly that many rows + let small_limit = total_rows / 2; + let rows = execute_with_limit(&table_provider, &ctx, &state, Some(small_limit)).await; + assert_eq!( + rows, small_limit, + "limit={small_limit} should return exactly {small_limit} rows" + ); + + // Limit equal to total should return all rows + let rows = execute_with_limit(&table_provider, &ctx, &state, Some(total_rows)).await; + assert_eq!( + rows, total_rows, + "limit={total_rows} should return all {total_rows} rows" + ); + + // Limit larger than total should return all rows + let rows = execute_with_limit(&table_provider, &ctx, &state, Some(total_rows + 100)).await; + assert_eq!( + rows, total_rows, + "limit larger than total should return all {total_rows} rows" + ); + + // SAFETY: + // This is simply a test + unsafe { + match original_env { + Some(val) => std::env::set_var("RERUN_CHUNK_MAX_ROWS_IF_UNSORTED", val), + None => std::env::remove_var("RERUN_CHUNK_MAX_ROWS_IF_UNSORTED"), + } + } +} + +async fn execute_with_limit( + table_provider: &DataframeQueryTableProvider, + ctx: &SessionContext, + state: &SessionState, + limit: Option, +) -> usize { + let plan = table_provider + .scan(state, None, &[lit(true)], limit) + .await + .unwrap(); + + let num_partitions = plan.output_partitioning().partition_count(); + let results = (0..num_partitions) + .map(|partition| plan.execute(partition, ctx.task_ctx())) + .collect::, _>>() + .unwrap(); + + let stream = futures::stream::iter(results); + + let results: Vec = stream + .flat_map(|stream| stream) + .try_collect() + .await + .unwrap(); + + results.iter().map(|batch| batch.num_rows()).sum() +} + // --- async fn query_dataset_snapshot( diff --git a/crates/store/re_redap_tests/src/tests/query_index_values.rs b/crates/store/re_redap_tests/src/tests/query_index_values.rs index 74a8131b4e60..9562e1da6cb1 100644 --- a/crates/store/re_redap_tests/src/tests/query_index_values.rs +++ b/crates/store/re_redap_tests/src/tests/query_index_values.rs @@ -141,7 +141,8 @@ async fn query_dataset_snapshot( &query, &[] as &[&str], Some(Arc::new(index_values)), - None, + None, // arrow_schema — let the provider fetch it + None, // trace_headers ) .await .unwrap(); diff --git a/crates/store/re_redap_tests/src/tests/register_segment.rs b/crates/store/re_redap_tests/src/tests/register_segment.rs index 826b99e498ef..21d663815fd8 100644 --- a/crates/store/re_redap_tests/src/tests/register_segment.rs +++ b/crates/store/re_redap_tests/src/tests/register_segment.rs @@ -25,7 +25,9 @@ use re_sdk_types::AnyValues; use re_types_core::AsComponents; use url::Url; -use super::common::{DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt as _, prop}; +use super::common::{ + DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt as _, entry_name, prop, +}; use crate::{ FieldsTestExt as _, RecordBatchTestExt as _, SchemaTestExt as _, create_simple_recording_in, }; @@ -66,7 +68,7 @@ pub async fn register_and_scan_blueprint_dataset(service: impl RerunCloudService let dataset_details: DatasetDetails = service .read_dataset_entry( tonic::Request::new(ReadDatasetEntryRequest {}) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -416,7 +418,7 @@ pub async fn register_bad_file_uri_should_error(service: impl RerunCloudService) let result = service .register_with_dataset( tonic::Request::new(request.into()) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await; @@ -441,7 +443,7 @@ pub async fn register_segment_bumps_timestamp(service: impl RerunCloudService) { service .read_dataset_entry( tonic::Request::new(ReadDatasetEntryRequest {}) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -544,7 +546,7 @@ pub async fn register_with_dataset_if_duplicate_behavior_error(service: impl Rer let result = service .register_with_dataset( tonic::Request::new(request.into()) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await; @@ -738,7 +740,7 @@ pub async fn register_intra_request_duplicates(service: impl RerunCloudService) let result = service .register_with_dataset( tonic::Request::new(request.into()) - .with_entry_name(&dataset_name) + .with_entry_name(entry_name(&dataset_name)) .unwrap(), ) .await; @@ -794,7 +796,7 @@ pub async fn register_empty_request(service: impl RerunCloudService) { let result = service .register_with_dataset( tonic::Request::new(request.into()) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await; @@ -845,7 +847,7 @@ pub async fn register_fully_skipped(service: impl RerunCloudService) { let result = service .register_with_dataset( tonic::Request::new(request.into()) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await; @@ -867,7 +869,7 @@ async fn scan_dataset_manifest( tonic::Request::new(ScanDatasetManifestRequest { columns: vec![], // all of them }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -1068,7 +1070,7 @@ pub async fn register_conflicting_schema_filters_segment_table(service: impl Rer data_sources: second_def.to_data_sources(), on_duplicate: IfDuplicateBehavior::Error as i32, }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(); // This registration should fail due to schema conflict @@ -1150,7 +1152,7 @@ pub async fn register_conflicting_schema_same_segment_filters_layer( data_sources: second_def.to_data_sources(), on_duplicate: IfDuplicateBehavior::Error as i32, }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(); // This registration should fail due to schema conflict @@ -1253,7 +1255,7 @@ async fn scan_segment_table(service: &impl RerunCloudService, dataset_name: &str tonic::Request::new(ScanSegmentTableRequest { columns: vec![], // all of them }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -1309,7 +1311,7 @@ async fn register_and_wait_for_task_result( data_sources: data_sources_def.to_data_sources(), on_duplicate: IfDuplicateBehavior::Error as i32, }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(); let resp = service @@ -1441,7 +1443,7 @@ async fn scan_segment_table_and_snapshot( tonic::Request::new(ScanSegmentTableRequest { columns: vec![], // all of them }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -1469,7 +1471,7 @@ async fn scan_segment_table_and_snapshot( let alleged_schema: Schema = service .get_segment_table_schema( tonic::Request::new(GetSegmentTableSchemaRequest {}) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -1531,7 +1533,7 @@ async fn scan_dataset_manifest_and_snapshot( tonic::Request::new(ScanDatasetManifestRequest { columns: vec![], // all of them }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -1559,7 +1561,7 @@ async fn scan_dataset_manifest_and_snapshot( let alleged_schema: Schema = service .get_dataset_manifest_schema( tonic::Request::new(GetDatasetManifestSchemaRequest {}) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await diff --git a/crates/store/re_redap_tests/src/tests/rrd_manifest.rs b/crates/store/re_redap_tests/src/tests/rrd_manifest.rs index 058d514db305..5c333880d909 100644 --- a/crates/store/re_redap_tests/src/tests/rrd_manifest.rs +++ b/crates/store/re_redap_tests/src/tests/rrd_manifest.rs @@ -13,7 +13,9 @@ use re_sdk::AsComponents; use re_sdk::external::re_log_encoding::{RawRrdManifest, ToApplication as _}; use re_sdk_types::AnyValues; -use super::common::{DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt as _}; +use super::common::{ + DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt as _, entry_name, +}; pub async fn simple_dataset_rrd_manifest(service: impl RerunCloudService) { let data_sources_def = DataSourcesDefinition::new_with_tuid_prefix( @@ -121,7 +123,7 @@ pub async fn layered_segment(service: impl RerunCloudService) { tonic::Request::new(GetRrdManifestRequest { segment_id: Some(segment_name.into()), }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await; @@ -178,7 +180,7 @@ pub async fn layered_segment_stress(service: impl RerunCloudService) { tonic::Request::new(GetRrdManifestRequest { segment_id: Some(segment_name.into()), }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -198,7 +200,7 @@ pub async fn layered_segment_stress(service: impl RerunCloudService) { let responses: Vec<_> = service .scan_segment_table( tonic::Request::new(ScanSegmentTableRequest { columns: vec![] }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -230,7 +232,7 @@ pub async fn layered_segment_stress(service: impl RerunCloudService) { tonic::Request::new(GetRrdManifestRequest { segment_id: Some(segment_name.into()), }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await; @@ -268,7 +270,7 @@ pub async fn unregistered_segment(service: impl RerunCloudService) { tonic::Request::new(GetRrdManifestRequest { segment_id: Some("my_segment_id".into()), }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await; @@ -285,7 +287,7 @@ pub async fn segment_id_not_found(service: impl RerunCloudService) { tonic::Request::new(GetRrdManifestRequest { segment_id: Some(segment_id.into()), }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await; @@ -306,7 +308,7 @@ async fn dataset_rrd_manifest_snapshot( tonic::Request::new(GetRrdManifestRequest { segment_id: Some(segment_id.into()), }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await? diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__create_table__create_table_data.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__create_table__create_table_data.snap index f6829318eee1..396ed1e1a500 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__create_table__create_table_data.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__create_table__create_table_data.snap @@ -2,7 +2,7 @@ source: crates/store/re_redap_tests/src/tests/create_table.rs expression: returned_schema.format_snapshot() --- -column_a: Utf8 -column_b: nullable i64 -column_c: f64 -column_d: nullable bool +column_a: non-null Utf8 +column_b: Int64 +column_c: non-null Float64 +column_d: Boolean diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__dataset_schema__simple_dataset_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__dataset_schema__simple_dataset_schema.snap index cf1e86154887..c9bb1bebbf9f 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__dataset_schema__simple_dataset_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__dataset_schema__simple_dataset_schema.snap @@ -5,14 +5,14 @@ expression: schema.format_snapshot() top-level metadata: [ sorbet:version: "0.1.3" ] -/another/one:example.MyPoints:colors: nullable List[nullable u32] [ +/another/one:example.MyPoints:colors: List(UInt32) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:entity_path: "/another/one" rerun:kind: "data" ] -/another/one:example.MyPoints:labels: nullable List[nullable Utf8] [ +/another/one:example.MyPoints:labels: List(Utf8) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:labels" rerun:component_type: "example.MyLabel" @@ -20,21 +20,21 @@ top-level metadata: [ rerun:is_static: "true" rerun:kind: "data" ] -/another/one:example.MyPoints:points: nullable List[nullable Struct[2]] [ +/another/one:example.MyPoints:points: List(Struct("x": non-null Float32, "y": non-null Float32)) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:entity_path: "/another/one" rerun:kind: "data" ] -/my/entity:example.MyPoints:colors: nullable List[nullable u32] [ +/my/entity:example.MyPoints:colors: List(UInt32) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -/my/entity:example.MyPoints:labels: nullable List[nullable Utf8] [ +/my/entity:example.MyPoints:labels: List(Utf8) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:labels" rerun:component_type: "example.MyLabel" @@ -42,21 +42,21 @@ top-level metadata: [ rerun:is_static: "true" rerun:kind: "data" ] -/my/entity:example.MyPoints:points: nullable List[nullable Struct[2]] [ +/my/entity:example.MyPoints:points: List(Struct("x": non-null Float32, "y": non-null Float32)) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -/my/other/entity:example.MyPoints:colors: nullable List[nullable u32] [ +/my/other/entity:example.MyPoints:colors: List(UInt32) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:entity_path: "/my/other/entity" rerun:kind: "data" ] -/my/other/entity:example.MyPoints:labels: nullable List[nullable Utf8] [ +/my/other/entity:example.MyPoints:labels: List(Utf8) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:labels" rerun:component_type: "example.MyLabel" @@ -64,21 +64,21 @@ top-level metadata: [ rerun:is_static: "true" rerun:kind: "data" ] -/my/other/entity:example.MyPoints:points: nullable List[nullable Struct[2]] [ +/my/other/entity:example.MyPoints:points: List(Struct("x": non-null Float32, "y": non-null Float32)) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:entity_path: "/my/other/entity" rerun:kind: "data" ] -/yet/another/one:example.MyPoints:colors: nullable List[nullable u32] [ +/yet/another/one:example.MyPoints:colors: List(UInt32) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:entity_path: "/yet/another/one" rerun:kind: "data" ] -/yet/another/one:example.MyPoints:labels: nullable List[nullable Utf8] [ +/yet/another/one:example.MyPoints:labels: List(Utf8) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:labels" rerun:component_type: "example.MyLabel" @@ -86,18 +86,18 @@ top-level metadata: [ rerun:is_static: "true" rerun:kind: "data" ] -/yet/another/one:example.MyPoints:points: nullable List[nullable Struct[2]] [ +/yet/another/one:example.MyPoints:points: List(Struct("x": non-null Float32, "y": non-null Float32)) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:entity_path: "/yet/another/one" rerun:kind: "data" ] -frame_nr: nullable i64 [ +frame_nr: Int64 [ rerun:index_name: "frame_nr" rerun:kind: "index" ] -rerun.controls.RowId: FixedSizeBinary[16] [ +rerun.controls.RowId: non-null FixedSizeBinary(16) [ ARROW:extension:metadata: "{\"namespace\":\"row\"}" ARROW:extension:name: "rerun.datatypes.TUID" rerun:kind: "control" diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__entries_table__entries_table_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__entries_table__entries_table_schema.snap index 5a386e6149f4..0e6ece31d4a4 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__entries_table__entries_table_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__entries_table__entries_table_schema.snap @@ -5,8 +5,8 @@ expression: schema.format_snapshot() top-level metadata: [ sorbet:version: "0.1.3" ] -created_at: Timestamp(ns) -entry_kind: i32 -id: FixedSizeBinary[16] -name: Utf8 -updated_at: Timestamp(ns) +created_at: non-null Timestamp(ns) +entry_kind: non-null Int32 +id: non-null FixedSizeBinary(16) +name: non-null Utf8 +updated_at: non-null Timestamp(ns) diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__fetch_chunks__simple_dataset_fetch_chunk.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__fetch_chunks__simple_dataset_fetch_chunk.snap index ef22848e4f01..a3e02677b26d 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__fetch_chunks__simple_dataset_fetch_chunk.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__fetch_chunks__simple_dataset_fetch_chunk.snap @@ -2,30 +2,31 @@ source: crates/store/re_redap_tests/src/tests/fetch_chunks.rs expression: printed --- -┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /my/entity │ -│ * id: chunk_00000000000000010000000000000001 │ -│ * version: 0.1.3 │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬──────────────────────┬────────────────────────────────────┬─────────────────────────────────────────┐ │ -│ │ RowId ┆ frame_nr ┆ example.MyPoints:colors ┆ example.MyPoints:points │ │ -│ │ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable i64 ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Struct[2]] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: frame_nr ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:points │ │ -│ │ is_sorted: true ┆ kind: index ┆ component_type: example.MyColor ┆ component_type: example.MyPoint │ │ -│ │ kind: control ┆ ┆ kind: data ┆ kind: data │ │ -│ ╞═══════════════════════════════════════════════╪══════════════════════╪════════════════════════════════════╪═════════════════════════════════════════╡ │ -│ │ row_00000000000000010000000000000001 ┆ 10 ┆ [0] ┆ [{x: 0.0, y: 0.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000010000000000000002 ┆ 20 ┆ [1] ┆ [{x: 1.0, y: 1.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000010000000000000003 ┆ 30 ┆ [2] ┆ [{x: 2.0, y: 2.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000010000000000000004 ┆ 40 ┆ [3] ┆ [{x: 3.0, y: 3.0}] │ │ -│ └───────────────────────────────────────────────┴──────────────────────┴────────────────────────────────────┴─────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /my/entity │ +│ * id: chunk_00000000000000010000000000000001 │ +│ * version: 0.1.3 │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬──────────────────────┬────────────────────────────────────┬──────────────────────────────────────────────────────────┐ │ +│ │ RowId ┆ frame_nr ┆ example.MyPoints:colors ┆ example.MyPoints:points │ │ +│ │ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(UInt32) ┆ type: List(Struct("x": non-null Float32, "y": non-null │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: frame_nr ┆ archetype: example.MyPoints ┆ Float32)) │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: example.MyPoints:colors ┆ archetype: example.MyPoints │ │ +│ │ is_sorted: true ┆ kind: index ┆ component_type: example.MyColor ┆ component: example.MyPoints:points │ │ +│ │ kind: control ┆ ┆ kind: data ┆ component_type: example.MyPoint │ │ +│ │ ┆ ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪══════════════════════╪════════════════════════════════════╪══════════════════════════════════════════════════════════╡ │ +│ │ row_00000000000000010000000000000001 ┆ 10 ┆ [0] ┆ [{x: 0.0, y: 0.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000010000000000000002 ┆ 20 ┆ [1] ┆ [{x: 1.0, y: 1.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000010000000000000003 ┆ 30 ┆ [2] ┆ [{x: 2.0, y: 2.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000010000000000000004 ┆ 40 ┆ [3] ┆ [{x: 3.0, y: 3.0}] │ │ +│ └───────────────────────────────────────────────┴──────────────────────┴────────────────────────────────────┴──────────────────────────────────────────────────────────┘ │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ┌────────────────────────────────────────────────────────────────────────────────────────┐ │ METADATA: │ │ * entity_path: /my/entity │ @@ -35,7 +36,7 @@ expression: printed │ ┌───────────────────────────────────────────────┬────────────────────────────────────┐ │ │ │ RowId ┆ example.MyPoints:labels │ │ │ │ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable List[nullable Utf8] │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: List(Utf8) │ │ │ │ ARROW:extension:metadata: {"namespace":"row"} ┆ archetype: example.MyPoints │ │ │ │ ARROW:extension:name: TUID ┆ component: example.MyPoints:labels │ │ │ │ is_sorted: true ┆ component_type: example.MyLabel │ │ @@ -44,30 +45,31 @@ expression: printed │ │ row_00000000000000010000000000000005 ┆ [simple] │ │ │ └───────────────────────────────────────────────┴────────────────────────────────────┘ │ └────────────────────────────────────────────────────────────────────────────────────────┘ -┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /my/other/entity │ -│ * id: chunk_00000000000000010000000000000003 │ -│ * version: 0.1.3 │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬──────────────────────┬────────────────────────────────────┬─────────────────────────────────────────┐ │ -│ │ RowId ┆ frame_nr ┆ example.MyPoints:colors ┆ example.MyPoints:points │ │ -│ │ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable i64 ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Struct[2]] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: frame_nr ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:points │ │ -│ │ is_sorted: true ┆ kind: index ┆ component_type: example.MyColor ┆ component_type: example.MyPoint │ │ -│ │ kind: control ┆ ┆ kind: data ┆ kind: data │ │ -│ ╞═══════════════════════════════════════════════╪══════════════════════╪════════════════════════════════════╪═════════════════════════════════════════╡ │ -│ │ row_00000000000000010000000000000006 ┆ 10 ┆ [0] ┆ [{x: 0.0, y: 0.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000010000000000000007 ┆ 20 ┆ [1] ┆ [{x: 1.0, y: 1.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000010000000000000008 ┆ 30 ┆ [2] ┆ [{x: 2.0, y: 2.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000010000000000000009 ┆ 40 ┆ [3] ┆ [{x: 3.0, y: 3.0}] │ │ -│ └───────────────────────────────────────────────┴──────────────────────┴────────────────────────────────────┴─────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /my/other/entity │ +│ * id: chunk_00000000000000010000000000000003 │ +│ * version: 0.1.3 │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬──────────────────────┬────────────────────────────────────┬──────────────────────────────────────────────────────────┐ │ +│ │ RowId ┆ frame_nr ┆ example.MyPoints:colors ┆ example.MyPoints:points │ │ +│ │ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(UInt32) ┆ type: List(Struct("x": non-null Float32, "y": non-null │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: frame_nr ┆ archetype: example.MyPoints ┆ Float32)) │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: example.MyPoints:colors ┆ archetype: example.MyPoints │ │ +│ │ is_sorted: true ┆ kind: index ┆ component_type: example.MyColor ┆ component: example.MyPoints:points │ │ +│ │ kind: control ┆ ┆ kind: data ┆ component_type: example.MyPoint │ │ +│ │ ┆ ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪══════════════════════╪════════════════════════════════════╪══════════════════════════════════════════════════════════╡ │ +│ │ row_00000000000000010000000000000006 ┆ 10 ┆ [0] ┆ [{x: 0.0, y: 0.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000010000000000000007 ┆ 20 ┆ [1] ┆ [{x: 1.0, y: 1.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000010000000000000008 ┆ 30 ┆ [2] ┆ [{x: 2.0, y: 2.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000010000000000000009 ┆ 40 ┆ [3] ┆ [{x: 3.0, y: 3.0}] │ │ +│ └───────────────────────────────────────────────┴──────────────────────┴────────────────────────────────────┴──────────────────────────────────────────────────────────┘ │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ┌────────────────────────────────────────────────────────────────────────────────────────┐ │ METADATA: │ │ * entity_path: /my/other/entity │ @@ -77,7 +79,7 @@ expression: printed │ ┌───────────────────────────────────────────────┬────────────────────────────────────┐ │ │ │ RowId ┆ example.MyPoints:labels │ │ │ │ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable List[nullable Utf8] │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: List(Utf8) │ │ │ │ ARROW:extension:metadata: {"namespace":"row"} ┆ archetype: example.MyPoints │ │ │ │ ARROW:extension:name: TUID ┆ component: example.MyPoints:labels │ │ │ │ is_sorted: true ┆ component_type: example.MyLabel │ │ @@ -86,30 +88,31 @@ expression: printed │ │ row_0000000000000001000000000000000a ┆ [simple] │ │ │ └───────────────────────────────────────────────┴────────────────────────────────────┘ │ └────────────────────────────────────────────────────────────────────────────────────────┘ -┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /my/entity │ -│ * id: chunk_00000000000000020000000000000001 │ -│ * version: 0.1.3 │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬──────────────────────┬────────────────────────────────────┬─────────────────────────────────────────┐ │ -│ │ RowId ┆ frame_nr ┆ example.MyPoints:colors ┆ example.MyPoints:points │ │ -│ │ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable i64 ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Struct[2]] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: frame_nr ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:points │ │ -│ │ is_sorted: true ┆ kind: index ┆ component_type: example.MyColor ┆ component_type: example.MyPoint │ │ -│ │ kind: control ┆ ┆ kind: data ┆ kind: data │ │ -│ ╞═══════════════════════════════════════════════╪══════════════════════╪════════════════════════════════════╪═════════════════════════════════════════╡ │ -│ │ row_00000000000000020000000000000001 ┆ 10 ┆ [0] ┆ [{x: 0.0, y: 0.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000020000000000000002 ┆ 20 ┆ [1] ┆ [{x: 1.0, y: 1.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000020000000000000003 ┆ 30 ┆ [2] ┆ [{x: 2.0, y: 2.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000020000000000000004 ┆ 40 ┆ [3] ┆ [{x: 3.0, y: 3.0}] │ │ -│ └───────────────────────────────────────────────┴──────────────────────┴────────────────────────────────────┴─────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /my/entity │ +│ * id: chunk_00000000000000020000000000000001 │ +│ * version: 0.1.3 │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬──────────────────────┬────────────────────────────────────┬──────────────────────────────────────────────────────────┐ │ +│ │ RowId ┆ frame_nr ┆ example.MyPoints:colors ┆ example.MyPoints:points │ │ +│ │ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(UInt32) ┆ type: List(Struct("x": non-null Float32, "y": non-null │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: frame_nr ┆ archetype: example.MyPoints ┆ Float32)) │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: example.MyPoints:colors ┆ archetype: example.MyPoints │ │ +│ │ is_sorted: true ┆ kind: index ┆ component_type: example.MyColor ┆ component: example.MyPoints:points │ │ +│ │ kind: control ┆ ┆ kind: data ┆ component_type: example.MyPoint │ │ +│ │ ┆ ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪══════════════════════╪════════════════════════════════════╪══════════════════════════════════════════════════════════╡ │ +│ │ row_00000000000000020000000000000001 ┆ 10 ┆ [0] ┆ [{x: 0.0, y: 0.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000020000000000000002 ┆ 20 ┆ [1] ┆ [{x: 1.0, y: 1.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000020000000000000003 ┆ 30 ┆ [2] ┆ [{x: 2.0, y: 2.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000020000000000000004 ┆ 40 ┆ [3] ┆ [{x: 3.0, y: 3.0}] │ │ +│ └───────────────────────────────────────────────┴──────────────────────┴────────────────────────────────────┴──────────────────────────────────────────────────────────┘ │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ┌────────────────────────────────────────────────────────────────────────────────────────┐ │ METADATA: │ │ * entity_path: /my/entity │ @@ -119,7 +122,7 @@ expression: printed │ ┌───────────────────────────────────────────────┬────────────────────────────────────┐ │ │ │ RowId ┆ example.MyPoints:labels │ │ │ │ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable List[nullable Utf8] │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: List(Utf8) │ │ │ │ ARROW:extension:metadata: {"namespace":"row"} ┆ archetype: example.MyPoints │ │ │ │ ARROW:extension:name: TUID ┆ component: example.MyPoints:labels │ │ │ │ is_sorted: true ┆ component_type: example.MyLabel │ │ @@ -128,30 +131,31 @@ expression: printed │ │ row_00000000000000020000000000000005 ┆ [simple] │ │ │ └───────────────────────────────────────────────┴────────────────────────────────────┘ │ └────────────────────────────────────────────────────────────────────────────────────────┘ -┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /my/entity │ -│ * id: chunk_00000000000000030000000000000001 │ -│ * version: 0.1.3 │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬──────────────────────┬────────────────────────────────────┬─────────────────────────────────────────┐ │ -│ │ RowId ┆ frame_nr ┆ example.MyPoints:colors ┆ example.MyPoints:points │ │ -│ │ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable i64 ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Struct[2]] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: frame_nr ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:points │ │ -│ │ is_sorted: true ┆ kind: index ┆ component_type: example.MyColor ┆ component_type: example.MyPoint │ │ -│ │ kind: control ┆ ┆ kind: data ┆ kind: data │ │ -│ ╞═══════════════════════════════════════════════╪══════════════════════╪════════════════════════════════════╪═════════════════════════════════════════╡ │ -│ │ row_00000000000000030000000000000001 ┆ 10 ┆ [0] ┆ [{x: 0.0, y: 0.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000030000000000000002 ┆ 20 ┆ [1] ┆ [{x: 1.0, y: 1.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000030000000000000003 ┆ 30 ┆ [2] ┆ [{x: 2.0, y: 2.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000030000000000000004 ┆ 40 ┆ [3] ┆ [{x: 3.0, y: 3.0}] │ │ -│ └───────────────────────────────────────────────┴──────────────────────┴────────────────────────────────────┴─────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /my/entity │ +│ * id: chunk_00000000000000030000000000000001 │ +│ * version: 0.1.3 │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬──────────────────────┬────────────────────────────────────┬──────────────────────────────────────────────────────────┐ │ +│ │ RowId ┆ frame_nr ┆ example.MyPoints:colors ┆ example.MyPoints:points │ │ +│ │ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(UInt32) ┆ type: List(Struct("x": non-null Float32, "y": non-null │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: frame_nr ┆ archetype: example.MyPoints ┆ Float32)) │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: example.MyPoints:colors ┆ archetype: example.MyPoints │ │ +│ │ is_sorted: true ┆ kind: index ┆ component_type: example.MyColor ┆ component: example.MyPoints:points │ │ +│ │ kind: control ┆ ┆ kind: data ┆ component_type: example.MyPoint │ │ +│ │ ┆ ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪══════════════════════╪════════════════════════════════════╪══════════════════════════════════════════════════════════╡ │ +│ │ row_00000000000000030000000000000001 ┆ 10 ┆ [0] ┆ [{x: 0.0, y: 0.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000030000000000000002 ┆ 20 ┆ [1] ┆ [{x: 1.0, y: 1.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000030000000000000003 ┆ 30 ┆ [2] ┆ [{x: 2.0, y: 2.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000030000000000000004 ┆ 40 ┆ [3] ┆ [{x: 3.0, y: 3.0}] │ │ +│ └───────────────────────────────────────────────┴──────────────────────┴────────────────────────────────────┴──────────────────────────────────────────────────────────┘ │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ┌────────────────────────────────────────────────────────────────────────────────────────┐ │ METADATA: │ │ * entity_path: /my/entity │ @@ -161,7 +165,7 @@ expression: printed │ ┌───────────────────────────────────────────────┬────────────────────────────────────┐ │ │ │ RowId ┆ example.MyPoints:labels │ │ │ │ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable List[nullable Utf8] │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: List(Utf8) │ │ │ │ ARROW:extension:metadata: {"namespace":"row"} ┆ archetype: example.MyPoints │ │ │ │ ARROW:extension:name: TUID ┆ component: example.MyPoints:labels │ │ │ │ is_sorted: true ┆ component_type: example.MyLabel │ │ @@ -170,30 +174,31 @@ expression: printed │ │ row_00000000000000030000000000000005 ┆ [simple] │ │ │ └───────────────────────────────────────────────┴────────────────────────────────────┘ │ └────────────────────────────────────────────────────────────────────────────────────────┘ -┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /another/one │ -│ * id: chunk_00000000000000030000000000000003 │ -│ * version: 0.1.3 │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬──────────────────────┬────────────────────────────────────┬─────────────────────────────────────────┐ │ -│ │ RowId ┆ frame_nr ┆ example.MyPoints:colors ┆ example.MyPoints:points │ │ -│ │ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable i64 ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Struct[2]] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: frame_nr ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:points │ │ -│ │ is_sorted: true ┆ kind: index ┆ component_type: example.MyColor ┆ component_type: example.MyPoint │ │ -│ │ kind: control ┆ ┆ kind: data ┆ kind: data │ │ -│ ╞═══════════════════════════════════════════════╪══════════════════════╪════════════════════════════════════╪═════════════════════════════════════════╡ │ -│ │ row_00000000000000030000000000000006 ┆ 10 ┆ [0] ┆ [{x: 0.0, y: 0.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000030000000000000007 ┆ 20 ┆ [1] ┆ [{x: 1.0, y: 1.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000030000000000000008 ┆ 30 ┆ [2] ┆ [{x: 2.0, y: 2.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000030000000000000009 ┆ 40 ┆ [3] ┆ [{x: 3.0, y: 3.0}] │ │ -│ └───────────────────────────────────────────────┴──────────────────────┴────────────────────────────────────┴─────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /another/one │ +│ * id: chunk_00000000000000030000000000000003 │ +│ * version: 0.1.3 │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬──────────────────────┬────────────────────────────────────┬──────────────────────────────────────────────────────────┐ │ +│ │ RowId ┆ frame_nr ┆ example.MyPoints:colors ┆ example.MyPoints:points │ │ +│ │ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(UInt32) ┆ type: List(Struct("x": non-null Float32, "y": non-null │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: frame_nr ┆ archetype: example.MyPoints ┆ Float32)) │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: example.MyPoints:colors ┆ archetype: example.MyPoints │ │ +│ │ is_sorted: true ┆ kind: index ┆ component_type: example.MyColor ┆ component: example.MyPoints:points │ │ +│ │ kind: control ┆ ┆ kind: data ┆ component_type: example.MyPoint │ │ +│ │ ┆ ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪══════════════════════╪════════════════════════════════════╪══════════════════════════════════════════════════════════╡ │ +│ │ row_00000000000000030000000000000006 ┆ 10 ┆ [0] ┆ [{x: 0.0, y: 0.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000030000000000000007 ┆ 20 ┆ [1] ┆ [{x: 1.0, y: 1.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000030000000000000008 ┆ 30 ┆ [2] ┆ [{x: 2.0, y: 2.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000030000000000000009 ┆ 40 ┆ [3] ┆ [{x: 3.0, y: 3.0}] │ │ +│ └───────────────────────────────────────────────┴──────────────────────┴────────────────────────────────────┴──────────────────────────────────────────────────────────┘ │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ┌────────────────────────────────────────────────────────────────────────────────────────┐ │ METADATA: │ │ * entity_path: /another/one │ @@ -203,7 +208,7 @@ expression: printed │ ┌───────────────────────────────────────────────┬────────────────────────────────────┐ │ │ │ RowId ┆ example.MyPoints:labels │ │ │ │ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable List[nullable Utf8] │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: List(Utf8) │ │ │ │ ARROW:extension:metadata: {"namespace":"row"} ┆ archetype: example.MyPoints │ │ │ │ ARROW:extension:name: TUID ┆ component: example.MyPoints:labels │ │ │ │ is_sorted: true ┆ component_type: example.MyLabel │ │ @@ -212,30 +217,31 @@ expression: printed │ │ row_0000000000000003000000000000000a ┆ [simple] │ │ │ └───────────────────────────────────────────────┴────────────────────────────────────┘ │ └────────────────────────────────────────────────────────────────────────────────────────┘ -┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /yet/another/one │ -│ * id: chunk_00000000000000030000000000000005 │ -│ * version: 0.1.3 │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬──────────────────────┬────────────────────────────────────┬─────────────────────────────────────────┐ │ -│ │ RowId ┆ frame_nr ┆ example.MyPoints:colors ┆ example.MyPoints:points │ │ -│ │ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable i64 ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Struct[2]] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: frame_nr ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:points │ │ -│ │ is_sorted: true ┆ kind: index ┆ component_type: example.MyColor ┆ component_type: example.MyPoint │ │ -│ │ kind: control ┆ ┆ kind: data ┆ kind: data │ │ -│ ╞═══════════════════════════════════════════════╪══════════════════════╪════════════════════════════════════╪═════════════════════════════════════════╡ │ -│ │ row_0000000000000003000000000000000b ┆ 10 ┆ [0] ┆ [{x: 0.0, y: 0.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_0000000000000003000000000000000c ┆ 20 ┆ [1] ┆ [{x: 1.0, y: 1.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_0000000000000003000000000000000d ┆ 30 ┆ [2] ┆ [{x: 2.0, y: 2.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_0000000000000003000000000000000e ┆ 40 ┆ [3] ┆ [{x: 3.0, y: 3.0}] │ │ -│ └───────────────────────────────────────────────┴──────────────────────┴────────────────────────────────────┴─────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /yet/another/one │ +│ * id: chunk_00000000000000030000000000000005 │ +│ * version: 0.1.3 │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬──────────────────────┬────────────────────────────────────┬──────────────────────────────────────────────────────────┐ │ +│ │ RowId ┆ frame_nr ┆ example.MyPoints:colors ┆ example.MyPoints:points │ │ +│ │ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(UInt32) ┆ type: List(Struct("x": non-null Float32, "y": non-null │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: frame_nr ┆ archetype: example.MyPoints ┆ Float32)) │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: example.MyPoints:colors ┆ archetype: example.MyPoints │ │ +│ │ is_sorted: true ┆ kind: index ┆ component_type: example.MyColor ┆ component: example.MyPoints:points │ │ +│ │ kind: control ┆ ┆ kind: data ┆ component_type: example.MyPoint │ │ +│ │ ┆ ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪══════════════════════╪════════════════════════════════════╪══════════════════════════════════════════════════════════╡ │ +│ │ row_0000000000000003000000000000000b ┆ 10 ┆ [0] ┆ [{x: 0.0, y: 0.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_0000000000000003000000000000000c ┆ 20 ┆ [1] ┆ [{x: 1.0, y: 1.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_0000000000000003000000000000000d ┆ 30 ┆ [2] ┆ [{x: 2.0, y: 2.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_0000000000000003000000000000000e ┆ 40 ┆ [3] ┆ [{x: 3.0, y: 3.0}] │ │ +│ └───────────────────────────────────────────────┴──────────────────────┴────────────────────────────────────┴──────────────────────────────────────────────────────────┘ │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ┌────────────────────────────────────────────────────────────────────────────────────────┐ │ METADATA: │ │ * entity_path: /yet/another/one │ @@ -245,7 +251,7 @@ expression: printed │ ┌───────────────────────────────────────────────┬────────────────────────────────────┐ │ │ │ RowId ┆ example.MyPoints:labels │ │ │ │ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable List[nullable Utf8] │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: List(Utf8) │ │ │ │ ARROW:extension:metadata: {"namespace":"row"} ┆ archetype: example.MyPoints │ │ │ │ ARROW:extension:name: TUID ┆ component: example.MyPoints:labels │ │ │ │ is_sorted: true ┆ component_type: example.MyLabel │ │ diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__empty_dataset_data.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__empty_dataset_data.snap index a532adec2bdf..f56493433b08 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__empty_dataset_data.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__empty_dataset_data.snap @@ -2,7 +2,7 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: filtered_chunk_info.format_snapshot(false) --- -┌──────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┐ -│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static │ -╞══════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╡ -└──────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┘ +┌──────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┬────────────────────────┬───────────────────────────────────┐ +│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static ┆ rerun_layer_direct_url ┆ rerun_layer_direct_url_expires_at │ +╞══════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╪════════════════════════╪═══════════════════════════════════╡ +└──────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┴────────────────────────┴───────────────────────────────────┘ diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__empty_dataset_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__empty_dataset_schema.snap index 782b3f48f76a..10e16e60be2f 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__empty_dataset_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__empty_dataset_schema.snap @@ -2,18 +2,21 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: required_chunk_info.format_schema_snapshot() --- -chunk_byte_len: u64 -chunk_entity_path: Utf8 [ +chunk_byte_len: non-null UInt64 +chunk_byte_size_uncompressed: UInt64 +chunk_entity_path: non-null Utf8 [ rerun:kind: "control" ] -chunk_id: FixedSizeBinary[16] [ +chunk_id: non-null FixedSizeBinary(16) [ rerun:kind: "control" ] -chunk_is_static: bool [ +chunk_is_static: non-null Boolean [ rerun:kind: "control" ] -chunk_key: Binary -chunk_segment_id: Utf8 [ +chunk_key: non-null Binary +chunk_segment_id: non-null Utf8 [ rerun:kind: "control" ] -rerun_segment_layer: Utf8 +rerun_layer_direct_url: Dictionary(Int32, Utf8) +rerun_layer_direct_url_expires_at: Dictionary(Int32, Int64) +rerun_segment_layer: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_default_data.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_default_data.snap index 6b1cd5ad9a29..beb5b340389b 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_default_data.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_default_data.snap @@ -2,30 +2,30 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: filtered_chunk_info.format_snapshot(false) --- -┌──────────────────────────────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┐ -│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static │ -╞══════════════════════════════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╡ -│ 00000000000000010000000000000001 ┆ my_segment_id1 ┆ base ┆ /my/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000010000000000000002 ┆ my_segment_id1 ┆ base ┆ /my/entity ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000010000000000000003 ┆ my_segment_id1 ┆ base ┆ /my/other/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000010000000000000004 ┆ my_segment_id1 ┆ base ┆ /my/other/entity ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000020000000000000001 ┆ my_segment_id2 ┆ base ┆ /my/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000020000000000000002 ┆ my_segment_id2 ┆ base ┆ /my/entity ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000001 ┆ my_segment_id3 ┆ base ┆ /my/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000002 ┆ my_segment_id3 ┆ base ┆ /my/entity ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000003 ┆ my_segment_id3 ┆ base ┆ /another/one ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000004 ┆ my_segment_id3 ┆ base ┆ /another/one ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000005 ┆ my_segment_id3 ┆ base ┆ /yet/another/one ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000006 ┆ my_segment_id3 ┆ base ┆ /yet/another/one ┆ true │ -└──────────────────────────────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┘ +┌──────────────────────────────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┬────────────────────────┬───────────────────────────────────┐ +│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static ┆ rerun_layer_direct_url ┆ rerun_layer_direct_url_expires_at │ +╞══════════════════════════════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╪════════════════════════╪═══════════════════════════════════╡ +│ 00000000000000010000000000000001 ┆ my_segment_id1 ┆ base ┆ /my/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000010000000000000002 ┆ my_segment_id1 ┆ base ┆ /my/entity ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000010000000000000003 ┆ my_segment_id1 ┆ base ┆ /my/other/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000010000000000000004 ┆ my_segment_id1 ┆ base ┆ /my/other/entity ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000020000000000000001 ┆ my_segment_id2 ┆ base ┆ /my/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000020000000000000002 ┆ my_segment_id2 ┆ base ┆ /my/entity ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000001 ┆ my_segment_id3 ┆ base ┆ /my/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000002 ┆ my_segment_id3 ┆ base ┆ /my/entity ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000003 ┆ my_segment_id3 ┆ base ┆ /another/one ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000004 ┆ my_segment_id3 ┆ base ┆ /another/one ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000005 ┆ my_segment_id3 ┆ base ┆ /yet/another/one ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000006 ┆ my_segment_id3 ┆ base ┆ /yet/another/one ┆ true ┆ null ┆ null │ +└──────────────────────────────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┴────────────────────────┴───────────────────────────────────┘ diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_default_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_default_schema.snap index 782b3f48f76a..10e16e60be2f 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_default_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_default_schema.snap @@ -2,18 +2,21 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: required_chunk_info.format_schema_snapshot() --- -chunk_byte_len: u64 -chunk_entity_path: Utf8 [ +chunk_byte_len: non-null UInt64 +chunk_byte_size_uncompressed: UInt64 +chunk_entity_path: non-null Utf8 [ rerun:kind: "control" ] -chunk_id: FixedSizeBinary[16] [ +chunk_id: non-null FixedSizeBinary(16) [ rerun:kind: "control" ] -chunk_is_static: bool [ +chunk_is_static: non-null Boolean [ rerun:kind: "control" ] -chunk_key: Binary -chunk_segment_id: Utf8 [ +chunk_key: non-null Binary +chunk_segment_id: non-null Utf8 [ rerun:kind: "control" ] -rerun_segment_layer: Utf8 +rerun_layer_direct_url: Dictionary(Int32, Utf8) +rerun_layer_direct_url_expires_at: Dictionary(Int32, Int64) +rerun_segment_layer: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_exclude_static_data.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_exclude_static_data.snap index 99227d405c5a..adf9081338b6 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_exclude_static_data.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_exclude_static_data.snap @@ -2,18 +2,18 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: filtered_chunk_info.format_snapshot(false) --- -┌──────────────────────────────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┐ -│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static │ -╞══════════════════════════════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╡ -│ 00000000000000010000000000000001 ┆ my_segment_id1 ┆ base ┆ /my/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000010000000000000003 ┆ my_segment_id1 ┆ base ┆ /my/other/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000020000000000000001 ┆ my_segment_id2 ┆ base ┆ /my/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000001 ┆ my_segment_id3 ┆ base ┆ /my/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000003 ┆ my_segment_id3 ┆ base ┆ /another/one ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000005 ┆ my_segment_id3 ┆ base ┆ /yet/another/one ┆ false │ -└──────────────────────────────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┘ +┌──────────────────────────────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┬────────────────────────┬───────────────────────────────────┐ +│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static ┆ rerun_layer_direct_url ┆ rerun_layer_direct_url_expires_at │ +╞══════════════════════════════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╪════════════════════════╪═══════════════════════════════════╡ +│ 00000000000000010000000000000001 ┆ my_segment_id1 ┆ base ┆ /my/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000010000000000000003 ┆ my_segment_id1 ┆ base ┆ /my/other/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000020000000000000001 ┆ my_segment_id2 ┆ base ┆ /my/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000001 ┆ my_segment_id3 ┆ base ┆ /my/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000003 ┆ my_segment_id3 ┆ base ┆ /another/one ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000005 ┆ my_segment_id3 ┆ base ┆ /yet/another/one ┆ false ┆ null ┆ null │ +└──────────────────────────────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┴────────────────────────┴───────────────────────────────────┘ diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_exclude_static_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_exclude_static_schema.snap index 782b3f48f76a..10e16e60be2f 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_exclude_static_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_exclude_static_schema.snap @@ -2,18 +2,21 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: required_chunk_info.format_schema_snapshot() --- -chunk_byte_len: u64 -chunk_entity_path: Utf8 [ +chunk_byte_len: non-null UInt64 +chunk_byte_size_uncompressed: UInt64 +chunk_entity_path: non-null Utf8 [ rerun:kind: "control" ] -chunk_id: FixedSizeBinary[16] [ +chunk_id: non-null FixedSizeBinary(16) [ rerun:kind: "control" ] -chunk_is_static: bool [ +chunk_is_static: non-null Boolean [ rerun:kind: "control" ] -chunk_key: Binary -chunk_segment_id: Utf8 [ +chunk_key: non-null Binary +chunk_segment_id: non-null Utf8 [ rerun:kind: "control" ] -rerun_segment_layer: Utf8 +rerun_layer_direct_url: Dictionary(Int32, Utf8) +rerun_layer_direct_url_expires_at: Dictionary(Int32, Int64) +rerun_segment_layer: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_exclude_temporal_data.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_exclude_temporal_data.snap index ec3ce014ff67..3f3a6385e3b3 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_exclude_temporal_data.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_exclude_temporal_data.snap @@ -2,18 +2,18 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: filtered_chunk_info.format_snapshot(false) --- -┌──────────────────────────────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┐ -│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static │ -╞══════════════════════════════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╡ -│ 00000000000000010000000000000002 ┆ my_segment_id1 ┆ base ┆ /my/entity ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000010000000000000004 ┆ my_segment_id1 ┆ base ┆ /my/other/entity ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000020000000000000002 ┆ my_segment_id2 ┆ base ┆ /my/entity ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000002 ┆ my_segment_id3 ┆ base ┆ /my/entity ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000004 ┆ my_segment_id3 ┆ base ┆ /another/one ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000006 ┆ my_segment_id3 ┆ base ┆ /yet/another/one ┆ true │ -└──────────────────────────────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┘ +┌──────────────────────────────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┬────────────────────────┬───────────────────────────────────┐ +│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static ┆ rerun_layer_direct_url ┆ rerun_layer_direct_url_expires_at │ +╞══════════════════════════════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╪════════════════════════╪═══════════════════════════════════╡ +│ 00000000000000010000000000000002 ┆ my_segment_id1 ┆ base ┆ /my/entity ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000010000000000000004 ┆ my_segment_id1 ┆ base ┆ /my/other/entity ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000020000000000000002 ┆ my_segment_id2 ┆ base ┆ /my/entity ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000002 ┆ my_segment_id3 ┆ base ┆ /my/entity ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000004 ┆ my_segment_id3 ┆ base ┆ /another/one ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000006 ┆ my_segment_id3 ┆ base ┆ /yet/another/one ┆ true ┆ null ┆ null │ +└──────────────────────────────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┴────────────────────────┴───────────────────────────────────┘ diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_exclude_temporal_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_exclude_temporal_schema.snap index 782b3f48f76a..10e16e60be2f 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_exclude_temporal_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_exclude_temporal_schema.snap @@ -2,18 +2,21 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: required_chunk_info.format_schema_snapshot() --- -chunk_byte_len: u64 -chunk_entity_path: Utf8 [ +chunk_byte_len: non-null UInt64 +chunk_byte_size_uncompressed: UInt64 +chunk_entity_path: non-null Utf8 [ rerun:kind: "control" ] -chunk_id: FixedSizeBinary[16] [ +chunk_id: non-null FixedSizeBinary(16) [ rerun:kind: "control" ] -chunk_is_static: bool [ +chunk_is_static: non-null Boolean [ rerun:kind: "control" ] -chunk_key: Binary -chunk_segment_id: Utf8 [ +chunk_key: non-null Binary +chunk_segment_id: non-null Utf8 [ rerun:kind: "control" ] -rerun_segment_layer: Utf8 +rerun_layer_direct_url: Dictionary(Int32, Utf8) +rerun_layer_direct_url_expires_at: Dictionary(Int32, Int64) +rerun_segment_layer: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_single_entity_data.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_single_entity_data.snap index cb423043a1d9..90a947bf9d1d 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_single_entity_data.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_single_entity_data.snap @@ -2,18 +2,18 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: filtered_chunk_info.format_snapshot(false) --- -┌──────────────────────────────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┐ -│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static │ -╞══════════════════════════════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╡ -│ 00000000000000010000000000000001 ┆ my_segment_id1 ┆ base ┆ /my/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000010000000000000002 ┆ my_segment_id1 ┆ base ┆ /my/entity ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000020000000000000001 ┆ my_segment_id2 ┆ base ┆ /my/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000020000000000000002 ┆ my_segment_id2 ┆ base ┆ /my/entity ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000001 ┆ my_segment_id3 ┆ base ┆ /my/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000002 ┆ my_segment_id3 ┆ base ┆ /my/entity ┆ true │ -└──────────────────────────────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┘ +┌──────────────────────────────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┬────────────────────────┬───────────────────────────────────┐ +│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static ┆ rerun_layer_direct_url ┆ rerun_layer_direct_url_expires_at │ +╞══════════════════════════════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╪════════════════════════╪═══════════════════════════════════╡ +│ 00000000000000010000000000000001 ┆ my_segment_id1 ┆ base ┆ /my/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000010000000000000002 ┆ my_segment_id1 ┆ base ┆ /my/entity ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000020000000000000001 ┆ my_segment_id2 ┆ base ┆ /my/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000020000000000000002 ┆ my_segment_id2 ┆ base ┆ /my/entity ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000001 ┆ my_segment_id3 ┆ base ┆ /my/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000002 ┆ my_segment_id3 ┆ base ┆ /my/entity ┆ true ┆ null ┆ null │ +└──────────────────────────────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┴────────────────────────┴───────────────────────────────────┘ diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_single_entity_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_single_entity_schema.snap index 782b3f48f76a..10e16e60be2f 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_single_entity_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_single_entity_schema.snap @@ -2,18 +2,21 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: required_chunk_info.format_schema_snapshot() --- -chunk_byte_len: u64 -chunk_entity_path: Utf8 [ +chunk_byte_len: non-null UInt64 +chunk_byte_size_uncompressed: UInt64 +chunk_entity_path: non-null Utf8 [ rerun:kind: "control" ] -chunk_id: FixedSizeBinary[16] [ +chunk_id: non-null FixedSizeBinary(16) [ rerun:kind: "control" ] -chunk_is_static: bool [ +chunk_is_static: non-null Boolean [ rerun:kind: "control" ] -chunk_key: Binary -chunk_segment_id: Utf8 [ +chunk_key: non-null Binary +chunk_segment_id: non-null Utf8 [ rerun:kind: "control" ] -rerun_segment_layer: Utf8 +rerun_layer_direct_url: Dictionary(Int32, Utf8) +rerun_layer_direct_url_expires_at: Dictionary(Int32, Int64) +rerun_segment_layer: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_single_segment_data.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_single_segment_data.snap index cfc0955c1485..28c1aac2ee8e 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_single_segment_data.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_single_segment_data.snap @@ -2,18 +2,18 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: filtered_chunk_info.format_snapshot(false) --- -┌──────────────────────────────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┐ -│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static │ -╞══════════════════════════════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╡ -│ 00000000000000030000000000000001 ┆ my_segment_id3 ┆ base ┆ /my/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000002 ┆ my_segment_id3 ┆ base ┆ /my/entity ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000003 ┆ my_segment_id3 ┆ base ┆ /another/one ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000004 ┆ my_segment_id3 ┆ base ┆ /another/one ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000005 ┆ my_segment_id3 ┆ base ┆ /yet/another/one ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000006 ┆ my_segment_id3 ┆ base ┆ /yet/another/one ┆ true │ -└──────────────────────────────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┘ +┌──────────────────────────────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┬────────────────────────┬───────────────────────────────────┐ +│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static ┆ rerun_layer_direct_url ┆ rerun_layer_direct_url_expires_at │ +╞══════════════════════════════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╪════════════════════════╪═══════════════════════════════════╡ +│ 00000000000000030000000000000001 ┆ my_segment_id3 ┆ base ┆ /my/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000002 ┆ my_segment_id3 ┆ base ┆ /my/entity ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000003 ┆ my_segment_id3 ┆ base ┆ /another/one ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000004 ┆ my_segment_id3 ┆ base ┆ /another/one ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000005 ┆ my_segment_id3 ┆ base ┆ /yet/another/one ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000006 ┆ my_segment_id3 ┆ base ┆ /yet/another/one ┆ true ┆ null ┆ null │ +└──────────────────────────────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┴────────────────────────┴───────────────────────────────────┘ diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_single_segment_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_single_segment_schema.snap index 782b3f48f76a..10e16e60be2f 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_single_segment_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_dataset_single_segment_schema.snap @@ -2,18 +2,21 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: required_chunk_info.format_schema_snapshot() --- -chunk_byte_len: u64 -chunk_entity_path: Utf8 [ +chunk_byte_len: non-null UInt64 +chunk_byte_size_uncompressed: UInt64 +chunk_entity_path: non-null Utf8 [ rerun:kind: "control" ] -chunk_id: FixedSizeBinary[16] [ +chunk_id: non-null FixedSizeBinary(16) [ rerun:kind: "control" ] -chunk_is_static: bool [ +chunk_is_static: non-null Boolean [ rerun:kind: "control" ] -chunk_key: Binary -chunk_segment_id: Utf8 [ +chunk_key: non-null Binary +chunk_segment_id: non-null Utf8 [ rerun:kind: "control" ] -rerun_segment_layer: Utf8 +rerun_layer_direct_url: Dictionary(Int32, Utf8) +rerun_layer_direct_url_expires_at: Dictionary(Int32, Int64) +rerun_segment_layer: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_with_layer_data.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_with_layer_data.snap index d4c0d19bea21..dc48fbeaf024 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_with_layer_data.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_with_layer_data.snap @@ -2,26 +2,26 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: filtered_chunk_info.format_snapshot(false) --- -┌──────────────────────────────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┐ -│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static │ -╞══════════════════════════════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╡ -│ 00000000000000010000000000000001 ┆ partition1 ┆ base ┆ /my/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000010000000000000002 ┆ partition1 ┆ base ┆ /my/entity ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000020000000000000001 ┆ partition1 ┆ extra ┆ /extra/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000020000000000000002 ┆ partition1 ┆ extra ┆ /extra/entity ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000001 ┆ partition2 ┆ base ┆ /another/one ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000030000000000000002 ┆ partition2 ┆ base ┆ /another/one ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000040000000000000001 ┆ partition2 ┆ extra ┆ /extra/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000040000000000000002 ┆ partition2 ┆ extra ┆ /extra/entity ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000050000000000000001 ┆ partition3 ┆ base ┆ /i/am/alone ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000050000000000000002 ┆ partition3 ┆ base ┆ /i/am/alone ┆ true │ -└──────────────────────────────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┘ +┌──────────────────────────────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┬────────────────────────┬───────────────────────────────────┐ +│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static ┆ rerun_layer_direct_url ┆ rerun_layer_direct_url_expires_at │ +╞══════════════════════════════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╪════════════════════════╪═══════════════════════════════════╡ +│ 00000000000000010000000000000001 ┆ partition1 ┆ base ┆ /my/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000010000000000000002 ┆ partition1 ┆ base ┆ /my/entity ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000020000000000000001 ┆ partition1 ┆ extra ┆ /extra/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000020000000000000002 ┆ partition1 ┆ extra ┆ /extra/entity ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000001 ┆ partition2 ┆ base ┆ /another/one ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000030000000000000002 ┆ partition2 ┆ base ┆ /another/one ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000040000000000000001 ┆ partition2 ┆ extra ┆ /extra/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000040000000000000002 ┆ partition2 ┆ extra ┆ /extra/entity ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000050000000000000001 ┆ partition3 ┆ base ┆ /i/am/alone ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000050000000000000002 ┆ partition3 ┆ base ┆ /i/am/alone ┆ true ┆ null ┆ null │ +└──────────────────────────────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┴────────────────────────┴───────────────────────────────────┘ diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_with_layer_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_with_layer_schema.snap index 782b3f48f76a..10e16e60be2f 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_with_layer_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__simple_with_layer_schema.snap @@ -2,18 +2,21 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: required_chunk_info.format_schema_snapshot() --- -chunk_byte_len: u64 -chunk_entity_path: Utf8 [ +chunk_byte_len: non-null UInt64 +chunk_byte_size_uncompressed: UInt64 +chunk_entity_path: non-null Utf8 [ rerun:kind: "control" ] -chunk_id: FixedSizeBinary[16] [ +chunk_id: non-null FixedSizeBinary(16) [ rerun:kind: "control" ] -chunk_is_static: bool [ +chunk_is_static: non-null Boolean [ rerun:kind: "control" ] -chunk_key: Binary -chunk_segment_id: Utf8 [ +chunk_key: non-null Binary +chunk_segment_id: non-null Utf8 [ rerun:kind: "control" ] -rerun_segment_layer: Utf8 +rerun_layer_direct_url: Dictionary(Int32, Utf8) +rerun_layer_direct_url_expires_at: Dictionary(Int32, Int64) +rerun_segment_layer: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_default_data.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_default_data.snap index 2cc27a1fb7a9..a090fbceb4bd 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_default_data.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_default_data.snap @@ -2,14 +2,14 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: filtered_chunk_info.format_snapshot(false) --- -┌──────────────────────────────────┬─────────────────────┬─────────────────────┬───────────────────┬─────────────────┐ -│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static │ -╞══════════════════════════════════╪═════════════════════╪═════════════════════╪═══════════════════╪═════════════════╡ -│ 00000000000000640000000000000001 ┆ static_test_segment ┆ base ┆ /static_only ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000640000000000000002 ┆ static_test_segment ┆ base ┆ /both ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000640000000000000003 ┆ static_test_segment ┆ base ┆ /both ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000640000000000000004 ┆ static_test_segment ┆ base ┆ /temporal_only ┆ false │ -└──────────────────────────────────┴─────────────────────┴─────────────────────┴───────────────────┴─────────────────┘ +┌──────────────────────────────────┬─────────────────────┬─────────────────────┬───────────────────┬─────────────────┬────────────────────────┬───────────────────────────────────┐ +│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static ┆ rerun_layer_direct_url ┆ rerun_layer_direct_url_expires_at │ +╞══════════════════════════════════╪═════════════════════╪═════════════════════╪═══════════════════╪═════════════════╪════════════════════════╪═══════════════════════════════════╡ +│ 00000000000000640000000000000001 ┆ static_test_segment ┆ base ┆ /static_only ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000640000000000000002 ┆ static_test_segment ┆ base ┆ /both ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000640000000000000003 ┆ static_test_segment ┆ base ┆ /both ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000640000000000000004 ┆ static_test_segment ┆ base ┆ /temporal_only ┆ false ┆ null ┆ null │ +└──────────────────────────────────┴─────────────────────┴─────────────────────┴───────────────────┴─────────────────┴────────────────────────┴───────────────────────────────────┘ diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_default_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_default_schema.snap index 782b3f48f76a..10e16e60be2f 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_default_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_default_schema.snap @@ -2,18 +2,21 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: required_chunk_info.format_schema_snapshot() --- -chunk_byte_len: u64 -chunk_entity_path: Utf8 [ +chunk_byte_len: non-null UInt64 +chunk_byte_size_uncompressed: UInt64 +chunk_entity_path: non-null Utf8 [ rerun:kind: "control" ] -chunk_id: FixedSizeBinary[16] [ +chunk_id: non-null FixedSizeBinary(16) [ rerun:kind: "control" ] -chunk_is_static: bool [ +chunk_is_static: non-null Boolean [ rerun:kind: "control" ] -chunk_key: Binary -chunk_segment_id: Utf8 [ +chunk_key: non-null Binary +chunk_segment_id: non-null Utf8 [ rerun:kind: "control" ] -rerun_segment_layer: Utf8 +rerun_layer_direct_url: Dictionary(Int32, Utf8) +rerun_layer_direct_url_expires_at: Dictionary(Int32, Int64) +rerun_segment_layer: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_latest_at_end_data.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_latest_at_end_data.snap index 9f29ae895315..0aad6fa22213 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_latest_at_end_data.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_latest_at_end_data.snap @@ -2,12 +2,12 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: filtered_chunk_info.format_snapshot(false) --- -┌──────────────────────────────────┬─────────────────────┬─────────────────────┬───────────────────┬─────────────────┐ -│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static │ -╞══════════════════════════════════╪═════════════════════╪═════════════════════╪═══════════════════╪═════════════════╡ -│ 00000000000000640000000000000001 ┆ static_test_segment ┆ base ┆ /static_only ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000640000000000000002 ┆ static_test_segment ┆ base ┆ /both ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000640000000000000004 ┆ static_test_segment ┆ base ┆ /temporal_only ┆ false │ -└──────────────────────────────────┴─────────────────────┴─────────────────────┴───────────────────┴─────────────────┘ +┌──────────────────────────────────┬─────────────────────┬─────────────────────┬───────────────────┬─────────────────┬────────────────────────┬───────────────────────────────────┐ +│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static ┆ rerun_layer_direct_url ┆ rerun_layer_direct_url_expires_at │ +╞══════════════════════════════════╪═════════════════════╪═════════════════════╪═══════════════════╪═════════════════╪════════════════════════╪═══════════════════════════════════╡ +│ 00000000000000640000000000000001 ┆ static_test_segment ┆ base ┆ /static_only ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000640000000000000002 ┆ static_test_segment ┆ base ┆ /both ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000640000000000000004 ┆ static_test_segment ┆ base ┆ /temporal_only ┆ false ┆ null ┆ null │ +└──────────────────────────────────┴─────────────────────┴─────────────────────┴───────────────────┴─────────────────┴────────────────────────┴───────────────────────────────────┘ diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_latest_at_end_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_latest_at_end_schema.snap index 782b3f48f76a..10e16e60be2f 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_latest_at_end_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_latest_at_end_schema.snap @@ -2,18 +2,21 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: required_chunk_info.format_schema_snapshot() --- -chunk_byte_len: u64 -chunk_entity_path: Utf8 [ +chunk_byte_len: non-null UInt64 +chunk_byte_size_uncompressed: UInt64 +chunk_entity_path: non-null Utf8 [ rerun:kind: "control" ] -chunk_id: FixedSizeBinary[16] [ +chunk_id: non-null FixedSizeBinary(16) [ rerun:kind: "control" ] -chunk_is_static: bool [ +chunk_is_static: non-null Boolean [ rerun:kind: "control" ] -chunk_key: Binary -chunk_segment_id: Utf8 [ +chunk_key: non-null Binary +chunk_segment_id: non-null Utf8 [ rerun:kind: "control" ] -rerun_segment_layer: Utf8 +rerun_layer_direct_url: Dictionary(Int32, Utf8) +rerun_layer_direct_url_expires_at: Dictionary(Int32, Int64) +rerun_segment_layer: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_none_data.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_none_data.snap index 2cc27a1fb7a9..a090fbceb4bd 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_none_data.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_none_data.snap @@ -2,14 +2,14 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: filtered_chunk_info.format_snapshot(false) --- -┌──────────────────────────────────┬─────────────────────┬─────────────────────┬───────────────────┬─────────────────┐ -│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static │ -╞══════════════════════════════════╪═════════════════════╪═════════════════════╪═══════════════════╪═════════════════╡ -│ 00000000000000640000000000000001 ┆ static_test_segment ┆ base ┆ /static_only ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000640000000000000002 ┆ static_test_segment ┆ base ┆ /both ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000640000000000000003 ┆ static_test_segment ┆ base ┆ /both ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000640000000000000004 ┆ static_test_segment ┆ base ┆ /temporal_only ┆ false │ -└──────────────────────────────────┴─────────────────────┴─────────────────────┴───────────────────┴─────────────────┘ +┌──────────────────────────────────┬─────────────────────┬─────────────────────┬───────────────────┬─────────────────┬────────────────────────┬───────────────────────────────────┐ +│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static ┆ rerun_layer_direct_url ┆ rerun_layer_direct_url_expires_at │ +╞══════════════════════════════════╪═════════════════════╪═════════════════════╪═══════════════════╪═════════════════╪════════════════════════╪═══════════════════════════════════╡ +│ 00000000000000640000000000000001 ┆ static_test_segment ┆ base ┆ /static_only ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000640000000000000002 ┆ static_test_segment ┆ base ┆ /both ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000640000000000000003 ┆ static_test_segment ┆ base ┆ /both ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000640000000000000004 ┆ static_test_segment ┆ base ┆ /temporal_only ┆ false ┆ null ┆ null │ +└──────────────────────────────────┴─────────────────────┴─────────────────────┴───────────────────┴─────────────────┴────────────────────────┴───────────────────────────────────┘ diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_none_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_none_schema.snap index 782b3f48f76a..10e16e60be2f 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_none_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_none_schema.snap @@ -2,18 +2,21 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: required_chunk_info.format_schema_snapshot() --- -chunk_byte_len: u64 -chunk_entity_path: Utf8 [ +chunk_byte_len: non-null UInt64 +chunk_byte_size_uncompressed: UInt64 +chunk_entity_path: non-null Utf8 [ rerun:kind: "control" ] -chunk_id: FixedSizeBinary[16] [ +chunk_id: non-null FixedSizeBinary(16) [ rerun:kind: "control" ] -chunk_is_static: bool [ +chunk_is_static: non-null Boolean [ rerun:kind: "control" ] -chunk_key: Binary -chunk_segment_id: Utf8 [ +chunk_key: non-null Binary +chunk_segment_id: non-null Utf8 [ rerun:kind: "control" ] -rerun_segment_layer: Utf8 +rerun_layer_direct_url: Dictionary(Int32, Utf8) +rerun_layer_direct_url_expires_at: Dictionary(Int32, Int64) +rerun_segment_layer: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_range_all_data.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_range_all_data.snap index 9f29ae895315..0aad6fa22213 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_range_all_data.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_range_all_data.snap @@ -2,12 +2,12 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: filtered_chunk_info.format_snapshot(false) --- -┌──────────────────────────────────┬─────────────────────┬─────────────────────┬───────────────────┬─────────────────┐ -│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static │ -╞══════════════════════════════════╪═════════════════════╪═════════════════════╪═══════════════════╪═════════════════╡ -│ 00000000000000640000000000000001 ┆ static_test_segment ┆ base ┆ /static_only ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000640000000000000002 ┆ static_test_segment ┆ base ┆ /both ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000640000000000000004 ┆ static_test_segment ┆ base ┆ /temporal_only ┆ false │ -└──────────────────────────────────┴─────────────────────┴─────────────────────┴───────────────────┴─────────────────┘ +┌──────────────────────────────────┬─────────────────────┬─────────────────────┬───────────────────┬─────────────────┬────────────────────────┬───────────────────────────────────┐ +│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static ┆ rerun_layer_direct_url ┆ rerun_layer_direct_url_expires_at │ +╞══════════════════════════════════╪═════════════════════╪═════════════════════╪═══════════════════╪═════════════════╪════════════════════════╪═══════════════════════════════════╡ +│ 00000000000000640000000000000001 ┆ static_test_segment ┆ base ┆ /static_only ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000640000000000000002 ┆ static_test_segment ┆ base ┆ /both ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000640000000000000004 ┆ static_test_segment ┆ base ┆ /temporal_only ┆ false ┆ null ┆ null │ +└──────────────────────────────────┴─────────────────────┴─────────────────────┴───────────────────┴─────────────────┴────────────────────────┴───────────────────────────────────┘ diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_range_all_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_range_all_schema.snap index 782b3f48f76a..10e16e60be2f 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_range_all_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_dataset__with_query_range_all_schema.snap @@ -2,18 +2,21 @@ source: crates/store/re_redap_tests/src/tests/query_dataset.rs expression: required_chunk_info.format_schema_snapshot() --- -chunk_byte_len: u64 -chunk_entity_path: Utf8 [ +chunk_byte_len: non-null UInt64 +chunk_byte_size_uncompressed: UInt64 +chunk_entity_path: non-null Utf8 [ rerun:kind: "control" ] -chunk_id: FixedSizeBinary[16] [ +chunk_id: non-null FixedSizeBinary(16) [ rerun:kind: "control" ] -chunk_is_static: bool [ +chunk_is_static: non-null Boolean [ rerun:kind: "control" ] -chunk_key: Binary -chunk_segment_id: Utf8 [ +chunk_key: non-null Binary +chunk_segment_id: non-null Utf8 [ rerun:kind: "control" ] -rerun_segment_layer: Utf8 +rerun_layer_direct_url: Dictionary(Int32, Utf8) +rerun_layer_direct_url_expires_at: Dictionary(Int32, Int64) +rerun_segment_layer: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_filter__simple_dataset_default_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_filter__simple_dataset_default_schema.snap index c18ae55289d2..617f8a01ffe8 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_filter__simple_dataset_default_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_filter__simple_dataset_default_schema.snap @@ -5,14 +5,14 @@ expression: results.format_schema_snapshot() top-level metadata: [ sorbet:version: "0.1.3" ] -/my/entity:example.MyPoints:colors: nullable List[nullable u32] [ +/my/entity:example.MyPoints:colors: List(UInt32) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -/my/entity:example.MyPoints:labels: nullable List[nullable Utf8] [ +/my/entity:example.MyPoints:labels: List(Utf8) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:labels" rerun:component_type: "example.MyLabel" @@ -20,15 +20,15 @@ top-level metadata: [ rerun:is_static: "true" rerun:kind: "data" ] -/my/entity:example.MyPoints:points: nullable List[nullable Struct[2]] [ +/my/entity:example.MyPoints:points: List(Struct("x": non-null Float32, "y": non-null Float32)) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -frame_nr: nullable i64 [ +frame_nr: Int64 [ rerun:index_name: "frame_nr" rerun:kind: "index" ] -rerun_segment_id: Utf8 +rerun_segment_id: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_filter__simple_dataset_frame_nr_eq_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_filter__simple_dataset_frame_nr_eq_schema.snap index c18ae55289d2..617f8a01ffe8 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_filter__simple_dataset_frame_nr_eq_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_filter__simple_dataset_frame_nr_eq_schema.snap @@ -5,14 +5,14 @@ expression: results.format_schema_snapshot() top-level metadata: [ sorbet:version: "0.1.3" ] -/my/entity:example.MyPoints:colors: nullable List[nullable u32] [ +/my/entity:example.MyPoints:colors: List(UInt32) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -/my/entity:example.MyPoints:labels: nullable List[nullable Utf8] [ +/my/entity:example.MyPoints:labels: List(Utf8) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:labels" rerun:component_type: "example.MyLabel" @@ -20,15 +20,15 @@ top-level metadata: [ rerun:is_static: "true" rerun:kind: "data" ] -/my/entity:example.MyPoints:points: nullable List[nullable Struct[2]] [ +/my/entity:example.MyPoints:points: List(Struct("x": non-null Float32, "y": non-null Float32)) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -frame_nr: nullable i64 [ +frame_nr: Int64 [ rerun:index_name: "frame_nr" rerun:kind: "index" ] -rerun_segment_id: Utf8 +rerun_segment_id: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_filter__simple_dataset_seg_id_eq_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_filter__simple_dataset_seg_id_eq_schema.snap index c18ae55289d2..617f8a01ffe8 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_filter__simple_dataset_seg_id_eq_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_filter__simple_dataset_seg_id_eq_schema.snap @@ -5,14 +5,14 @@ expression: results.format_schema_snapshot() top-level metadata: [ sorbet:version: "0.1.3" ] -/my/entity:example.MyPoints:colors: nullable List[nullable u32] [ +/my/entity:example.MyPoints:colors: List(UInt32) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -/my/entity:example.MyPoints:labels: nullable List[nullable Utf8] [ +/my/entity:example.MyPoints:labels: List(Utf8) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:labels" rerun:component_type: "example.MyLabel" @@ -20,15 +20,15 @@ top-level metadata: [ rerun:is_static: "true" rerun:kind: "data" ] -/my/entity:example.MyPoints:points: nullable List[nullable Struct[2]] [ +/my/entity:example.MyPoints:points: List(Struct("x": non-null Float32, "y": non-null Float32)) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -frame_nr: nullable i64 [ +frame_nr: Int64 [ rerun:index_name: "frame_nr" rerun:kind: "index" ] -rerun_segment_id: Utf8 +rerun_segment_id: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_index_values__query_index_values_duration_all_valid_index_values_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_index_values__query_index_values_duration_all_valid_index_values_schema.snap index 4eff11795cf8..b3cd64b31eab 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_index_values__query_index_values_duration_all_valid_index_values_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_index_values__query_index_values_duration_all_valid_index_values_schema.snap @@ -5,14 +5,14 @@ expression: results.format_schema_snapshot() top-level metadata: [ sorbet:version: "0.1.3" ] -/my/entity:example.MyPoints:colors: nullable List[nullable u32] [ +/my/entity:example.MyPoints:colors: List(UInt32) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -/my/entity:example.MyPoints:labels: nullable List[nullable Utf8] [ +/my/entity:example.MyPoints:labels: List(Utf8) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:labels" rerun:component_type: "example.MyLabel" @@ -20,15 +20,15 @@ top-level metadata: [ rerun:is_static: "true" rerun:kind: "data" ] -/my/entity:example.MyPoints:points: nullable List[nullable Struct[2]] [ +/my/entity:example.MyPoints:points: List(Struct("x": non-null Float32, "y": non-null Float32)) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -duration: nullable Duration(ns) [ +duration: Duration(ns) [ rerun:index_name: "duration" rerun:kind: "index" ] -rerun_segment_id: Utf8 +rerun_segment_id: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_index_values__query_index_values_sequence_all_valid_index_values_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_index_values__query_index_values_sequence_all_valid_index_values_schema.snap index 0f839438cbe9..30c289175473 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_index_values__query_index_values_sequence_all_valid_index_values_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_index_values__query_index_values_sequence_all_valid_index_values_schema.snap @@ -5,14 +5,14 @@ expression: results.format_schema_snapshot() top-level metadata: [ sorbet:version: "0.1.3" ] -/my/entity:example.MyPoints:colors: nullable List[nullable u32] [ +/my/entity:example.MyPoints:colors: List(UInt32) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -/my/entity:example.MyPoints:labels: nullable List[nullable Utf8] [ +/my/entity:example.MyPoints:labels: List(Utf8) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:labels" rerun:component_type: "example.MyLabel" @@ -20,15 +20,15 @@ top-level metadata: [ rerun:is_static: "true" rerun:kind: "data" ] -/my/entity:example.MyPoints:points: nullable List[nullable Struct[2]] [ +/my/entity:example.MyPoints:points: List(Struct("x": non-null Float32, "y": non-null Float32)) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -frame_nr: nullable i64 [ +frame_nr: Int64 [ rerun:index_name: "frame_nr" rerun:kind: "index" ] -rerun_segment_id: Utf8 +rerun_segment_id: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_index_values__query_index_values_timestamp_all_valid_index_values_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_index_values__query_index_values_timestamp_all_valid_index_values_schema.snap index 14d20ceee0ce..cf7297836d68 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_index_values__query_index_values_timestamp_all_valid_index_values_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__query_index_values__query_index_values_timestamp_all_valid_index_values_schema.snap @@ -5,14 +5,14 @@ expression: results.format_schema_snapshot() top-level metadata: [ sorbet:version: "0.1.3" ] -/my/entity:example.MyPoints:colors: nullable List[nullable u32] [ +/my/entity:example.MyPoints:colors: List(UInt32) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -/my/entity:example.MyPoints:labels: nullable List[nullable Utf8] [ +/my/entity:example.MyPoints:labels: List(Utf8) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:labels" rerun:component_type: "example.MyLabel" @@ -20,15 +20,15 @@ top-level metadata: [ rerun:is_static: "true" rerun:kind: "data" ] -/my/entity:example.MyPoints:points: nullable List[nullable Struct[2]] [ +/my/entity:example.MyPoints:points: List(Struct("x": non-null Float32, "y": non-null Float32)) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -rerun_segment_id: Utf8 -timestamp: nullable Timestamp(ns) [ +rerun_segment_id: non-null Utf8 +timestamp: Timestamp(ns) [ rerun:index_name: "timestamp" rerun:kind: "index" ] diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__empty_manifest_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__empty_manifest_schema.snap index 0d36538287ba..27c8709620e8 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__empty_manifest_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__empty_manifest_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__empty_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__empty_segments_schema.snap index ccf338e1df78..21257c126e56 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__empty_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__empty_segments_schema.snap @@ -2,9 +2,9 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__out_of_order_properties_manifest_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__out_of_order_properties_manifest_schema.snap index 1ea90bb14db0..0483acaf87af 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__out_of_order_properties_manifest_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__out_of_order_properties_manifest_schema.snap @@ -2,20 +2,20 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -property:text_log:TextLog:text: nullable List[nullable Utf8] [ +property:text_log:TextLog:text: List(Utf8) [ rerun:archetype: "rerun.archetypes.TextLog" rerun:component: "TextLog:text" rerun:component_type: "rerun.components.Text" rerun:entity_path: "/__properties/text_log" rerun:kind: "data" ] -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__out_of_order_properties_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__out_of_order_properties_segments_schema.snap index e58763bfe234..c59f946ddd8b 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__out_of_order_properties_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__out_of_order_properties_segments_schema.snap @@ -2,16 +2,16 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -property:text_log:TextLog:text: nullable List[nullable Utf8] [ +property:text_log:TextLog:text: List(Utf8) [ rerun:archetype: "rerun.archetypes.TextLog" rerun:component: "TextLog:text" rerun:component_type: "rerun.components.Text" rerun:entity_path: "/__properties/text_log" rerun:kind: "data" ] -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__register_prefix_manifest_manifest_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__register_prefix_manifest_manifest_schema.snap index 0d36538287ba..27c8709620e8 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__register_prefix_manifest_manifest_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__register_prefix_manifest_manifest_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__register_prefix_segments_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__register_prefix_segments_segments_schema.snap index a3b74359e469..9ac7457802af 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__register_prefix_segments_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__register_prefix_segments_segments_schema.snap @@ -2,21 +2,21 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -frame_nr:end: nullable i64 [ +frame_nr:end: Int64 [ rerun:index: "frame_nr" rerun:index_kind: "sequence" rerun:index_marker: "end" rerun:kind: "index" ] -frame_nr:start: nullable i64 [ +frame_nr:start: Int64 [ rerun:index: "frame_nr" rerun:index_kind: "sequence" rerun:index_marker: "start" rerun:kind: "index" ] -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__segment1_props_should_be_there_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__segment1_props_should_be_there_segments_schema.snap index 51895a2cb559..87777b1cea5f 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__segment1_props_should_be_there_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__segment1_props_should_be_there_segments_schema.snap @@ -2,14 +2,14 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -property:prop:test: nullable List[nullable f64] [ +property:prop:test: List(Float64) [ rerun:component: "test" rerun:entity_path: "/__properties/prop" rerun:kind: "data" ] -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_blueprint_manifest_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_blueprint_manifest_schema.snap index 0d36538287ba..27c8709620e8 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_blueprint_manifest_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_blueprint_manifest_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_blueprint_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_blueprint_segments_schema.snap index a3b74359e469..9ac7457802af 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_blueprint_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_blueprint_segments_schema.snap @@ -2,21 +2,21 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -frame_nr:end: nullable i64 [ +frame_nr:end: Int64 [ rerun:index: "frame_nr" rerun:index_kind: "sequence" rerun:index_marker: "end" rerun:kind: "index" ] -frame_nr:start: nullable i64 [ +frame_nr:start: Int64 [ rerun:index: "frame_nr" rerun:index_kind: "sequence" rerun:index_marker: "start" rerun:kind: "index" ] -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_manifest_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_manifest_schema.snap index 0d36538287ba..27c8709620e8 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_manifest_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_manifest_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_segments_schema.snap index a3b74359e469..9ac7457802af 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_segments_schema.snap @@ -2,21 +2,21 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -frame_nr:end: nullable i64 [ +frame_nr:end: Int64 [ rerun:index: "frame_nr" rerun:index_kind: "sequence" rerun:index_marker: "end" rerun:kind: "index" ] -frame_nr:start: nullable i64 [ +frame_nr:start: Int64 [ rerun:index: "frame_nr" rerun:index_kind: "sequence" rerun:index_marker: "start" rerun:kind: "index" ] -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_layers_manifest_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_layers_manifest_schema.snap index 0d36538287ba..27c8709620e8 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_layers_manifest_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_layers_manifest_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_layers_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_layers_segments_schema.snap index a3b74359e469..9ac7457802af 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_layers_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_layers_segments_schema.snap @@ -2,21 +2,21 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -frame_nr:end: nullable i64 [ +frame_nr:end: Int64 [ rerun:index: "frame_nr" rerun:index_kind: "sequence" rerun:index_marker: "end" rerun:kind: "index" ] -frame_nr:start: nullable i64 [ +frame_nr:start: Int64 [ rerun:index: "frame_nr" rerun:index_kind: "sequence" rerun:index_marker: "start" rerun:kind: "index" ] -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_multiple_timelines_manifest_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_multiple_timelines_manifest_schema.snap index 1ea90bb14db0..0483acaf87af 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_multiple_timelines_manifest_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_multiple_timelines_manifest_schema.snap @@ -2,20 +2,20 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -property:text_log:TextLog:text: nullable List[nullable Utf8] [ +property:text_log:TextLog:text: List(Utf8) [ rerun:archetype: "rerun.archetypes.TextLog" rerun:component: "TextLog:text" rerun:component_type: "rerun.components.Text" rerun:entity_path: "/__properties/text_log" rerun:kind: "data" ] -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_multiple_timelines_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_multiple_timelines_segments_schema.snap index ca7cea9c2ec5..e77e20ca230c 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_multiple_timelines_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_multiple_timelines_segments_schema.snap @@ -2,50 +2,50 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -duration:end: nullable Duration(ns) [ +duration:end: Duration(ns) [ rerun:index: "duration" rerun:index_kind: "duration" rerun:index_marker: "end" rerun:kind: "index" ] -duration:start: nullable Duration(ns) [ +duration:start: Duration(ns) [ rerun:index: "duration" rerun:index_kind: "duration" rerun:index_marker: "start" rerun:kind: "index" ] -frame_nr:end: nullable i64 [ +frame_nr:end: Int64 [ rerun:index: "frame_nr" rerun:index_kind: "sequence" rerun:index_marker: "end" rerun:kind: "index" ] -frame_nr:start: nullable i64 [ +frame_nr:start: Int64 [ rerun:index: "frame_nr" rerun:index_kind: "sequence" rerun:index_marker: "start" rerun:kind: "index" ] -property:text_log:TextLog:text: nullable List[nullable Utf8] [ +property:text_log:TextLog:text: List(Utf8) [ rerun:archetype: "rerun.archetypes.TextLog" rerun:component: "TextLog:text" rerun:component_type: "rerun.components.Text" rerun:entity_path: "/__properties/text_log" rerun:kind: "data" ] -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] -timestamp:end: nullable Timestamp(ns) [ +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') +timestamp:end: Timestamp(ns) [ rerun:index: "timestamp" rerun:index_kind: "timestamp" rerun:index_marker: "end" rerun:kind: "index" ] -timestamp:start: nullable Timestamp(ns) [ +timestamp:start: Timestamp(ns) [ rerun:index: "timestamp" rerun:index_kind: "timestamp" rerun:index_marker: "start" diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_properties_manifest_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_properties_manifest_schema.snap index 444de09dfc65..af69ec5b162f 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_properties_manifest_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_properties_manifest_schema.snap @@ -2,27 +2,27 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -property:points:Points2D:positions: nullable List[nullable FixedSizeList[nullable f32; 2]] [ +property:points:Points2D:positions: List(FixedSizeList(2 x Float32)) [ rerun:archetype: "rerun.archetypes.Points2D" rerun:component: "Points2D:positions" rerun:component_type: "rerun.components.Position2D" rerun:entity_path: "/__properties/points" rerun:kind: "data" ] -property:text_log:TextLog:text: nullable List[nullable Utf8] [ +property:text_log:TextLog:text: List(Utf8) [ rerun:archetype: "rerun.archetypes.TextLog" rerun:component: "TextLog:text" rerun:component_type: "rerun.components.Text" rerun:entity_path: "/__properties/text_log" rerun:kind: "data" ] -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_properties_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_properties_segments_schema.snap index 4405ab59e3f2..65b7cbb1faae 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_properties_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__register_segment__simple_with_properties_segments_schema.snap @@ -2,35 +2,35 @@ source: crates/store/re_redap_tests/src/tests/register_segment.rs expression: batch.format_schema_snapshot() --- -frame_nr:end: nullable i64 [ +frame_nr:end: Int64 [ rerun:index: "frame_nr" rerun:index_kind: "sequence" rerun:index_marker: "end" rerun:kind: "index" ] -frame_nr:start: nullable i64 [ +frame_nr:start: Int64 [ rerun:index: "frame_nr" rerun:index_kind: "sequence" rerun:index_marker: "start" rerun:kind: "index" ] -property:points:Points2D:positions: nullable List[nullable FixedSizeList[nullable f32; 2]] [ +property:points:Points2D:positions: List(FixedSizeList(2 x Float32)) [ rerun:archetype: "rerun.archetypes.Points2D" rerun:component: "Points2D:positions" rerun:component_type: "rerun.components.Position2D" rerun:entity_path: "/__properties/points" rerun:kind: "data" ] -property:text_log:TextLog:text: nullable List[nullable Utf8] [ +property:text_log:TextLog:text: List(Utf8) [ rerun:archetype: "rerun.archetypes.TextLog" rerun:component: "TextLog:text" rerun:component_type: "rerun.components.Text" rerun:entity_path: "/__properties/text_log" rerun:kind: "data" ] -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__rrd_manifest__fetch_chunks_from_rrd_manifest.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__rrd_manifest__fetch_chunks_from_rrd_manifest.snap index 65c8a0f07699..4f1d55ff2926 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__rrd_manifest__fetch_chunks_from_rrd_manifest.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__rrd_manifest__fetch_chunks_from_rrd_manifest.snap @@ -2,30 +2,31 @@ source: crates/store/re_redap_tests/src/tests/rrd_manifest.rs expression: printed --- -┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /my/entity │ -│ * id: chunk_00000000000000010000000000000001 │ -│ * version: 0.1.3 │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬──────────────────────┬────────────────────────────────────┬─────────────────────────────────────────┐ │ -│ │ RowId ┆ frame_nr ┆ example.MyPoints:colors ┆ example.MyPoints:points │ │ -│ │ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable i64 ┆ type: nullable List[nullable u32] ┆ type: nullable List[nullable Struct[2]] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: frame_nr ┆ archetype: example.MyPoints ┆ archetype: example.MyPoints │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: example.MyPoints:colors ┆ component: example.MyPoints:points │ │ -│ │ is_sorted: true ┆ kind: index ┆ component_type: example.MyColor ┆ component_type: example.MyPoint │ │ -│ │ kind: control ┆ ┆ kind: data ┆ kind: data │ │ -│ ╞═══════════════════════════════════════════════╪══════════════════════╪════════════════════════════════════╪═════════════════════════════════════════╡ │ -│ │ row_00000000000000010000000000000001 ┆ 10 ┆ [0] ┆ [{x: 0.0, y: 0.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000010000000000000002 ┆ 20 ┆ [1] ┆ [{x: 1.0, y: 1.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000010000000000000003 ┆ 30 ┆ [2] ┆ [{x: 2.0, y: 2.0}] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_00000000000000010000000000000004 ┆ 40 ┆ [3] ┆ [{x: 3.0, y: 3.0}] │ │ -│ └───────────────────────────────────────────────┴──────────────────────┴────────────────────────────────────┴─────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /my/entity │ +│ * id: chunk_00000000000000010000000000000001 │ +│ * version: 0.1.3 │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬──────────────────────┬────────────────────────────────────┬──────────────────────────────────────────────────────────┐ │ +│ │ RowId ┆ frame_nr ┆ example.MyPoints:colors ┆ example.MyPoints:points │ │ +│ │ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(UInt32) ┆ type: List(Struct("x": non-null Float32, "y": non-null │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: frame_nr ┆ archetype: example.MyPoints ┆ Float32)) │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: example.MyPoints:colors ┆ archetype: example.MyPoints │ │ +│ │ is_sorted: true ┆ kind: index ┆ component_type: example.MyColor ┆ component: example.MyPoints:points │ │ +│ │ kind: control ┆ ┆ kind: data ┆ component_type: example.MyPoint │ │ +│ │ ┆ ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪══════════════════════╪════════════════════════════════════╪══════════════════════════════════════════════════════════╡ │ +│ │ row_00000000000000010000000000000001 ┆ 10 ┆ [0] ┆ [{x: 0.0, y: 0.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000010000000000000002 ┆ 20 ┆ [1] ┆ [{x: 1.0, y: 1.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000010000000000000003 ┆ 30 ┆ [2] ┆ [{x: 2.0, y: 2.0}] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_00000000000000010000000000000004 ┆ 40 ┆ [3] ┆ [{x: 3.0, y: 3.0}] │ │ +│ └───────────────────────────────────────────────┴──────────────────────┴────────────────────────────────────┴──────────────────────────────────────────────────────────┘ │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ┌────────────────────────────────────────────────────────────────────────────────────────┐ │ METADATA: │ │ * entity_path: /my/entity │ @@ -35,7 +36,7 @@ expression: printed │ ┌───────────────────────────────────────────────┬────────────────────────────────────┐ │ │ │ RowId ┆ example.MyPoints:labels │ │ │ │ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable List[nullable Utf8] │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: List(Utf8) │ │ │ │ ARROW:extension:metadata: {"namespace":"row"} ┆ archetype: example.MyPoints │ │ │ │ ARROW:extension:name: TUID ┆ component: example.MyPoints:labels │ │ │ │ is_sorted: true ┆ component_type: example.MyLabel │ │ diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__rrd_manifest__layered_segment_rrd_manifest_1_all_there_sorbet_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__rrd_manifest__layered_segment_rrd_manifest_1_all_there_sorbet_schema.snap index 7e808c8d5887..229d5eb9f8b9 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__rrd_manifest__layered_segment_rrd_manifest_1_all_there_sorbet_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__rrd_manifest__layered_segment_rrd_manifest_1_all_there_sorbet_schema.snap @@ -5,20 +5,20 @@ expression: rrd_manifest.sorbet_schema.format_snapshot() top-level metadata: [ sorbet:version: "0.1.3" ] -/data:test: nullable List[nullable f32] [ +/data:test: List(Float32) [ rerun:component: "test" rerun:entity_path: "/data" rerun:is_static: "true" rerun:kind: "data" ] -/my/entity:example.MyPoints:colors: nullable List[nullable u32] [ +/my/entity:example.MyPoints:colors: List(UInt32) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -/my/entity:example.MyPoints:labels: nullable List[nullable Utf8] [ +/my/entity:example.MyPoints:labels: List(Utf8) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:labels" rerun:component_type: "example.MyLabel" @@ -26,18 +26,18 @@ top-level metadata: [ rerun:is_static: "true" rerun:kind: "data" ] -/my/entity:example.MyPoints:points: nullable List[nullable Struct[2]] [ +/my/entity:example.MyPoints:points: List(Struct("x": non-null Float32, "y": non-null Float32)) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -frame_nr: nullable i64 [ +frame_nr: Int64 [ rerun:index_name: "frame_nr" rerun:kind: "index" ] -rerun.controls.RowId: FixedSizeBinary[16] [ +rerun.controls.RowId: non-null FixedSizeBinary(16) [ ARROW:extension:metadata: "{\"namespace\":\"row\"}" ARROW:extension:name: "rerun.datatypes.TUID" rerun:kind: "control" diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__rrd_manifest__layered_segment_rrd_manifest_2_base_removed_sorbet_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__rrd_manifest__layered_segment_rrd_manifest_2_base_removed_sorbet_schema.snap index 1f5855822556..fa17a25b76b4 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__rrd_manifest__layered_segment_rrd_manifest_2_base_removed_sorbet_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__rrd_manifest__layered_segment_rrd_manifest_2_base_removed_sorbet_schema.snap @@ -5,13 +5,13 @@ expression: rrd_manifest.sorbet_schema.format_snapshot() top-level metadata: [ sorbet:version: "0.1.3" ] -/data:test: nullable List[nullable f32] [ +/data:test: List(Float32) [ rerun:component: "test" rerun:entity_path: "/data" rerun:is_static: "true" rerun:kind: "data" ] -rerun.controls.RowId: FixedSizeBinary[16] [ +rerun.controls.RowId: non-null FixedSizeBinary(16) [ ARROW:extension:metadata: "{\"namespace\":\"row\"}" ARROW:extension:name: "rerun.datatypes.TUID" rerun:kind: "control" diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__rrd_manifest__rrd_manifest_sorbet_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__rrd_manifest__rrd_manifest_sorbet_schema.snap index c0fe81a91b3b..aace8e6a5d30 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__rrd_manifest__rrd_manifest_sorbet_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__rrd_manifest__rrd_manifest_sorbet_schema.snap @@ -5,14 +5,14 @@ expression: rrd_manifest.sorbet_schema.format_snapshot() top-level metadata: [ sorbet:version: "0.1.3" ] -/my/entity:example.MyPoints:colors: nullable List[nullable u32] [ +/my/entity:example.MyPoints:colors: List(UInt32) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:colors" rerun:component_type: "example.MyColor" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -/my/entity:example.MyPoints:labels: nullable List[nullable Utf8] [ +/my/entity:example.MyPoints:labels: List(Utf8) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:labels" rerun:component_type: "example.MyLabel" @@ -20,18 +20,18 @@ top-level metadata: [ rerun:is_static: "true" rerun:kind: "data" ] -/my/entity:example.MyPoints:points: nullable List[nullable Struct[2]] [ +/my/entity:example.MyPoints:points: List(Struct("x": non-null Float32, "y": non-null Float32)) [ rerun:archetype: "example.MyPoints" rerun:component: "example.MyPoints:points" rerun:component_type: "example.MyPoint" rerun:entity_path: "/my/entity" rerun:kind: "data" ] -frame_nr: nullable i64 [ +frame_nr: Int64 [ rerun:index_name: "frame_nr" rerun:kind: "index" ] -rerun.controls.RowId: FixedSizeBinary[16] [ +rerun.controls.RowId: non-null FixedSizeBinary(16) [ ARROW:extension:metadata: "{\"namespace\":\"row\"}" ARROW:extension:name: "rerun.datatypes.TUID" rerun:kind: "control" diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__missing_1_should_be_empty_response_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__missing_1_should_be_empty_response_schema.snap index de2dddcae99f..13f55ed9e024 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__missing_1_should_be_empty_response_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__missing_1_should_be_empty_response_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_1_register_all_manifest_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_1_register_all_manifest_schema.snap index de2dddcae99f..13f55ed9e024 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_1_register_all_manifest_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_1_register_all_manifest_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_1_register_all_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_1_register_all_segments_schema.snap index 08c5f5f30d4c..06216fbdc832 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_1_register_all_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_1_register_all_segments_schema.snap @@ -1,10 +1,10 @@ --- source: crates/store/re_redap_tests/src/tests/unregister_segment.rs -expression: "batch.clone().filter_columns_by(|f|\nf.metadata().get(\"rerun:index_kind\").is_none()).unwrap().format_schema_snapshot()" +expression: filter_out_index_ranges(batch.clone()).format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_2_remove_layers_BD_for_segments_13_manifest_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_2_remove_layers_BD_for_segments_13_manifest_schema.snap index de2dddcae99f..13f55ed9e024 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_2_remove_layers_BD_for_segments_13_manifest_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_2_remove_layers_BD_for_segments_13_manifest_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_2_remove_layers_BD_for_segments_13_response_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_2_remove_layers_BD_for_segments_13_response_schema.snap index de2dddcae99f..13f55ed9e024 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_2_remove_layers_BD_for_segments_13_response_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_2_remove_layers_BD_for_segments_13_response_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_2_remove_layers_BD_for_segments_13_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_2_remove_layers_BD_for_segments_13_segments_schema.snap index 08c5f5f30d4c..06216fbdc832 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_2_remove_layers_BD_for_segments_13_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_2_remove_layers_BD_for_segments_13_segments_schema.snap @@ -1,10 +1,10 @@ --- source: crates/store/re_redap_tests/src/tests/unregister_segment.rs -expression: "batch.clone().filter_columns_by(|f|\nf.metadata().get(\"rerun:index_kind\").is_none()).unwrap().format_schema_snapshot()" +expression: filter_out_index_ranges(batch.clone()).format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_3_remove_layers_BD_for_all_segments_manifest_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_3_remove_layers_BD_for_all_segments_manifest_schema.snap index de2dddcae99f..13f55ed9e024 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_3_remove_layers_BD_for_all_segments_manifest_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_3_remove_layers_BD_for_all_segments_manifest_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_3_remove_layers_BD_for_all_segments_response_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_3_remove_layers_BD_for_all_segments_response_schema.snap index de2dddcae99f..13f55ed9e024 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_3_remove_layers_BD_for_all_segments_response_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_3_remove_layers_BD_for_all_segments_response_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_3_remove_layers_BD_for_all_segments_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_3_remove_layers_BD_for_all_segments_segments_schema.snap index 08c5f5f30d4c..06216fbdc832 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_3_remove_layers_BD_for_all_segments_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_3_remove_layers_BD_for_all_segments_segments_schema.snap @@ -1,10 +1,10 @@ --- source: crates/store/re_redap_tests/src/tests/unregister_segment.rs -expression: "batch.clone().filter_columns_by(|f|\nf.metadata().get(\"rerun:index_kind\").is_none()).unwrap().format_schema_snapshot()" +expression: filter_out_index_ranges(batch.clone()).format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_4_remove_all_layers_for_segments_23_manifest_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_4_remove_all_layers_for_segments_23_manifest_schema.snap index de2dddcae99f..13f55ed9e024 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_4_remove_all_layers_for_segments_23_manifest_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_4_remove_all_layers_for_segments_23_manifest_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_4_remove_all_layers_for_segments_23_response_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_4_remove_all_layers_for_segments_23_response_schema.snap index de2dddcae99f..13f55ed9e024 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_4_remove_all_layers_for_segments_23_response_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_4_remove_all_layers_for_segments_23_response_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_4_remove_all_layers_for_segments_23_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_4_remove_all_layers_for_segments_23_segments_schema.snap index 08c5f5f30d4c..06216fbdc832 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_4_remove_all_layers_for_segments_23_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__products_4_remove_all_layers_for_segments_23_segments_schema.snap @@ -1,10 +1,10 @@ --- source: crates/store/re_redap_tests/src/tests/unregister_segment.rs -expression: "batch.clone().filter_columns_by(|f|\nf.metadata().get(\"rerun:index_kind\").is_none()).unwrap().format_schema_snapshot()" +expression: filter_out_index_ranges(batch.clone()).format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_1_register_all_manifest_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_1_register_all_manifest_schema.snap index de2dddcae99f..13f55ed9e024 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_1_register_all_manifest_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_1_register_all_manifest_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_1_register_all_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_1_register_all_segments_schema.snap index 08c5f5f30d4c..06216fbdc832 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_1_register_all_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_1_register_all_segments_schema.snap @@ -1,10 +1,10 @@ --- source: crates/store/re_redap_tests/src/tests/unregister_segment.rs -expression: "batch.clone().filter_columns_by(|f|\nf.metadata().get(\"rerun:index_kind\").is_none()).unwrap().format_schema_snapshot()" +expression: filter_out_index_ranges(batch.clone()).format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_2_remove_segment_id2_manifest_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_2_remove_segment_id2_manifest_schema.snap index de2dddcae99f..13f55ed9e024 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_2_remove_segment_id2_manifest_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_2_remove_segment_id2_manifest_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_2_remove_segment_id2_response_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_2_remove_segment_id2_response_schema.snap index de2dddcae99f..13f55ed9e024 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_2_remove_segment_id2_response_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_2_remove_segment_id2_response_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_2_remove_segment_id2_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_2_remove_segment_id2_segments_schema.snap index 08c5f5f30d4c..06216fbdc832 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_2_remove_segment_id2_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_2_remove_segment_id2_segments_schema.snap @@ -1,10 +1,10 @@ --- source: crates/store/re_redap_tests/src/tests/unregister_segment.rs -expression: "batch.clone().filter_columns_by(|f|\nf.metadata().get(\"rerun:index_kind\").is_none()).unwrap().format_schema_snapshot()" +expression: filter_out_index_ranges(batch.clone()).format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_3_remove_remaining_segments_manifest_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_3_remove_remaining_segments_manifest_schema.snap index de2dddcae99f..13f55ed9e024 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_3_remove_remaining_segments_manifest_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_3_remove_remaining_segments_manifest_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_3_remove_remaining_segments_response_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_3_remove_remaining_segments_response_schema.snap index de2dddcae99f..13f55ed9e024 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_3_remove_remaining_segments_response_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_3_remove_remaining_segments_response_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_3_remove_remaining_segments_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_3_remove_remaining_segments_segments_schema.snap index 08c5f5f30d4c..06216fbdc832 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_3_remove_remaining_segments_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_3_remove_remaining_segments_segments_schema.snap @@ -1,10 +1,10 @@ --- source: crates/store/re_redap_tests/src/tests/unregister_segment.rs -expression: "batch.clone().filter_columns_by(|f|\nf.metadata().get(\"rerun:index_kind\").is_none()).unwrap().format_schema_snapshot()" +expression: filter_out_index_ranges(batch.clone()).format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_4_reregister_all_manifest_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_4_reregister_all_manifest_schema.snap index de2dddcae99f..13f55ed9e024 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_4_reregister_all_manifest_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_4_reregister_all_manifest_schema.snap @@ -2,13 +2,13 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: batch.format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_name: Utf8 -rerun_layer_type: Utf8 -rerun_num_chunks: u64 -rerun_registration_status: Utf8 -rerun_registration_time: Timestamp(ns) -rerun_schema_sha256: FixedSizeBinary[32] -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_url: Utf8 +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_name: non-null Utf8 +rerun_layer_type: non-null Utf8 +rerun_num_chunks: non-null UInt64 +rerun_registration_status: non-null Utf8 +rerun_registration_time: non-null Timestamp(ns) +rerun_schema_sha256: non-null FixedSizeBinary(32) +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_url: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_4_reregister_all_segments_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_4_reregister_all_segments_schema.snap index 08c5f5f30d4c..06216fbdc832 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_4_reregister_all_segments_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__simple_4_reregister_all_segments_schema.snap @@ -1,10 +1,10 @@ --- source: crates/store/re_redap_tests/src/tests/unregister_segment.rs -expression: "batch.clone().filter_columns_by(|f|\nf.metadata().get(\"rerun:index_kind\").is_none()).unwrap().format_schema_snapshot()" +expression: filter_out_index_ranges(batch.clone()).format_schema_snapshot() --- -rerun_last_updated_at: Timestamp(ns) -rerun_layer_names: List[Utf8] -rerun_num_chunks: u64 -rerun_segment_id: Utf8 -rerun_size_bytes: u64 -rerun_storage_urls: List[Utf8] +rerun_last_updated_at: non-null Timestamp(ns) +rerun_layer_names: non-null List(non-null Utf8, field: 'rerun_layer_names') +rerun_num_chunks: non-null UInt64 +rerun_segment_id: non-null Utf8 +rerun_size_bytes: non-null UInt64 +rerun_storage_urls: non-null List(non-null Utf8, field: 'rerun_storage_urls') diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__unregister_then_query_1_added_data.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__unregister_then_query_1_added_data.snap index 899480f65b76..251d14945d82 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__unregister_then_query_1_added_data.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__unregister_then_query_1_added_data.snap @@ -2,14 +2,14 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: filtered_chunk_info.format_snapshot(false) --- -┌──────────────────────────────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┐ -│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static │ -╞══════════════════════════════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╡ -│ 00000000000000010000000000000001 ┆ my_segment_id ┆ base ┆ /my/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000010000000000000002 ┆ my_segment_id ┆ base ┆ /my/entity ┆ true │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000010000000000000003 ┆ my_segment_id ┆ base ┆ /my/other/entity ┆ false │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 00000000000000010000000000000004 ┆ my_segment_id ┆ base ┆ /my/other/entity ┆ true │ -└──────────────────────────────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┘ +┌──────────────────────────────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┬────────────────────────┬───────────────────────────────────┐ +│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static ┆ rerun_layer_direct_url ┆ rerun_layer_direct_url_expires_at │ +╞══════════════════════════════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╪════════════════════════╪═══════════════════════════════════╡ +│ 00000000000000010000000000000001 ┆ my_segment_id ┆ base ┆ /my/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000010000000000000002 ┆ my_segment_id ┆ base ┆ /my/entity ┆ true ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000010000000000000003 ┆ my_segment_id ┆ base ┆ /my/other/entity ┆ false ┆ null ┆ null │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 00000000000000010000000000000004 ┆ my_segment_id ┆ base ┆ /my/other/entity ┆ true ┆ null ┆ null │ +└──────────────────────────────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┴────────────────────────┴───────────────────────────────────┘ diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__unregister_then_query_1_added_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__unregister_then_query_1_added_schema.snap index 92631be63786..de3836219061 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__unregister_then_query_1_added_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__unregister_then_query_1_added_schema.snap @@ -2,18 +2,21 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: required_chunk_info.format_schema_snapshot() --- -chunk_byte_len: u64 -chunk_entity_path: Utf8 [ +chunk_byte_len: non-null UInt64 +chunk_byte_size_uncompressed: UInt64 +chunk_entity_path: non-null Utf8 [ rerun:kind: "control" ] -chunk_id: FixedSizeBinary[16] [ +chunk_id: non-null FixedSizeBinary(16) [ rerun:kind: "control" ] -chunk_is_static: bool [ +chunk_is_static: non-null Boolean [ rerun:kind: "control" ] -chunk_key: Binary -chunk_segment_id: Utf8 [ +chunk_key: non-null Binary +chunk_segment_id: non-null Utf8 [ rerun:kind: "control" ] -rerun_segment_layer: Utf8 +rerun_layer_direct_url: Dictionary(Int32, Utf8) +rerun_layer_direct_url_expires_at: Dictionary(Int32, Int64) +rerun_segment_layer: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__unregister_then_query_2_removed_data.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__unregister_then_query_2_removed_data.snap index c77936a8c99b..a3e71aa9dd9c 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__unregister_then_query_2_removed_data.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__unregister_then_query_2_removed_data.snap @@ -2,7 +2,7 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: filtered_chunk_info.format_snapshot(false) --- -┌──────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┐ -│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static │ -╞══════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╡ -└──────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┘ +┌──────────┬──────────────────┬─────────────────────┬───────────────────┬─────────────────┬────────────────────────┬───────────────────────────────────┐ +│ chunk_id ┆ chunk_segment_id ┆ rerun_segment_layer ┆ chunk_entity_path ┆ chunk_is_static ┆ rerun_layer_direct_url ┆ rerun_layer_direct_url_expires_at │ +╞══════════╪══════════════════╪═════════════════════╪═══════════════════╪═════════════════╪════════════════════════╪═══════════════════════════════════╡ +└──────────┴──────────────────┴─────────────────────┴───────────────────┴─────────────────┴────────────────────────┴───────────────────────────────────┘ diff --git a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__unregister_then_query_2_removed_schema.snap b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__unregister_then_query_2_removed_schema.snap index 92631be63786..de3836219061 100644 --- a/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__unregister_then_query_2_removed_schema.snap +++ b/crates/store/re_redap_tests/src/tests/snapshots/re_redap_tests__tests__unregister_segment__unregister_then_query_2_removed_schema.snap @@ -2,18 +2,21 @@ source: crates/store/re_redap_tests/src/tests/unregister_segment.rs expression: required_chunk_info.format_schema_snapshot() --- -chunk_byte_len: u64 -chunk_entity_path: Utf8 [ +chunk_byte_len: non-null UInt64 +chunk_byte_size_uncompressed: UInt64 +chunk_entity_path: non-null Utf8 [ rerun:kind: "control" ] -chunk_id: FixedSizeBinary[16] [ +chunk_id: non-null FixedSizeBinary(16) [ rerun:kind: "control" ] -chunk_is_static: bool [ +chunk_is_static: non-null Boolean [ rerun:kind: "control" ] -chunk_key: Binary -chunk_segment_id: Utf8 [ +chunk_key: non-null Binary +chunk_segment_id: non-null Utf8 [ rerun:kind: "control" ] -rerun_segment_layer: Utf8 +rerun_layer_direct_url: Dictionary(Int32, Utf8) +rerun_layer_direct_url_expires_at: Dictionary(Int32, Int64) +rerun_segment_layer: non-null Utf8 diff --git a/crates/store/re_redap_tests/src/tests/unregister_segment.rs b/crates/store/re_redap_tests/src/tests/unregister_segment.rs index 93a735414ddb..9636cc473bc1 100644 --- a/crates/store/re_redap_tests/src/tests/unregister_segment.rs +++ b/crates/store/re_redap_tests/src/tests/unregister_segment.rs @@ -1,6 +1,8 @@ #![expect(clippy::unwrap_used)] -use super::common::{DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt as _}; +use super::common::{ + DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt as _, entry_name, +}; use crate::tests::common::concat_record_batches; use crate::{FieldsTestExt as _, RecordBatchTestExt as _, SchemaTestExt as _}; use arrow::array::{RecordBatch, StringArray}; @@ -313,7 +315,7 @@ async fn scan_segment_table_and_snapshot( tonic::Request::new(ScanSegmentTableRequest { columns: vec![], // all of them }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -341,7 +343,7 @@ async fn scan_segment_table_and_snapshot( let alleged_schema: Schema = service .get_segment_table_schema( tonic::Request::new(GetSegmentTableSchemaRequest {}) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -403,7 +405,7 @@ async fn scan_dataset_manifest_and_snapshot( tonic::Request::new(ScanDatasetManifestRequest { columns: vec![], // all of them }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -431,7 +433,7 @@ async fn scan_dataset_manifest_and_snapshot( let alleged_schema: Schema = service .get_dataset_manifest_schema( tonic::Request::new(GetDatasetManifestSchemaRequest {}) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -511,7 +513,7 @@ async fn snapshot_response( let alleged_schema: Schema = service .get_dataset_manifest_schema( tonic::Request::new(GetDatasetManifestSchemaRequest {}) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -590,7 +592,7 @@ async fn query_dataset_snapshot( let chunk_info = service .query_dataset( tonic::Request::new(query_dataset_request.into()) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await @@ -634,6 +636,7 @@ async fn query_dataset_snapshot( .remove_columns(&[ QueryDatasetResponse::FIELD_CHUNK_KEY, QueryDatasetResponse::FIELD_CHUNK_BYTE_LENGTH, + QueryDatasetResponse::FIELD_CHUNK_BYTE_LENGTH_UNCOMPRESSED, ]) .auto_sort_rows() .unwrap(); @@ -669,7 +672,7 @@ async fn get_dataset_updated_at_nanos(service: &impl RerunCloudService, dataset_ service .read_dataset_entry( tonic::Request::new(ReadDatasetEntryRequest {}) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await diff --git a/crates/store/re_redap_tests/src/tests/update_entry.rs b/crates/store/re_redap_tests/src/tests/update_entry.rs index 2855e1287138..f09203737034 100644 --- a/crates/store/re_redap_tests/src/tests/update_entry.rs +++ b/crates/store/re_redap_tests/src/tests/update_entry.rs @@ -6,6 +6,8 @@ use re_protos::cloud::v1alpha1::ext::{ }; use re_protos::cloud::v1alpha1::rerun_cloud_service_server::RerunCloudService; +use super::common::entry_name; + pub async fn update_entry_tests(service: impl RerunCloudService) { // // Create a dataset @@ -15,7 +17,7 @@ pub async fn update_entry_tests(service: impl RerunCloudService) { let dataset_entry = create_dataset_entry(&service, dataset_name).await.unwrap(); - assert_eq!(dataset_entry.details.name, dataset_name); + assert_eq!(dataset_entry.details.name, entry_name(dataset_name)); let dataset_id = dataset_entry.details.id; @@ -33,7 +35,7 @@ pub async fn update_entry_tests(service: impl RerunCloudService) { .await .unwrap(); - assert_eq!(response.entry_details.name, dataset_name); + assert_eq!(response.entry_details.name, entry_name(dataset_name)); // // Dataset rename should succeed @@ -45,14 +47,14 @@ pub async fn update_entry_tests(service: impl RerunCloudService) { UpdateEntryRequest { id: dataset_id, entry_details_update: EntryDetailsUpdate { - name: Some(new_dataset_name.to_owned()), + name: Some(entry_name(new_dataset_name)), }, }, ) .await .unwrap(); - assert_eq!(response.entry_details.name, new_dataset_name); + assert_eq!(response.entry_details.name, entry_name(new_dataset_name)); // // Create another dataset @@ -73,7 +75,7 @@ pub async fn update_entry_tests(service: impl RerunCloudService) { UpdateEntryRequest { id: dataset2_id, entry_details_update: EntryDetailsUpdate { - name: Some(new_dataset_name.to_owned()), + name: Some(entry_name(new_dataset_name)), }, }, ) @@ -97,7 +99,7 @@ pub async fn update_entry_tests(service: impl RerunCloudService) { .await .unwrap(); - assert_eq!(table_entry.details.name, table_name); + assert_eq!(table_entry.details.name, entry_name(table_name)); let table_id = table_entry.details.id; // @@ -110,14 +112,14 @@ pub async fn update_entry_tests(service: impl RerunCloudService) { UpdateEntryRequest { id: table_id, entry_details_update: EntryDetailsUpdate { - name: Some(new_table_name.to_owned()), + name: Some(entry_name(new_table_name)), }, }, ) .await .unwrap(); - assert_eq!(response.entry_details.name, new_table_name); + assert_eq!(response.entry_details.name, entry_name(new_table_name)); // // Updating table name to an existing dataset name should fail. @@ -128,7 +130,7 @@ pub async fn update_entry_tests(service: impl RerunCloudService) { UpdateEntryRequest { id: table_id, entry_details_update: EntryDetailsUpdate { - name: Some(dataset2_name.to_owned()), + name: Some(entry_name(dataset2_name)), }, }, ) @@ -162,7 +164,7 @@ pub async fn update_entry_tests(service: impl RerunCloudService) { UpdateEntryRequest { id: table2_id, entry_details_update: EntryDetailsUpdate { - name: Some(new_table_name.to_owned()), + name: Some(entry_name(new_table_name)), }, }, ) @@ -200,7 +202,7 @@ pub async fn update_entry_bumps_timestamp(service: impl RerunCloudService) { UpdateEntryRequest { id: dataset_id, entry_details_update: EntryDetailsUpdate { - name: Some(new_name.to_owned()), + name: Some(entry_name(new_name)), }, }, ) @@ -226,7 +228,7 @@ pub async fn update_entry_bumps_timestamp(service: impl RerunCloudService) { UpdateEntryRequest { id: dataset_id, entry_details_update: EntryDetailsUpdate { - name: Some(new_name.to_owned()), + name: Some(entry_name(new_name)), }, }, ) @@ -250,7 +252,7 @@ async fn create_dataset_entry( service .create_dataset_entry(tonic::Request::new( CreateDatasetEntryRequest { - name: name.to_owned(), + name: entry_name(name), id: None, } .into(), @@ -273,7 +275,7 @@ async fn create_table_entry( service .create_table_entry(tonic::Request::new( CreateTableEntryRequest { - name: table_name.to_owned(), + name: entry_name(table_name), schema: schema.clone(), provider_details: Some(provider_details), } diff --git a/crates/store/re_redap_tests/src/tests/write_table.rs b/crates/store/re_redap_tests/src/tests/write_table.rs index 6c58181f7606..8d94f81a369b 100644 --- a/crates/store/re_redap_tests/src/tests/write_table.rs +++ b/crates/store/re_redap_tests/src/tests/write_table.rs @@ -10,7 +10,7 @@ use re_protos::cloud::v1alpha1::{ use re_protos::headers::RerunHeadersInjectorExt as _; use crate::RecordBatchTestExt as _; -use crate::tests::common::{RerunCloudServiceExt as _, concat_record_batches}; +use crate::tests::common::{RerunCloudServiceExt as _, concat_record_batches, entry_name}; use crate::utils::streaming::make_streaming_request; use crate::utils::tables::create_simple_lance_dataset; @@ -73,7 +73,7 @@ pub async fn write_table(service: impl RerunCloudService) { .try_into() .expect("Failed to convert to EntryDetails"); - assert_eq!(entry.name, table_name); + assert_eq!(entry.name, entry_name(table_name)); let original_batches = get_table_batches(&service, &entry).await; assert_ne!(original_batches.len(), 0); diff --git a/crates/store/re_redap_tests/src/utils/client.rs b/crates/store/re_redap_tests/src/utils/client.rs index bd386006d54e..b056d7537558 100644 --- a/crates/store/re_redap_tests/src/utils/client.rs +++ b/crates/store/re_redap_tests/src/utils/client.rs @@ -32,7 +32,7 @@ impl std::fmt::Debug for TestClient { /// Adapter to convert a stream into tonic Streaming format struct StreamAdapter { - results: VecDeque>, + results: VecDeque>, } impl tonic::codec::Decoder for StreamAdapter { @@ -51,10 +51,10 @@ impl tonic::codec::Decoder for StreamAdapter { async fn stream_to_streaming(stream: S) -> tonic::codec::Streaming where T: Send + 'static + std::fmt::Debug, - S: Stream> + Send + 'static, + S: Stream> + Send + 'static, { use futures::StreamExt as _; - let results: VecDeque> = stream.collect().await; + let results: VecDeque> = stream.collect().await; let body_len = 5 * results.len(); // compression_flag (1) + length (4) let adapter = StreamAdapter { results }; @@ -74,14 +74,14 @@ impl DataframeClientAPI for TestClient { async fn get_dataset_schema( &mut self, request: Request, - ) -> Result, Status> { + ) -> tonic::Result> { self.service.get_dataset_schema(request).await } async fn query_dataset( &mut self, request: Request, - ) -> Result>, Status> { + ) -> tonic::Result>> { let response = self.service.query_dataset(request).await?; let (metadata, stream, _extensions) = response.into_parts(); @@ -95,7 +95,7 @@ impl DataframeClientAPI for TestClient { async fn fetch_chunks( &mut self, request: Request, - ) -> Result>, Status> { + ) -> tonic::Result>> { let response = self.service.fetch_chunks(request).await?; let (metadata, stream, _extensions) = response.into_parts(); diff --git a/crates/store/re_redap_tests/src/utils/rerun.rs b/crates/store/re_redap_tests/src/utils/rerun.rs index fa851b81d465..c667a1181f8f 100644 --- a/crates/store/re_redap_tests/src/utils/rerun.rs +++ b/crates/store/re_redap_tests/src/utils/rerun.rs @@ -21,18 +21,18 @@ use crate::TempPath; pub type TuidPrefix = u64; pub fn next_chunk_id_generator(prefix: u64) -> impl FnMut() -> re_chunk::ChunkId { - let mut chunk_id = re_chunk::ChunkId::from_tuid(Tuid::from_nanos_and_inc(prefix, 0)); + let mut tuid = Tuid::from_nanos_and_inc(prefix, 0); move || { - chunk_id = chunk_id.next(); - chunk_id + tuid = tuid.next(); + re_chunk::ChunkId::from_tuid(tuid) } } pub fn next_row_id_generator(prefix: u64) -> impl FnMut() -> re_chunk::RowId { - let mut row_id = re_chunk::RowId::from_tuid(Tuid::from_nanos_and_inc(prefix, 0)); + let mut tuid = Tuid::from_nanos_and_inc(prefix, 0); move || { - row_id = row_id.next(); - row_id + tuid = tuid.next(); + re_chunk::RowId::from_tuid(tuid) } } @@ -468,6 +468,124 @@ pub fn create_nasty_recording( Ok(tmp_path) } +/// Create a recording with deliberately divergent per-component time ranges within chunks. +/// +/// This is designed to test correctness of latest-at and range queries when per-component +/// time ranges differ significantly from the chunk's global time range. +/// +/// Creates two chunks for entity `/sensor`: +/// +/// ```text +/// Chunk F: global [5, 20] +/// - row at T=5: points present, colors absent +/// - row at T=8: points present, colors absent +/// - row at T=15: points absent, colors present +/// - row at T=20: points absent, colors present +/// → points@[5, 8], colors@[15, 20] +/// +/// Chunk G: global [1, 3] +/// - row at T=1: colors present +/// - row at T=3: colors present +/// → colors@[1, 3] +/// ``` +pub fn create_divergent_component_ranges_recording( + tuid_prefix: TuidPrefix, + segment_id: &str, +) -> anyhow::Result { + use re_chunk::Chunk; + use re_log_types::example_components::{MyColor, MyPoint, MyPoints}; + use re_log_types::{TimeInt, build_frame_nr}; + + let tmp_path = { + let dir = tempfile::tempdir()?; + let path = dir.path().join(format!("{segment_id}.rrd")); + TempPath::new(dir, path) + }; + + let rec = RecordingStreamBuilder::new(format!("rerun_example_{segment_id}")) + .recording_id(segment_id) + .send_properties(false) + .save(tmp_path.clone())?; + + let mut next_chunk_id = next_chunk_id_generator(tuid_prefix); + let mut next_row_id = next_row_id_generator(tuid_prefix); + + let entity_path = EntityPath::from("sensor"); + + let t5 = TimeInt::new_temporal(5); + let t8 = TimeInt::new_temporal(8); + let t15 = TimeInt::new_temporal(15); + let t20 = TimeInt::new_temporal(20); + + let t1 = TimeInt::new_temporal(1); + let t3 = TimeInt::new_temporal(3); + + let points_a = MyPoint::from_iter(0..1); + let points_b = MyPoint::from_iter(1..2); + let colors_a = MyColor::from_iter(0..1); + let colors_b = MyColor::from_iter(1..2); + let colors_c = MyColor::from_iter(2..3); + let colors_d = MyColor::from_iter(3..4); + + // Chunk F: global [5, 20], points@[5, 8], colors@[15, 20] + let chunk_f = Chunk::builder_with_id(next_chunk_id(), entity_path.clone()) + .with_sparse_component_batches( + next_row_id(), + [build_frame_nr(t5)], + [ + (MyPoints::descriptor_points(), Some(&points_a as _)), + (MyPoints::descriptor_colors(), None), + ], + ) + .with_sparse_component_batches( + next_row_id(), + [build_frame_nr(t8)], + [ + (MyPoints::descriptor_points(), Some(&points_b as _)), + (MyPoints::descriptor_colors(), None), + ], + ) + .with_sparse_component_batches( + next_row_id(), + [build_frame_nr(t15)], + [ + (MyPoints::descriptor_points(), None), + (MyPoints::descriptor_colors(), Some(&colors_a as _)), + ], + ) + .with_sparse_component_batches( + next_row_id(), + [build_frame_nr(t20)], + [ + (MyPoints::descriptor_points(), None), + (MyPoints::descriptor_colors(), Some(&colors_b as _)), + ], + ) + .build()?; + + rec.send_chunk(chunk_f); + + // Chunk G: global [1, 3], colors@[1, 3] + let chunk_g = Chunk::builder_with_id(next_chunk_id(), entity_path.clone()) + .with_sparse_component_batches( + next_row_id(), + [build_frame_nr(t1)], + [(MyPoints::descriptor_colors(), Some(&colors_c as _))], + ) + .with_sparse_component_batches( + next_row_id(), + [build_frame_nr(t3)], + [(MyPoints::descriptor_colors(), Some(&colors_d as _))], + ) + .build()?; + + rec.send_chunk(chunk_g); + + rec.flush_blocking()?; + + Ok(tmp_path) +} + /// Create an rrd recording with embeddings with 256 floats each. Total number of embeddings (rows) /// and number of embeddings per row can be specified. /// diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes.fbs index a6d21cbc4e8c..58a207a1f182 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes.fbs @@ -20,6 +20,7 @@ include "./archetypes/geo_line_strings.fbs"; include "./archetypes/geo_points.fbs"; include "./archetypes/graph_edges.fbs"; include "./archetypes/graph_nodes.fbs"; +include "./archetypes/grid_map.fbs"; include "./archetypes/image.fbs"; include "./archetypes/instance_poses3d.fbs"; include "./archetypes/line_strips2d.fbs"; @@ -37,6 +38,7 @@ include "./archetypes/scalars.fbs"; include "./archetypes/segmentation_image.fbs"; include "./archetypes/series_lines.fbs"; include "./archetypes/series_points.fbs"; +include "./archetypes/status.fbs"; include "./archetypes/tensor.fbs"; include "./archetypes/text_document.fbs"; include "./archetypes/text_log.fbs"; diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/annotation_context.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/annotation_context.fbs index 90a6553b0238..4673ac5e0562 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/annotation_context.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/annotation_context.fbs @@ -21,5 +21,5 @@ table AnnotationContext ( "attr.rust.derive": "PartialEq" ) { /// List of class descriptions, mapping class indices to class names, colors etc. - context: rerun.components.AnnotationContext ("attr.rerun.component_required", order: 1000); + context: rerun.components.AnnotationContext ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); } diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/asset3d.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/asset3d.fbs index 6e3f6899693c..d812b93065fe 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/asset3d.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/asset3d.fbs @@ -20,7 +20,7 @@ table Asset3D ( // --- Required --- /// The asset's bytes. - blob: rerun.components.Blob ("attr.rerun.component_required", order: 1000); + blob: rerun.components.Blob ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); // --- Recommended --- diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/asset_video.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/asset_video.fbs index 08ecc68ae8a0..e6624aa31734 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/asset_video.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/asset_video.fbs @@ -20,7 +20,7 @@ table AssetVideo ( // --- Required --- /// The asset's bytes. - blob: rerun.components.Blob ("attr.rerun.component_required", order: 1000); + blob: rerun.components.Blob ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); // --- Recommended --- diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/bar_chart.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/bar_chart.fbs index 95702ca1d563..f5b168a3d098 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/bar_chart.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/bar_chart.fbs @@ -17,7 +17,7 @@ table BarChart ( // --- Required --- /// The values. Should always be a 1-dimensional tensor (i.e. a vector). - values: rerun.components.TensorData ("attr.rerun.component_required", order: 1000); + values: rerun.components.TensorData ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); // --- Optional --- diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/clear.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/clear.fbs index 851dbe1cb7ea..c271b382b24d 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/clear.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/clear.fbs @@ -24,5 +24,5 @@ table Clear ( "attr.rust.derive": "PartialEq", "attr.rust.override_crate": "re_types_core" ) { - is_recursive: rerun.components.ClearIsRecursive ("attr.rerun.component_required", order: 100); + is_recursive: rerun.components.ClearIsRecursive ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 100); } diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/coordinate_frame.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/coordinate_frame.fbs index d38dadfbafa1..03e5cfa78dd8 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/coordinate_frame.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/coordinate_frame.fbs @@ -18,5 +18,7 @@ table CoordinateFrame ( "attr.rust.derive": "PartialEq" ) { /// The coordinate frame to use for the current entity. - frame: rerun.components.TransformFrameId ("attr.rerun.component_required", order: 1000); + /// + /// Note that empty strings are not valid transform frame IDs. + frame: rerun.components.TransformFrameId ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); } diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/depth_image.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/depth_image.fbs index 7deb1fdd3207..ad0d5e3945d2 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/depth_image.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/depth_image.fbs @@ -22,10 +22,10 @@ table DepthImage ( // --- Required --- /// The raw depth image data. - buffer: rerun.components.ImageBuffer ("attr.rerun.component_required", order: 1000); + buffer: rerun.components.ImageBuffer ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); /// The format of the image. - format: rerun.components.ImageFormat ("attr.rerun.component_required", order: 1100); + format: rerun.components.ImageFormat ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1100); // --- Optional --- @@ -72,4 +72,11 @@ table DepthImage ( /// Objects with higher values are drawn on top of those with lower values. /// Defaults to `-20.0`. draw_order: rerun.components.DrawOrder ("attr.rerun.component_optional", nullable, order: 3500); + + /// Optional filter used when a texel is magnified (displayed larger than a screen pixel) in 2D views. + /// + /// The filter is applied to the scalar values *before* they are mapped to color via the colormap. + /// + /// Has no effect in 3D views. + magnification_filter: rerun.components.MagnificationFilter ("attr.rerun.component_optional", nullable, order: 3600); } diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/encoded_depth_image.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/encoded_depth_image.fbs index f642f4b7eba7..ff00f2bb7dd7 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/encoded_depth_image.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/encoded_depth_image.fbs @@ -19,7 +19,7 @@ table EncodedDepthImage( /// Supported are: /// * single channel PNG /// * RVL with ROS2 metadata (for details see ) - blob: rerun.components.Blob ("attr.rerun.component_required", order: 1000); + blob: rerun.components.Blob ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); // --- Recommended --- @@ -47,4 +47,11 @@ table EncodedDepthImage( /// Optional 2D draw order. draw_order: rerun.components.DrawOrder ("attr.rerun.component_optional", nullable, order: 3200); + + /// Optional filter used when a texel is magnified (displayed larger than a screen pixel) in 2D views. + /// + /// The filter is applied to the scalar values *before* they are mapped to color via the colormap. + /// + /// Has no effect in 3D views. + magnification_filter: rerun.components.MagnificationFilter ("attr.rerun.component_optional", nullable, order: 3300); } diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/encoded_image.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/encoded_image.fbs index 36d064a3f4d9..98727e99a584 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/encoded_image.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/encoded_image.fbs @@ -20,7 +20,7 @@ table EncodedImage ( // --- Required --- /// The encoded content of some image file, e.g. a PNG or JPEG. - blob: rerun.components.Blob ("attr.rerun.component_required", order: 1000); + blob: rerun.components.Blob ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); // --- Recommended --- @@ -45,4 +45,7 @@ table EncodedImage ( /// /// Objects with higher values are drawn on top of those with lower values. draw_order: rerun.components.DrawOrder ("attr.rerun.component_optional", nullable, order: 3100); + + /// Optional filter used when a texel is magnified (displayed larger than a screen pixel). + magnification_filter: rerun.components.MagnificationFilter ("attr.rerun.component_optional", nullable, order: 3200); } diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/geo_line_strings.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/geo_line_strings.fbs index 4ae2c23d5b6a..e9ba8da08bd1 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/geo_line_strings.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/geo_line_strings.fbs @@ -18,7 +18,7 @@ table GeoLineStrings ( // --- Required --- /// The line strings, expressed in [EPSG:4326](https://epsg.io/4326) coordinates (North/East-positive degrees). - line_strings: [rerun.components.GeoLineString] ("attr.rerun.component_required", order: 1000); + line_strings: [rerun.components.GeoLineString] ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); // --- Recommended --- diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/graph_edges.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/graph_edges.fbs index 2eada2fa2876..8d92bff1784c 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/graph_edges.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/graph_edges.fbs @@ -18,7 +18,7 @@ table GraphEdges ( // --- Required --- /// A list of node tuples. - edges: [rerun.components.GraphEdge] ("attr.rerun.component_required", order: 1000); + edges: [rerun.components.GraphEdge] ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); // --- Recommended --- diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/graph_nodes.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/graph_nodes.fbs index 2a12d7d5bc52..2243880259fb 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/graph_nodes.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/graph_nodes.fbs @@ -16,7 +16,7 @@ table GraphNodes ( // --- Required --- /// A list of node IDs. - node_ids: [rerun.components.GraphNode] ("attr.rerun.component_required", order: 1000); + node_ids: [rerun.components.GraphNode] ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); // --- Optional --- diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/grid_map.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/grid_map.fbs new file mode 100644 index 000000000000..96ac36859e64 --- /dev/null +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/grid_map.fbs @@ -0,0 +1,68 @@ +namespace rerun.archetypes; + + +/// A 2D grid map stored as raster data in an image buffer, with a cell size in scene units and pose. +/// +/// This archetype is intended for robotics applications like occupancy maps or navigation costmaps. +/// +/// \example archetypes/grid_map_simple title="Simple occupancy grid map" +table GridMap ( + "attr.cpp.no_field_ctors", + "attr.docs.category": "Spatial 3D", + "attr.docs.unreleased", + "attr.docs.view_types": "Spatial3DView, Spatial2DView", + "attr.rerun.state": "unstable", + "attr.rust.derive": "PartialEq" +) { + // --- Required --- + + /// The raw grid data. + data: rerun.components.ImageBuffer ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); + + /// The format of the grid's image data. + format: rerun.components.ImageFormat ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1100); + + /// The scene unit size of a single grid cell (e.g. m / pixel). + cell_size: rerun.components.CellSize ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1150); + + // --- Optional --- + + /// Translation of the lower-left corner of the grid map in space. + /// + /// Together with [components.RotationAxisAngle] or [components.RotationQuat], this defines the pose of the + /// lower-left image corner relative to the map's parent coordinate frame. + /// + /// If not set, the lower-left image corner is placed at origin of the map's parent coordinate frame. + translation: rerun.components.Translation3D ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 1200); + + /// Rotation of the lower-left corner of the grid map in space via axis + angle. + /// + /// Together with [components.Translation3D], this defines the pose of the + /// lower-left image corner relative to the map's parent coordinate frame. + /// + /// Note: either this or [components.RotationQuat] can be set to specify the grid map's rotation, but not both. + /// If both this and [components.RotationQuat] are set, this is ignored in favor of the quaternion. + rotation_axis_angle: rerun.components.RotationAxisAngle ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 1300); + + /// Rotation of the lower-left corner of the grid map in space via quaternion. + /// + /// Together with [components.Translation3D], this defines the pose of the + /// lower-left image corner relative to the map's parent coordinate frame. + quaternion: rerun.components.RotationQuat ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 1400); + + /// Opacity of the grid map texture after all image decoding and colormap application. + /// + /// Defaults to 1.0 (fully opaque). + opacity: rerun.components.Opacity ("attr.rerun.component_optional", nullable, order: 3000); + + /// Optional draw order for layering multiple grid maps that overlap in space. + /// + /// Higher values are drawn on top of lower values. + draw_order: rerun.components.DrawOrder ("attr.rerun.component_optional", nullable, order: 3050); + + /// Colormap to use for rendering single-channel grid maps. + /// + /// If not set, the grid map is shown using the underlying [components.ImageFormat] + /// interpretation. + colormap: rerun.components.Colormap ("attr.rerun.component_optional", nullable, order: 3100); +} diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/image.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/image.fbs index a3bb7be37475..96703b714217 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/image.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/image.fbs @@ -36,10 +36,10 @@ table Image ( // --- Required --- /// The raw image data. - buffer: rerun.components.ImageBuffer ("attr.rerun.component_required", order: 1000); + buffer: rerun.components.ImageBuffer ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); /// The format of the image. - format: rerun.components.ImageFormat ("attr.rerun.component_required", order: 1100); + format: rerun.components.ImageFormat ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1100); // --- Optional --- @@ -53,4 +53,7 @@ table Image ( /// Objects with higher values are drawn on top of those with lower values. /// Defaults to `-10.0`. draw_order: rerun.components.DrawOrder ("attr.rerun.component_optional", nullable, order: 3100); + + /// Optional filter used when a texel is magnified (displayed larger than a screen pixel). + magnification_filter: rerun.components.MagnificationFilter ("attr.rerun.component_optional", nullable, order: 3200); } diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/instance_poses3d.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/instance_poses3d.fbs index b90ef88f2362..0cbd3a410328 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/instance_poses3d.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/instance_poses3d.fbs @@ -34,17 +34,17 @@ table InstancePoses3D ( // TODO(#6743): Transforms can't be affected by blueprints which is why all components of this archetype are non-ui editable. /// Translation vectors. - translations: [rerun.components.Translation3D] ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 1100); + translations: [rerun.components.Translation3D] ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 1100); /// Rotations via axis + angle. - rotation_axis_angles: [rerun.components.RotationAxisAngle] ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 1200); + rotation_axis_angles: [rerun.components.RotationAxisAngle] ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 1200); /// Rotations via quaternion. - quaternions: [rerun.components.RotationQuat] ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 1300); + quaternions: [rerun.components.RotationQuat] ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 1300); /// Scaling factors. - scales: [rerun.components.Scale3D] ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 1400); + scales: [rerun.components.Scale3D] ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 1400); /// 3x3 transformation matrices. - mat3x3: [rerun.components.TransformMat3x3] ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 1500); + mat3x3: [rerun.components.TransformMat3x3] ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 1500); } diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/line_strips2d.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/line_strips2d.fbs index 4c76758bca86..7089266eb7b3 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/line_strips2d.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/line_strips2d.fbs @@ -18,7 +18,7 @@ table LineStrips2D ( // --- Required --- /// All the actual 2D line strips that make up the batch. - strips: [rerun.components.LineStrip2D] ("attr.rerun.component_required", order: 1000); + strips: [rerun.components.LineStrip2D] ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); // --- Recommended --- diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/line_strips3d.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/line_strips3d.fbs index 4e1a03fa1443..9c532f55925d 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/line_strips3d.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/line_strips3d.fbs @@ -18,7 +18,7 @@ table LineStrips3D ( // --- Required --- /// All the actual 3D line strips that make up the batch. - strips: [rerun.components.LineStrip3D] ("attr.rerun.component_required", order: 1000); + strips: [rerun.components.LineStrip3D] ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); // --- Recommended --- diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/mcap_channel.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/mcap_channel.fbs index 25e8628315a1..21423016b1f5 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/mcap_channel.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/mcap_channel.fbs @@ -26,14 +26,14 @@ table McapChannel ( /// /// Channel IDs must be unique within a single MCAP file and are used to associate /// messages with their corresponding channel definition. - id: rerun.components.ChannelId ("attr.rerun.component_required", order: 1000); + id: rerun.components.ChannelId ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); /// The topic name that this channel publishes to. /// /// Topics are hierarchical paths from the original robotics system (e.g., "/sensors/camera/image") /// that categorize and organize different data streams. /// Topics are separate from Rerun's entity paths, but they often can be mapped to them. - topic: rerun.components.Text ("attr.rerun.component_required", order: 2000); + topic: rerun.components.Text ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 2000); /// The encoding format used for messages in this channel. /// @@ -42,7 +42,7 @@ table McapChannel ( /// * `cdr` - Common Data Representation (CDR) message format, used by ROS2 /// * `protobuf` - Protocol Buffers /// * `json` - JSON encoding - message_encoding: rerun.components.Text ("attr.rerun.component_required", order: 3000); + message_encoding: rerun.components.Text ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 3000); // --- Optional --- diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/mcap_message.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/mcap_message.fbs index 4156e3d5547c..f4d7d0b16f16 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/mcap_message.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/mcap_message.fbs @@ -30,5 +30,5 @@ table McapMessage ( /// by the associated channel's `message_encoding` field. The structure and interpretation /// of this binary data depends on the encoding format (e.g., ros1, cdr, protobuf) /// and the message schema defined for the channel. - data: rerun.components.Blob ("attr.rerun.component_required", order: 1000); + data: rerun.components.Blob ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); } diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/mcap_schema.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/mcap_schema.fbs index 8bdc217f91d5..49e1d13715d7 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/mcap_schema.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/mcap_schema.fbs @@ -27,13 +27,13 @@ table McapSchema ( /// /// Schema IDs must be unique within an MCAP file and are referenced by channels /// to specify their message structure. A single schema can be shared across multiple channels. - id: rerun.components.SchemaId ("attr.rerun.component_required", order: 1000); + id: rerun.components.SchemaId ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); /// Human-readable name identifying this schema. /// /// Schema names typically describe the message type or data structure /// (e.g., `"geometry_msgs/msg/Twist"`, `"sensor_msgs/msg/Image"`, `"MyCustomMessage"`). - name: rerun.components.Text ("attr.rerun.component_required", order: 2000); + name: rerun.components.Text ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 2000); /// The schema definition format used to describe the message structure. /// @@ -43,7 +43,7 @@ table McapSchema ( /// * `ros2msg` - [ROS2](https://mcap.dev/spec/registry#ros2msg) message definition format /// * `jsonschema` - [JSON Schema](https://mcap.dev/spec/registry#jsonschema) specification /// * `flatbuffer` - [FlatBuffers](https://mcap.dev/spec/registry#flatbuffer) schema definition - encoding: rerun.components.Text ("attr.rerun.component_required", order: 3000); + encoding: rerun.components.Text ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 3000); /// The schema definition content as binary data. /// @@ -51,5 +51,5 @@ table McapSchema ( /// `encoding` field. For text-based schemas (like ROS message definitions or JSON Schema), /// this is typically UTF-8 encoded text. For binary schema formats, this contains /// the serialized schema data. - data: rerun.components.Blob ("attr.rerun.component_required", order: 4000); + data: rerun.components.Blob ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 4000); } diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/mcap_statistics.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/mcap_statistics.fbs index 460b1b5b0b89..4103df3e4311 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/mcap_statistics.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/mcap_statistics.fbs @@ -26,43 +26,43 @@ table McapStatistics ( /// /// This count includes all timestamped data messages but excludes metadata records, /// schema definitions, and other non-message records. - message_count: rerun.components.Count ("attr.rerun.component_required", order: 1000); + message_count: rerun.components.Count ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); /// Number of unique schema definitions in the recording. /// /// Each schema defines the structure for one or more message types used by channels. - schema_count: rerun.components.Count ("attr.rerun.component_required", order: 2000); + schema_count: rerun.components.Count ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 2000); /// Number of channels defined in the recording. /// /// Each channel represents a unique topic and encoding combination for publishing messages. - channel_count: rerun.components.Count ("attr.rerun.component_required", order: 3000); + channel_count: rerun.components.Count ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 3000); /// Number of file attachments embedded in the recording. /// /// Attachments can include calibration files, configuration data, or other auxiliary files. - attachment_count: rerun.components.Count ("attr.rerun.component_required", order: 4000); + attachment_count: rerun.components.Count ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 4000); /// Number of metadata records providing additional context about the recording. /// /// Metadata records contain key-value pairs with information about the recording environment, /// system configuration, or other contextual data. - metadata_count: rerun.components.Count ("attr.rerun.component_required", order: 5000); + metadata_count: rerun.components.Count ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 5000); /// Number of data chunks used to organize messages in the file. /// /// Chunks group related messages together for efficient storage and indexed access. - chunk_count: rerun.components.Count ("attr.rerun.component_required", order: 6000); + chunk_count: rerun.components.Count ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 6000); /// Timestamp of the earliest message in the recording. /// /// This marks the beginning of the recorded data timeline. - message_start_time: rerun.components.Timestamp ("attr.rerun.component_required", order: 7000); + message_start_time: rerun.components.Timestamp ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 7000); /// Timestamp of the latest message in the recording. /// /// Together with `message_start_time`, this defines the total duration of the recording. - message_end_time: rerun.components.Timestamp ("attr.rerun.component_required", order: 8000); + message_end_time: rerun.components.Timestamp ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 8000); // --- Optional --- diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/mesh3d.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/mesh3d.fbs index 33505b0a4b95..13b3fe478fc3 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/mesh3d.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/mesh3d.fbs @@ -9,8 +9,9 @@ namespace rerun.archetypes; /// If there are multiple [archetypes.InstancePoses3D] instances logged to the same entity as a mesh, /// an instance of the mesh will be drawn for each transform. /// -/// The viewer draws meshes always two-sided. However, for transparency ordering -/// front faces are assumed to those with counter clockwise triangle winding order (this is the same as in the GLTF specification). +/// For transparency ordering, as well as back face culling (disabled by default), +/// front faces are assumed to be those with counter clockwise triangle winding order +/// (this is the same as in the GLTF specification). /// /// \example archetypes/mesh3d_indexed title="Simple indexed 3D mesh" image="https://static.rerun.io/mesh3d_indexed/57c70dc992e6dc0bd9c5222ca084f5b6240cea75/1200w.png" /// \example archetypes/mesh3d_instancing title="3D mesh with instancing" image="https://static.rerun.io/mesh3d_leaf_transforms3d/c2d0ee033129da53168f5705625a9b033f3a3d61/1200w.png" @@ -27,7 +28,7 @@ table Mesh3D ( /// The positions of each vertex. /// /// If no `triangle_indices` are specified, then each triplet of positions is interpreted as a triangle. - vertex_positions: [rerun.components.Position3D] ("attr.rerun.component_required", order: 1000); + vertex_positions: [rerun.components.Position3D] ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); // --- Recommended --- @@ -52,6 +53,11 @@ table Mesh3D ( /// Alpha channel governs the overall mesh transparency. albedo_factor: rerun.components.AlbedoFactor ("attr.rerun.component_optional", nullable, order: 3300); + /// Determines which faces of the mesh are rendered. + /// + /// The default is [components.MeshFaceRendering.DoubleSided], meaning both front and back faces are shown. + face_rendering: rerun.components.MeshFaceRendering ("attr.rerun.component_optional", nullable, order: 3350); + /// Optional albedo texture. /// /// Used with the [components.Texcoord2D] of the mesh. diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/pinhole.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/pinhole.fbs index c798300ab6a5..d45484ab67ae 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/pinhole.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/pinhole.fbs @@ -28,7 +28,7 @@ table Pinhole ( /// Camera projection, from image coordinates to view coordinates. /// /// Any update to this field will reset all other transform properties that aren't changed in the same log call or `send_columns` row. - image_from_camera: rerun.components.PinholeProjection ("attr.rerun.component_required", "attr.rerun.component_ui_editable": "false", order: 1000); + image_from_camera: rerun.components.PinholeProjection ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); /// Pixel resolution (usually integers) of child image space. Width and height. /// @@ -40,7 +40,7 @@ table Pinhole ( /// `image_from_camera` project onto the space spanned by `(0,0)` and `resolution - 1`. /// /// Any update to this field will reset all other transform properties that aren't changed in the same log call or `send_columns` row. - resolution: rerun.components.Resolution ("attr.rerun.component_recommended", "attr.rerun.component_ui_editable": "false", nullable, order: 2000); + resolution: rerun.components.Resolution ("attr.rerun.component_recommended", "attr.rerun.component_no_ui_edit", nullable, order: 2000); // --- Other --- @@ -74,7 +74,7 @@ table Pinhole ( // TODO(#2641): This should specify a default-value of `RDF` // TODO(andreas): this isn't part of the "atomic set" right now because we may also source this from `ViewCoordinates` and things get confusing quickly // Should it be reset when other transform properties are changed? - camera_xyz: rerun.components.ViewCoordinates ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 3000); + camera_xyz: rerun.components.ViewCoordinates ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 3000); // --- Topology --- @@ -90,7 +90,7 @@ table Pinhole ( /// To set the frame an entity is part of see [archetypes.CoordinateFrame]. /// /// Any update to this field will reset all other transform properties that aren't changed in the same log call or `send_columns` row. - child_frame: rerun.components.TransformFrameId ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 4000); + child_frame: rerun.components.TransformFrameId ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 4000); /// The parent frame this transform transforms into. /// @@ -100,7 +100,7 @@ table Pinhole ( /// To set the frame an entity is part of see [archetypes.CoordinateFrame]. /// /// Any update to this field will reset all other transform properties that aren't changed in the same log call or `send_columns` row. - parent_frame: rerun.components.TransformFrameId ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 4100); + parent_frame: rerun.components.TransformFrameId ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 4100); // --- Visualization in 3D --- diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/scalars.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/scalars.fbs index 661a187dc484..1142e6560c85 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/scalars.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/scalars.fbs @@ -25,7 +25,7 @@ table Scalars ( // --- Required --- /// The scalar values to log. - scalars: [rerun.components.Scalar] ("attr.rerun.component_required", order: 1000); + scalars: [rerun.components.Scalar] ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); // --- Recommended --- diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/segmentation_image.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/segmentation_image.fbs index f51804306049..c3d9b4be3054 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/segmentation_image.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/segmentation_image.fbs @@ -25,10 +25,10 @@ table SegmentationImage ( // --- Required --- /// The raw image data. - buffer: rerun.components.ImageBuffer ("attr.rerun.component_required", order: 1000); + buffer: rerun.components.ImageBuffer ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); /// The format of the image. - format: rerun.components.ImageFormat ("attr.rerun.component_required", order: 1100); + format: rerun.components.ImageFormat ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1100); // --- Optional --- diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/series_points.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/series_points.fbs index d7d553c992fd..91bda2f66c45 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/series_points.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/series_points.fbs @@ -21,7 +21,7 @@ table SeriesPoints ( /// What shape to use to represent the point /// /// May change over time. - markers: [rerun.components.MarkerShape] ("attr.rerun.component_required", "attr.rerun.component_ui_editable": "true", nullable, order: 2000); + markers: [rerun.components.MarkerShape] ("attr.rerun.component_required", nullable, order: 2000); // --- Optional --- diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/status.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/status.fbs new file mode 100644 index 000000000000..915f67013463 --- /dev/null +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/status.fbs @@ -0,0 +1,22 @@ +namespace rerun.archetypes; + +/// A status update, representing a change in the status of an entity. +/// +/// Useful for representing discrete state machines, mode transitions, or +/// status changes over time. Each logged [archetypes.Status] marks a new status +/// at the given time. A `null` status is ignored by the Status view. +/// +/// The Status view displays these as horizontal colored lanes over time. +/// +/// \example archetypes/status title="Status changes over time" image="https://static.rerun.io/status/8f224c6e4a9cbbb4b1e279c56a426ec4c6bfca50/1200w.png" +table Status ( + "attr.docs.category": "Plotting", + "attr.docs.unreleased", + "attr.docs.view_types": "StatusView", + "attr.rerun.state": "unstable", + "attr.rerun.visualizer": "StatusVisualizer", + "attr.rust.derive": "PartialEq" +) { + /// The new status value. A `null` status is ignored, it can be used to partially update a multi-instance status array. + status: rerun.components.Text ("attr.rerun.component_required", nullable, order: 100); +} diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/tensor.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/tensor.fbs index 434e46f83007..52979f56a398 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/tensor.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/tensor.fbs @@ -20,7 +20,7 @@ table Tensor ( "attr.rust.derive": "PartialEq" ) { /// The tensor data - data: rerun.components.TensorData ("attr.rerun.component_required", order: 1000); + data: rerun.components.TensorData ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); // --- Optional --- diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/transform3d.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/transform3d.fbs index b554c2d8d935..210f76d4ad36 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/transform3d.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/transform3d.fbs @@ -32,32 +32,32 @@ table Transform3D ( /// Translation vector. /// /// Any update to this field will reset all other transform properties that aren't changed in the same log call or `send_columns` row. - translation: rerun.components.Translation3D ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 1100); + translation: rerun.components.Translation3D ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 1100); /// Rotation via axis + angle. /// /// Any update to this field will reset all other transform properties that aren't changed in the same log call or `send_columns` row. - rotation_axis_angle: rerun.components.RotationAxisAngle ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 1200); + rotation_axis_angle: rerun.components.RotationAxisAngle ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 1200); /// Rotation via quaternion. /// /// Any update to this field will reset all other transform properties that aren't changed in the same log call or `send_columns` row. - quaternion: rerun.components.RotationQuat ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 1300); + quaternion: rerun.components.RotationQuat ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 1300); /// Scaling factor. /// /// Any update to this field will reset all other transform properties that aren't changed in the same log call or `send_columns` row. - scale: rerun.components.Scale3D ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 1400); + scale: rerun.components.Scale3D ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 1400); /// 3x3 transformation matrix. /// /// Any update to this field will reset all other transform properties that aren't changed in the same log call or `send_columns` row. - mat3x3: rerun.components.TransformMat3x3 ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 1500); + mat3x3: rerun.components.TransformMat3x3 ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 1500); /// Specifies the relation this transform establishes between this entity and its parent. /// /// Any update to this field will reset all other transform properties that aren't changed in the same log call or `send_columns` row. - relation: rerun.components.TransformRelation ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 1600); + relation: rerun.components.TransformRelation ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 1600); // --- transform frame @@ -73,7 +73,7 @@ table Transform3D ( /// To set the frame an entity is part of see [archetypes.CoordinateFrame]. /// /// Any update to this field will reset all other transform properties that aren't changed in the same log call or `send_columns` row. - child_frame: rerun.components.TransformFrameId ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 2000); + child_frame: rerun.components.TransformFrameId ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 2000); /// The parent frame this transform transforms into. /// @@ -83,5 +83,5 @@ table Transform3D ( /// To set the frame an entity is part of see [archetypes.CoordinateFrame]. /// /// Any update to this field will reset all other transform properties that aren't changed in the same log call or `send_columns` row. - parent_frame: rerun.components.TransformFrameId ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 2100); + parent_frame: rerun.components.TransformFrameId ("attr.rerun.component_optional", "attr.rerun.component_no_ui_edit", nullable, order: 2100); } diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/transform_axes3d.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/transform_axes3d.fbs index eab76588f288..c3dd65ce7ec4 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/transform_axes3d.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/transform_axes3d.fbs @@ -14,7 +14,7 @@ table TransformAxes3D ( /// /// The length is interpreted in the local coordinate system of the transform. /// If the transform is scaled, the axes will be scaled accordingly. - axis_length: rerun.components.AxisLength ("attr.rerun.component_required", "attr.rerun.component_ui_editable": "true", order: 1000); + axis_length: rerun.components.AxisLength ("attr.rerun.component_required", order: 1000); /// Whether to show a text label with the corresponding frame. show_frame: rerun.components.ShowLabels ("attr.rerun.component_optional", nullable, order: 2000); diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/video_stream.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/video_stream.fbs index 7ee5b10b3cc8..538cdb6f5848 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/video_stream.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/video_stream.fbs @@ -22,7 +22,7 @@ table VideoStream ( /// The codec used to encode the video chunks. /// /// This property is expected to be constant over time and is ideally logged statically once per stream. - codec: rerun.components.VideoCodec ("attr.rerun.component_required", order: 1000); + codec: rerun.components.VideoCodec ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); // --- Recommended --- diff --git a/crates/store/re_sdk_types/definitions/rerun/archetypes/view_coordinates.fbs b/crates/store/re_sdk_types/definitions/rerun/archetypes/view_coordinates.fbs index a975856852ce..e08b9ca74941 100644 --- a/crates/store/re_sdk_types/definitions/rerun/archetypes/view_coordinates.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/archetypes/view_coordinates.fbs @@ -25,5 +25,5 @@ table ViewCoordinates ( "attr.rust.repr": "transparent" ) { /// The directions of the [x, y, z] axes. - xyz: rerun.components.ViewCoordinates ("attr.rerun.component_required", order: 1000); + xyz: rerun.components.ViewCoordinates ("attr.rerun.component_required", "attr.rerun.component_no_ui_edit", order: 1000); } diff --git a/crates/store/re_sdk_types/definitions/rerun/attributes.fbs b/crates/store/re_sdk_types/definitions/rerun/attributes.fbs index dc284a82ffb9..fd14581eb3bd 100644 --- a/crates/store/re_sdk_types/definitions/rerun/attributes.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/attributes.fbs @@ -18,15 +18,11 @@ attribute "attr.rerun.component_recommended"; /// Only applies to the fields of an archetype. attribute "attr.rerun.component_optional"; -/// Marks a component as editable through the UI. -/// By default all required components are non-editable and all other components are editable. -/// -/// Must be set to true or false. Valid usage examples:" -/// - `axis_length: rerun.components.AxisLength ("attr.rerun.component_required", "attr.rerun.component_ui_editable": "true", order: 1000);` -/// - `translation: rerun.components.Translation3D ("attr.rerun.component_optional", "attr.rerun.component_ui_editable": "false", nullable, order: 1100);` +/// Opts a component out of being editable through the UI. +/// By default all components are editable. This is a boolean attribute (presence = true). /// /// Note this is only a hint to the Viewer. Blueprint code may always set overrides for any component! -attribute "attr.rerun.component_ui_editable"; +attribute "attr.rerun.component_no_ui_edit"; /// Override the type of a field. /// diff --git a/crates/store/re_sdk_types/definitions/rerun/blueprint/archetypes/dataframe_query.fbs b/crates/store/re_sdk_types/definitions/rerun/blueprint/archetypes/dataframe_query.fbs index e9857025f9cc..30296b4b3185 100644 --- a/crates/store/re_sdk_types/definitions/rerun/blueprint/archetypes/dataframe_query.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/blueprint/archetypes/dataframe_query.fbs @@ -23,7 +23,7 @@ table DataframeQuery ( /// Should empty cells be filled with latest-at queries? apply_latest_at: rerun.blueprint.components.ApplyLatestAt ("attr.rerun.component_optional", nullable, order: 400); - /// Selected columns. If unset, all columns are selected. + /// Selected columns. If unset, only the active timeline and all component columns are selected. select: rerun.blueprint.components.SelectedColumns ("attr.rerun.component_optional", nullable, order: 500); /// The order of entity path column groups. If unset, the default order is used. diff --git a/crates/store/re_sdk_types/definitions/rerun/blueprint/archetypes/spatial_information.fbs b/crates/store/re_sdk_types/definitions/rerun/blueprint/archetypes/spatial_information.fbs index 5b80b0d6595c..a3272158d83f 100644 --- a/crates/store/re_sdk_types/definitions/rerun/blueprint/archetypes/spatial_information.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/blueprint/archetypes/spatial_information.fbs @@ -13,5 +13,7 @@ table SpatialInformation ( show_axes: rerun.blueprint.components.Enabled ("attr.rerun.component_optional", nullable, order: 100); /// Whether the bounding box should be shown. + // TODO(andreas): Make this an enum so the user can choose between showing bounding boxes, + // regions of interest, or per-entity bounding boxes. show_bounding_box: rerun.blueprint.components.Enabled ("attr.rerun.component_optional", nullable, order: 200); } diff --git a/crates/store/re_sdk_types/definitions/rerun/blueprint/archetypes/text_log_columns.fbs b/crates/store/re_sdk_types/definitions/rerun/blueprint/archetypes/text_log_columns.fbs index c3ac3f13f5d5..ec0d656e137d 100644 --- a/crates/store/re_sdk_types/definitions/rerun/blueprint/archetypes/text_log_columns.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/blueprint/archetypes/text_log_columns.fbs @@ -10,7 +10,7 @@ table TextLogColumns ( /// What timeline columns to show. /// - /// Defaults to displaying all timelines. + /// Defaults to displaying only the active timeline. timeline_columns: [rerun.blueprint.components.TimelineColumn] ("attr.rerun.component_optional", nullable, order: 1000); /// All columns to be displayed. diff --git a/crates/store/re_sdk_types/definitions/rerun/blueprint/views.fbs b/crates/store/re_sdk_types/definitions/rerun/blueprint/views.fbs index a1e81ce6ca08..c2196abd7e23 100644 --- a/crates/store/re_sdk_types/definitions/rerun/blueprint/views.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/blueprint/views.fbs @@ -6,6 +6,7 @@ include "./views/graph.fbs"; include "./views/map.fbs"; include "./views/spatial2d.fbs"; include "./views/spatial3d.fbs"; +include "./views/status.fbs"; include "./views/tensor.fbs"; include "./views/text_document.fbs"; include "./views/text_log.fbs"; diff --git a/crates/store/re_sdk_types/definitions/rerun/blueprint/views/status.fbs b/crates/store/re_sdk_types/definitions/rerun/blueprint/views/status.fbs new file mode 100644 index 000000000000..1ee4d457fda0 --- /dev/null +++ b/crates/store/re_sdk_types/definitions/rerun/blueprint/views/status.fbs @@ -0,0 +1,12 @@ +namespace rerun.blueprint.views; + +/// A view for displaying status transitions over time, for use with [archetypes.Status]. +// TODO(RR-4240): Add a proper snippet and update screenshot. +/// +/// \example views/status title="Use a blueprint to show a StatusView." image="https://static.rerun.io/status_view/997ff1c16765374651ba662812a78e53803aba75/1200w.png" +table StatusView ( + "attr.docs.unreleased", + "attr.rerun.view_identifier": "Status", + "attr.rerun.state": "unstable" +) { +} diff --git a/crates/store/re_sdk_types/definitions/rerun/components.fbs b/crates/store/re_sdk_types/definitions/rerun/components.fbs index 2fde1bc92709..6141b72f8798 100644 --- a/crates/store/re_sdk_types/definitions/rerun/components.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/components.fbs @@ -5,6 +5,7 @@ include "./components/albedo_factor.fbs"; include "./components/annotation_context.fbs"; include "./components/axis_length.fbs"; include "./components/blob.fbs"; +include "./components/cell_size.fbs"; include "./components/channel_id.fbs"; include "./components/channel_message_counts.fbs"; include "./components/class_id.fbs"; @@ -40,6 +41,7 @@ include "./components/magnification_filter.fbs"; include "./components/marker_shape.fbs"; include "./components/marker_size.fbs"; include "./components/media_type.fbs"; +include "./components/mesh_face_rendering.fbs"; include "./components/name.fbs"; include "./components/opacity.fbs"; include "./components/pinhole_projection.fbs"; diff --git a/crates/store/re_sdk_types/definitions/rerun/components/cell_size.fbs b/crates/store/re_sdk_types/definitions/rerun/components/cell_size.fbs new file mode 100644 index 000000000000..24a32194ebf4 --- /dev/null +++ b/crates/store/re_sdk_types/definitions/rerun/components/cell_size.fbs @@ -0,0 +1,16 @@ +namespace rerun.components; + +// --- + +/// The metric size of one grid cell in local scene units. +/// +/// E.g. for 2D grid maps, this is the physical size represented by a single pixel or cell. +struct CellSize ( + "attr.docs.unreleased", + "attr.python.aliases": "float", + "attr.python.array_aliases": "float | npt.NDArray[np.float32]", + "attr.rust.derive": "Copy, PartialEq, PartialOrd, bytemuck::Pod, bytemuck::Zeroable", + "attr.rust.repr": "transparent" +) { + value: rerun.datatypes.Float32 (order: 100); +} diff --git a/crates/store/re_sdk_types/definitions/rerun/components/colormap.fbs b/crates/store/re_sdk_types/definitions/rerun/components/colormap.fbs index ea39d5cabd0e..cbea7a57a4b8 100644 --- a/crates/store/re_sdk_types/definitions/rerun/components/colormap.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/components/colormap.fbs @@ -67,4 +67,16 @@ enum Colormap: ubyte { /// /// It interpolates from white to blue to purple to red to orange and back to white. Twilight, + + /// The classic `RViz` "Map" grid-map colormap intended for occupancy-style SLAM grid maps. + /// + /// Known values are mapped to a grayscale ramp from white (free) to black (occupied), + /// unknown values are in a green-blue color. Special / illegal values have highlight colors. + RvizMap, + + /// The classic `RViz` "Costmap" grid-map colormap for robot navigation cost maps. + /// + /// Cost values are mapped to blue to red spectrum, and special cost values + /// (e.g. lethal obstacles) have highlight colors. Zero values are fully transparent. + RvizCostmap, } diff --git a/crates/store/re_sdk_types/definitions/rerun/components/fill_mode.fbs b/crates/store/re_sdk_types/definitions/rerun/components/fill_mode.fbs index caf9ec087ac8..57516fc9597f 100644 --- a/crates/store/re_sdk_types/definitions/rerun/components/fill_mode.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/components/fill_mode.fbs @@ -19,7 +19,7 @@ enum FillMode: ubyte{ /// * An [archetypes.Ellipsoids3D] will draw three axis-aligned ellipses that are cross-sections /// of each ellipsoid, each of which displays two out of three of the sizes of the ellipsoid. /// * For [archetypes.Boxes3D], it is the edges of the box, identical to [components.FillMode.DenseWireframe]. - MajorWireframe (default), + MajorWireframe, /// Many lines are drawn to represent the surface of the shape in a see-through fashion. /// @@ -32,4 +32,9 @@ enum FillMode: ubyte{ /// The surface of the shape is filled in with a solid color. No lines are drawn. Solid, + + /// The surface of the shape is filled in with a transparent color, with major wireframe lines on top. + /// + /// This gives a good default appearance that shows both the shape's surface and its structure. + TransparentFillMajorWireframe (default), } diff --git a/crates/store/re_sdk_types/definitions/rerun/components/interpolation_mode.fbs b/crates/store/re_sdk_types/definitions/rerun/components/interpolation_mode.fbs index 65421d22955e..6f6522cbb23b 100644 --- a/crates/store/re_sdk_types/definitions/rerun/components/interpolation_mode.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/components/interpolation_mode.fbs @@ -1,9 +1,7 @@ namespace rerun.components; /// Specifies how values between data points are interpolated in time series. -enum InterpolationMode: ubyte ( - "attr.docs.unreleased" -) { +enum InterpolationMode: ubyte { /// Invalid value. Won't show up in generated types. Invalid = 0, diff --git a/crates/store/re_sdk_types/definitions/rerun/components/magnification_filter.fbs b/crates/store/re_sdk_types/definitions/rerun/components/magnification_filter.fbs index 074c2178a773..5788fb2cfe41 100644 --- a/crates/store/re_sdk_types/definitions/rerun/components/magnification_filter.fbs +++ b/crates/store/re_sdk_types/definitions/rerun/components/magnification_filter.fbs @@ -1,18 +1,27 @@ namespace rerun.components; -/// Filter used when magnifying an image/texture such that a single pixel/texel is displayed as multiple pixels on screen. +/// Filter used when a single texel/pixel of an image is displayed larger than a single screen pixel. +/// +/// This happens when zooming into an image, when displaying a low-resolution image in a large area, +/// or when viewing an image up close in 3D space. enum MagnificationFilter: ubyte { /// Invalid value. Won't show up in generated types. Invalid = 0, /// Show the nearest pixel value. /// - /// This will give a blocky appearance when zooming in. + /// This will give a blocky appearance when the image is scaled up. /// Used as default when rendering 2D images. Nearest (default), - /// Linearly interpolate the nearest neighbors, creating a smoother look when zooming in. + /// Linearly interpolate the nearest neighbors, creating a smoother look when the image is scaled up. /// /// Used as default for mesh rendering. Linear, + + /// Bicubic interpolation using a Catmull-Rom spline, creating the smoothest look when the image is scaled up. + /// + /// This is computationally more expensive than linear filtering but produces sharper results with less blurring. + /// Unlike bilinear filtering, this avoids cross-shaped artifacts at texel boundaries. + Bicubic, } diff --git a/crates/store/re_sdk_types/definitions/rerun/components/mesh_face_rendering.fbs b/crates/store/re_sdk_types/definitions/rerun/components/mesh_face_rendering.fbs new file mode 100644 index 000000000000..c1cd6b113abb --- /dev/null +++ b/crates/store/re_sdk_types/definitions/rerun/components/mesh_face_rendering.fbs @@ -0,0 +1,23 @@ +namespace rerun.components; + +/// Determines which faces of a mesh are rendered. +/// +/// For this purpose, we assume that the winding order of vertices in a mesh is +/// consistent and that front faces are defined as those with vertices in counter clockwise order. +enum MeshFaceRendering: ubyte { + /// Invalid value. Won't show up in generated types. + Invalid = 0, + + /// Show both back and front faces. + DoubleSided(default), + + /// Only front faces are shown. + /// + /// Front faces are assumed to have a counter clockwise vertex winding order on screen. + Front, + + /// Only back faces are shown. + /// + /// Back faces are assumed to have a clockwise vertex winding order on screen. + Back, +} diff --git a/crates/store/re_sdk_types/src/archetypes/.gitattributes b/crates/store/re_sdk_types/src/archetypes/.gitattributes index 1d89b59d8551..4e95c7ba88ba 100644 --- a/crates/store/re_sdk_types/src/archetypes/.gitattributes +++ b/crates/store/re_sdk_types/src/archetypes/.gitattributes @@ -20,6 +20,7 @@ geo_line_strings.rs linguist-generated=true geo_points.rs linguist-generated=true graph_edges.rs linguist-generated=true graph_nodes.rs linguist-generated=true +grid_map.rs linguist-generated=true image.rs linguist-generated=true instance_poses3d.rs linguist-generated=true line_strips2d.rs linguist-generated=true @@ -38,6 +39,7 @@ scalars.rs linguist-generated=true segmentation_image.rs linguist-generated=true series_lines.rs linguist-generated=true series_points.rs linguist-generated=true +status.rs linguist-generated=true tensor.rs linguist-generated=true text_document.rs linguist-generated=true text_log.rs linguist-generated=true diff --git a/crates/store/re_sdk_types/src/archetypes/coordinate_frame.rs b/crates/store/re_sdk_types/src/archetypes/coordinate_frame.rs index 740c7fdd3b53..eb1818e835e3 100644 --- a/crates/store/re_sdk_types/src/archetypes/coordinate_frame.rs +++ b/crates/store/re_sdk_types/src/archetypes/coordinate_frame.rs @@ -82,6 +82,8 @@ use ::re_types_core::{DeserializationError, DeserializationResult}; #[derive(Clone, Debug, PartialEq, Default)] pub struct CoordinateFrame { /// The coordinate frame to use for the current entity. + /// + /// Note that empty strings are not valid transform frame IDs. pub frame: Option, } @@ -237,6 +239,8 @@ impl CoordinateFrame { } /// The coordinate frame to use for the current entity. + /// + /// Note that empty strings are not valid transform frame IDs. #[inline] pub fn with_frame(mut self, frame: impl Into) -> Self { self.frame = try_serialize_field(Self::descriptor_frame(), [frame]); diff --git a/crates/store/re_sdk_types/src/archetypes/depth_image.rs b/crates/store/re_sdk_types/src/archetypes/depth_image.rs index c4bcac4d2830..2af2013ef4b4 100644 --- a/crates/store/re_sdk_types/src/archetypes/depth_image.rs +++ b/crates/store/re_sdk_types/src/archetypes/depth_image.rs @@ -118,6 +118,13 @@ pub struct DepthImage { /// Objects with higher values are drawn on top of those with lower values. /// Defaults to `-20.0`. pub draw_order: Option, + + /// Optional filter used when a texel is magnified (displayed larger than a screen pixel) in 2D views. + /// + /// The filter is applied to the scalar values *before* they are mapped to color via the colormap. + /// + /// Has no effect in 3D views. + pub magnification_filter: Option, } impl DepthImage { @@ -204,6 +211,18 @@ impl DepthImage { component_type: Some("rerun.components.DrawOrder".into()), } } + + /// Returns the [`ComponentDescriptor`] for [`Self::magnification_filter`]. + /// + /// The corresponding component is [`crate::components::MagnificationFilter`]. + #[inline] + pub fn descriptor_magnification_filter() -> ComponentDescriptor { + ComponentDescriptor { + archetype: Some("rerun.archetypes.DepthImage".into()), + component: "DepthImage:magnification_filter".into(), + component_type: Some("rerun.components.MagnificationFilter".into()), + } + } } static REQUIRED_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 2usize]> = @@ -217,7 +236,7 @@ static REQUIRED_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 2usize]> = static RECOMMENDED_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 0usize]> = std::sync::LazyLock::new(|| []); -static OPTIONAL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 5usize]> = +static OPTIONAL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 6usize]> = std::sync::LazyLock::new(|| { [ DepthImage::descriptor_meter(), @@ -225,10 +244,11 @@ static OPTIONAL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 5usize]> = DepthImage::descriptor_depth_range(), DepthImage::descriptor_point_fill_ratio(), DepthImage::descriptor_draw_order(), + DepthImage::descriptor_magnification_filter(), ] }); -static ALL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 7usize]> = +static ALL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 8usize]> = std::sync::LazyLock::new(|| { [ DepthImage::descriptor_buffer(), @@ -238,12 +258,13 @@ static ALL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 7usize]> = DepthImage::descriptor_depth_range(), DepthImage::descriptor_point_fill_ratio(), DepthImage::descriptor_draw_order(), + DepthImage::descriptor_magnification_filter(), ] }); impl DepthImage { - /// The total number of components in the archetype: 2 required, 0 recommended, 5 optional - pub const NUM_COMPONENTS: usize = 7usize; + /// The total number of components in the archetype: 2 required, 0 recommended, 6 optional + pub const NUM_COMPONENTS: usize = 8usize; } impl ::re_types_core::Archetype for DepthImage { @@ -311,6 +332,14 @@ impl ::re_types_core::Archetype for DepthImage { .map(|array| { SerializedComponentBatch::new(array.clone(), Self::descriptor_draw_order()) }); + let magnification_filter = arrays_by_descr + .get(&Self::descriptor_magnification_filter()) + .map(|array| { + SerializedComponentBatch::new( + array.clone(), + Self::descriptor_magnification_filter(), + ) + }); Ok(Self { buffer, format, @@ -319,6 +348,7 @@ impl ::re_types_core::Archetype for DepthImage { depth_range, point_fill_ratio, draw_order, + magnification_filter, }) } } @@ -335,6 +365,7 @@ impl ::re_types_core::AsComponents for DepthImage { self.depth_range.clone(), self.point_fill_ratio.clone(), self.draw_order.clone(), + self.magnification_filter.clone(), ] .into_iter() .flatten() @@ -366,6 +397,7 @@ impl DepthImage { depth_range: None, point_fill_ratio: None, draw_order: None, + magnification_filter: None, } } @@ -408,6 +440,10 @@ impl DepthImage { crate::components::DrawOrder::arrow_empty(), Self::descriptor_draw_order(), )), + magnification_filter: Some(SerializedComponentBatch::new( + crate::components::MagnificationFilter::arrow_empty(), + Self::descriptor_magnification_filter(), + )), } } @@ -451,6 +487,9 @@ impl DepthImage { self.draw_order .map(|draw_order| draw_order.partitioned(_lengths.clone())) .transpose()?, + self.magnification_filter + .map(|magnification_filter| magnification_filter.partitioned(_lengths.clone())) + .transpose()?, ]; Ok(columns.into_iter().flatten()) } @@ -470,6 +509,7 @@ impl DepthImage { let len_depth_range = self.depth_range.as_ref().map(|b| b.array.len()); let len_point_fill_ratio = self.point_fill_ratio.as_ref().map(|b| b.array.len()); let len_draw_order = self.draw_order.as_ref().map(|b| b.array.len()); + let len_magnification_filter = self.magnification_filter.as_ref().map(|b| b.array.len()); let len = None .or(len_buffer) .or(len_format) @@ -478,6 +518,7 @@ impl DepthImage { .or(len_depth_range) .or(len_point_fill_ratio) .or(len_draw_order) + .or(len_magnification_filter) .unwrap_or(0); self.columns(std::iter::repeat_n(1, len)) } @@ -658,6 +699,41 @@ impl DepthImage { self.draw_order = try_serialize_field(Self::descriptor_draw_order(), draw_order); self } + + /// Optional filter used when a texel is magnified (displayed larger than a screen pixel) in 2D views. + /// + /// The filter is applied to the scalar values *before* they are mapped to color via the colormap. + /// + /// Has no effect in 3D views. + #[inline] + pub fn with_magnification_filter( + mut self, + magnification_filter: impl Into, + ) -> Self { + self.magnification_filter = try_serialize_field( + Self::descriptor_magnification_filter(), + [magnification_filter], + ); + self + } + + /// This method makes it possible to pack multiple [`crate::components::MagnificationFilter`] in a single component batch. + /// + /// This only makes sense when used in conjunction with [`Self::columns`]. [`Self::with_magnification_filter`] should + /// be used when logging a single row's worth of data. + #[inline] + pub fn with_many_magnification_filter( + mut self, + magnification_filter: impl IntoIterator< + Item = impl Into, + >, + ) -> Self { + self.magnification_filter = try_serialize_field( + Self::descriptor_magnification_filter(), + magnification_filter, + ); + self + } } impl ::re_byte_size::SizeBytes for DepthImage { @@ -670,5 +746,6 @@ impl ::re_byte_size::SizeBytes for DepthImage { + self.depth_range.heap_size_bytes() + self.point_fill_ratio.heap_size_bytes() + self.draw_order.heap_size_bytes() + + self.magnification_filter.heap_size_bytes() } } diff --git a/crates/store/re_sdk_types/src/archetypes/encoded_depth_image.rs b/crates/store/re_sdk_types/src/archetypes/encoded_depth_image.rs index b0419974728c..778406cc158c 100644 --- a/crates/store/re_sdk_types/src/archetypes/encoded_depth_image.rs +++ b/crates/store/re_sdk_types/src/archetypes/encoded_depth_image.rs @@ -95,6 +95,13 @@ pub struct EncodedDepthImage { /// Optional 2D draw order. pub draw_order: Option, + + /// Optional filter used when a texel is magnified (displayed larger than a screen pixel) in 2D views. + /// + /// The filter is applied to the scalar values *before* they are mapped to color via the colormap. + /// + /// Has no effect in 3D views. + pub magnification_filter: Option, } impl EncodedDepthImage { @@ -181,6 +188,18 @@ impl EncodedDepthImage { component_type: Some("rerun.components.DrawOrder".into()), } } + + /// Returns the [`ComponentDescriptor`] for [`Self::magnification_filter`]. + /// + /// The corresponding component is [`crate::components::MagnificationFilter`]. + #[inline] + pub fn descriptor_magnification_filter() -> ComponentDescriptor { + ComponentDescriptor { + archetype: Some("rerun.archetypes.EncodedDepthImage".into()), + component: "EncodedDepthImage:magnification_filter".into(), + component_type: Some("rerun.components.MagnificationFilter".into()), + } + } } static REQUIRED_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 1usize]> = @@ -194,17 +213,18 @@ static RECOMMENDED_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 2usize] ] }); -static OPTIONAL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 4usize]> = +static OPTIONAL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 5usize]> = std::sync::LazyLock::new(|| { [ EncodedDepthImage::descriptor_colormap(), EncodedDepthImage::descriptor_depth_range(), EncodedDepthImage::descriptor_point_fill_ratio(), EncodedDepthImage::descriptor_draw_order(), + EncodedDepthImage::descriptor_magnification_filter(), ] }); -static ALL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 7usize]> = +static ALL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 8usize]> = std::sync::LazyLock::new(|| { [ EncodedDepthImage::descriptor_blob(), @@ -214,12 +234,13 @@ static ALL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 7usize]> = EncodedDepthImage::descriptor_depth_range(), EncodedDepthImage::descriptor_point_fill_ratio(), EncodedDepthImage::descriptor_draw_order(), + EncodedDepthImage::descriptor_magnification_filter(), ] }); impl EncodedDepthImage { - /// The total number of components in the archetype: 1 required, 2 recommended, 4 optional - pub const NUM_COMPONENTS: usize = 7usize; + /// The total number of components in the archetype: 1 required, 2 recommended, 5 optional + pub const NUM_COMPONENTS: usize = 8usize; } impl ::re_types_core::Archetype for EncodedDepthImage { @@ -289,6 +310,14 @@ impl ::re_types_core::Archetype for EncodedDepthImage { .map(|array| { SerializedComponentBatch::new(array.clone(), Self::descriptor_draw_order()) }); + let magnification_filter = arrays_by_descr + .get(&Self::descriptor_magnification_filter()) + .map(|array| { + SerializedComponentBatch::new( + array.clone(), + Self::descriptor_magnification_filter(), + ) + }); Ok(Self { blob, media_type, @@ -297,6 +326,7 @@ impl ::re_types_core::Archetype for EncodedDepthImage { depth_range, point_fill_ratio, draw_order, + magnification_filter, }) } } @@ -313,6 +343,7 @@ impl ::re_types_core::AsComponents for EncodedDepthImage { self.depth_range.clone(), self.point_fill_ratio.clone(), self.draw_order.clone(), + self.magnification_filter.clone(), ] .into_iter() .flatten() @@ -341,6 +372,7 @@ impl EncodedDepthImage { depth_range: None, point_fill_ratio: None, draw_order: None, + magnification_filter: None, } } @@ -383,6 +415,10 @@ impl EncodedDepthImage { crate::components::DrawOrder::arrow_empty(), Self::descriptor_draw_order(), )), + magnification_filter: Some(SerializedComponentBatch::new( + crate::components::MagnificationFilter::arrow_empty(), + Self::descriptor_magnification_filter(), + )), } } @@ -426,6 +462,9 @@ impl EncodedDepthImage { self.draw_order .map(|draw_order| draw_order.partitioned(_lengths.clone())) .transpose()?, + self.magnification_filter + .map(|magnification_filter| magnification_filter.partitioned(_lengths.clone())) + .transpose()?, ]; Ok(columns.into_iter().flatten()) } @@ -445,6 +484,7 @@ impl EncodedDepthImage { let len_depth_range = self.depth_range.as_ref().map(|b| b.array.len()); let len_point_fill_ratio = self.point_fill_ratio.as_ref().map(|b| b.array.len()); let len_draw_order = self.draw_order.as_ref().map(|b| b.array.len()); + let len_magnification_filter = self.magnification_filter.as_ref().map(|b| b.array.len()); let len = None .or(len_blob) .or(len_media_type) @@ -453,6 +493,7 @@ impl EncodedDepthImage { .or(len_depth_range) .or(len_point_fill_ratio) .or(len_draw_order) + .or(len_magnification_filter) .unwrap_or(0); self.columns(std::iter::repeat_n(1, len)) } @@ -613,6 +654,41 @@ impl EncodedDepthImage { self.draw_order = try_serialize_field(Self::descriptor_draw_order(), draw_order); self } + + /// Optional filter used when a texel is magnified (displayed larger than a screen pixel) in 2D views. + /// + /// The filter is applied to the scalar values *before* they are mapped to color via the colormap. + /// + /// Has no effect in 3D views. + #[inline] + pub fn with_magnification_filter( + mut self, + magnification_filter: impl Into, + ) -> Self { + self.magnification_filter = try_serialize_field( + Self::descriptor_magnification_filter(), + [magnification_filter], + ); + self + } + + /// This method makes it possible to pack multiple [`crate::components::MagnificationFilter`] in a single component batch. + /// + /// This only makes sense when used in conjunction with [`Self::columns`]. [`Self::with_magnification_filter`] should + /// be used when logging a single row's worth of data. + #[inline] + pub fn with_many_magnification_filter( + mut self, + magnification_filter: impl IntoIterator< + Item = impl Into, + >, + ) -> Self { + self.magnification_filter = try_serialize_field( + Self::descriptor_magnification_filter(), + magnification_filter, + ); + self + } } impl ::re_byte_size::SizeBytes for EncodedDepthImage { @@ -625,5 +701,6 @@ impl ::re_byte_size::SizeBytes for EncodedDepthImage { + self.depth_range.heap_size_bytes() + self.point_fill_ratio.heap_size_bytes() + self.draw_order.heap_size_bytes() + + self.magnification_filter.heap_size_bytes() } } diff --git a/crates/store/re_sdk_types/src/archetypes/encoded_image.rs b/crates/store/re_sdk_types/src/archetypes/encoded_image.rs index 5394c5a84b11..1eda5caa1a6b 100644 --- a/crates/store/re_sdk_types/src/archetypes/encoded_image.rs +++ b/crates/store/re_sdk_types/src/archetypes/encoded_image.rs @@ -76,6 +76,9 @@ pub struct EncodedImage { /// /// Objects with higher values are drawn on top of those with lower values. pub draw_order: Option, + + /// Optional filter used when a texel is magnified (displayed larger than a screen pixel). + pub magnification_filter: Option, } impl EncodedImage { @@ -126,6 +129,18 @@ impl EncodedImage { component_type: Some("rerun.components.DrawOrder".into()), } } + + /// Returns the [`ComponentDescriptor`] for [`Self::magnification_filter`]. + /// + /// The corresponding component is [`crate::components::MagnificationFilter`]. + #[inline] + pub fn descriptor_magnification_filter() -> ComponentDescriptor { + ComponentDescriptor { + archetype: Some("rerun.archetypes.EncodedImage".into()), + component: "EncodedImage:magnification_filter".into(), + component_type: Some("rerun.components.MagnificationFilter".into()), + } + } } static REQUIRED_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 1usize]> = @@ -134,27 +149,29 @@ static REQUIRED_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 1usize]> = static RECOMMENDED_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 1usize]> = std::sync::LazyLock::new(|| [EncodedImage::descriptor_media_type()]); -static OPTIONAL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 2usize]> = +static OPTIONAL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 3usize]> = std::sync::LazyLock::new(|| { [ EncodedImage::descriptor_opacity(), EncodedImage::descriptor_draw_order(), + EncodedImage::descriptor_magnification_filter(), ] }); -static ALL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 4usize]> = +static ALL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 5usize]> = std::sync::LazyLock::new(|| { [ EncodedImage::descriptor_blob(), EncodedImage::descriptor_media_type(), EncodedImage::descriptor_opacity(), EncodedImage::descriptor_draw_order(), + EncodedImage::descriptor_magnification_filter(), ] }); impl EncodedImage { - /// The total number of components in the archetype: 1 required, 1 recommended, 2 optional - pub const NUM_COMPONENTS: usize = 4usize; + /// The total number of components in the archetype: 1 required, 1 recommended, 3 optional + pub const NUM_COMPONENTS: usize = 5usize; } impl ::re_types_core::Archetype for EncodedImage { @@ -211,11 +228,20 @@ impl ::re_types_core::Archetype for EncodedImage { .map(|array| { SerializedComponentBatch::new(array.clone(), Self::descriptor_draw_order()) }); + let magnification_filter = arrays_by_descr + .get(&Self::descriptor_magnification_filter()) + .map(|array| { + SerializedComponentBatch::new( + array.clone(), + Self::descriptor_magnification_filter(), + ) + }); Ok(Self { blob, media_type, opacity, draw_order, + magnification_filter, }) } } @@ -229,6 +255,7 @@ impl ::re_types_core::AsComponents for EncodedImage { self.media_type.clone(), self.opacity.clone(), self.draw_order.clone(), + self.magnification_filter.clone(), ] .into_iter() .flatten() @@ -254,6 +281,7 @@ impl EncodedImage { media_type: None, opacity: None, draw_order: None, + magnification_filter: None, } } @@ -284,6 +312,10 @@ impl EncodedImage { crate::components::DrawOrder::arrow_empty(), Self::descriptor_draw_order(), )), + magnification_filter: Some(SerializedComponentBatch::new( + crate::components::MagnificationFilter::arrow_empty(), + Self::descriptor_magnification_filter(), + )), } } @@ -318,6 +350,9 @@ impl EncodedImage { self.draw_order .map(|draw_order| draw_order.partitioned(_lengths.clone())) .transpose()?, + self.magnification_filter + .map(|magnification_filter| magnification_filter.partitioned(_lengths.clone())) + .transpose()?, ]; Ok(columns.into_iter().flatten()) } @@ -334,11 +369,13 @@ impl EncodedImage { let len_media_type = self.media_type.as_ref().map(|b| b.array.len()); let len_opacity = self.opacity.as_ref().map(|b| b.array.len()); let len_draw_order = self.draw_order.as_ref().map(|b| b.array.len()); + let len_magnification_filter = self.magnification_filter.as_ref().map(|b| b.array.len()); let len = None .or(len_blob) .or(len_media_type) .or(len_opacity) .or(len_draw_order) + .or(len_magnification_filter) .unwrap_or(0); self.columns(std::iter::repeat_n(1, len)) } @@ -433,6 +470,37 @@ impl EncodedImage { self.draw_order = try_serialize_field(Self::descriptor_draw_order(), draw_order); self } + + /// Optional filter used when a texel is magnified (displayed larger than a screen pixel). + #[inline] + pub fn with_magnification_filter( + mut self, + magnification_filter: impl Into, + ) -> Self { + self.magnification_filter = try_serialize_field( + Self::descriptor_magnification_filter(), + [magnification_filter], + ); + self + } + + /// This method makes it possible to pack multiple [`crate::components::MagnificationFilter`] in a single component batch. + /// + /// This only makes sense when used in conjunction with [`Self::columns`]. [`Self::with_magnification_filter`] should + /// be used when logging a single row's worth of data. + #[inline] + pub fn with_many_magnification_filter( + mut self, + magnification_filter: impl IntoIterator< + Item = impl Into, + >, + ) -> Self { + self.magnification_filter = try_serialize_field( + Self::descriptor_magnification_filter(), + magnification_filter, + ); + self + } } impl ::re_byte_size::SizeBytes for EncodedImage { @@ -442,5 +510,6 @@ impl ::re_byte_size::SizeBytes for EncodedImage { + self.media_type.heap_size_bytes() + self.opacity.heap_size_bytes() + self.draw_order.heap_size_bytes() + + self.magnification_filter.heap_size_bytes() } } diff --git a/crates/store/re_sdk_types/src/archetypes/grid_map.rs b/crates/store/re_sdk_types/src/archetypes/grid_map.rs new file mode 100644 index 000000000000..dc1cc8cc1f07 --- /dev/null +++ b/crates/store/re_sdk_types/src/archetypes/grid_map.rs @@ -0,0 +1,774 @@ +// DO NOT EDIT! This file was auto-generated by crates/build/re_types_builder/src/codegen/rust/api.rs +// Based on "crates/store/re_sdk_types/definitions/rerun/archetypes/grid_map.fbs". + +#![allow(unused_braces)] +#![allow(unused_imports)] +#![allow(unused_parens)] +#![allow(clippy::allow_attributes)] +#![allow(clippy::clone_on_copy)] +#![allow(clippy::cloned_instead_of_copied)] +#![allow(clippy::map_flatten)] +#![allow(clippy::needless_question_mark)] +#![allow(clippy::new_without_default)] +#![allow(clippy::redundant_closure)] +#![allow(clippy::too_many_arguments)] +#![allow(clippy::too_many_lines)] +#![allow(clippy::wildcard_imports)] + +use ::re_types_core::SerializationResult; +use ::re_types_core::try_serialize_field; +use ::re_types_core::{ComponentBatch as _, SerializedComponentBatch}; +use ::re_types_core::{ComponentDescriptor, ComponentType}; +use ::re_types_core::{DeserializationError, DeserializationResult}; + +/// **Archetype**: A 2D grid map stored as raster data in an image buffer, with a cell size in scene units and pose. +/// +/// This archetype is intended for robotics applications like occupancy maps or navigation costmaps. +/// +/// ⚠️ **This type is _unstable_ and may change significantly in a way that the data won't be backwards compatible.** +/// +/// ## Example +/// +/// ### Simple occupancy grid map +/// ```ignore +/// fn main() -> Result<(), Box> { +/// let rec = rerun::RecordingStreamBuilder::new("rerun_example_grid_map").spawn()?; +/// +/// let width: usize = 64; +/// let height: usize = 64; +/// let cell_size: f32 = 0.1; +/// +/// // Create a synthetic image with ROS `nav_msgs/OccupancyGrid` cell value conventions: +/// // -1 (255) unknown, 0 free, 100 occupied. +/// let mut grid = vec![255u8; width * height]; +/// for y in 8..56 { +/// for x in 8..56 { +/// grid[y * width + x] = 0; +/// } +/// } +/// for y in 20..44 { +/// for x in 20..44 { +/// grid[y * width + x] = 100; +/// } +/// } +/// +/// rec.log( +/// "world/map", +/// &rerun::GridMap::new( +/// grid, +/// rerun::components::ImageFormat::from_color_model( +/// [width as u32, height as u32], +/// rerun::ColorModel::L, +/// rerun::ChannelDatatype::U8, +/// ), +/// cell_size, +/// ) +/// .with_translation([ +/// -(width as f32) * cell_size / 2.0, +/// -(height as f32) * cell_size / 2.0, +/// 0.0, +/// ]) +/// .with_colormap(rerun::components::Colormap::RvizMap), +/// )?; +/// +/// Ok(()) +/// } +/// ``` +#[derive(Clone, Debug, PartialEq, Default)] +pub struct GridMap { + /// The raw grid data. + pub data: Option, + + /// The format of the grid's image data. + pub format: Option, + + /// The scene unit size of a single grid cell (e.g. m / pixel). + pub cell_size: Option, + + /// Translation of the lower-left corner of the grid map in space. + /// + /// Together with [`components::RotationAxisAngle`][crate::components::RotationAxisAngle] or [`components::RotationQuat`][crate::components::RotationQuat], this defines the pose of the + /// lower-left image corner relative to the map's parent coordinate frame. + /// + /// If not set, the lower-left image corner is placed at origin of the map's parent coordinate frame. + pub translation: Option, + + /// Rotation of the lower-left corner of the grid map in space via axis + angle. + /// + /// Together with [`components::Translation3D`][crate::components::Translation3D], this defines the pose of the + /// lower-left image corner relative to the map's parent coordinate frame. + /// + /// Note: either this or [`components::RotationQuat`][crate::components::RotationQuat] can be set to specify the grid map's rotation, but not both. + /// If both this and [`components::RotationQuat`][crate::components::RotationQuat] are set, this is ignored in favor of the quaternion. + pub rotation_axis_angle: Option, + + /// Rotation of the lower-left corner of the grid map in space via quaternion. + /// + /// Together with [`components::Translation3D`][crate::components::Translation3D], this defines the pose of the + /// lower-left image corner relative to the map's parent coordinate frame. + pub quaternion: Option, + + /// Opacity of the grid map texture after all image decoding and colormap application. + /// + /// Defaults to 1.0 (fully opaque). + pub opacity: Option, + + /// Optional draw order for layering multiple grid maps that overlap in space. + /// + /// Higher values are drawn on top of lower values. + pub draw_order: Option, + + /// Colormap to use for rendering single-channel grid maps. + /// + /// If not set, the grid map is shown using the underlying [`components::ImageFormat`][crate::components::ImageFormat] + /// interpretation. + pub colormap: Option, +} + +impl GridMap { + /// Returns the [`ComponentDescriptor`] for [`Self::data`]. + /// + /// The corresponding component is [`crate::components::ImageBuffer`]. + #[inline] + pub fn descriptor_data() -> ComponentDescriptor { + ComponentDescriptor { + archetype: Some("rerun.archetypes.GridMap".into()), + component: "GridMap:data".into(), + component_type: Some("rerun.components.ImageBuffer".into()), + } + } + + /// Returns the [`ComponentDescriptor`] for [`Self::format`]. + /// + /// The corresponding component is [`crate::components::ImageFormat`]. + #[inline] + pub fn descriptor_format() -> ComponentDescriptor { + ComponentDescriptor { + archetype: Some("rerun.archetypes.GridMap".into()), + component: "GridMap:format".into(), + component_type: Some("rerun.components.ImageFormat".into()), + } + } + + /// Returns the [`ComponentDescriptor`] for [`Self::cell_size`]. + /// + /// The corresponding component is [`crate::components::CellSize`]. + #[inline] + pub fn descriptor_cell_size() -> ComponentDescriptor { + ComponentDescriptor { + archetype: Some("rerun.archetypes.GridMap".into()), + component: "GridMap:cell_size".into(), + component_type: Some("rerun.components.CellSize".into()), + } + } + + /// Returns the [`ComponentDescriptor`] for [`Self::translation`]. + /// + /// The corresponding component is [`crate::components::Translation3D`]. + #[inline] + pub fn descriptor_translation() -> ComponentDescriptor { + ComponentDescriptor { + archetype: Some("rerun.archetypes.GridMap".into()), + component: "GridMap:translation".into(), + component_type: Some("rerun.components.Translation3D".into()), + } + } + + /// Returns the [`ComponentDescriptor`] for [`Self::rotation_axis_angle`]. + /// + /// The corresponding component is [`crate::components::RotationAxisAngle`]. + #[inline] + pub fn descriptor_rotation_axis_angle() -> ComponentDescriptor { + ComponentDescriptor { + archetype: Some("rerun.archetypes.GridMap".into()), + component: "GridMap:rotation_axis_angle".into(), + component_type: Some("rerun.components.RotationAxisAngle".into()), + } + } + + /// Returns the [`ComponentDescriptor`] for [`Self::quaternion`]. + /// + /// The corresponding component is [`crate::components::RotationQuat`]. + #[inline] + pub fn descriptor_quaternion() -> ComponentDescriptor { + ComponentDescriptor { + archetype: Some("rerun.archetypes.GridMap".into()), + component: "GridMap:quaternion".into(), + component_type: Some("rerun.components.RotationQuat".into()), + } + } + + /// Returns the [`ComponentDescriptor`] for [`Self::opacity`]. + /// + /// The corresponding component is [`crate::components::Opacity`]. + #[inline] + pub fn descriptor_opacity() -> ComponentDescriptor { + ComponentDescriptor { + archetype: Some("rerun.archetypes.GridMap".into()), + component: "GridMap:opacity".into(), + component_type: Some("rerun.components.Opacity".into()), + } + } + + /// Returns the [`ComponentDescriptor`] for [`Self::draw_order`]. + /// + /// The corresponding component is [`crate::components::DrawOrder`]. + #[inline] + pub fn descriptor_draw_order() -> ComponentDescriptor { + ComponentDescriptor { + archetype: Some("rerun.archetypes.GridMap".into()), + component: "GridMap:draw_order".into(), + component_type: Some("rerun.components.DrawOrder".into()), + } + } + + /// Returns the [`ComponentDescriptor`] for [`Self::colormap`]. + /// + /// The corresponding component is [`crate::components::Colormap`]. + #[inline] + pub fn descriptor_colormap() -> ComponentDescriptor { + ComponentDescriptor { + archetype: Some("rerun.archetypes.GridMap".into()), + component: "GridMap:colormap".into(), + component_type: Some("rerun.components.Colormap".into()), + } + } +} + +static REQUIRED_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 3usize]> = + std::sync::LazyLock::new(|| { + [ + GridMap::descriptor_data(), + GridMap::descriptor_format(), + GridMap::descriptor_cell_size(), + ] + }); + +static RECOMMENDED_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 0usize]> = + std::sync::LazyLock::new(|| []); + +static OPTIONAL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 6usize]> = + std::sync::LazyLock::new(|| { + [ + GridMap::descriptor_translation(), + GridMap::descriptor_rotation_axis_angle(), + GridMap::descriptor_quaternion(), + GridMap::descriptor_opacity(), + GridMap::descriptor_draw_order(), + GridMap::descriptor_colormap(), + ] + }); + +static ALL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 9usize]> = + std::sync::LazyLock::new(|| { + [ + GridMap::descriptor_data(), + GridMap::descriptor_format(), + GridMap::descriptor_cell_size(), + GridMap::descriptor_translation(), + GridMap::descriptor_rotation_axis_angle(), + GridMap::descriptor_quaternion(), + GridMap::descriptor_opacity(), + GridMap::descriptor_draw_order(), + GridMap::descriptor_colormap(), + ] + }); + +impl GridMap { + /// The total number of components in the archetype: 3 required, 0 recommended, 6 optional + pub const NUM_COMPONENTS: usize = 9usize; +} + +impl ::re_types_core::Archetype for GridMap { + #[inline] + fn name() -> ::re_types_core::ArchetypeName { + "rerun.archetypes.GridMap".into() + } + + #[inline] + fn display_name() -> &'static str { + "Grid map" + } + + #[inline] + fn required_components() -> ::std::borrow::Cow<'static, [ComponentDescriptor]> { + REQUIRED_COMPONENTS.as_slice().into() + } + + #[inline] + fn recommended_components() -> ::std::borrow::Cow<'static, [ComponentDescriptor]> { + RECOMMENDED_COMPONENTS.as_slice().into() + } + + #[inline] + fn optional_components() -> ::std::borrow::Cow<'static, [ComponentDescriptor]> { + OPTIONAL_COMPONENTS.as_slice().into() + } + + #[inline] + fn all_components() -> ::std::borrow::Cow<'static, [ComponentDescriptor]> { + ALL_COMPONENTS.as_slice().into() + } + + #[inline] + fn from_arrow_components( + arrow_data: impl IntoIterator, + ) -> DeserializationResult { + re_tracing::profile_function!(); + use ::re_types_core::{Loggable as _, ResultExt as _}; + let arrays_by_descr: ::nohash_hasher::IntMap<_, _> = arrow_data.into_iter().collect(); + let data = arrays_by_descr + .get(&Self::descriptor_data()) + .map(|array| SerializedComponentBatch::new(array.clone(), Self::descriptor_data())); + let format = arrays_by_descr + .get(&Self::descriptor_format()) + .map(|array| SerializedComponentBatch::new(array.clone(), Self::descriptor_format())); + let cell_size = arrays_by_descr + .get(&Self::descriptor_cell_size()) + .map(|array| { + SerializedComponentBatch::new(array.clone(), Self::descriptor_cell_size()) + }); + let translation = arrays_by_descr + .get(&Self::descriptor_translation()) + .map(|array| { + SerializedComponentBatch::new(array.clone(), Self::descriptor_translation()) + }); + let rotation_axis_angle = arrays_by_descr + .get(&Self::descriptor_rotation_axis_angle()) + .map(|array| { + SerializedComponentBatch::new(array.clone(), Self::descriptor_rotation_axis_angle()) + }); + let quaternion = arrays_by_descr + .get(&Self::descriptor_quaternion()) + .map(|array| { + SerializedComponentBatch::new(array.clone(), Self::descriptor_quaternion()) + }); + let opacity = arrays_by_descr + .get(&Self::descriptor_opacity()) + .map(|array| SerializedComponentBatch::new(array.clone(), Self::descriptor_opacity())); + let draw_order = arrays_by_descr + .get(&Self::descriptor_draw_order()) + .map(|array| { + SerializedComponentBatch::new(array.clone(), Self::descriptor_draw_order()) + }); + let colormap = arrays_by_descr + .get(&Self::descriptor_colormap()) + .map(|array| SerializedComponentBatch::new(array.clone(), Self::descriptor_colormap())); + Ok(Self { + data, + format, + cell_size, + translation, + rotation_axis_angle, + quaternion, + opacity, + draw_order, + colormap, + }) + } +} + +impl ::re_types_core::AsComponents for GridMap { + #[inline] + fn as_serialized_batches(&self) -> Vec { + use ::re_types_core::Archetype as _; + [ + self.data.clone(), + self.format.clone(), + self.cell_size.clone(), + self.translation.clone(), + self.rotation_axis_angle.clone(), + self.quaternion.clone(), + self.opacity.clone(), + self.draw_order.clone(), + self.colormap.clone(), + ] + .into_iter() + .flatten() + .collect() + } +} + +impl ::re_types_core::ArchetypeReflectionMarker for GridMap {} + +impl GridMap { + /// Create a new `GridMap`. + #[inline] + pub fn new( + data: impl Into, + format: impl Into, + cell_size: impl Into, + ) -> Self { + Self { + data: try_serialize_field(Self::descriptor_data(), [data]), + format: try_serialize_field(Self::descriptor_format(), [format]), + cell_size: try_serialize_field(Self::descriptor_cell_size(), [cell_size]), + translation: None, + rotation_axis_angle: None, + quaternion: None, + opacity: None, + draw_order: None, + colormap: None, + } + } + + /// Update only some specific fields of a `GridMap`. + #[inline] + pub fn update_fields() -> Self { + Self::default() + } + + /// Clear all the fields of a `GridMap`. + #[inline] + pub fn clear_fields() -> Self { + use ::re_types_core::Loggable as _; + Self { + data: Some(SerializedComponentBatch::new( + crate::components::ImageBuffer::arrow_empty(), + Self::descriptor_data(), + )), + format: Some(SerializedComponentBatch::new( + crate::components::ImageFormat::arrow_empty(), + Self::descriptor_format(), + )), + cell_size: Some(SerializedComponentBatch::new( + crate::components::CellSize::arrow_empty(), + Self::descriptor_cell_size(), + )), + translation: Some(SerializedComponentBatch::new( + crate::components::Translation3D::arrow_empty(), + Self::descriptor_translation(), + )), + rotation_axis_angle: Some(SerializedComponentBatch::new( + crate::components::RotationAxisAngle::arrow_empty(), + Self::descriptor_rotation_axis_angle(), + )), + quaternion: Some(SerializedComponentBatch::new( + crate::components::RotationQuat::arrow_empty(), + Self::descriptor_quaternion(), + )), + opacity: Some(SerializedComponentBatch::new( + crate::components::Opacity::arrow_empty(), + Self::descriptor_opacity(), + )), + draw_order: Some(SerializedComponentBatch::new( + crate::components::DrawOrder::arrow_empty(), + Self::descriptor_draw_order(), + )), + colormap: Some(SerializedComponentBatch::new( + crate::components::Colormap::arrow_empty(), + Self::descriptor_colormap(), + )), + } + } + + /// Partitions the component data into multiple sub-batches. + /// + /// Specifically, this transforms the existing [`SerializedComponentBatch`]es data into [`SerializedComponentColumn`]s + /// instead, via [`SerializedComponentBatch::partitioned`]. + /// + /// This makes it possible to use `RecordingStream::send_columns` to send columnar data directly into Rerun. + /// + /// The specified `lengths` must sum to the total length of the component batch. + /// + /// [`SerializedComponentColumn`]: [::re_types_core::SerializedComponentColumn] + #[inline] + pub fn columns( + self, + _lengths: I, + ) -> SerializationResult> + where + I: IntoIterator + Clone, + { + let columns = [ + self.data + .map(|data| data.partitioned(_lengths.clone())) + .transpose()?, + self.format + .map(|format| format.partitioned(_lengths.clone())) + .transpose()?, + self.cell_size + .map(|cell_size| cell_size.partitioned(_lengths.clone())) + .transpose()?, + self.translation + .map(|translation| translation.partitioned(_lengths.clone())) + .transpose()?, + self.rotation_axis_angle + .map(|rotation_axis_angle| rotation_axis_angle.partitioned(_lengths.clone())) + .transpose()?, + self.quaternion + .map(|quaternion| quaternion.partitioned(_lengths.clone())) + .transpose()?, + self.opacity + .map(|opacity| opacity.partitioned(_lengths.clone())) + .transpose()?, + self.draw_order + .map(|draw_order| draw_order.partitioned(_lengths.clone())) + .transpose()?, + self.colormap + .map(|colormap| colormap.partitioned(_lengths.clone())) + .transpose()?, + ]; + Ok(columns.into_iter().flatten()) + } + + /// Helper to partition the component data into unit-length sub-batches. + /// + /// This is semantically similar to calling [`Self::columns`] with `std::iter::take(1).repeat(n)`, + /// where `n` is automatically guessed. + #[inline] + pub fn columns_of_unit_batches( + self, + ) -> SerializationResult> { + let len_data = self.data.as_ref().map(|b| b.array.len()); + let len_format = self.format.as_ref().map(|b| b.array.len()); + let len_cell_size = self.cell_size.as_ref().map(|b| b.array.len()); + let len_translation = self.translation.as_ref().map(|b| b.array.len()); + let len_rotation_axis_angle = self.rotation_axis_angle.as_ref().map(|b| b.array.len()); + let len_quaternion = self.quaternion.as_ref().map(|b| b.array.len()); + let len_opacity = self.opacity.as_ref().map(|b| b.array.len()); + let len_draw_order = self.draw_order.as_ref().map(|b| b.array.len()); + let len_colormap = self.colormap.as_ref().map(|b| b.array.len()); + let len = None + .or(len_data) + .or(len_format) + .or(len_cell_size) + .or(len_translation) + .or(len_rotation_axis_angle) + .or(len_quaternion) + .or(len_opacity) + .or(len_draw_order) + .or(len_colormap) + .unwrap_or(0); + self.columns(std::iter::repeat_n(1, len)) + } + + /// The raw grid data. + #[inline] + pub fn with_data(mut self, data: impl Into) -> Self { + self.data = try_serialize_field(Self::descriptor_data(), [data]); + self + } + + /// This method makes it possible to pack multiple [`crate::components::ImageBuffer`] in a single component batch. + /// + /// This only makes sense when used in conjunction with [`Self::columns`]. [`Self::with_data`] should + /// be used when logging a single row's worth of data. + #[inline] + pub fn with_many_data( + mut self, + data: impl IntoIterator>, + ) -> Self { + self.data = try_serialize_field(Self::descriptor_data(), data); + self + } + + /// The format of the grid's image data. + #[inline] + pub fn with_format(mut self, format: impl Into) -> Self { + self.format = try_serialize_field(Self::descriptor_format(), [format]); + self + } + + /// This method makes it possible to pack multiple [`crate::components::ImageFormat`] in a single component batch. + /// + /// This only makes sense when used in conjunction with [`Self::columns`]. [`Self::with_format`] should + /// be used when logging a single row's worth of data. + #[inline] + pub fn with_many_format( + mut self, + format: impl IntoIterator>, + ) -> Self { + self.format = try_serialize_field(Self::descriptor_format(), format); + self + } + + /// The scene unit size of a single grid cell (e.g. m / pixel). + #[inline] + pub fn with_cell_size(mut self, cell_size: impl Into) -> Self { + self.cell_size = try_serialize_field(Self::descriptor_cell_size(), [cell_size]); + self + } + + /// This method makes it possible to pack multiple [`crate::components::CellSize`] in a single component batch. + /// + /// This only makes sense when used in conjunction with [`Self::columns`]. [`Self::with_cell_size`] should + /// be used when logging a single row's worth of data. + #[inline] + pub fn with_many_cell_size( + mut self, + cell_size: impl IntoIterator>, + ) -> Self { + self.cell_size = try_serialize_field(Self::descriptor_cell_size(), cell_size); + self + } + + /// Translation of the lower-left corner of the grid map in space. + /// + /// Together with [`components::RotationAxisAngle`][crate::components::RotationAxisAngle] or [`components::RotationQuat`][crate::components::RotationQuat], this defines the pose of the + /// lower-left image corner relative to the map's parent coordinate frame. + /// + /// If not set, the lower-left image corner is placed at origin of the map's parent coordinate frame. + #[inline] + pub fn with_translation( + mut self, + translation: impl Into, + ) -> Self { + self.translation = try_serialize_field(Self::descriptor_translation(), [translation]); + self + } + + /// This method makes it possible to pack multiple [`crate::components::Translation3D`] in a single component batch. + /// + /// This only makes sense when used in conjunction with [`Self::columns`]. [`Self::with_translation`] should + /// be used when logging a single row's worth of data. + #[inline] + pub fn with_many_translation( + mut self, + translation: impl IntoIterator>, + ) -> Self { + self.translation = try_serialize_field(Self::descriptor_translation(), translation); + self + } + + /// Rotation of the lower-left corner of the grid map in space via axis + angle. + /// + /// Together with [`components::Translation3D`][crate::components::Translation3D], this defines the pose of the + /// lower-left image corner relative to the map's parent coordinate frame. + /// + /// Note: either this or [`components::RotationQuat`][crate::components::RotationQuat] can be set to specify the grid map's rotation, but not both. + /// If both this and [`components::RotationQuat`][crate::components::RotationQuat] are set, this is ignored in favor of the quaternion. + #[inline] + pub fn with_rotation_axis_angle( + mut self, + rotation_axis_angle: impl Into, + ) -> Self { + self.rotation_axis_angle = try_serialize_field( + Self::descriptor_rotation_axis_angle(), + [rotation_axis_angle], + ); + self + } + + /// This method makes it possible to pack multiple [`crate::components::RotationAxisAngle`] in a single component batch. + /// + /// This only makes sense when used in conjunction with [`Self::columns`]. [`Self::with_rotation_axis_angle`] should + /// be used when logging a single row's worth of data. + #[inline] + pub fn with_many_rotation_axis_angle( + mut self, + rotation_axis_angle: impl IntoIterator>, + ) -> Self { + self.rotation_axis_angle = + try_serialize_field(Self::descriptor_rotation_axis_angle(), rotation_axis_angle); + self + } + + /// Rotation of the lower-left corner of the grid map in space via quaternion. + /// + /// Together with [`components::Translation3D`][crate::components::Translation3D], this defines the pose of the + /// lower-left image corner relative to the map's parent coordinate frame. + #[inline] + pub fn with_quaternion( + mut self, + quaternion: impl Into, + ) -> Self { + self.quaternion = try_serialize_field(Self::descriptor_quaternion(), [quaternion]); + self + } + + /// This method makes it possible to pack multiple [`crate::components::RotationQuat`] in a single component batch. + /// + /// This only makes sense when used in conjunction with [`Self::columns`]. [`Self::with_quaternion`] should + /// be used when logging a single row's worth of data. + #[inline] + pub fn with_many_quaternion( + mut self, + quaternion: impl IntoIterator>, + ) -> Self { + self.quaternion = try_serialize_field(Self::descriptor_quaternion(), quaternion); + self + } + + /// Opacity of the grid map texture after all image decoding and colormap application. + /// + /// Defaults to 1.0 (fully opaque). + #[inline] + pub fn with_opacity(mut self, opacity: impl Into) -> Self { + self.opacity = try_serialize_field(Self::descriptor_opacity(), [opacity]); + self + } + + /// This method makes it possible to pack multiple [`crate::components::Opacity`] in a single component batch. + /// + /// This only makes sense when used in conjunction with [`Self::columns`]. [`Self::with_opacity`] should + /// be used when logging a single row's worth of data. + #[inline] + pub fn with_many_opacity( + mut self, + opacity: impl IntoIterator>, + ) -> Self { + self.opacity = try_serialize_field(Self::descriptor_opacity(), opacity); + self + } + + /// Optional draw order for layering multiple grid maps that overlap in space. + /// + /// Higher values are drawn on top of lower values. + #[inline] + pub fn with_draw_order(mut self, draw_order: impl Into) -> Self { + self.draw_order = try_serialize_field(Self::descriptor_draw_order(), [draw_order]); + self + } + + /// This method makes it possible to pack multiple [`crate::components::DrawOrder`] in a single component batch. + /// + /// This only makes sense when used in conjunction with [`Self::columns`]. [`Self::with_draw_order`] should + /// be used when logging a single row's worth of data. + #[inline] + pub fn with_many_draw_order( + mut self, + draw_order: impl IntoIterator>, + ) -> Self { + self.draw_order = try_serialize_field(Self::descriptor_draw_order(), draw_order); + self + } + + /// Colormap to use for rendering single-channel grid maps. + /// + /// If not set, the grid map is shown using the underlying [`components::ImageFormat`][crate::components::ImageFormat] + /// interpretation. + #[inline] + pub fn with_colormap(mut self, colormap: impl Into) -> Self { + self.colormap = try_serialize_field(Self::descriptor_colormap(), [colormap]); + self + } + + /// This method makes it possible to pack multiple [`crate::components::Colormap`] in a single component batch. + /// + /// This only makes sense when used in conjunction with [`Self::columns`]. [`Self::with_colormap`] should + /// be used when logging a single row's worth of data. + #[inline] + pub fn with_many_colormap( + mut self, + colormap: impl IntoIterator>, + ) -> Self { + self.colormap = try_serialize_field(Self::descriptor_colormap(), colormap); + self + } +} + +impl ::re_byte_size::SizeBytes for GridMap { + #[inline] + fn heap_size_bytes(&self) -> u64 { + self.data.heap_size_bytes() + + self.format.heap_size_bytes() + + self.cell_size.heap_size_bytes() + + self.translation.heap_size_bytes() + + self.rotation_axis_angle.heap_size_bytes() + + self.quaternion.heap_size_bytes() + + self.opacity.heap_size_bytes() + + self.draw_order.heap_size_bytes() + + self.colormap.heap_size_bytes() + } +} diff --git a/crates/store/re_sdk_types/src/archetypes/image.rs b/crates/store/re_sdk_types/src/archetypes/image.rs index 96e830a050ba..e3b048ddef5b 100644 --- a/crates/store/re_sdk_types/src/archetypes/image.rs +++ b/crates/store/re_sdk_types/src/archetypes/image.rs @@ -151,6 +151,9 @@ pub struct Image { /// Objects with higher values are drawn on top of those with lower values. /// Defaults to `-10.0`. pub draw_order: Option, + + /// Optional filter used when a texel is magnified (displayed larger than a screen pixel). + pub magnification_filter: Option, } impl Image { @@ -201,6 +204,18 @@ impl Image { component_type: Some("rerun.components.DrawOrder".into()), } } + + /// Returns the [`ComponentDescriptor`] for [`Self::magnification_filter`]. + /// + /// The corresponding component is [`crate::components::MagnificationFilter`]. + #[inline] + pub fn descriptor_magnification_filter() -> ComponentDescriptor { + ComponentDescriptor { + archetype: Some("rerun.archetypes.Image".into()), + component: "Image:magnification_filter".into(), + component_type: Some("rerun.components.MagnificationFilter".into()), + } + } } static REQUIRED_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 2usize]> = @@ -209,22 +224,29 @@ static REQUIRED_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 2usize]> = static RECOMMENDED_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 0usize]> = std::sync::LazyLock::new(|| []); -static OPTIONAL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 2usize]> = - std::sync::LazyLock::new(|| [Image::descriptor_opacity(), Image::descriptor_draw_order()]); +static OPTIONAL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 3usize]> = + std::sync::LazyLock::new(|| { + [ + Image::descriptor_opacity(), + Image::descriptor_draw_order(), + Image::descriptor_magnification_filter(), + ] + }); -static ALL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 4usize]> = +static ALL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 5usize]> = std::sync::LazyLock::new(|| { [ Image::descriptor_buffer(), Image::descriptor_format(), Image::descriptor_opacity(), Image::descriptor_draw_order(), + Image::descriptor_magnification_filter(), ] }); impl Image { - /// The total number of components in the archetype: 2 required, 0 recommended, 2 optional - pub const NUM_COMPONENTS: usize = 4usize; + /// The total number of components in the archetype: 2 required, 0 recommended, 3 optional + pub const NUM_COMPONENTS: usize = 5usize; } impl ::re_types_core::Archetype for Image { @@ -279,11 +301,20 @@ impl ::re_types_core::Archetype for Image { .map(|array| { SerializedComponentBatch::new(array.clone(), Self::descriptor_draw_order()) }); + let magnification_filter = arrays_by_descr + .get(&Self::descriptor_magnification_filter()) + .map(|array| { + SerializedComponentBatch::new( + array.clone(), + Self::descriptor_magnification_filter(), + ) + }); Ok(Self { buffer, format, opacity, draw_order, + magnification_filter, }) } } @@ -297,6 +328,7 @@ impl ::re_types_core::AsComponents for Image { self.format.clone(), self.opacity.clone(), self.draw_order.clone(), + self.magnification_filter.clone(), ] .into_iter() .flatten() @@ -325,6 +357,7 @@ impl Image { format: try_serialize_field(Self::descriptor_format(), [format]), opacity: None, draw_order: None, + magnification_filter: None, } } @@ -355,6 +388,10 @@ impl Image { crate::components::DrawOrder::arrow_empty(), Self::descriptor_draw_order(), )), + magnification_filter: Some(SerializedComponentBatch::new( + crate::components::MagnificationFilter::arrow_empty(), + Self::descriptor_magnification_filter(), + )), } } @@ -389,6 +426,9 @@ impl Image { self.draw_order .map(|draw_order| draw_order.partitioned(_lengths.clone())) .transpose()?, + self.magnification_filter + .map(|magnification_filter| magnification_filter.partitioned(_lengths.clone())) + .transpose()?, ]; Ok(columns.into_iter().flatten()) } @@ -405,11 +445,13 @@ impl Image { let len_format = self.format.as_ref().map(|b| b.array.len()); let len_opacity = self.opacity.as_ref().map(|b| b.array.len()); let len_draw_order = self.draw_order.as_ref().map(|b| b.array.len()); + let len_magnification_filter = self.magnification_filter.as_ref().map(|b| b.array.len()); let len = None .or(len_buffer) .or(len_format) .or(len_opacity) .or(len_draw_order) + .or(len_magnification_filter) .unwrap_or(0); self.columns(std::iter::repeat_n(1, len)) } @@ -498,6 +540,37 @@ impl Image { self.draw_order = try_serialize_field(Self::descriptor_draw_order(), draw_order); self } + + /// Optional filter used when a texel is magnified (displayed larger than a screen pixel). + #[inline] + pub fn with_magnification_filter( + mut self, + magnification_filter: impl Into, + ) -> Self { + self.magnification_filter = try_serialize_field( + Self::descriptor_magnification_filter(), + [magnification_filter], + ); + self + } + + /// This method makes it possible to pack multiple [`crate::components::MagnificationFilter`] in a single component batch. + /// + /// This only makes sense when used in conjunction with [`Self::columns`]. [`Self::with_magnification_filter`] should + /// be used when logging a single row's worth of data. + #[inline] + pub fn with_many_magnification_filter( + mut self, + magnification_filter: impl IntoIterator< + Item = impl Into, + >, + ) -> Self { + self.magnification_filter = try_serialize_field( + Self::descriptor_magnification_filter(), + magnification_filter, + ); + self + } } impl ::re_byte_size::SizeBytes for Image { @@ -507,5 +580,6 @@ impl ::re_byte_size::SizeBytes for Image { + self.format.heap_size_bytes() + self.opacity.heap_size_bytes() + self.draw_order.heap_size_bytes() + + self.magnification_filter.heap_size_bytes() } } diff --git a/crates/store/re_sdk_types/src/archetypes/mesh3d.rs b/crates/store/re_sdk_types/src/archetypes/mesh3d.rs index bde0e1406146..bc7b44ec682e 100644 --- a/crates/store/re_sdk_types/src/archetypes/mesh3d.rs +++ b/crates/store/re_sdk_types/src/archetypes/mesh3d.rs @@ -28,8 +28,9 @@ use ::re_types_core::{DeserializationError, DeserializationResult}; /// If there are multiple [`archetypes::InstancePoses3D`][crate::archetypes::InstancePoses3D] instances logged to the same entity as a mesh, /// an instance of the mesh will be drawn for each transform. /// -/// The viewer draws meshes always two-sided. However, for transparency ordering -/// front faces are assumed to those with counter clockwise triangle winding order (this is the same as in the GLTF specification). +/// For transparency ordering, as well as back face culling (disabled by default), +/// front faces are assumed to be those with counter clockwise triangle winding order +/// (this is the same as in the GLTF specification). /// /// ## Examples /// @@ -138,6 +139,11 @@ pub struct Mesh3D { /// Alpha channel governs the overall mesh transparency. pub albedo_factor: Option, + /// Determines which faces of the mesh are rendered. + /// + /// The default is [`components::MeshFaceRendering::DoubleSided`][crate::components::MeshFaceRendering::DoubleSided], meaning both front and back faces are shown. + pub face_rendering: Option, + /// Optional albedo texture. /// /// Used with the [`components::Texcoord2D`][crate::components::Texcoord2D] of the mesh. @@ -230,6 +236,18 @@ impl Mesh3D { } } + /// Returns the [`ComponentDescriptor`] for [`Self::face_rendering`]. + /// + /// The corresponding component is [`crate::components::MeshFaceRendering`]. + #[inline] + pub fn descriptor_face_rendering() -> ComponentDescriptor { + ComponentDescriptor { + archetype: Some("rerun.archetypes.Mesh3D".into()), + component: "Mesh3D:face_rendering".into(), + component_type: Some("rerun.components.MeshFaceRendering".into()), + } + } + /// Returns the [`ComponentDescriptor`] for [`Self::albedo_texture_buffer`]. /// /// The corresponding component is [`crate::components::ImageBuffer`]. @@ -278,19 +296,20 @@ static RECOMMENDED_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 2usize] ] }); -static OPTIONAL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 6usize]> = +static OPTIONAL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 7usize]> = std::sync::LazyLock::new(|| { [ Mesh3D::descriptor_vertex_colors(), Mesh3D::descriptor_vertex_texcoords(), Mesh3D::descriptor_albedo_factor(), + Mesh3D::descriptor_face_rendering(), Mesh3D::descriptor_albedo_texture_buffer(), Mesh3D::descriptor_albedo_texture_format(), Mesh3D::descriptor_class_ids(), ] }); -static ALL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 9usize]> = +static ALL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 10usize]> = std::sync::LazyLock::new(|| { [ Mesh3D::descriptor_vertex_positions(), @@ -299,6 +318,7 @@ static ALL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 9usize]> = Mesh3D::descriptor_vertex_colors(), Mesh3D::descriptor_vertex_texcoords(), Mesh3D::descriptor_albedo_factor(), + Mesh3D::descriptor_face_rendering(), Mesh3D::descriptor_albedo_texture_buffer(), Mesh3D::descriptor_albedo_texture_format(), Mesh3D::descriptor_class_ids(), @@ -306,8 +326,8 @@ static ALL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 9usize]> = }); impl Mesh3D { - /// The total number of components in the archetype: 1 required, 2 recommended, 6 optional - pub const NUM_COMPONENTS: usize = 9usize; + /// The total number of components in the archetype: 1 required, 2 recommended, 7 optional + pub const NUM_COMPONENTS: usize = 10usize; } impl ::re_types_core::Archetype for Mesh3D { @@ -378,6 +398,11 @@ impl ::re_types_core::Archetype for Mesh3D { .map(|array| { SerializedComponentBatch::new(array.clone(), Self::descriptor_albedo_factor()) }); + let face_rendering = arrays_by_descr + .get(&Self::descriptor_face_rendering()) + .map(|array| { + SerializedComponentBatch::new(array.clone(), Self::descriptor_face_rendering()) + }); let albedo_texture_buffer = arrays_by_descr .get(&Self::descriptor_albedo_texture_buffer()) .map(|array| { @@ -406,6 +431,7 @@ impl ::re_types_core::Archetype for Mesh3D { vertex_colors, vertex_texcoords, albedo_factor, + face_rendering, albedo_texture_buffer, albedo_texture_format, class_ids, @@ -424,6 +450,7 @@ impl ::re_types_core::AsComponents for Mesh3D { self.vertex_colors.clone(), self.vertex_texcoords.clone(), self.albedo_factor.clone(), + self.face_rendering.clone(), self.albedo_texture_buffer.clone(), self.albedo_texture_format.clone(), self.class_ids.clone(), @@ -459,6 +486,7 @@ impl Mesh3D { vertex_colors: None, vertex_texcoords: None, albedo_factor: None, + face_rendering: None, albedo_texture_buffer: None, albedo_texture_format: None, class_ids: None, @@ -500,6 +528,10 @@ impl Mesh3D { crate::components::AlbedoFactor::arrow_empty(), Self::descriptor_albedo_factor(), )), + face_rendering: Some(SerializedComponentBatch::new( + crate::components::MeshFaceRendering::arrow_empty(), + Self::descriptor_face_rendering(), + )), albedo_texture_buffer: Some(SerializedComponentBatch::new( crate::components::ImageBuffer::arrow_empty(), Self::descriptor_albedo_texture_buffer(), @@ -552,6 +584,9 @@ impl Mesh3D { self.albedo_factor .map(|albedo_factor| albedo_factor.partitioned(_lengths.clone())) .transpose()?, + self.face_rendering + .map(|face_rendering| face_rendering.partitioned(_lengths.clone())) + .transpose()?, self.albedo_texture_buffer .map(|albedo_texture_buffer| albedo_texture_buffer.partitioned(_lengths.clone())) .transpose()?, @@ -579,6 +614,7 @@ impl Mesh3D { let len_vertex_colors = self.vertex_colors.as_ref().map(|b| b.array.len()); let len_vertex_texcoords = self.vertex_texcoords.as_ref().map(|b| b.array.len()); let len_albedo_factor = self.albedo_factor.as_ref().map(|b| b.array.len()); + let len_face_rendering = self.face_rendering.as_ref().map(|b| b.array.len()); let len_albedo_texture_buffer = self.albedo_texture_buffer.as_ref().map(|b| b.array.len()); let len_albedo_texture_format = self.albedo_texture_format.as_ref().map(|b| b.array.len()); let len_class_ids = self.class_ids.as_ref().map(|b| b.array.len()); @@ -589,6 +625,7 @@ impl Mesh3D { .or(len_vertex_colors) .or(len_vertex_texcoords) .or(len_albedo_factor) + .or(len_face_rendering) .or(len_albedo_texture_buffer) .or(len_albedo_texture_format) .or(len_class_ids) @@ -679,6 +716,33 @@ impl Mesh3D { self } + /// Determines which faces of the mesh are rendered. + /// + /// The default is [`components::MeshFaceRendering::DoubleSided`][crate::components::MeshFaceRendering::DoubleSided], meaning both front and back faces are shown. + #[inline] + pub fn with_face_rendering( + mut self, + face_rendering: impl Into, + ) -> Self { + self.face_rendering = + try_serialize_field(Self::descriptor_face_rendering(), [face_rendering]); + self + } + + /// This method makes it possible to pack multiple [`crate::components::MeshFaceRendering`] in a single component batch. + /// + /// This only makes sense when used in conjunction with [`Self::columns`]. [`Self::with_face_rendering`] should + /// be used when logging a single row's worth of data. + #[inline] + pub fn with_many_face_rendering( + mut self, + face_rendering: impl IntoIterator>, + ) -> Self { + self.face_rendering = + try_serialize_field(Self::descriptor_face_rendering(), face_rendering); + self + } + /// Optional albedo texture. /// /// Used with the [`components::Texcoord2D`][crate::components::Texcoord2D] of the mesh. @@ -766,6 +830,7 @@ impl ::re_byte_size::SizeBytes for Mesh3D { + self.vertex_colors.heap_size_bytes() + self.vertex_texcoords.heap_size_bytes() + self.albedo_factor.heap_size_bytes() + + self.face_rendering.heap_size_bytes() + self.albedo_texture_buffer.heap_size_bytes() + self.albedo_texture_format.heap_size_bytes() + self.class_ids.heap_size_bytes() diff --git a/crates/store/re_sdk_types/src/archetypes/mod.rs b/crates/store/re_sdk_types/src/archetypes/mod.rs index 4de6062810fd..ae9e557c7ab8 100644 --- a/crates/store/re_sdk_types/src/archetypes/mod.rs +++ b/crates/store/re_sdk_types/src/archetypes/mod.rs @@ -34,6 +34,7 @@ mod geo_points_ext; mod graph_edges; mod graph_edges_ext; mod graph_nodes; +mod grid_map; mod image; mod image_ext; mod instance_poses3d; @@ -57,6 +58,7 @@ mod segmentation_image; mod segmentation_image_ext; mod series_lines; mod series_points; +mod status; mod tensor; mod tensor_ext; mod text_document; @@ -89,6 +91,7 @@ pub use self::geo_line_strings::GeoLineStrings; pub use self::geo_points::GeoPoints; pub use self::graph_edges::GraphEdges; pub use self::graph_nodes::GraphNodes; +pub use self::grid_map::GridMap; pub use self::image::Image; pub use self::instance_poses3d::InstancePoses3D; pub use self::line_strips2d::LineStrips2D; @@ -106,6 +109,7 @@ pub use self::scalars::Scalars; pub use self::segmentation_image::SegmentationImage; pub use self::series_lines::SeriesLines; pub use self::series_points::SeriesPoints; +pub use self::status::Status; pub use self::tensor::Tensor; pub use self::text_document::TextDocument; pub use self::text_log::TextLog; diff --git a/crates/store/re_sdk_types/src/archetypes/status.rs b/crates/store/re_sdk_types/src/archetypes/status.rs new file mode 100644 index 000000000000..ec37bbbfc534 --- /dev/null +++ b/crates/store/re_sdk_types/src/archetypes/status.rs @@ -0,0 +1,250 @@ +// DO NOT EDIT! This file was auto-generated by crates/build/re_types_builder/src/codegen/rust/api.rs +// Based on "crates/store/re_sdk_types/definitions/rerun/archetypes/status.fbs". + +#![allow(unused_braces)] +#![allow(unused_imports)] +#![allow(unused_parens)] +#![allow(clippy::allow_attributes)] +#![allow(clippy::clone_on_copy)] +#![allow(clippy::cloned_instead_of_copied)] +#![allow(clippy::map_flatten)] +#![allow(clippy::needless_question_mark)] +#![allow(clippy::new_without_default)] +#![allow(clippy::redundant_closure)] +#![allow(clippy::too_many_arguments)] +#![allow(clippy::too_many_lines)] +#![allow(clippy::wildcard_imports)] + +use ::re_types_core::SerializationResult; +use ::re_types_core::try_serialize_field; +use ::re_types_core::{ComponentBatch as _, SerializedComponentBatch}; +use ::re_types_core::{ComponentDescriptor, ComponentType}; +use ::re_types_core::{DeserializationError, DeserializationResult}; + +/// **Archetype**: A status update, representing a change in the status of an entity. +/// +/// Useful for representing discrete state machines, mode transitions, or +/// status changes over time. Each logged [`archetypes::Status`][crate::archetypes::Status] marks a new status +/// at the given time. A `null` status is ignored by the Status view. +/// +/// The Status view displays these as horizontal colored lanes over time. +/// +/// ⚠️ **This type is _unstable_ and may change significantly in a way that the data won't be backwards compatible.** +/// +/// ## Example +/// +/// ### Status changes over time +/// ```ignore +/// fn main() -> Result<(), Box> { +/// let rec = rerun::RecordingStreamBuilder::new("rerun_example_status").spawn()?; +/// +/// rec.set_time_sequence("step", 0); +/// rec.log("door", &rerun::Status::new().with_status("open"))?; +/// +/// rec.set_time_sequence("step", 1); +/// rec.log("door", &rerun::Status::new().with_status("closed"))?; +/// +/// rec.set_time_sequence("step", 2); +/// rec.log("door", &rerun::Status::new().with_status("open"))?; +/// +/// Ok(()) +/// } +/// ``` +///

+/// +/// +/// +/// +/// +/// +/// +///
+#[derive(Clone, Debug, PartialEq, Default)] +pub struct Status { + /// The new status value. A `null` status is ignored, it can be used to partially update a multi-instance status array. + pub status: Option, +} + +impl Status { + /// Returns the [`ComponentDescriptor`] for [`Self::status`]. + /// + /// The corresponding component is [`crate::components::Text`]. + #[inline] + pub fn descriptor_status() -> ComponentDescriptor { + ComponentDescriptor { + archetype: Some("rerun.archetypes.Status".into()), + component: "Status:status".into(), + component_type: Some("rerun.components.Text".into()), + } + } +} + +static REQUIRED_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 1usize]> = + std::sync::LazyLock::new(|| [Status::descriptor_status()]); + +static RECOMMENDED_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 0usize]> = + std::sync::LazyLock::new(|| []); + +static OPTIONAL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 0usize]> = + std::sync::LazyLock::new(|| []); + +static ALL_COMPONENTS: std::sync::LazyLock<[ComponentDescriptor; 1usize]> = + std::sync::LazyLock::new(|| [Status::descriptor_status()]); + +impl Status { + /// The total number of components in the archetype: 1 required, 0 recommended, 0 optional + pub const NUM_COMPONENTS: usize = 1usize; +} + +impl ::re_types_core::Archetype for Status { + #[inline] + fn name() -> ::re_types_core::ArchetypeName { + "rerun.archetypes.Status".into() + } + + #[inline] + fn display_name() -> &'static str { + "Status" + } + + #[inline] + fn required_components() -> ::std::borrow::Cow<'static, [ComponentDescriptor]> { + REQUIRED_COMPONENTS.as_slice().into() + } + + #[inline] + fn recommended_components() -> ::std::borrow::Cow<'static, [ComponentDescriptor]> { + RECOMMENDED_COMPONENTS.as_slice().into() + } + + #[inline] + fn optional_components() -> ::std::borrow::Cow<'static, [ComponentDescriptor]> { + OPTIONAL_COMPONENTS.as_slice().into() + } + + #[inline] + fn all_components() -> ::std::borrow::Cow<'static, [ComponentDescriptor]> { + ALL_COMPONENTS.as_slice().into() + } + + #[inline] + fn from_arrow_components( + arrow_data: impl IntoIterator, + ) -> DeserializationResult { + re_tracing::profile_function!(); + use ::re_types_core::{Loggable as _, ResultExt as _}; + let arrays_by_descr: ::nohash_hasher::IntMap<_, _> = arrow_data.into_iter().collect(); + let status = arrays_by_descr + .get(&Self::descriptor_status()) + .map(|array| SerializedComponentBatch::new(array.clone(), Self::descriptor_status())); + Ok(Self { status }) + } +} + +impl ::re_types_core::AsComponents for Status { + #[inline] + fn as_serialized_batches(&self) -> Vec { + use ::re_types_core::Archetype as _; + std::iter::once(self.status.clone()).flatten().collect() + } +} + +impl ::re_types_core::ArchetypeReflectionMarker for Status {} + +impl crate::VisualizableArchetype for Status { + #[inline] + fn visualizer(&self) -> crate::Visualizer { + crate::Visualizer::new("StatusVisualizer").with_overrides(self) + } +} + +impl Status { + /// Create a new `Status`. + #[inline] + pub fn new() -> Self { + Self { status: None } + } + + /// Update only some specific fields of a `Status`. + #[inline] + pub fn update_fields() -> Self { + Self::default() + } + + /// Clear all the fields of a `Status`. + #[inline] + pub fn clear_fields() -> Self { + use ::re_types_core::Loggable as _; + Self { + status: Some(SerializedComponentBatch::new( + crate::components::Text::arrow_empty(), + Self::descriptor_status(), + )), + } + } + + /// Partitions the component data into multiple sub-batches. + /// + /// Specifically, this transforms the existing [`SerializedComponentBatch`]es data into [`SerializedComponentColumn`]s + /// instead, via [`SerializedComponentBatch::partitioned`]. + /// + /// This makes it possible to use `RecordingStream::send_columns` to send columnar data directly into Rerun. + /// + /// The specified `lengths` must sum to the total length of the component batch. + /// + /// [`SerializedComponentColumn`]: [::re_types_core::SerializedComponentColumn] + #[inline] + pub fn columns( + self, + _lengths: I, + ) -> SerializationResult> + where + I: IntoIterator + Clone, + { + let columns = [self + .status + .map(|status| status.partitioned(_lengths.clone())) + .transpose()?]; + Ok(columns.into_iter().flatten()) + } + + /// Helper to partition the component data into unit-length sub-batches. + /// + /// This is semantically similar to calling [`Self::columns`] with `std::iter::take(1).repeat(n)`, + /// where `n` is automatically guessed. + #[inline] + pub fn columns_of_unit_batches( + self, + ) -> SerializationResult> { + let len_status = self.status.as_ref().map(|b| b.array.len()); + let len = None.or(len_status).unwrap_or(0); + self.columns(std::iter::repeat_n(1, len)) + } + + /// The new status value. A `null` status is ignored, it can be used to partially update a multi-instance status array. + #[inline] + pub fn with_status(mut self, status: impl Into) -> Self { + self.status = try_serialize_field(Self::descriptor_status(), [status]); + self + } + + /// This method makes it possible to pack multiple [`crate::components::Text`] in a single component batch. + /// + /// This only makes sense when used in conjunction with [`Self::columns`]. [`Self::with_status`] should + /// be used when logging a single row's worth of data. + #[inline] + pub fn with_many_status( + mut self, + status: impl IntoIterator>, + ) -> Self { + self.status = try_serialize_field(Self::descriptor_status(), status); + self + } +} + +impl ::re_byte_size::SizeBytes for Status { + #[inline] + fn heap_size_bytes(&self) -> u64 { + self.status.heap_size_bytes() + } +} diff --git a/crates/store/re_sdk_types/src/blueprint/archetypes/dataframe_query.rs b/crates/store/re_sdk_types/src/blueprint/archetypes/dataframe_query.rs index 243c024c5f14..34eb75cdcafe 100644 --- a/crates/store/re_sdk_types/src/blueprint/archetypes/dataframe_query.rs +++ b/crates/store/re_sdk_types/src/blueprint/archetypes/dataframe_query.rs @@ -42,7 +42,7 @@ pub struct DataframeQuery { /// Should empty cells be filled with latest-at queries? pub apply_latest_at: Option, - /// Selected columns. If unset, all columns are selected. + /// Selected columns. If unset, only the active timeline and all component columns are selected. pub select: Option, /// The order of entity path column groups. If unset, the default order is used. @@ -389,7 +389,7 @@ impl DataframeQuery { self } - /// Selected columns. If unset, all columns are selected. + /// Selected columns. If unset, only the active timeline and all component columns are selected. #[inline] pub fn with_select( mut self, diff --git a/crates/store/re_sdk_types/src/blueprint/archetypes/text_log_columns.rs b/crates/store/re_sdk_types/src/blueprint/archetypes/text_log_columns.rs index 5e48c90e302a..26e96b58205b 100644 --- a/crates/store/re_sdk_types/src/blueprint/archetypes/text_log_columns.rs +++ b/crates/store/re_sdk_types/src/blueprint/archetypes/text_log_columns.rs @@ -28,7 +28,7 @@ use ::re_types_core::{DeserializationError, DeserializationResult}; pub struct TextLogColumns { /// What timeline columns to show. /// - /// Defaults to displaying all timelines. + /// Defaults to displaying only the active timeline. pub timeline_columns: Option, /// All columns to be displayed. @@ -192,7 +192,7 @@ impl TextLogColumns { /// What timeline columns to show. /// - /// Defaults to displaying all timelines. + /// Defaults to displaying only the active timeline. #[inline] pub fn with_timeline_columns( mut self, diff --git a/crates/store/re_sdk_types/src/blueprint/components/background_kind.rs b/crates/store/re_sdk_types/src/blueprint/components/background_kind.rs index cfc8f19e5edf..263468719fb3 100644 --- a/crates/store/re_sdk_types/src/blueprint/components/background_kind.rs +++ b/crates/store/re_sdk_types/src/blueprint/components/background_kind.rs @@ -110,15 +110,16 @@ impl ::re_types_core::Loggable for BackgroundKind { .with_context("rerun.blueprint.components.BackgroundKind#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::GradientDark)), - Some(2) => Ok(Some(Self::GradientBright)), - Some(3) => Ok(Some(Self::SolidColor)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.blueprint.components.BackgroundKind")?) @@ -136,6 +137,8 @@ impl std::fmt::Display for BackgroundKind { } impl ::re_types_core::reflection::Enum for BackgroundKind { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[Self::GradientDark, Self::GradientBright, Self::SolidColor] @@ -153,6 +156,13 @@ impl ::re_types_core::reflection::Enum for BackgroundKind { Self::SolidColor => "Simple uniform color.", } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for BackgroundKind { diff --git a/crates/store/re_sdk_types/src/blueprint/components/container_kind.rs b/crates/store/re_sdk_types/src/blueprint/components/container_kind.rs index 0ad748ffd79c..52b88cc25b59 100644 --- a/crates/store/re_sdk_types/src/blueprint/components/container_kind.rs +++ b/crates/store/re_sdk_types/src/blueprint/components/container_kind.rs @@ -109,16 +109,16 @@ impl ::re_types_core::Loggable for ContainerKind { .with_context("rerun.blueprint.components.ContainerKind#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::Tabs)), - Some(2) => Ok(Some(Self::Horizontal)), - Some(3) => Ok(Some(Self::Vertical)), - Some(4) => Ok(Some(Self::Grid)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.blueprint.components.ContainerKind")?) @@ -137,6 +137,8 @@ impl std::fmt::Display for ContainerKind { } impl ::re_types_core::reflection::Enum for ContainerKind { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[Self::Tabs, Self::Horizontal, Self::Vertical, Self::Grid] @@ -151,6 +153,13 @@ impl ::re_types_core::reflection::Enum for ContainerKind { Self::Grid => "Organize children in a grid layout", } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for ContainerKind { diff --git a/crates/store/re_sdk_types/src/blueprint/components/corner2d.rs b/crates/store/re_sdk_types/src/blueprint/components/corner2d.rs index 641d59dd2732..27f9b39426f4 100644 --- a/crates/store/re_sdk_types/src/blueprint/components/corner2d.rs +++ b/crates/store/re_sdk_types/src/blueprint/components/corner2d.rs @@ -109,16 +109,16 @@ impl ::re_types_core::Loggable for Corner2D { .with_context("rerun.blueprint.components.Corner2D#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::LeftTop)), - Some(2) => Ok(Some(Self::RightTop)), - Some(3) => Ok(Some(Self::LeftBottom)), - Some(4) => Ok(Some(Self::RightBottom)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.blueprint.components.Corner2D")?) @@ -137,6 +137,8 @@ impl std::fmt::Display for Corner2D { } impl ::re_types_core::reflection::Enum for Corner2D { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[ @@ -156,6 +158,13 @@ impl ::re_types_core::reflection::Enum for Corner2D { Self::RightBottom => "Right bottom corner.", } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for Corner2D { diff --git a/crates/store/re_sdk_types/src/blueprint/components/corner2d_ext.rs b/crates/store/re_sdk_types/src/blueprint/components/corner2d_ext.rs index fe2f14fb0975..e74742ff7d6e 100644 --- a/crates/store/re_sdk_types/src/blueprint/components/corner2d_ext.rs +++ b/crates/store/re_sdk_types/src/blueprint/components/corner2d_ext.rs @@ -1,3 +1,14 @@ +impl From for emath::Align2 { + fn from(corner: super::Corner2D) -> Self { + match corner { + super::Corner2D::LeftTop => Self::LEFT_TOP, + super::Corner2D::RightTop => Self::RIGHT_TOP, + super::Corner2D::LeftBottom => Self::LEFT_BOTTOM, + super::Corner2D::RightBottom => Self::RIGHT_BOTTOM, + } + } +} + #[cfg(feature = "egui_plot")] impl From for egui_plot::Corner { fn from(corner: super::Corner2D) -> Self { diff --git a/crates/store/re_sdk_types/src/blueprint/components/eye3d_kind.rs b/crates/store/re_sdk_types/src/blueprint/components/eye3d_kind.rs index d92dd55bb688..86a53bc2ba1d 100644 --- a/crates/store/re_sdk_types/src/blueprint/components/eye3d_kind.rs +++ b/crates/store/re_sdk_types/src/blueprint/components/eye3d_kind.rs @@ -113,14 +113,16 @@ impl ::re_types_core::Loggable for Eye3DKind { .with_context("rerun.blueprint.components.Eye3DKind#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::FirstPerson)), - Some(2) => Ok(Some(Self::Orbital)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.blueprint.components.Eye3DKind")?) @@ -137,6 +139,8 @@ impl std::fmt::Display for Eye3DKind { } impl ::re_types_core::reflection::Enum for Eye3DKind { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[Self::FirstPerson, Self::Orbital] @@ -153,6 +157,13 @@ impl ::re_types_core::reflection::Enum for Eye3DKind { } } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for Eye3DKind { diff --git a/crates/store/re_sdk_types/src/blueprint/components/link_axis.rs b/crates/store/re_sdk_types/src/blueprint/components/link_axis.rs index 680ffa8e025e..eb0b80435aa1 100644 --- a/crates/store/re_sdk_types/src/blueprint/components/link_axis.rs +++ b/crates/store/re_sdk_types/src/blueprint/components/link_axis.rs @@ -103,14 +103,16 @@ impl ::re_types_core::Loggable for LinkAxis { .with_context("rerun.blueprint.components.LinkAxis#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::Independent)), - Some(2) => Ok(Some(Self::LinkToGlobal)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.blueprint.components.LinkAxis")?) @@ -127,6 +129,8 @@ impl std::fmt::Display for LinkAxis { } impl ::re_types_core::reflection::Enum for LinkAxis { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[Self::Independent, Self::LinkToGlobal] @@ -139,6 +143,13 @@ impl ::re_types_core::reflection::Enum for LinkAxis { Self::LinkToGlobal => "Link to all other plots that also have this options set.", } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for LinkAxis { diff --git a/crates/store/re_sdk_types/src/blueprint/components/loop_mode.rs b/crates/store/re_sdk_types/src/blueprint/components/loop_mode.rs index 80b297cf34bd..9a482c8b8dd0 100644 --- a/crates/store/re_sdk_types/src/blueprint/components/loop_mode.rs +++ b/crates/store/re_sdk_types/src/blueprint/components/loop_mode.rs @@ -108,15 +108,16 @@ impl ::re_types_core::Loggable for LoopMode { .with_context("rerun.blueprint.components.LoopMode#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::Off)), - Some(2) => Ok(Some(Self::Selection)), - Some(3) => Ok(Some(Self::All)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.blueprint.components.LoopMode")?) @@ -134,6 +135,8 @@ impl std::fmt::Display for LoopMode { } impl ::re_types_core::reflection::Enum for LoopMode { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[Self::Off, Self::Selection, Self::All] @@ -147,6 +150,13 @@ impl ::re_types_core::reflection::Enum for LoopMode { Self::All => "We are looping the entire recording.\n\nThe loop selection is ignored.", } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for LoopMode { diff --git a/crates/store/re_sdk_types/src/blueprint/components/map_provider.rs b/crates/store/re_sdk_types/src/blueprint/components/map_provider.rs index ed4ec46e52b3..a7595f36e8a8 100644 --- a/crates/store/re_sdk_types/src/blueprint/components/map_provider.rs +++ b/crates/store/re_sdk_types/src/blueprint/components/map_provider.rs @@ -112,17 +112,16 @@ impl ::re_types_core::Loggable for MapProvider { .with_context("rerun.blueprint.components.MapProvider#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::OpenStreetMap)), - Some(2) => Ok(Some(Self::MapboxStreets)), - Some(3) => Ok(Some(Self::MapboxDark)), - Some(4) => Ok(Some(Self::MapboxSatellite)), - Some(5) => Ok(Some(Self::MapboxLight)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.blueprint.components.MapProvider")?) @@ -142,6 +141,8 @@ impl std::fmt::Display for MapProvider { } impl ::re_types_core::reflection::Enum for MapProvider { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[ @@ -163,6 +164,13 @@ impl ::re_types_core::reflection::Enum for MapProvider { Self::MapboxLight => "Mapbox Light is a light-themed map designed by Mapbox.", } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for MapProvider { diff --git a/crates/store/re_sdk_types/src/blueprint/components/panel_state.rs b/crates/store/re_sdk_types/src/blueprint/components/panel_state.rs index 2f91873ced9a..dd46e666e868 100644 --- a/crates/store/re_sdk_types/src/blueprint/components/panel_state.rs +++ b/crates/store/re_sdk_types/src/blueprint/components/panel_state.rs @@ -106,15 +106,16 @@ impl ::re_types_core::Loggable for PanelState { .with_context("rerun.blueprint.components.PanelState#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::Hidden)), - Some(2) => Ok(Some(Self::Collapsed)), - Some(3) => Ok(Some(Self::Expanded)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.blueprint.components.PanelState")?) @@ -132,6 +133,8 @@ impl std::fmt::Display for PanelState { } impl ::re_types_core::reflection::Enum for PanelState { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[Self::Hidden, Self::Collapsed, Self::Expanded] @@ -145,6 +148,13 @@ impl ::re_types_core::reflection::Enum for PanelState { Self::Expanded => "Fully expanded.", } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for PanelState { diff --git a/crates/store/re_sdk_types/src/blueprint/components/play_state.rs b/crates/store/re_sdk_types/src/blueprint/components/play_state.rs index 7f16dc666d4c..c72506813d7f 100644 --- a/crates/store/re_sdk_types/src/blueprint/components/play_state.rs +++ b/crates/store/re_sdk_types/src/blueprint/components/play_state.rs @@ -106,15 +106,16 @@ impl ::re_types_core::Loggable for PlayState { .with_context("rerun.blueprint.components.PlayState#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::Paused)), - Some(2) => Ok(Some(Self::Playing)), - Some(3) => Ok(Some(Self::Following)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.blueprint.components.PlayState")?) @@ -132,6 +133,8 @@ impl std::fmt::Display for PlayState { } impl ::re_types_core::reflection::Enum for PlayState { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[Self::Paused, Self::Playing, Self::Following] @@ -145,6 +148,13 @@ impl ::re_types_core::reflection::Enum for PlayState { Self::Following => "Follow the latest available data.", } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for PlayState { diff --git a/crates/store/re_sdk_types/src/blueprint/components/view_fit.rs b/crates/store/re_sdk_types/src/blueprint/components/view_fit.rs index bd4eaa2aea0c..c8407d0d427d 100644 --- a/crates/store/re_sdk_types/src/blueprint/components/view_fit.rs +++ b/crates/store/re_sdk_types/src/blueprint/components/view_fit.rs @@ -106,15 +106,16 @@ impl ::re_types_core::Loggable for ViewFit { .with_context("rerun.blueprint.components.ViewFit#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::Original)), - Some(2) => Ok(Some(Self::Fill)), - Some(3) => Ok(Some(Self::FillKeepAspectRatio)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.blueprint.components.ViewFit")?) @@ -132,6 +133,8 @@ impl std::fmt::Display for ViewFit { } impl ::re_types_core::reflection::Enum for ViewFit { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[Self::Original, Self::Fill, Self::FillKeepAspectRatio] @@ -149,6 +152,13 @@ impl ::re_types_core::reflection::Enum for ViewFit { } } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for ViewFit { diff --git a/crates/store/re_sdk_types/src/blueprint/components/visualizer_instruction_id_ext.rs b/crates/store/re_sdk_types/src/blueprint/components/visualizer_instruction_id_ext.rs index 9f4b4c8afe35..606e955a217d 100644 --- a/crates/store/re_sdk_types/src/blueprint/components/visualizer_instruction_id_ext.rs +++ b/crates/store/re_sdk_types/src/blueprint/components/visualizer_instruction_id_ext.rs @@ -1,3 +1,5 @@ +use re_log_types::EntityPath; + use super::VisualizerInstructionId; use crate::datatypes::Uuid; @@ -19,8 +21,11 @@ impl VisualizerInstructionId { /// This is used internally for generating stable IDs for heuristically /// created visualizers. #[inline] - pub fn new_deterministic(hash: u64, index: usize) -> Self { - Self(Uuid::from(uuid::Uuid::from_u64_pair(hash, index as u64))) + pub fn new_deterministic(entity_path: &EntityPath, index: usize) -> Self { + Self(Uuid::from(uuid::Uuid::from_u64_pair( + entity_path.calculate_deterministic_hash(), + index as u64, + ))) } } diff --git a/crates/store/re_sdk_types/src/blueprint/datatypes/component_column_selector_ext.rs b/crates/store/re_sdk_types/src/blueprint/datatypes/component_column_selector_ext.rs index 007ce50ed152..ff556e1f05c5 100644 --- a/crates/store/re_sdk_types/src/blueprint/datatypes/component_column_selector_ext.rs +++ b/crates/store/re_sdk_types/src/blueprint/datatypes/component_column_selector_ext.rs @@ -14,7 +14,7 @@ impl super::ComponentColumnSelector { EntityPath::from(self.entity_path.as_str()) } - /// The parsed omponent column selector. + /// The parsed component column selector. pub fn column_selector(&self) -> re_sorbet::ComponentColumnSelector { let entity_path = EntityPath::from(self.entity_path.as_str()); let component = self.component.to_string(); diff --git a/crates/store/re_sdk_types/src/blueprint/datatypes/component_source_kind.rs b/crates/store/re_sdk_types/src/blueprint/datatypes/component_source_kind.rs index 6cf13adc4e42..3aeb4057fba4 100644 --- a/crates/store/re_sdk_types/src/blueprint/datatypes/component_source_kind.rs +++ b/crates/store/re_sdk_types/src/blueprint/datatypes/component_source_kind.rs @@ -112,15 +112,16 @@ impl ::re_types_core::Loggable for ComponentSourceKind { .with_context("rerun.blueprint.datatypes.ComponentSourceKind#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::SourceComponent)), - Some(2) => Ok(Some(Self::Override)), - Some(3) => Ok(Some(Self::Default)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.blueprint.datatypes.ComponentSourceKind")?) @@ -138,6 +139,8 @@ impl std::fmt::Display for ComponentSourceKind { } impl ::re_types_core::reflection::Enum for ComponentSourceKind { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[Self::SourceComponent, Self::Override, Self::Default] @@ -157,6 +160,13 @@ impl ::re_types_core::reflection::Enum for ComponentSourceKind { } } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for ComponentSourceKind { diff --git a/crates/store/re_sdk_types/src/blueprint/datatypes/text_log_column_kind.rs b/crates/store/re_sdk_types/src/blueprint/datatypes/text_log_column_kind.rs index 965be126e110..166b943f86c1 100644 --- a/crates/store/re_sdk_types/src/blueprint/datatypes/text_log_column_kind.rs +++ b/crates/store/re_sdk_types/src/blueprint/datatypes/text_log_column_kind.rs @@ -99,15 +99,16 @@ impl ::re_types_core::Loggable for TextLogColumnKind { .with_context("rerun.blueprint.datatypes.TextLogColumnKind#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::EntityPath)), - Some(2) => Ok(Some(Self::LogLevel)), - Some(3) => Ok(Some(Self::Body)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.blueprint.datatypes.TextLogColumnKind")?) @@ -125,6 +126,8 @@ impl std::fmt::Display for TextLogColumnKind { } impl ::re_types_core::reflection::Enum for TextLogColumnKind { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[Self::EntityPath, Self::LogLevel, Self::Body] @@ -138,6 +141,13 @@ impl ::re_types_core::reflection::Enum for TextLogColumnKind { Self::Body => "The text message the log has.", } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for TextLogColumnKind { diff --git a/crates/store/re_sdk_types/src/blueprint/views/.gitattributes b/crates/store/re_sdk_types/src/blueprint/views/.gitattributes index c49cd98b51ee..dcc2d8675efb 100644 --- a/crates/store/re_sdk_types/src/blueprint/views/.gitattributes +++ b/crates/store/re_sdk_types/src/blueprint/views/.gitattributes @@ -8,6 +8,7 @@ map_view.rs linguist-generated=true mod.rs linguist-generated=true spatial2d_view.rs linguist-generated=true spatial3d_view.rs linguist-generated=true +status_view.rs linguist-generated=true tensor_view.rs linguist-generated=true text_document_view.rs linguist-generated=true text_log_view.rs linguist-generated=true diff --git a/crates/store/re_sdk_types/src/blueprint/views/mod.rs b/crates/store/re_sdk_types/src/blueprint/views/mod.rs index 11802f945c62..01fa75bf503f 100644 --- a/crates/store/re_sdk_types/src/blueprint/views/mod.rs +++ b/crates/store/re_sdk_types/src/blueprint/views/mod.rs @@ -6,6 +6,7 @@ mod graph_view; mod map_view; mod spatial2d_view; mod spatial3d_view; +mod status_view; mod tensor_view; mod text_document_view; mod text_log_view; @@ -17,6 +18,7 @@ pub use self::graph_view::GraphView; pub use self::map_view::MapView; pub use self::spatial2d_view::Spatial2DView; pub use self::spatial3d_view::Spatial3DView; +pub use self::status_view::StatusView; pub use self::tensor_view::TensorView; pub use self::text_document_view::TextDocumentView; pub use self::text_log_view::TextLogView; diff --git a/crates/store/re_sdk_types/src/blueprint/views/status_view.rs b/crates/store/re_sdk_types/src/blueprint/views/status_view.rs new file mode 100644 index 000000000000..1d233174a0c2 --- /dev/null +++ b/crates/store/re_sdk_types/src/blueprint/views/status_view.rs @@ -0,0 +1,47 @@ +// DO NOT EDIT! This file was auto-generated by crates/build/re_types_builder/src/codegen/rust/api.rs +// Based on "crates/store/re_sdk_types/definitions/rerun/blueprint/views/status.fbs". + +#![allow(unused_braces)] +#![allow(unused_imports)] +#![allow(unused_parens)] +#![allow(clippy::allow_attributes)] +#![allow(clippy::clone_on_copy)] +#![allow(clippy::cloned_instead_of_copied)] +#![allow(clippy::map_flatten)] +#![allow(clippy::needless_question_mark)] +#![allow(clippy::new_without_default)] +#![allow(clippy::redundant_closure)] +#![allow(clippy::too_many_arguments)] +#![allow(clippy::too_many_lines)] +#![allow(clippy::wildcard_imports)] + +use ::re_types_core::SerializationResult; +use ::re_types_core::try_serialize_field; +use ::re_types_core::{ComponentBatch as _, SerializedComponentBatch}; +use ::re_types_core::{ComponentDescriptor, ComponentType}; +use ::re_types_core::{DeserializationError, DeserializationResult}; + +/// **View**: A view for displaying status transitions over time, for use with [`archetypes::Status`][crate::archetypes::Status]. +/// +/// ⚠️ **This type is _unstable_ and may change significantly in a way that the data won't be backwards compatible.** +#[derive(Clone, Debug)] +pub struct StatusView {} + +impl ::re_types_core::View for StatusView { + #[inline] + fn identifier() -> ::re_types_core::ViewClassIdentifier { + "Status".into() + } +} + +impl ::re_byte_size::SizeBytes for StatusView { + #[inline] + fn heap_size_bytes(&self) -> u64 { + 0 + } + + #[inline] + fn is_pod() -> bool { + true + } +} diff --git a/crates/store/re_sdk_types/src/colormap_category.rs b/crates/store/re_sdk_types/src/colormap_category.rs index ade348338356..239a92117558 100644 --- a/crates/store/re_sdk_types/src/colormap_category.rs +++ b/crates/store/re_sdk_types/src/colormap_category.rs @@ -13,12 +13,41 @@ pub enum ColormapCategory { /// Colormaps that wrap around. Cyclic, + + /// Colormaps specialized for occupancy grids and costmaps. + GridMap, +} + +/// Allows to select groups of colormap categories. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub enum ColormapSelection { + #[default] + /// Show the standard colormap categories. + Standard, + + /// Show the standard colormap categories plus GridMap-specific colormaps. + IncludeGridMap, +} + +impl ColormapSelection { + /// Whether this selection includes the given category. + pub const fn includes(self, category: ColormapCategory) -> bool { + match self { + Self::Standard => !matches!(category, ColormapCategory::GridMap), + Self::IncludeGridMap => true, + } + } } impl ColormapCategory { /// Returns all possible colormap categories. pub fn variants() -> &'static [Self] { - &[Self::Sequential, Self::Diverging, Self::Cyclic] + &[ + Self::Sequential, + Self::Diverging, + Self::Cyclic, + Self::GridMap, + ] } /// Returns the [`ColormapCategory`] classification for the given colormap. @@ -32,6 +61,7 @@ impl ColormapCategory { | Colormap::Turbo => Self::Sequential, Colormap::CyanToYellow | Colormap::Spectral => Self::Diverging, Colormap::Twilight => Self::Cyclic, + Colormap::RvizMap | Colormap::RvizCostmap => Self::GridMap, } } } diff --git a/crates/store/re_sdk_types/src/components/.gitattributes b/crates/store/re_sdk_types/src/components/.gitattributes index d78ebc1a802d..159dbd596574 100644 --- a/crates/store/re_sdk_types/src/components/.gitattributes +++ b/crates/store/re_sdk_types/src/components/.gitattributes @@ -6,6 +6,7 @@ albedo_factor.rs linguist-generated=true annotation_context.rs linguist-generated=true axis_length.rs linguist-generated=true blob.rs linguist-generated=true +cell_size.rs linguist-generated=true channel_id.rs linguist-generated=true channel_message_counts.rs linguist-generated=true class_id.rs linguist-generated=true @@ -40,6 +41,7 @@ magnification_filter.rs linguist-generated=true marker_shape.rs linguist-generated=true marker_size.rs linguist-generated=true media_type.rs linguist-generated=true +mesh_face_rendering.rs linguist-generated=true mod.rs linguist-generated=true name.rs linguist-generated=true opacity.rs linguist-generated=true diff --git a/crates/store/re_sdk_types/src/components/aggregation_policy.rs b/crates/store/re_sdk_types/src/components/aggregation_policy.rs index 78788e0c64b9..819ce1a2749f 100644 --- a/crates/store/re_sdk_types/src/components/aggregation_policy.rs +++ b/crates/store/re_sdk_types/src/components/aggregation_policy.rs @@ -121,18 +121,16 @@ impl ::re_types_core::Loggable for AggregationPolicy { .with_context("rerun.components.AggregationPolicy#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::Off)), - Some(2) => Ok(Some(Self::Average)), - Some(3) => Ok(Some(Self::Max)), - Some(4) => Ok(Some(Self::Min)), - Some(5) => Ok(Some(Self::MinMax)), - Some(6) => Ok(Some(Self::MinMaxAverage)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.components.AggregationPolicy")?) @@ -153,6 +151,8 @@ impl std::fmt::Display for AggregationPolicy { } impl ::re_types_core::reflection::Enum for AggregationPolicy { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[ @@ -180,6 +180,13 @@ impl ::re_types_core::reflection::Enum for AggregationPolicy { } } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for AggregationPolicy { diff --git a/crates/store/re_sdk_types/src/components/cell_size.rs b/crates/store/re_sdk_types/src/components/cell_size.rs new file mode 100644 index 000000000000..8d03013446c1 --- /dev/null +++ b/crates/store/re_sdk_types/src/components/cell_size.rs @@ -0,0 +1,86 @@ +// DO NOT EDIT! This file was auto-generated by crates/build/re_types_builder/src/codegen/rust/api.rs +// Based on "crates/store/re_sdk_types/definitions/rerun/components/cell_size.fbs". + +#![allow(unused_braces)] +#![allow(unused_imports)] +#![allow(unused_parens)] +#![allow(clippy::allow_attributes)] +#![allow(clippy::clone_on_copy)] +#![allow(clippy::cloned_instead_of_copied)] +#![allow(clippy::map_flatten)] +#![allow(clippy::needless_question_mark)] +#![allow(clippy::new_without_default)] +#![allow(clippy::redundant_closure)] +#![allow(clippy::too_many_arguments)] +#![allow(clippy::too_many_lines)] +#![allow(clippy::wildcard_imports)] + +use ::re_types_core::SerializationResult; +use ::re_types_core::try_serialize_field; +use ::re_types_core::{ComponentBatch as _, SerializedComponentBatch}; +use ::re_types_core::{ComponentDescriptor, ComponentType}; +use ::re_types_core::{DeserializationError, DeserializationResult}; + +/// **Component**: The metric size of one grid cell in local scene units. +/// +/// E.g. for 2D grid maps, this is the physical size represented by a single pixel or cell. +#[derive(Clone, Debug, Copy, PartialEq, PartialOrd, bytemuck::Pod, bytemuck::Zeroable)] +#[repr(transparent)] +pub struct CellSize(pub crate::datatypes::Float32); + +impl ::re_types_core::WrapperComponent for CellSize { + type Datatype = crate::datatypes::Float32; + + #[inline] + fn name() -> ComponentType { + "rerun.components.CellSize".into() + } + + #[inline] + fn into_inner(self) -> Self::Datatype { + self.0 + } +} + +::re_types_core::macros::impl_into_cow!(CellSize); + +impl> From for CellSize { + fn from(v: T) -> Self { + Self(v.into()) + } +} + +impl std::borrow::Borrow for CellSize { + #[inline] + fn borrow(&self) -> &crate::datatypes::Float32 { + &self.0 + } +} + +impl std::ops::Deref for CellSize { + type Target = crate::datatypes::Float32; + + #[inline] + fn deref(&self) -> &crate::datatypes::Float32 { + &self.0 + } +} + +impl std::ops::DerefMut for CellSize { + #[inline] + fn deref_mut(&mut self) -> &mut crate::datatypes::Float32 { + &mut self.0 + } +} + +impl ::re_byte_size::SizeBytes for CellSize { + #[inline] + fn heap_size_bytes(&self) -> u64 { + self.0.heap_size_bytes() + } + + #[inline] + fn is_pod() -> bool { + ::is_pod() + } +} diff --git a/crates/store/re_sdk_types/src/components/colormap.rs b/crates/store/re_sdk_types/src/components/colormap.rs index 1eeaeddc63e9..a548496fabf1 100644 --- a/crates/store/re_sdk_types/src/components/colormap.rs +++ b/crates/store/re_sdk_types/src/components/colormap.rs @@ -89,6 +89,18 @@ pub enum Colormap { /// /// It interpolates from white to blue to purple to red to orange and back to white. Twilight = 9, + + /// The classic `RViz` "Map" grid-map colormap intended for occupancy-style SLAM grid maps. + /// + /// Known values are mapped to a grayscale ramp from white (free) to black (occupied), + /// unknown values are in a green-blue color. Special / illegal values have highlight colors. + RvizMap = 10, + + /// The classic `RViz` "Costmap" grid-map colormap for robot navigation cost maps. + /// + /// Cost values are mapped to blue to red spectrum, and special cost values + /// (e.g. lethal obstacles) have highlight colors. Zero values are fully transparent. + RvizCostmap = 11, } impl ::re_types_core::Component for Colormap { @@ -160,21 +172,16 @@ impl ::re_types_core::Loggable for Colormap { .with_context("rerun.components.Colormap#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::Grayscale)), - Some(2) => Ok(Some(Self::Inferno)), - Some(3) => Ok(Some(Self::Magma)), - Some(4) => Ok(Some(Self::Plasma)), - Some(5) => Ok(Some(Self::Turbo)), - Some(6) => Ok(Some(Self::Viridis)), - Some(7) => Ok(Some(Self::CyanToYellow)), - Some(8) => Ok(Some(Self::Spectral)), - Some(9) => Ok(Some(Self::Twilight)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.components.Colormap")?) @@ -193,11 +200,15 @@ impl std::fmt::Display for Colormap { Self::CyanToYellow => write!(f, "CyanToYellow"), Self::Spectral => write!(f, "Spectral"), Self::Twilight => write!(f, "Twilight"), + Self::RvizMap => write!(f, "RvizMap"), + Self::RvizCostmap => write!(f, "RvizCostmap"), } } } impl ::re_types_core::reflection::Enum for Colormap { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[ @@ -210,6 +221,8 @@ impl ::re_types_core::reflection::Enum for Colormap { Self::CyanToYellow, Self::Spectral, Self::Twilight, + Self::RvizMap, + Self::RvizCostmap, ] } @@ -243,8 +256,21 @@ impl ::re_types_core::reflection::Enum for Colormap { Self::Twilight => { "The Twilight colormap from Matplotlib.\n\nThis is a perceptually uniform cyclic colormap from Matplotlib, it is useful for\nvisualizing periodic or cyclic data.\n\nIt interpolates from white to blue to purple to red to orange and back to white." } + Self::RvizMap => { + "The classic `RViz` \"Map\" grid-map colormap intended for occupancy-style SLAM grid maps.\n\nKnown values are mapped to a grayscale ramp from white (free) to black (occupied),\nunknown values are in a green-blue color. Special / illegal values have highlight colors." + } + Self::RvizCostmap => { + "The classic `RViz` \"Costmap\" grid-map colormap for robot navigation cost maps.\n\nCost values are mapped to blue to red spectrum, and special cost values\n(e.g. lethal obstacles) have highlight colors. Zero values are fully transparent." + } } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for Colormap { diff --git a/crates/store/re_sdk_types/src/components/colormap_ext.rs b/crates/store/re_sdk_types/src/components/colormap_ext.rs index 5c60b15cafb3..0a5e52fe6cf3 100644 --- a/crates/store/re_sdk_types/src/components/colormap_ext.rs +++ b/crates/store/re_sdk_types/src/components/colormap_ext.rs @@ -2,38 +2,6 @@ use super::Colormap; use crate::ColormapCategory; impl Colormap { - /// Instantiate a new [`Colormap`] from a u8 value. - /// - /// Returns `None` if the value doesn't match any of the enum's arms. - pub fn from_u8(value: u8) -> Option { - // NOTE: This code will be optimized out, it's only here to make sure this method fails to - // compile if the enum is modified. - match Self::default() { - Self::Grayscale - | Self::Inferno - | Self::Magma - | Self::Plasma - | Self::Turbo - | Self::Viridis - | Self::CyanToYellow - | Self::Spectral - | Self::Twilight => {} - } - - match value { - v if v == Self::Grayscale as u8 => Some(Self::Grayscale), - v if v == Self::Inferno as u8 => Some(Self::Inferno), - v if v == Self::Magma as u8 => Some(Self::Magma), - v if v == Self::Plasma as u8 => Some(Self::Plasma), - v if v == Self::Turbo as u8 => Some(Self::Turbo), - v if v == Self::Viridis as u8 => Some(Self::Viridis), - v if v == Self::CyanToYellow as u8 => Some(Self::CyanToYellow), - v if v == Self::Spectral as u8 => Some(Self::Spectral), - v if v == Self::Twilight as u8 => Some(Self::Twilight), - _ => None, - } - } - /// Returns the [`ColormapCategory`] classification for this colormap. pub fn category(&self) -> ColormapCategory { ColormapCategory::from_colormap(*self) diff --git a/crates/store/re_sdk_types/src/components/fill_mode.rs b/crates/store/re_sdk_types/src/components/fill_mode.rs index eff4605906a2..6154de28a725 100644 --- a/crates/store/re_sdk_types/src/components/fill_mode.rs +++ b/crates/store/re_sdk_types/src/components/fill_mode.rs @@ -33,7 +33,6 @@ pub enum FillMode { /// * An [`archetypes::Ellipsoids3D`][crate::archetypes::Ellipsoids3D] will draw three axis-aligned ellipses that are cross-sections /// of each ellipsoid, each of which displays two out of three of the sizes of the ellipsoid. /// * For [`archetypes::Boxes3D`][crate::archetypes::Boxes3D], it is the edges of the box, identical to [`components::FillMode::DenseWireframe`][crate::components::FillMode::DenseWireframe]. - #[default] MajorWireframe = 1, /// Many lines are drawn to represent the surface of the shape in a see-through fashion. @@ -47,6 +46,12 @@ pub enum FillMode { /// The surface of the shape is filled in with a solid color. No lines are drawn. Solid = 3, + + /// The surface of the shape is filled in with a transparent color, with major wireframe lines on top. + /// + /// This gives a good default appearance that shows both the shape's surface and its structure. + #[default] + TransparentFillMajorWireframe = 4, } impl ::re_types_core::Component for FillMode { @@ -118,15 +123,16 @@ impl ::re_types_core::Loggable for FillMode { .with_context("rerun.components.FillMode#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::MajorWireframe)), - Some(2) => Ok(Some(Self::DenseWireframe)), - Some(3) => Ok(Some(Self::Solid)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.components.FillMode")?) @@ -139,14 +145,24 @@ impl std::fmt::Display for FillMode { Self::MajorWireframe => write!(f, "MajorWireframe"), Self::DenseWireframe => write!(f, "DenseWireframe"), Self::Solid => write!(f, "Solid"), + Self::TransparentFillMajorWireframe => { + write!(f, "TransparentFillMajorWireframe") + } } } } impl ::re_types_core::reflection::Enum for FillMode { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { - &[Self::MajorWireframe, Self::DenseWireframe, Self::Solid] + &[ + Self::MajorWireframe, + Self::DenseWireframe, + Self::Solid, + Self::TransparentFillMajorWireframe, + ] } #[inline] @@ -161,8 +177,18 @@ impl ::re_types_core::reflection::Enum for FillMode { Self::Solid => { "The surface of the shape is filled in with a solid color. No lines are drawn." } + Self::TransparentFillMajorWireframe => { + "The surface of the shape is filled in with a transparent color, with major wireframe lines on top.\n\nThis gives a good default appearance that shows both the shape's surface and its structure." + } } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for FillMode { diff --git a/crates/store/re_sdk_types/src/components/fill_mode_ext.rs b/crates/store/re_sdk_types/src/components/fill_mode_ext.rs index 9096be0cb444..264ffbb0737e 100644 --- a/crates/store/re_sdk_types/src/components/fill_mode_ext.rs +++ b/crates/store/re_sdk_types/src/components/fill_mode_ext.rs @@ -1,21 +1,29 @@ use super::FillMode; impl FillMode { - /// Instantiate a new [`FillMode`] from a u8 value. - /// - /// Returns `None` if the value doesn't match any of the enum's arms. - pub fn from_u8(value: u8) -> Option { - // NOTE: This code will be optimized out, it's only here to make sure this method fails to - // compile if the enum is modified. - match Self::default() { - Self::MajorWireframe | Self::DenseWireframe | Self::Solid => {} + /// Does this fill mode include wireframe lines? + pub fn has_wireframe(self) -> bool { + match self { + Self::MajorWireframe | Self::DenseWireframe | Self::TransparentFillMajorWireframe => { + true + } + Self::Solid => false, } + } + + /// Does this fill mode include a solid fill? + pub fn has_solid(self) -> bool { + match self { + Self::Solid | Self::TransparentFillMajorWireframe => true, + Self::MajorWireframe | Self::DenseWireframe => false, + } + } - match value { - v if v == Self::MajorWireframe as u8 => Some(Self::MajorWireframe), - v if v == Self::DenseWireframe as u8 => Some(Self::DenseWireframe), - v if v == Self::Solid as u8 => Some(Self::Solid), - _ => None, + /// Should we only draw the major axes, or the full mesh? + pub fn axes_only(self) -> bool { + match self { + Self::MajorWireframe | Self::TransparentFillMajorWireframe => true, + Self::DenseWireframe | Self::Solid => false, } } } diff --git a/crates/store/re_sdk_types/src/components/graph_type.rs b/crates/store/re_sdk_types/src/components/graph_type.rs index 3f744ac4a99d..e25f152792df 100644 --- a/crates/store/re_sdk_types/src/components/graph_type.rs +++ b/crates/store/re_sdk_types/src/components/graph_type.rs @@ -103,14 +103,16 @@ impl ::re_types_core::Loggable for GraphType { .with_context("rerun.components.GraphType#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::Undirected)), - Some(2) => Ok(Some(Self::Directed)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.components.GraphType")?) @@ -127,6 +129,8 @@ impl std::fmt::Display for GraphType { } impl ::re_types_core::reflection::Enum for GraphType { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[Self::Undirected, Self::Directed] @@ -139,6 +143,13 @@ impl ::re_types_core::reflection::Enum for GraphType { Self::Directed => "The graph has directed edges.", } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for GraphType { diff --git a/crates/store/re_sdk_types/src/components/interpolation_mode.rs b/crates/store/re_sdk_types/src/components/interpolation_mode.rs index 0b3078db89b8..c6066f23247e 100644 --- a/crates/store/re_sdk_types/src/components/interpolation_mode.rs +++ b/crates/store/re_sdk_types/src/components/interpolation_mode.rs @@ -115,16 +115,16 @@ impl ::re_types_core::Loggable for InterpolationMode { .with_context("rerun.components.InterpolationMode#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::Linear)), - Some(2) => Ok(Some(Self::StepAfter)), - Some(3) => Ok(Some(Self::StepBefore)), - Some(4) => Ok(Some(Self::StepMid)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.components.InterpolationMode")?) @@ -143,6 +143,8 @@ impl std::fmt::Display for InterpolationMode { } impl ::re_types_core::reflection::Enum for InterpolationMode { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[ @@ -168,6 +170,13 @@ impl ::re_types_core::reflection::Enum for InterpolationMode { } } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for InterpolationMode { diff --git a/crates/store/re_sdk_types/src/components/magnification_filter.rs b/crates/store/re_sdk_types/src/components/magnification_filter.rs index 7ac95760804d..b92765bc0271 100644 --- a/crates/store/re_sdk_types/src/components/magnification_filter.rs +++ b/crates/store/re_sdk_types/src/components/magnification_filter.rs @@ -22,21 +22,30 @@ use ::re_types_core::{ComponentBatch as _, SerializedComponentBatch}; use ::re_types_core::{ComponentDescriptor, ComponentType}; use ::re_types_core::{DeserializationError, DeserializationResult}; -/// **Component**: Filter used when magnifying an image/texture such that a single pixel/texel is displayed as multiple pixels on screen. +/// **Component**: Filter used when a single texel/pixel of an image is displayed larger than a single screen pixel. +/// +/// This happens when zooming into an image, when displaying a low-resolution image in a large area, +/// or when viewing an image up close in 3D space. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, Default)] #[repr(u8)] pub enum MagnificationFilter { /// Show the nearest pixel value. /// - /// This will give a blocky appearance when zooming in. + /// This will give a blocky appearance when the image is scaled up. /// Used as default when rendering 2D images. #[default] Nearest = 1, - /// Linearly interpolate the nearest neighbors, creating a smoother look when zooming in. + /// Linearly interpolate the nearest neighbors, creating a smoother look when the image is scaled up. /// /// Used as default for mesh rendering. Linear = 2, + + /// Bicubic interpolation using a Catmull-Rom spline, creating the smoothest look when the image is scaled up. + /// + /// This is computationally more expensive than linear filtering but produces sharper results with less blurring. + /// Unlike bilinear filtering, this avoids cross-shaped artifacts at texel boundaries. + Bicubic = 3, } impl ::re_types_core::Component for MagnificationFilter { @@ -108,14 +117,16 @@ impl ::re_types_core::Loggable for MagnificationFilter { .with_context("rerun.components.MagnificationFilter#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::Nearest)), - Some(2) => Ok(Some(Self::Linear)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.components.MagnificationFilter")?) @@ -127,27 +138,40 @@ impl std::fmt::Display for MagnificationFilter { match self { Self::Nearest => write!(f, "Nearest"), Self::Linear => write!(f, "Linear"), + Self::Bicubic => write!(f, "Bicubic"), } } } impl ::re_types_core::reflection::Enum for MagnificationFilter { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { - &[Self::Nearest, Self::Linear] + &[Self::Nearest, Self::Linear, Self::Bicubic] } #[inline] fn docstring_md(self) -> &'static str { match self { Self::Nearest => { - "Show the nearest pixel value.\n\nThis will give a blocky appearance when zooming in.\nUsed as default when rendering 2D images." + "Show the nearest pixel value.\n\nThis will give a blocky appearance when the image is scaled up.\nUsed as default when rendering 2D images." } Self::Linear => { - "Linearly interpolate the nearest neighbors, creating a smoother look when zooming in.\n\nUsed as default for mesh rendering." + "Linearly interpolate the nearest neighbors, creating a smoother look when the image is scaled up.\n\nUsed as default for mesh rendering." + } + Self::Bicubic => { + "Bicubic interpolation using a Catmull-Rom spline, creating the smoothest look when the image is scaled up.\n\nThis is computationally more expensive than linear filtering but produces sharper results with less blurring.\nUnlike bilinear filtering, this avoids cross-shaped artifacts at texel boundaries." } } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for MagnificationFilter { diff --git a/crates/store/re_sdk_types/src/components/magnification_filter_ext.rs b/crates/store/re_sdk_types/src/components/magnification_filter_ext.rs new file mode 100644 index 000000000000..b2b93d1a0ea0 --- /dev/null +++ b/crates/store/re_sdk_types/src/components/magnification_filter_ext.rs @@ -0,0 +1,21 @@ +use super::MagnificationFilter; + +impl MagnificationFilter { + /// Instantiate a new [`MagnificationFilter`] from a u8 value. + /// + /// Returns `None` if the value doesn't match any of the enum's arms. + pub fn from_u8(value: u8) -> Option { + // NOTE: This code will be optimized out, it's only here to make sure this method fails to + // compile if the enum is modified. + match Self::default() { + Self::Nearest | Self::Linear | Self::Bicubic => {} + } + + match value { + v if v == Self::Nearest as u8 => Some(Self::Nearest), + v if v == Self::Linear as u8 => Some(Self::Linear), + v if v == Self::Bicubic as u8 => Some(Self::Bicubic), + _ => None, + } + } +} diff --git a/crates/store/re_sdk_types/src/components/marker_shape.rs b/crates/store/re_sdk_types/src/components/marker_shape.rs index e2fe973e4c68..51d2a7fdf8aa 100644 --- a/crates/store/re_sdk_types/src/components/marker_shape.rs +++ b/crates/store/re_sdk_types/src/components/marker_shape.rs @@ -127,22 +127,16 @@ impl ::re_types_core::Loggable for MarkerShape { .with_context("rerun.components.MarkerShape#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::Circle)), - Some(2) => Ok(Some(Self::Diamond)), - Some(3) => Ok(Some(Self::Square)), - Some(4) => Ok(Some(Self::Cross)), - Some(5) => Ok(Some(Self::Plus)), - Some(6) => Ok(Some(Self::Up)), - Some(7) => Ok(Some(Self::Down)), - Some(8) => Ok(Some(Self::Left)), - Some(9) => Ok(Some(Self::Right)), - Some(10) => Ok(Some(Self::Asterisk)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.components.MarkerShape")?) @@ -167,6 +161,8 @@ impl std::fmt::Display for MarkerShape { } impl ::re_types_core::reflection::Enum for MarkerShape { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[ @@ -198,6 +194,13 @@ impl ::re_types_core::reflection::Enum for MarkerShape { Self::Asterisk => "`*`", } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for MarkerShape { diff --git a/crates/store/re_sdk_types/src/components/mesh_face_rendering.rs b/crates/store/re_sdk_types/src/components/mesh_face_rendering.rs new file mode 100644 index 000000000000..bd83bddc42a8 --- /dev/null +++ b/crates/store/re_sdk_types/src/components/mesh_face_rendering.rs @@ -0,0 +1,181 @@ +// DO NOT EDIT! This file was auto-generated by crates/build/re_types_builder/src/codegen/rust/api.rs +// Based on "crates/store/re_sdk_types/definitions/rerun/components/mesh_face_rendering.fbs". + +#![allow(unused_braces)] +#![allow(unused_imports)] +#![allow(unused_parens)] +#![allow(clippy::allow_attributes)] +#![allow(clippy::clone_on_copy)] +#![allow(clippy::cloned_instead_of_copied)] +#![allow(clippy::map_flatten)] +#![allow(clippy::needless_question_mark)] +#![allow(clippy::new_without_default)] +#![allow(clippy::redundant_closure)] +#![allow(clippy::too_many_arguments)] +#![allow(clippy::too_many_lines)] +#![allow(clippy::wildcard_imports)] +#![allow(non_camel_case_types)] + +use ::re_types_core::SerializationResult; +use ::re_types_core::try_serialize_field; +use ::re_types_core::{ComponentBatch as _, SerializedComponentBatch}; +use ::re_types_core::{ComponentDescriptor, ComponentType}; +use ::re_types_core::{DeserializationError, DeserializationResult}; + +/// **Component**: Determines which faces of a mesh are rendered. +/// +/// For this purpose, we assume that the winding order of vertices in a mesh is +/// consistent and that front faces are defined as those with vertices in counter clockwise order. +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, Default)] +#[repr(u8)] +pub enum MeshFaceRendering { + /// Show both back and front faces. + #[default] + DoubleSided = 1, + + /// Only front faces are shown. + /// + /// Front faces are assumed to have a counter clockwise vertex winding order on screen. + Front = 2, + + /// Only back faces are shown. + /// + /// Back faces are assumed to have a clockwise vertex winding order on screen. + Back = 3, +} + +impl ::re_types_core::Component for MeshFaceRendering { + #[inline] + fn name() -> ComponentType { + "rerun.components.MeshFaceRendering".into() + } +} + +::re_types_core::macros::impl_into_cow!(MeshFaceRendering); + +impl ::re_types_core::Loggable for MeshFaceRendering { + #[inline] + fn arrow_datatype() -> arrow::datatypes::DataType { + use arrow::datatypes::*; + DataType::UInt8 + } + + fn to_arrow_opt<'a>( + data: impl IntoIterator>>>, + ) -> SerializationResult + where + Self: Clone + 'a, + { + #![allow(clippy::manual_is_variant_and)] + use ::re_types_core::{Loggable as _, ResultExt as _, arrow_helpers::as_array_ref}; + use arrow::{array::*, buffer::*, datatypes::*}; + Ok({ + let (somes, data0): (Vec<_>, Vec<_>) = data + .into_iter() + .map(|datum| { + let datum: Option<::std::borrow::Cow<'a, Self>> = datum.map(Into::into); + let datum = datum.map(|datum| *datum as u8); + (datum.is_some(), datum) + }) + .unzip(); + let data0_validity: Option = { + let any_nones = somes.iter().any(|some| !*some); + any_nones.then(|| somes.into()) + }; + as_array_ref(PrimitiveArray::::new( + ScalarBuffer::from( + data0 + .into_iter() + .map(|v| v.unwrap_or_default()) + .collect::>(), + ), + data0_validity, + )) + }) + } + + fn from_arrow_opt( + arrow_data: &dyn arrow::array::Array, + ) -> DeserializationResult>> + where + Self: Sized, + { + use ::re_types_core::{Loggable as _, ResultExt as _, arrow_zip_validity::ZipValidity}; + use arrow::{array::*, buffer::*, datatypes::*}; + Ok(arrow_data + .as_any() + .downcast_ref::() + .ok_or_else(|| { + let expected = Self::arrow_datatype(); + let actual = arrow_data.data_type().clone(); + DeserializationError::datatype_mismatch(expected, actual) + }) + .with_context("rerun.components.MeshFaceRendering#enum")? + .into_iter() + .map(|typ| match typ { + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), + None => Ok(None), + }) + .collect::>>>() + .with_context("rerun.components.MeshFaceRendering")?) + } +} + +impl std::fmt::Display for MeshFaceRendering { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::DoubleSided => write!(f, "DoubleSided"), + Self::Front => write!(f, "Front"), + Self::Back => write!(f, "Back"), + } + } +} + +impl ::re_types_core::reflection::Enum for MeshFaceRendering { + type Repr = u8; + + #[inline] + fn variants() -> &'static [Self] { + &[Self::DoubleSided, Self::Front, Self::Back] + } + + #[inline] + fn docstring_md(self) -> &'static str { + match self { + Self::DoubleSided => "Show both back and front faces.", + Self::Front => { + "Only front faces are shown.\n\nFront faces are assumed to have a counter clockwise vertex winding order on screen." + } + Self::Back => { + "Only back faces are shown.\n\nBack faces are assumed to have a clockwise vertex winding order on screen." + } + } + } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } +} + +impl ::re_byte_size::SizeBytes for MeshFaceRendering { + #[inline] + fn heap_size_bytes(&self) -> u64 { + 0 + } + + #[inline] + fn is_pod() -> bool { + true + } +} diff --git a/crates/store/re_sdk_types/src/components/mod.rs b/crates/store/re_sdk_types/src/components/mod.rs index 84f077daa483..19d3504b43e7 100644 --- a/crates/store/re_sdk_types/src/components/mod.rs +++ b/crates/store/re_sdk_types/src/components/mod.rs @@ -7,6 +7,7 @@ mod annotation_context; mod axis_length; mod axis_length_ext; mod blob; +mod cell_size; mod channel_id; mod channel_message_counts; mod class_id; @@ -59,12 +60,14 @@ mod line_strip3d; mod line_strip3d_ext; mod linear_speed; mod magnification_filter; +mod magnification_filter_ext; mod marker_shape; mod marker_shape_ext; mod marker_size; mod marker_size_ext; mod media_type; mod media_type_ext; +mod mesh_face_rendering; mod name; mod name_ext; mod opacity; @@ -139,6 +142,7 @@ pub use self::albedo_factor::AlbedoFactor; pub use self::annotation_context::AnnotationContext; pub use self::axis_length::AxisLength; pub use self::blob::Blob; +pub use self::cell_size::CellSize; pub use self::channel_id::ChannelId; pub use self::channel_message_counts::ChannelMessageCounts; pub use self::class_id::ClassId; @@ -173,6 +177,7 @@ pub use self::magnification_filter::MagnificationFilter; pub use self::marker_shape::MarkerShape; pub use self::marker_size::MarkerSize; pub use self::media_type::MediaType; +pub use self::mesh_face_rendering::MeshFaceRendering; pub use self::name::Name; pub use self::opacity::Opacity; pub use self::pinhole_projection::PinholeProjection; diff --git a/crates/store/re_sdk_types/src/components/transform_relation.rs b/crates/store/re_sdk_types/src/components/transform_relation.rs index 20c974b21749..9e6b8aa31254 100644 --- a/crates/store/re_sdk_types/src/components/transform_relation.rs +++ b/crates/store/re_sdk_types/src/components/transform_relation.rs @@ -111,14 +111,16 @@ impl ::re_types_core::Loggable for TransformRelation { .with_context("rerun.components.TransformRelation#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::ParentFromChild)), - Some(2) => Ok(Some(Self::ChildFromParent)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.components.TransformRelation")?) @@ -135,6 +137,8 @@ impl std::fmt::Display for TransformRelation { } impl ::re_types_core::reflection::Enum for TransformRelation { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[Self::ParentFromChild, Self::ChildFromParent] @@ -151,6 +155,13 @@ impl ::re_types_core::reflection::Enum for TransformRelation { } } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for TransformRelation { diff --git a/crates/store/re_sdk_types/src/components/video_codec.rs b/crates/store/re_sdk_types/src/components/video_codec.rs index 445fe9cb4e9c..a76600c0954f 100644 --- a/crates/store/re_sdk_types/src/components/video_codec.rs +++ b/crates/store/re_sdk_types/src/components/video_codec.rs @@ -140,15 +140,16 @@ impl ::re_types_core::Loggable for VideoCodec { .with_context("rerun.components.VideoCodec#enum")? .into_iter() .map(|typ| match typ { - Some(1635135537) => Ok(Some(Self::AV1)), - Some(1635148593) => Ok(Some(Self::H264)), - Some(1751479857) => Ok(Some(Self::H265)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.components.VideoCodec")?) @@ -166,6 +167,8 @@ impl std::fmt::Display for VideoCodec { } impl ::re_types_core::reflection::Enum for VideoCodec { + type Repr = u32; + #[inline] fn variants() -> &'static [Self] { &[Self::AV1, Self::H264, Self::H265] @@ -185,6 +188,16 @@ impl ::re_types_core::reflection::Enum for VideoCodec { } } } + + #[inline] + fn try_from_integer(value: u32) -> Option { + match value { + 0x61763031 => Some(Self::AV1), + 0x61766331 => Some(Self::H264), + 0x68657631 => Some(Self::H265), + _ => None, + } + } } impl ::re_byte_size::SizeBytes for VideoCodec { diff --git a/crates/store/re_sdk_types/src/components/video_codec_ext.rs b/crates/store/re_sdk_types/src/components/video_codec_ext.rs index 93b0150d27e2..238c11642a02 100644 --- a/crates/store/re_sdk_types/src/components/video_codec_ext.rs +++ b/crates/store/re_sdk_types/src/components/video_codec_ext.rs @@ -1,5 +1,7 @@ +use crate::components::VideoCodec; + #[cfg(feature = "video")] -impl TryFrom for crate::components::VideoCodec { +impl TryFrom for VideoCodec { type Error = String; fn try_from(value: re_video::VideoCodec) -> Result { @@ -7,16 +9,16 @@ impl TryFrom for crate::components::VideoCodec { re_video::VideoCodec::H264 => Ok(Self::H264), re_video::VideoCodec::H265 => Ok(Self::H265), re_video::VideoCodec::AV1 => Ok(Self::AV1), - // TODO(#10186): Add support for VP9. re_video::VideoCodec::VP8 | re_video::VideoCodec::VP9 => Err(format!( "Video codec {value:?} is not supported for VideoStream yet", )), + re_video::VideoCodec::ImageSequence(_) => Err("Not a real video".to_owned()), } } } #[cfg(feature = "video")] -impl From for re_video::VideoCodec { +impl From for re_video::VideoCodec { fn from(val: crate::components::VideoCodec) -> Self { match val { crate::components::VideoCodec::H264 => Self::H264, @@ -28,3 +30,29 @@ impl From for re_video::VideoCodec { } } } + +impl VideoCodec { + /// Convert the base representation to this enum. + pub fn try_from_u32(value: u32) -> Option { + match value { + 0x61763031 => Some(Self::AV1), + 0x61766331 => Some(Self::H264), + 0x68657631 => Some(Self::H265), + _ => None, + } + } +} + +#[test] +fn test_video_codec_u32_conversion() { + use re_types_core::reflection::Enum as _; + + let all = VideoCodec::variants(); + + for codec in all { + let repr = *codec as u32; + let codec_from_repr = VideoCodec::try_from_u32(repr).unwrap(); + + assert_eq!(codec_from_repr, *codec); + } +} diff --git a/crates/store/re_sdk_types/src/datatypes/channel_datatype.rs b/crates/store/re_sdk_types/src/datatypes/channel_datatype.rs index cbb949f13dfa..8596c3e199db 100644 --- a/crates/store/re_sdk_types/src/datatypes/channel_datatype.rs +++ b/crates/store/re_sdk_types/src/datatypes/channel_datatype.rs @@ -125,23 +125,16 @@ impl ::re_types_core::Loggable for ChannelDatatype { .with_context("rerun.datatypes.ChannelDatatype#enum")? .into_iter() .map(|typ| match typ { - Some(6) => Ok(Some(Self::U8)), - Some(7) => Ok(Some(Self::I8)), - Some(8) => Ok(Some(Self::U16)), - Some(9) => Ok(Some(Self::I16)), - Some(10) => Ok(Some(Self::U32)), - Some(11) => Ok(Some(Self::I32)), - Some(12) => Ok(Some(Self::U64)), - Some(13) => Ok(Some(Self::I64)), - Some(33) => Ok(Some(Self::F16)), - Some(34) => Ok(Some(Self::F32)), - Some(35) => Ok(Some(Self::F64)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.datatypes.ChannelDatatype")?) @@ -167,6 +160,8 @@ impl std::fmt::Display for ChannelDatatype { } impl ::re_types_core::reflection::Enum for ChannelDatatype { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[ @@ -200,6 +195,24 @@ impl ::re_types_core::reflection::Enum for ChannelDatatype { Self::F64 => "64-bit IEEE-754 floating point, also known as `double`.", } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + match value { + 6 => Some(Self::U8), + 7 => Some(Self::I8), + 8 => Some(Self::U16), + 9 => Some(Self::I16), + 10 => Some(Self::U32), + 11 => Some(Self::I32), + 12 => Some(Self::U64), + 13 => Some(Self::I64), + 33 => Some(Self::F16), + 34 => Some(Self::F32), + 35 => Some(Self::F64), + _ => None, + } + } } impl ::re_byte_size::SizeBytes for ChannelDatatype { diff --git a/crates/store/re_sdk_types/src/datatypes/color_model.rs b/crates/store/re_sdk_types/src/datatypes/color_model.rs index 7035147dd104..73b08f017c81 100644 --- a/crates/store/re_sdk_types/src/datatypes/color_model.rs +++ b/crates/store/re_sdk_types/src/datatypes/color_model.rs @@ -111,17 +111,16 @@ impl ::re_types_core::Loggable for ColorModel { .with_context("rerun.datatypes.ColorModel#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::L)), - Some(2) => Ok(Some(Self::RGB)), - Some(3) => Ok(Some(Self::RGBA)), - Some(4) => Ok(Some(Self::BGR)), - Some(5) => Ok(Some(Self::BGRA)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.datatypes.ColorModel")?) @@ -141,6 +140,8 @@ impl std::fmt::Display for ColorModel { } impl ::re_types_core::reflection::Enum for ColorModel { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[Self::L, Self::RGB, Self::RGBA, Self::BGR, Self::BGRA] @@ -156,6 +157,13 @@ impl ::re_types_core::reflection::Enum for ColorModel { Self::BGRA => "Blue, Green, Red, Alpha", } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for ColorModel { diff --git a/crates/store/re_sdk_types/src/datatypes/pixel_format.rs b/crates/store/re_sdk_types/src/datatypes/pixel_format.rs index 5253402833e0..a3e9784f7da3 100644 --- a/crates/store/re_sdk_types/src/datatypes/pixel_format.rs +++ b/crates/store/re_sdk_types/src/datatypes/pixel_format.rs @@ -190,22 +190,16 @@ impl ::re_types_core::Loggable for PixelFormat { .with_context("rerun.datatypes.PixelFormat#enum")? .into_iter() .map(|typ| match typ { - Some(20) => Ok(Some(Self::Y_U_V12_LimitedRange)), - Some(26) => Ok(Some(Self::NV12)), - Some(27) => Ok(Some(Self::YUY2)), - Some(30) => Ok(Some(Self::Y8_FullRange)), - Some(39) => Ok(Some(Self::Y_U_V24_LimitedRange)), - Some(40) => Ok(Some(Self::Y_U_V24_FullRange)), - Some(41) => Ok(Some(Self::Y8_LimitedRange)), - Some(44) => Ok(Some(Self::Y_U_V12_FullRange)), - Some(49) => Ok(Some(Self::Y_U_V16_LimitedRange)), - Some(50) => Ok(Some(Self::Y_U_V16_FullRange)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.datatypes.PixelFormat")?) @@ -230,6 +224,8 @@ impl std::fmt::Display for PixelFormat { } impl ::re_types_core::reflection::Enum for PixelFormat { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[ @@ -281,6 +277,23 @@ impl ::re_types_core::reflection::Enum for PixelFormat { } } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + match value { + 20 => Some(Self::Y_U_V12_LimitedRange), + 26 => Some(Self::NV12), + 27 => Some(Self::YUY2), + 30 => Some(Self::Y8_FullRange), + 39 => Some(Self::Y_U_V24_LimitedRange), + 40 => Some(Self::Y_U_V24_FullRange), + 41 => Some(Self::Y8_LimitedRange), + 44 => Some(Self::Y_U_V12_FullRange), + 49 => Some(Self::Y_U_V16_LimitedRange), + 50 => Some(Self::Y_U_V16_FullRange), + _ => None, + } + } } impl ::re_byte_size::SizeBytes for PixelFormat { diff --git a/crates/store/re_sdk_types/src/datatypes/tensor_buffer.rs b/crates/store/re_sdk_types/src/datatypes/tensor_buffer.rs index d0d37395e6bb..1e0440638e40 100644 --- a/crates/store/re_sdk_types/src/datatypes/tensor_buffer.rs +++ b/crates/store/re_sdk_types/src/datatypes/tensor_buffer.rs @@ -67,7 +67,7 @@ impl ::re_types_core::Loggable for TensorBuffer { fn arrow_datatype() -> arrow::datatypes::DataType { use arrow::datatypes::*; DataType::Union( - UnionFields::new( + UnionFields::try_new( vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], vec![ Field::new("_null_markers", DataType::Null, true), @@ -171,7 +171,8 @@ impl ::re_types_core::Loggable for TensorBuffer { false, ), ], - ), + ) + .expect("UnionFields::try_new should be infallible"), UnionMode::Dense, ) } @@ -750,7 +751,7 @@ impl ::re_types_core::Loggable for TensorBuffer { re_log::debug_assert_eq!(field_type_ids.len(), fields.len()); re_log::debug_assert_eq!(fields.len(), children.len()); as_array_ref(UnionArray::try_new( - UnionFields::new(field_type_ids, fields), + UnionFields::try_new(field_type_ids, fields)?, ScalarBuffer::from(type_ids), Some(offsets), children, diff --git a/crates/store/re_sdk_types/src/image.rs b/crates/store/re_sdk_types/src/image.rs index 1eeac3b9f2ef..ea4d060ae679 100644 --- a/crates/store/re_sdk_types/src/image.rs +++ b/crates/store/re_sdk_types/src/image.rs @@ -251,7 +251,7 @@ pub fn find_non_empty_dim_indices(shape: &[u64]) -> SmallVec<[usize; 4]> { let mut non_unit_indices = shape .iter() .enumerate() - .filter_map(|(ind, &dim)| if dim != 1 { Some(ind) } else { None }); + .filter_map(|(ind, &dim)| if dim == 1 { None } else { Some(ind) }); // 0 is always a valid index. let mut min = non_unit_indices.next().unwrap_or(0); diff --git a/crates/store/re_sdk_types/src/lib.rs b/crates/store/re_sdk_types/src/lib.rs index 15228a5a0271..46cf4ed6a2d5 100644 --- a/crates/store/re_sdk_types/src/lib.rs +++ b/crates/store/re_sdk_types/src/lib.rs @@ -308,7 +308,7 @@ pub use visualizer::{VisualizableArchetype, Visualizer}; // Has to live here otherwise we can't export it from `re_sdk_types`. mod colormap_category; -pub use colormap_category::ColormapCategory; +pub use colormap_category::{ColormapCategory, ColormapSelection}; #[cfg(feature = "testing")] pub mod testing; diff --git a/crates/store/re_sdk_types/src/reflection/mod.rs b/crates/store/re_sdk_types/src/reflection/mod.rs index 19a2d63da356..a0e7003ab013 100644 --- a/crates/store/re_sdk_types/src/reflection/mod.rs +++ b/crates/store/re_sdk_types/src/reflection/mod.rs @@ -678,6 +678,17 @@ fn generate_component_reflection() -> Result::name(), + ComponentReflection { + docstring_md: "The metric size of one grid cell in local scene units.\n\nE.g. for 2D grid maps, this is the physical size represented by a single pixel or cell.", + deprecation_summary: None, + custom_placeholder: None, + datatype: CellSize::arrow_datatype(), + is_enum: false, + verify_arrow_array: CellSize::verify_arrow_array, + }, + ), ( ::name(), ComponentReflection { @@ -1022,7 +1033,7 @@ fn generate_component_reflection() -> Result::name(), ComponentReflection { - docstring_md: "Filter used when magnifying an image/texture such that a single pixel/texel is displayed as multiple pixels on screen.", + docstring_md: "Filter used when a single texel/pixel of an image is displayed larger than a single screen pixel.\n\nThis happens when zooming into an image, when displaying a low-resolution image in a large area,\nor when viewing an image up close in 3D space.", deprecation_summary: None, custom_placeholder: Some(MagnificationFilter::default().to_arrow()?), datatype: MagnificationFilter::arrow_datatype(), @@ -1063,6 +1074,17 @@ fn generate_component_reflection() -> Result::name(), + ComponentReflection { + docstring_md: "Determines which faces of a mesh are rendered.\n\nFor this purpose, we assume that the winding order of vertices in a mesh is\nconsistent and that front faces are defined as those with vertices in counter clockwise order.", + deprecation_summary: None, + custom_placeholder: Some(MeshFaceRendering::default().to_arrow()?), + datatype: MeshFaceRendering::arrow_datatype(), + is_enum: true, + verify_arrow_array: MeshFaceRendering::verify_arrow_array, + }, + ), ( ::name(), ComponentReflection { @@ -1510,7 +1532,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Vectors", component_type: "rerun.components.Vector2D".into(), docstring_md: "All the vectors for each arrow in the batch.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "origins", @@ -1577,7 +1599,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Vectors", component_type: "rerun.components.Vector3D".into(), docstring_md: "All the vectors for each arrow in the batch.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "origins", @@ -1733,7 +1755,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Half sizes", component_type: "rerun.components.HalfSize2D".into(), docstring_md: "All half-extents that make up the batch of boxes.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "centers", @@ -1800,7 +1822,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Half sizes", component_type: "rerun.components.HalfSize3D".into(), docstring_md: "All half-extents that make up the batch of boxes.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "centers", @@ -1881,14 +1903,14 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Lengths", component_type: "rerun.components.Length".into(), docstring_md: "Lengths of the capsules, defined as the distance between the centers of the endcaps.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "radii", display_name: "Radii", component_type: "rerun.components.Radius".into(), docstring_md: "Radii of the capsules.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "translations", @@ -1983,7 +2005,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { name: "frame", display_name: "Frame", component_type: "rerun.components.TransformFrameId".into(), - docstring_md: "The coordinate frame to use for the current entity.", + docstring_md: "The coordinate frame to use for the current entity.\n\nNote that empty strings are not valid transform frame IDs.", flags: ArchetypeFieldFlags::REQUIRED, }], }, @@ -2001,14 +2023,14 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Lengths", component_type: "rerun.components.Length".into(), docstring_md: "The total axial length of the cylinder, measured as the straight-line distance between the centers of its two endcaps.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "radii", display_name: "Radii", component_type: "rerun.components.Radius".into(), docstring_md: "Radii of the cylinders.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "centers", @@ -2133,6 +2155,13 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { docstring_md: "An optional floating point value that specifies the 2D drawing order, used only if the depth image is shown as a 2D image.\n\nObjects with higher values are drawn on top of those with lower values.\nDefaults to `-20.0`.", flags: ArchetypeFieldFlags::UI_EDITABLE, }, + ArchetypeFieldReflection { + name: "magnification_filter", + display_name: "Magnification filter", + component_type: "rerun.components.MagnificationFilter".into(), + docstring_md: "Optional filter used when a texel is magnified (displayed larger than a screen pixel) in 2D views.\n\nThe filter is applied to the scalar values *before* they are mapped to color via the colormap.\n\nHas no effect in 3D views.", + flags: ArchetypeFieldFlags::UI_EDITABLE, + }, ], }, ), @@ -2149,7 +2178,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Half sizes", component_type: "rerun.components.HalfSize3D".into(), docstring_md: "For each ellipsoid, half of its size on its three axes.\n\nIf all components are equal, then it is a sphere with that radius.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "centers", @@ -2274,6 +2303,13 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { docstring_md: "Optional 2D draw order.", flags: ArchetypeFieldFlags::UI_EDITABLE, }, + ArchetypeFieldReflection { + name: "magnification_filter", + display_name: "Magnification filter", + component_type: "rerun.components.MagnificationFilter".into(), + docstring_md: "Optional filter used when a texel is magnified (displayed larger than a screen pixel) in 2D views.\n\nThe filter is applied to the scalar values *before* they are mapped to color via the colormap.\n\nHas no effect in 3D views.", + flags: ArchetypeFieldFlags::UI_EDITABLE, + }, ], }, ), @@ -2313,6 +2349,13 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { docstring_md: "An optional floating point value that specifies the 2D drawing order.\n\nObjects with higher values are drawn on top of those with lower values.", flags: ArchetypeFieldFlags::UI_EDITABLE, }, + ArchetypeFieldReflection { + name: "magnification_filter", + display_name: "Magnification filter", + component_type: "rerun.components.MagnificationFilter".into(), + docstring_md: "Optional filter used when a texel is magnified (displayed larger than a screen pixel).", + flags: ArchetypeFieldFlags::UI_EDITABLE, + }, ], }, ), @@ -2361,7 +2404,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Positions", component_type: "rerun.components.LatLon".into(), docstring_md: "The [EPSG:4326](https://epsg.io/4326) coordinates for the points (North/East-positive degrees).", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "radii", @@ -2465,6 +2508,80 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { ], }, ), + ( + ArchetypeName::new("rerun.archetypes.GridMap"), + ArchetypeReflection { + display_name: "Grid map", + deprecation_summary: None, + scope: None, + view_types: &["Spatial3DView", "Spatial2DView"], + fields: vec![ + ArchetypeFieldReflection { + name: "data", + display_name: "Data", + component_type: "rerun.components.ImageBuffer".into(), + docstring_md: "The raw grid data.", + flags: ArchetypeFieldFlags::REQUIRED, + }, + ArchetypeFieldReflection { + name: "format", + display_name: "Format", + component_type: "rerun.components.ImageFormat".into(), + docstring_md: "The format of the grid's image data.", + flags: ArchetypeFieldFlags::REQUIRED, + }, + ArchetypeFieldReflection { + name: "cell_size", + display_name: "Cell size", + component_type: "rerun.components.CellSize".into(), + docstring_md: "The scene unit size of a single grid cell (e.g. m / pixel).", + flags: ArchetypeFieldFlags::REQUIRED, + }, + ArchetypeFieldReflection { + name: "translation", + display_name: "Translation", + component_type: "rerun.components.Translation3D".into(), + docstring_md: "Translation of the lower-left corner of the grid map in space.\n\nTogether with [`components.RotationAxisAngle`](https://rerun.io/docs/reference/types/components/rotation_axis_angle) or [`components.RotationQuat`](https://rerun.io/docs/reference/types/components/rotation_quat), this defines the pose of the\nlower-left image corner relative to the map's parent coordinate frame.\n\nIf not set, the lower-left image corner is placed at origin of the map's parent coordinate frame.", + flags: ArchetypeFieldFlags::empty(), + }, + ArchetypeFieldReflection { + name: "rotation_axis_angle", + display_name: "Rotation axis angle", + component_type: "rerun.components.RotationAxisAngle".into(), + docstring_md: "Rotation of the lower-left corner of the grid map in space via axis + angle.\n\nTogether with [`components.Translation3D`](https://rerun.io/docs/reference/types/components/translation3d), this defines the pose of the\nlower-left image corner relative to the map's parent coordinate frame.\n\nNote: either this or [`components.RotationQuat`](https://rerun.io/docs/reference/types/components/rotation_quat) can be set to specify the grid map's rotation, but not both.\nIf both this and [`components.RotationQuat`](https://rerun.io/docs/reference/types/components/rotation_quat) are set, this is ignored in favor of the quaternion.", + flags: ArchetypeFieldFlags::empty(), + }, + ArchetypeFieldReflection { + name: "quaternion", + display_name: "Quaternion", + component_type: "rerun.components.RotationQuat".into(), + docstring_md: "Rotation of the lower-left corner of the grid map in space via quaternion.\n\nTogether with [`components.Translation3D`](https://rerun.io/docs/reference/types/components/translation3d), this defines the pose of the\nlower-left image corner relative to the map's parent coordinate frame.", + flags: ArchetypeFieldFlags::empty(), + }, + ArchetypeFieldReflection { + name: "opacity", + display_name: "Opacity", + component_type: "rerun.components.Opacity".into(), + docstring_md: "Opacity of the grid map texture after all image decoding and colormap application.\n\nDefaults to 1.0 (fully opaque).", + flags: ArchetypeFieldFlags::UI_EDITABLE, + }, + ArchetypeFieldReflection { + name: "draw_order", + display_name: "Draw order", + component_type: "rerun.components.DrawOrder".into(), + docstring_md: "Optional draw order for layering multiple grid maps that overlap in space.\n\nHigher values are drawn on top of lower values.", + flags: ArchetypeFieldFlags::UI_EDITABLE, + }, + ArchetypeFieldReflection { + name: "colormap", + display_name: "Colormap", + component_type: "rerun.components.Colormap".into(), + docstring_md: "Colormap to use for rendering single-channel grid maps.\n\nIf not set, the grid map is shown using the underlying [`components.ImageFormat`](https://rerun.io/docs/reference/types/components/image_format)\ninterpretation.", + flags: ArchetypeFieldFlags::UI_EDITABLE, + }, + ], + }, + ), ( ArchetypeName::new("rerun.archetypes.Image"), ArchetypeReflection { @@ -2501,6 +2618,13 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { docstring_md: "An optional floating point value that specifies the 2D drawing order.\n\nObjects with higher values are drawn on top of those with lower values.\nDefaults to `-10.0`.", flags: ArchetypeFieldFlags::UI_EDITABLE, }, + ArchetypeFieldReflection { + name: "magnification_filter", + display_name: "Magnification filter", + component_type: "rerun.components.MagnificationFilter".into(), + docstring_md: "Optional filter used when a texel is magnified (displayed larger than a screen pixel).", + flags: ArchetypeFieldFlags::UI_EDITABLE, + }, ], }, ), @@ -2881,6 +3005,13 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { docstring_md: "A color multiplier applied to the whole mesh.\n\nAlpha channel governs the overall mesh transparency.", flags: ArchetypeFieldFlags::UI_EDITABLE, }, + ArchetypeFieldReflection { + name: "face_rendering", + display_name: "Face rendering", + component_type: "rerun.components.MeshFaceRendering".into(), + docstring_md: "Determines which faces of the mesh are rendered.\n\nThe default is [`components.MeshFaceRendering#DoubleSided`](https://rerun.io/docs/reference/types/components/mesh_face_rendering), meaning both front and back faces are shown.", + flags: ArchetypeFieldFlags::UI_EDITABLE, + }, ArchetypeFieldReflection { name: "albedo_texture_buffer", display_name: "Albedo texture buffer", @@ -2985,7 +3116,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Positions", component_type: "rerun.components.Position2D".into(), docstring_md: "All the 2D positions at which the point cloud shows points.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "radii", @@ -3052,7 +3183,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Positions", component_type: "rerun.components.Position3D".into(), docstring_md: "All the 3D positions at which the point cloud shows points.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "radii", @@ -3278,6 +3409,22 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { ], }, ), + ( + ArchetypeName::new("rerun.archetypes.Status"), + ArchetypeReflection { + display_name: "Status", + deprecation_summary: None, + scope: None, + view_types: &["StatusView"], + fields: vec![ArchetypeFieldReflection { + name: "status", + display_name: "Status", + component_type: "rerun.components.Text".into(), + docstring_md: "The new status value. A `null` status is ignored, it can be used to partially update a multi-instance status array.", + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, + }], + }, + ), ( ArchetypeName::new("rerun.archetypes.Tensor"), ArchetypeReflection { @@ -3316,7 +3463,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Text", component_type: "rerun.components.Text".into(), docstring_md: "Contents of the text document.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "media_type", @@ -3341,7 +3488,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Text", component_type: "rerun.components.Text".into(), docstring_md: "The body of the message.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "level", @@ -3465,7 +3612,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Timestamp", component_type: "rerun.components.VideoTimestamp".into(), docstring_md: "References the closest video frame to this timestamp.\n\nNote that this uses the closest video frame instead of the latest at this timestamp\nin order to be more forgiving of rounding errors for inprecise timestamp types.\n\nTimestamps are relative to the start of the video, i.e. a timestamp of 0 always corresponds to the first frame.\nThis is oftentimes equivalent to presentation timestamps (known as PTS), but in the presence of B-frames\n(bidirectionally predicted frames) there may be an offset on the first presentation timestamp in the video.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "video_reference", @@ -3558,7 +3705,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Instruction ids", component_type: "rerun.blueprint.components.VisualizerInstructionId".into(), docstring_md: "Id's of the visualizers that should be active.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }], }, ), @@ -3575,7 +3722,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Kind", component_type: "rerun.blueprint.components.BackgroundKind".into(), docstring_md: "The type of the background.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "color", @@ -3600,7 +3747,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Container kind", component_type: "rerun.blueprint.components.ContainerKind".into(), docstring_md: "The class of the view.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "display_name", @@ -3694,7 +3841,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { name: "select", display_name: "Select", component_type: "rerun.blueprint.components.SelectedColumns".into(), - docstring_md: "Selected columns. If unset, all columns are selected.", + docstring_md: "Selected columns. If unset, only the active timeline and all component columns are selected.", flags: ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { @@ -4278,7 +4425,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { name: "timeline_columns", display_name: "Timeline columns", component_type: "rerun.blueprint.components.TimelineColumn".into(), - docstring_md: "What timeline columns to show.\n\nDefaults to displaying all timelines.", + docstring_md: "What timeline columns to show.\n\nDefaults to displaying only the active timeline.", flags: ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { @@ -4428,7 +4575,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Class identifier", component_type: "rerun.blueprint.components.ViewClass".into(), docstring_md: "The class of the view.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "display_name", @@ -4529,7 +4676,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Ranges", component_type: "rerun.blueprint.components.VisibleTimeRange".into(), docstring_md: "The time ranges to show for each timeline unless specified otherwise on a per-entity basis.\n\nIf a timeline is specified more than once, the first entry will be used.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }], }, ), @@ -4545,7 +4692,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Range", component_type: "rerun.blueprint.components.VisualBounds2D".into(), docstring_md: "Controls the visible range of a 2D view.\n\nUse this to control pan & zoom of the view.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }], }, ), @@ -4562,7 +4709,7 @@ fn generate_archetype_reflection() -> ArchetypeReflectionMap { display_name: "Visualizer type", component_type: "rerun.blueprint.components.VisualizerType".into(), docstring_md: "The type of the visualizer.", - flags: ArchetypeFieldFlags::REQUIRED, + flags: ArchetypeFieldFlags::REQUIRED | ArchetypeFieldFlags::UI_EDITABLE, }, ArchetypeFieldReflection { name: "component_map", diff --git a/crates/store/re_sdk_types/src/testing/components/affix_fuzzer15.rs b/crates/store/re_sdk_types/src/testing/components/affix_fuzzer15.rs index a5e68eb7a9f1..2b869e8c56b9 100644 --- a/crates/store/re_sdk_types/src/testing/components/affix_fuzzer15.rs +++ b/crates/store/re_sdk_types/src/testing/components/affix_fuzzer15.rs @@ -38,7 +38,7 @@ impl ::re_types_core::Loggable for AffixFuzzer15 { fn arrow_datatype() -> arrow::datatypes::DataType { use arrow::datatypes::*; DataType::Union( - UnionFields::new( + UnionFields::try_new( vec![0, 1, 2, 3, 4], vec![ Field::new("_null_markers", DataType::Null, true), @@ -62,7 +62,8 @@ impl ::re_types_core::Loggable for AffixFuzzer15 { ), Field::new("empty_variant", DataType::Null, true), ], - ), + ) + .expect("UnionFields::try_new should be infallible"), UnionMode::Dense, ) } diff --git a/crates/store/re_sdk_types/src/testing/datatypes/affix_fuzzer3.rs b/crates/store/re_sdk_types/src/testing/datatypes/affix_fuzzer3.rs index 7e9f838c283e..dc2d2a1317af 100644 --- a/crates/store/re_sdk_types/src/testing/datatypes/affix_fuzzer3.rs +++ b/crates/store/re_sdk_types/src/testing/datatypes/affix_fuzzer3.rs @@ -36,7 +36,7 @@ impl ::re_types_core::Loggable for AffixFuzzer3 { fn arrow_datatype() -> arrow::datatypes::DataType { use arrow::datatypes::*; DataType::Union( - UnionFields::new( + UnionFields::try_new( vec![0, 1, 2, 3, 4], vec![ Field::new("_null_markers", DataType::Null, true), @@ -60,7 +60,8 @@ impl ::re_types_core::Loggable for AffixFuzzer3 { ), Field::new("empty_variant", DataType::Null, true), ], - ), + ) + .expect("UnionFields::try_new should be infallible"), UnionMode::Dense, ) } @@ -240,7 +241,7 @@ impl ::re_types_core::Loggable for AffixFuzzer3 { re_log::debug_assert_eq!(field_type_ids.len(), fields.len()); re_log::debug_assert_eq!(fields.len(), children.len()); as_array_ref(UnionArray::try_new( - UnionFields::new(field_type_ids, fields), + UnionFields::try_new(field_type_ids, fields)?, ScalarBuffer::from(type_ids), Some(offsets), children, diff --git a/crates/store/re_sdk_types/src/testing/datatypes/affix_fuzzer4.rs b/crates/store/re_sdk_types/src/testing/datatypes/affix_fuzzer4.rs index 5538d4279680..81768d69de02 100644 --- a/crates/store/re_sdk_types/src/testing/datatypes/affix_fuzzer4.rs +++ b/crates/store/re_sdk_types/src/testing/datatypes/affix_fuzzer4.rs @@ -34,7 +34,7 @@ impl ::re_types_core::Loggable for AffixFuzzer4 { fn arrow_datatype() -> arrow::datatypes::DataType { use arrow::datatypes::*; DataType::Union( - UnionFields::new( + UnionFields::try_new( vec![0, 1, 2], vec![ Field::new("_null_markers", DataType::Null, true), @@ -53,7 +53,8 @@ impl ::re_types_core::Loggable for AffixFuzzer4 { false, ), ], - ), + ) + .expect("UnionFields::try_new should be infallible"), UnionMode::Dense, ) } @@ -181,7 +182,7 @@ impl ::re_types_core::Loggable for AffixFuzzer4 { re_log::debug_assert_eq!(field_type_ids.len(), fields.len()); re_log::debug_assert_eq!(fields.len(), children.len()); as_array_ref(UnionArray::try_new( - UnionFields::new(field_type_ids, fields), + UnionFields::try_new(field_type_ids, fields)?, ScalarBuffer::from(type_ids), Some(offsets), children, diff --git a/crates/store/re_sdk_types/src/testing/datatypes/enum_test.rs b/crates/store/re_sdk_types/src/testing/datatypes/enum_test.rs index f02725d01698..97ea5be886b5 100644 --- a/crates/store/re_sdk_types/src/testing/datatypes/enum_test.rs +++ b/crates/store/re_sdk_types/src/testing/datatypes/enum_test.rs @@ -108,18 +108,16 @@ impl ::re_types_core::Loggable for EnumTest { .with_context("rerun.testing.datatypes.EnumTest#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::Up)), - Some(2) => Ok(Some(Self::Down)), - Some(3) => Ok(Some(Self::Right)), - Some(4) => Ok(Some(Self::Left)), - Some(5) => Ok(Some(Self::Forward)), - Some(6) => Ok(Some(Self::Back)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.testing.datatypes.EnumTest")?) @@ -140,6 +138,8 @@ impl std::fmt::Display for EnumTest { } impl ::re_types_core::reflection::Enum for EnumTest { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[ @@ -163,6 +163,13 @@ impl ::re_types_core::reflection::Enum for EnumTest { Self::Back => "Baby's got it.", } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + Self::variants() + .get((value as usize).wrapping_sub(1)) + .copied() + } } impl ::re_byte_size::SizeBytes for EnumTest { diff --git a/crates/store/re_sdk_types/src/testing/datatypes/valued_enum.rs b/crates/store/re_sdk_types/src/testing/datatypes/valued_enum.rs index e7cec0b0fcdd..154be20eca15 100644 --- a/crates/store/re_sdk_types/src/testing/datatypes/valued_enum.rs +++ b/crates/store/re_sdk_types/src/testing/datatypes/valued_enum.rs @@ -101,16 +101,16 @@ impl ::re_types_core::Loggable for ValuedEnum { .with_context("rerun.testing.datatypes.ValuedEnum#enum")? .into_iter() .map(|typ| match typ { - Some(1) => Ok(Some(Self::One)), - Some(2) => Ok(Some(Self::Two)), - Some(3) => Ok(Some(Self::Three)), - Some(42) => Ok(Some(Self::TheAnswer)), + Some(val) => ::try_from_integer(val) + .map(Some) + .ok_or_else(|| { + DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + val as _, + ) + }), None => Ok(None), - Some(invalid) => Err(DeserializationError::missing_union_arm( - Self::arrow_datatype(), - "", - invalid as _, - )), }) .collect::>>>() .with_context("rerun.testing.datatypes.ValuedEnum")?) @@ -129,6 +129,8 @@ impl std::fmt::Display for ValuedEnum { } impl ::re_types_core::reflection::Enum for ValuedEnum { + type Repr = u8; + #[inline] fn variants() -> &'static [Self] { &[Self::One, Self::Two, Self::Three, Self::TheAnswer] @@ -143,6 +145,17 @@ impl ::re_types_core::reflection::Enum for ValuedEnum { Self::TheAnswer => "The answer to life, the universe, and everything.", } } + + #[inline] + fn try_from_integer(value: u8) -> Option { + match value { + 1 => Some(Self::One), + 2 => Some(Self::Two), + 3 => Some(Self::Three), + 42 => Some(Self::TheAnswer), + _ => None, + } + } } impl ::re_byte_size::SizeBytes for ValuedEnum { diff --git a/crates/store/re_sdk_types/tests/types/depth_image.rs b/crates/store/re_sdk_types/tests/types/depth_image.rs index 21fbe0370799..28e89814d74b 100644 --- a/crates/store/re_sdk_types/tests/types/depth_image.rs +++ b/crates/store/re_sdk_types/tests/types/depth_image.rs @@ -22,6 +22,7 @@ fn depth_image_roundtrip() { colormap: None, point_fill_ratio: None, depth_range: None, + magnification_filter: None, }]; let all_arch_serialized = [ diff --git a/crates/store/re_sdk_types/tests/types/dynamic_archetype.rs b/crates/store/re_sdk_types/tests/types/dynamic_archetype.rs index e2135fb5fdb9..8b85c4f9cdd8 100644 --- a/crates/store/re_sdk_types/tests/types/dynamic_archetype.rs +++ b/crates/store/re_sdk_types/tests/types/dynamic_archetype.rs @@ -1,7 +1,10 @@ use std::collections::BTreeSet; -use re_log_types::datatypes::Utf8; -use re_log_types::{DynamicArchetype, components}; +use re_sdk_types::datatypes::Utf8; +use re_sdk_types::reflection::ComponentDescriptorExt as _; +use re_sdk_types::{ + AsComponents as _, Component as _, ComponentDescriptor, DynamicArchetype, components, +}; #[test] fn with_archetype() { diff --git a/crates/store/re_sdk_types/tests/types/main.rs b/crates/store/re_sdk_types/tests/types/main.rs index 7b4b7461628c..57931926ee57 100644 --- a/crates/store/re_sdk_types/tests/types/main.rs +++ b/crates/store/re_sdk_types/tests/types/main.rs @@ -9,6 +9,8 @@ mod box2d; mod box3d; mod clear; mod depth_image; +mod dynamic_archetype; +mod image; mod line_strips2d; mod line_strips3d; mod mesh3d; diff --git a/crates/store/re_sdk_types/tests/types/mesh3d.rs b/crates/store/re_sdk_types/tests/types/mesh3d.rs index f93416ddedb2..d4b9488e2a84 100644 --- a/crates/store/re_sdk_types/tests/types/mesh3d.rs +++ b/crates/store/re_sdk_types/tests/types/mesh3d.rs @@ -47,6 +47,7 @@ fn roundtrip() { ClassId::from(127), // ] .serialized(Mesh3D::descriptor_class_ids()), + face_rendering: None, }; let arch = Mesh3D::new([[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]) diff --git a/crates/store/re_server/Cargo.toml b/crates/store/re_server/Cargo.toml index 8e0b27bb622a..6de283920f4a 100644 --- a/crates/store/re_server/Cargo.toml +++ b/crates/store/re_server/Cargo.toml @@ -54,7 +54,6 @@ axum.workspace = true bincode.workspace = true bytes.workspace = true cfg-if.workspace = true -chrono = { workspace = true, features = ["serde"] } clap = { workspace = true, features = ["derive", "env"] } datafusion.workspace = true futures.workspace = true @@ -63,6 +62,7 @@ http-body.workspace = true itertools.workspace = true jiff.workspace = true nohash-hasher.workspace = true +opentelemetry.workspace = true parking_lot.workspace = true serde.workspace = true tempfile.workspace = true @@ -73,7 +73,6 @@ tokio-util.workspace = true tonic-web.workspace = true tonic.workspace = true tower.workspace = true -tower-http.workspace = true tower-service.workspace = true tracing.workspace = true url.workspace = true diff --git a/crates/store/re_server/src/bandwidth_layer.rs b/crates/store/re_server/src/bandwidth_layer.rs index e6d793e77437..47c403943a25 100644 --- a/crates/store/re_server/src/bandwidth_layer.rs +++ b/crates/store/re_server/src/bandwidth_layer.rs @@ -57,7 +57,9 @@ where } fn call(&mut self, req: http::Request) -> Self::Future { - let mut inner = self.inner.clone(); + // See: https://docs.rs/tower/latest/tower/trait.Service.html#be-careful-when-cloning-inner-services + let clone = self.inner.clone(); + let mut inner = std::mem::replace(&mut self.inner, clone); let bytes_per_second = self.bytes_per_second; Box::pin(async move { let resp = inner.call(req).await?; diff --git a/crates/store/re_server/src/chunk_index/index.rs b/crates/store/re_server/src/chunk_index/index.rs index 855e9829babd..82560c7fd333 100644 --- a/crates/store/re_server/src/chunk_index/index.rs +++ b/crates/store/re_server/src/chunk_index/index.rs @@ -11,7 +11,7 @@ use arrow::error::ArrowError; use lance::deps::arrow_array::UInt8Array; use lance_index::DatasetIndexExt as _; use re_chunk_store::Chunk; -use re_log_types::{EntityPath, TimelineName}; +use re_log_types::{ComponentPath, EntityPath, TimelineName}; use re_protos::cloud::v1alpha1::ext::{IndexConfig, IndexProperties}; use re_protos::common::v1alpha1::ext::SegmentId; use re_types_core::ComponentIdentifier; @@ -333,9 +333,9 @@ pub async fn create_index( &config.time_index, ) .ok_or_else(|| { - StoreError::EntryNameNotFound(format!( - "{}#{}", - config.column.entity_path, config.column.descriptor.component + StoreError::ComponentPathNotFound(ComponentPath::new( + config.column.entity_path.clone(), + config.column.descriptor.component, )) })?; @@ -492,8 +492,13 @@ fn find_datatypes( ) -> Option { for segment in dataset.segments().values() { for layer in segment.layers().values() { - let chunk_store = layer.store_handle().read(); - for chunk in chunk_store.iter_physical_chunks() { + let chunks: Vec> = match layer.resolved_store() { + crate::store::ResolvedStore::Eager(h) => { + h.read().iter_physical_chunks().cloned().collect() + } + crate::store::ResolvedStore::Lazy(lazy) => lazy.collect_physical_chunks().ok()?, + }; + for chunk in chunks { if chunk.entity_path() == entity_path && let Some(component) = chunk.components().0.get(component) && let Some(timeline) = chunk.timelines().get(timeline_name) diff --git a/crates/store/re_server/src/chunk_index/mod.rs b/crates/store/re_server/src/chunk_index/mod.rs index 30b06d64313b..9ebec470f347 100644 --- a/crates/store/re_server/src/chunk_index/mod.rs +++ b/crates/store/re_server/src/chunk_index/mod.rs @@ -7,8 +7,7 @@ use std::sync::{Arc, OnceLock}; use ahash::{HashMap, HashMapExt as _}; use futures::StreamExt as _; -use re_chunk_store::ChunkStoreHandle; -use re_log_types::{EntityPath, EntryId}; +use re_log_types::{ComponentPath, EntityPath, EntryId}; use re_protos::cloud::v1alpha1::ext::{ CreateIndexRequest, IndexColumn, IndexConfig, SearchDatasetRequest, }; @@ -181,9 +180,9 @@ impl DatasetChunkIndexes { ) .await else { - return Err(StoreError::IndexNotFound(format!( - "{}#{}", - &request.column.entity_path, &request.column.descriptor.component + return Err(StoreError::ComponentPathNotFound(ComponentPath::new( + request.column.entity_path, + request.column.descriptor.component, )))?; }; @@ -205,23 +204,32 @@ impl DatasetChunkIndexes { pub async fn on_layer_added( &self, segment_id: SegmentId, - store: ChunkStoreHandle, + resolved: &crate::store::ResolvedStore, layer_name: &str, _overwritten: bool, ) -> Result<(), StoreError> { - let mut worklist = vec![]; + // Fast path: no indexes exist, nothing to do (no chunk loading needed). + if self.indexes.read().await.is_empty() { + return Ok(()); + } + + // Collect physical chunks from the store, loading on demand for lazy stores. + let chunks: Vec> = match resolved { + crate::store::ResolvedStore::Eager(h) => { + h.read().iter_physical_chunks().cloned().collect() + } + crate::store::ResolvedStore::Lazy(lazy) => lazy + .collect_physical_chunks() + .map_err(|err| StoreError::IndexingError(format!("{err:#}")))?, + }; + let mut worklist = vec![]; { - // Blocking lock: quickly get what we need let indexes = self.indexes.read().await; - let store = store.read(); - - for chunk in store.iter_physical_chunks() { + for chunk in &chunks { if let Some(entity_indexes) = indexes.get(chunk.entity_path()) { - // Find components by iterating on indexes (lower cardinality) for (name, index) in entity_indexes { if chunk.components().0.contains_key(name) { - // Needs indexing worklist.push(( index.clone(), segment_id.clone(), @@ -235,12 +243,8 @@ impl DatasetChunkIndexes { } for (index, segment_id, layer_name, chunk) in worklist { - let checkout_latest = true; index - .store_chunks( - vec![(segment_id.clone(), layer_name, chunk.clone())], - checkout_latest, - ) + .store_chunks(vec![(segment_id.clone(), layer_name, chunk.clone())], true) .await?; } @@ -317,12 +321,21 @@ impl DatasetChunkIndexes { let mut backfill = Vec::new(); for (segment_id, segment) in dataset.segments() { for (layer_name, layer) in segment.layers() { - let store = layer.store_handle().read(); - for chunk in store.iter_physical_chunks() { + let chunks: Vec> = match layer + .resolved_store() + { + crate::store::ResolvedStore::Eager(h) => { + h.read().iter_physical_chunks().cloned().collect() + } + crate::store::ResolvedStore::Lazy(lazy) => lazy + .collect_physical_chunks() + .map_err(|err| StoreError::IndexingError(format!("{err:#}")))?, + }; + for chunk in chunks { if chunk.entity_path() == entity_path && chunk.components().0.contains_key(component) { - backfill.push((segment_id.clone(), layer_name.clone(), chunk.clone())); + backfill.push((segment_id.clone(), layer_name.clone(), chunk)); } } } @@ -363,7 +376,7 @@ mod tests { let mut dataset = Dataset::new( EntryId::new(), - "test-data".to_owned(), + re_protos::EntryName::new("test-data").unwrap(), StoreKind::Recording, Default::default(), ); @@ -425,7 +438,7 @@ mod tests { ChunkStoreConfig::default(), ); store.insert_chunk(&Arc::new(chunk))?; - let handle = ChunkStoreHandle::new(store); + let handle = re_chunk_store::ChunkStoreHandle::new(store); let store_slot_id = crate::store::StoreSlotId::new(); dataset @@ -433,7 +446,7 @@ mod tests { segment_id, layer_name, store_slot_id, - handle, + crate::store::ResolvedStore::Eager(handle), IfDuplicateBehavior::Error, ) .await?; diff --git a/crates/store/re_server/src/chunk_index/search.rs b/crates/store/re_server/src/chunk_index/search.rs index 3b0144a592f4..13b2a0572c6a 100644 --- a/crates/store/re_server/src/chunk_index/search.rs +++ b/crates/store/re_server/src/chunk_index/search.rs @@ -110,7 +110,7 @@ async fn apply_parameters( if let Some(filter) = filter.filter(|f| !f.is_empty()) { let filter = lance::io::exec::Planner::new(scanner.schema().await?).parse_filter(&filter)?; - match scanner.get_filter()? { + match scanner.get_expr_filter()? { Some(existing_filter) => { scanner.filter_expr(existing_filter.and(filter)); } @@ -146,7 +146,7 @@ async fn apply_parameters( } if explain_filter { - match scanner.get_filter() { + match scanner.get_expr_filter() { Ok(Some(filter)) => { info!(%filter); } diff --git a/crates/store/re_server/src/entrypoint.rs b/crates/store/re_server/src/entrypoint.rs index 1456a783e111..ca012cd23f16 100644 --- a/crates/store/re_server/src/entrypoint.rs +++ b/crates/store/re_server/src/entrypoint.rs @@ -3,6 +3,7 @@ use std::path::PathBuf; use std::str::FromStr; use anyhow::Context as _; +use re_protos::EntryName; #[cfg(unix)] use tokio::signal::unix::{SignalKind, signal}; #[cfg(windows)] @@ -51,6 +52,15 @@ pub struct Args { /// Artificial bandwidth limit for responses (e.g. '10MB' for 10 megabytes per second). #[clap(long, value_parser = parse_bandwidth_limit)] pub bandwidth_limit: Option, + + /// Additional origin patterns allowed to make cross-origin requests to the server + /// (can be specified multiple times). + /// + /// By default, only `localhost`, `127.0.0.1`, and `rerun.io` are allowed. + /// Patterns are matched against the full `Origin` header value, + /// using glob-style matching where `*` matches any sequence of characters. + #[clap(long = "cors-allow-origin")] + pub cors_allow_origin: Vec, } fn parse_bandwidth_limit(s: &str) -> Result { @@ -69,6 +79,7 @@ impl Default for Args { tables: vec![], latency_ms: 0, bandwidth_limit: None, + cors_allow_origin: Vec::new(), } } } @@ -82,7 +93,7 @@ pub struct NamedPath { /// A named collection of paths. #[derive(Debug, Clone)] pub struct NamedPathCollection { - pub name: String, + pub name: EntryName, pub paths: Vec, } @@ -105,10 +116,10 @@ impl FromStr for NamedPath { } impl Args { - /// Waits for the server to start, and return a handle to it together with its address. + /// Waits for the server to start, and return a handle to it. /// - /// The returned address is one you can connect to, e.g. 127.0.0.1 instead of 0.0.0.0. - pub async fn create_server_handle(self) -> anyhow::Result<(ServerHandle, SocketAddr)> { + /// Use [`ServerHandle::connect_addr`] for the address to connect to. + pub async fn create_server_handle(self) -> anyhow::Result { let Self { host: ip, port, @@ -117,11 +128,10 @@ impl Args { tables, latency_ms, bandwidth_limit, + cors_allow_origin, } = self; - let rerun_cloud_server = { - use re_protos::cloud::v1alpha1::rerun_cloud_service_server::RerunCloudServiceServer; - + let handler = { let mut builder = crate::RerunCloudHandlerBuilder::new(); for NamedPathCollection { name, paths } in datasets { @@ -162,11 +172,16 @@ impl Args { } } - RerunCloudServiceServer::new(builder.build()) - .max_decoding_message_size(re_grpc_server::MAX_DECODING_MESSAGE_SIZE) - .max_encoding_message_size(re_grpc_server::MAX_ENCODING_MESSAGE_SIZE) + builder.build() }; + let rerun_cloud_server = + re_protos::cloud::v1alpha1::rerun_cloud_service_server::RerunCloudServiceServer::new( + handler, + ) + .max_decoding_message_size(re_grpc_server::MAX_DECODING_MESSAGE_SIZE) + .max_encoding_message_size(re_grpc_server::MAX_ENCODING_MESSAGE_SIZE); + let ip = ip.parse().with_context(|| format!("IP: {ip:?}"))?; let ip_port = SocketAddr::new(ip, port); @@ -178,19 +193,18 @@ impl Args { axum::routing::get(async move || re_build_info::build_info!().to_string()), ) .with_artificial_latency(std::time::Duration::from_millis(latency_ms as _)) - .with_bandwidth_limit(bandwidth_limit); + .with_bandwidth_limit(bandwidth_limit) + .with_cors_allowed_origins(cors_allow_origin); let server = server_builder.build(); - let mut server_handle = server.start(); - - let addr = server_handle.wait_for_ready().await?; + let server_handle = server.start().await?; - Ok((server_handle, addr)) + Ok(server_handle) } pub async fn run_async(self) -> anyhow::Result<()> { - let (mut server_handle, _) = self.create_server_handle().await?; + let mut server_handle = self.create_server_handle().await?; #[cfg(unix)] let mut term_signal = signal(SignalKind::terminate())?; @@ -211,7 +225,7 @@ impl Args { info!("received SIGINT, gracefully shutting down"); } - _ = server_handle.wait_for_shutdown() => { + () = server_handle.wait_for_shutdown() => { warn!("gRPC endpoint shut down on its own, terminating redap-server"); } } diff --git a/crates/store/re_server/src/error_layer.rs b/crates/store/re_server/src/error_layer.rs new file mode 100644 index 000000000000..cfc6e58c3792 --- /dev/null +++ b/crates/store/re_server/src/error_layer.rs @@ -0,0 +1,128 @@ +use std::collections::HashSet; +use std::sync::Arc; + +use parking_lot::Mutex; + +/// Shared state for injecting errors into specific gRPC endpoints. +/// +/// Holds a set of gRPC method names (e.g. `"FetchChunks"`) that should +/// fail with a `NotFound` error. Used for testing error propagation. +#[derive(Clone)] +pub struct InjectedErrors(Arc>>); + +impl Default for InjectedErrors { + fn default() -> Self { + Self::new() + } +} + +impl InjectedErrors { + pub fn new() -> Self { + Self(Arc::new(Mutex::new(HashSet::new()))) + } + + /// Mark a gRPC endpoint to fail. The `method` is matched against the + /// gRPC method name, e.g. `"FetchChunks"`. + pub fn inject(&self, method: &str) { + self.0.lock().insert(method.to_owned()); + } + + /// Stop failing a previously injected endpoint. + pub fn clear(&self, method: &str) { + self.0.lock().remove(method); + } + + /// Stop failing all endpoints. + pub fn clear_all(&self) { + self.0.lock().clear(); + } + + /// Check if the given URI path should fail. + /// + /// Extracts the method name (last `/`-separated segment) from the path + /// and checks it against the set. + fn check_path(&self, path: &str) -> Option { + let method = path.rsplit('/').next().unwrap_or(path); + let set = self.0.lock(); + if set.contains(method) { + Some(method.to_owned()) + } else { + None + } + } +} + +// --- Tower layer --- + +/// A tower [`tower::Layer`] that rejects requests to gRPC endpoints registered in [`InjectedErrors`]. +/// +/// When a request's URI path ends with a registered method name, the layer +/// short-circuits with a `tonic::Status::not_found` error without calling +/// the inner service. This works for any gRPC endpoint. +#[derive(Clone)] +pub struct ErrorInjectionLayer { + errors: InjectedErrors, +} + +impl ErrorInjectionLayer { + pub fn new(errors: InjectedErrors) -> Self { + Self { errors } + } +} + +impl tower::Layer for ErrorInjectionLayer { + type Service = ErrorInjectionService; + + fn layer(&self, service: S) -> Self::Service { + ErrorInjectionService { + inner: service, + errors: self.errors.clone(), + } + } +} + +#[derive(Clone)] +pub struct ErrorInjectionService { + inner: S, + errors: InjectedErrors, +} + +impl tower::Service> for ErrorInjectionService +where + S: tower::Service, Response = http::Response> + + Clone + + Send + + 'static, + S::Future: Send, + S::Error: Send, + ReqBody: Send + 'static, + ResBody: Default + Send + 'static, +{ + type Response = S::Response; + type Error = S::Error; + type Future = + std::pin::Pin> + Send>>; + + fn poll_ready( + &mut self, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + self.inner.poll_ready(cx) + } + + fn call(&mut self, req: http::Request) -> Self::Future { + if let Some(method) = self.errors.check_path(req.uri().path()) { + let status = tonic::Status::not_found(format!( + "injected error for testing: {method} deliberately failed" + )); + // `tonic::Status::into_http` produces a valid gRPC error response + // with `grpc-status` and `grpc-message` headers and a default (empty) body. + return Box::pin(async move { Ok(status.into_http()) }); + } + + // See: https://docs.rs/tower/latest/tower/trait.Service.html#be-careful-when-cloning-inner-services + let clone = self.inner.clone(); + let mut inner = std::mem::replace(&mut self.inner, clone); + Box::pin(async move { inner.call(req).await }) + } +} diff --git a/crates/store/re_server/src/latency_layer.rs b/crates/store/re_server/src/latency_layer.rs index 5245b5fda13e..b3ab9d6adea1 100644 --- a/crates/store/re_server/src/latency_layer.rs +++ b/crates/store/re_server/src/latency_layer.rs @@ -52,7 +52,10 @@ where } fn call(&mut self, req: Request) -> Self::Future { - let Self { mut inner, rtt } = self.clone(); + // See: https://docs.rs/tower/latest/tower/trait.Service.html#be-careful-when-cloning-inner-services + let clone = self.inner.clone(); + let mut inner = std::mem::replace(&mut self.inner, clone); + let rtt = self.rtt; Box::pin(async move { let resp = inner.call(req).await; if rtt != Duration::ZERO { diff --git a/crates/store/re_server/src/lib.rs b/crates/store/re_server/src/lib.rs index ca4452f157d0..22b65252c761 100644 --- a/crates/store/re_server/src/lib.rs +++ b/crates/store/re_server/src/lib.rs @@ -5,12 +5,14 @@ mod chunk_index; mod bandwidth_layer; mod entrypoint; +mod error_layer; mod latency_layer; mod rerun_cloud; mod server; mod store; pub use self::entrypoint::{Args, NamedPath, NamedPathCollection}; +pub use self::error_layer::InjectedErrors; pub use self::rerun_cloud::{ RerunCloudHandler, RerunCloudHandlerBuilder, RerunCloudHandlerSettings, }; diff --git a/crates/store/re_server/src/rerun_cloud.rs b/crates/store/re_server/src/rerun_cloud.rs index a753de74df07..314e8a632bc1 100644 --- a/crates/store/re_server/src/rerun_cloud.rs +++ b/crates/store/re_server/src/rerun_cloud.rs @@ -7,41 +7,45 @@ use arrow::record_batch::RecordBatch; use cfg_if::cfg_if; use datafusion::logical_expr::dml::InsertOp; use datafusion::prelude::SessionContext; -use nohash_hasher::IntSet; +use nohash_hasher::{IntMap, IntSet}; use tokio_stream::StreamExt as _; use tonic::{Code, Request, Response, Status}; use re_arrow_util::RecordBatchExt as _; use re_chunk_store::{ - Chunk, ChunkStore, ChunkStoreHandle, ChunkTrackingMode, LatestAtQuery, RangeQuery, + Chunk, ChunkId, ChunkStore, ChunkStoreHandle, ChunkTrackingMode, LatestAtQuery, RangeQuery, }; use re_log_encoding::ToTransport as _; -use re_log_types::{EntityPath, EntryId, StoreId, StoreKind}; -use re_protos::cloud::v1alpha1::ext::LanceTable; -use re_protos::cloud::v1alpha1::ext::{ - self, CreateDatasetEntryRequest, CreateDatasetEntryResponse, CreateTableEntryRequest, - CreateTableEntryResponse, DataSource, EntryDetailsUpdate, ProviderDetails, QueryDatasetRequest, - ReadDatasetEntryResponse, ReadTableEntryResponse, TableInsertMode, UpdateDatasetEntryRequest, - UpdateDatasetEntryResponse, UpdateEntryRequest, UpdateEntryResponse, -}; +use re_log_types::{AbsoluteTimeRange, EntityPath, EntryId, StoreId, StoreKind, Timeline}; use re_protos::cloud::v1alpha1::rerun_cloud_service_server::RerunCloudService; use re_protos::cloud::v1alpha1::{ - DeleteEntryResponse, EntryDetails, EntryKind, FetchChunksRequest, - GetDatasetManifestSchemaRequest, GetDatasetManifestSchemaResponse, GetDatasetSchemaResponse, - GetRrdManifestResponse, GetSegmentTableSchemaResponse, QueryDatasetResponse, - QueryTasksOnCompletionRequest, QueryTasksOnCompletionResponse, QueryTasksRequest, - QueryTasksResponse, RegisterTableRequest, RegisterTableResponse, RegisterWithDatasetResponse, - ScanDatasetManifestRequest, ScanDatasetManifestResponse, ScanSegmentTableResponse, - ScanTableResponse, + CancelTasksRequest, CancelTasksResponse, DeleteEntryResponse, EntryDetails, EntryKind, + FetchChunksRequest, GetDatasetManifestSchemaRequest, GetDatasetManifestSchemaResponse, + GetDatasetSchemaResponse, GetRrdManifestResponse, GetSegmentTableSchemaResponse, + QueryDatasetResponse, QueryTasksOnCompletionRequest, QueryTasksOnCompletionResponse, + QueryTasksRequest, QueryTasksResponse, RegisterTableRequest, RegisterTableResponse, + RegisterWithDatasetResponse, ScanDatasetManifestRequest, ScanDatasetManifestResponse, + ScanSegmentTableResponse, ScanTableResponse, }; use re_protos::common::v1alpha1::TaskId; use re_protos::common::v1alpha1::ext::{IfDuplicateBehavior, SegmentId}; use re_protos::headers::RerunHeadersExtractorExt as _; use re_protos::missing_field; +use re_protos::{ + EntryName, + cloud::v1alpha1::ext::{ + self, CreateDatasetEntryRequest, CreateDatasetEntryResponse, CreateTableEntryRequest, + CreateTableEntryResponse, DataSource, EntryDetailsUpdate, LanceTable, ProviderDetails, + QueryDatasetRequest, ReadDatasetEntryResponse, ReadTableEntryResponse, TableInsertMode, + UpdateDatasetEntryRequest, UpdateDatasetEntryResponse, UpdateEntryRequest, + UpdateEntryResponse, + }, +}; use re_tuid::Tuid; use crate::OnError; use crate::entrypoint::NamedPath; +use crate::store::ResolvedStore; use crate::store::{ ChunkKey, Dataset, Error, InMemoryStore, StoreSlotId, TASK_ID_SUCCESS, Table, TaskResult, }; @@ -89,7 +93,7 @@ impl RerunCloudHandlerBuilder { pub async fn with_rrds_as_dataset( mut self, - dataset_name: String, + dataset_name: EntryName, rrd_paths: Vec, on_duplicate: IfDuplicateBehavior, on_error: crate::OnError, @@ -136,6 +140,14 @@ impl RerunCloudHandlerBuilder { Ok(self) } + pub fn with_eager_chunk_store_config( + mut self, + config: re_chunk_store::ChunkStoreConfig, + ) -> Self { + self.store.set_eager_chunk_store_config(config); + self + } + pub fn build(self) -> RerunCloudHandler { RerunCloudHandler::new(self.settings, self.store) } @@ -145,14 +157,16 @@ impl RerunCloudHandlerBuilder { pub struct RerunCloudHandler { settings: RerunCloudHandlerSettings, - + eager_chunk_store_config: re_chunk_store::ChunkStoreConfig, store: tokio::sync::RwLock, } impl RerunCloudHandler { pub fn new(settings: RerunCloudHandlerSettings, store: InMemoryStore) -> Self { + let eager_chunk_store_config = store.eager_chunk_store_config(); Self { settings, + eager_chunk_store_config, store: tokio::sync::RwLock::new(store), } } @@ -165,7 +179,7 @@ impl RerunCloudHandler { &self, dataset_id: EntryId, segment_ids: &[SegmentId], - ) -> tonic::Result> { + ) -> tonic::Result> { let store = self.store.read().await; let dataset = store.dataset(dataset_id)?; @@ -177,7 +191,7 @@ impl RerunCloudHandler { segment_id.clone(), layer_name.to_owned(), layer.store_slot_id(), - layer.store_handle().clone(), + layer.resolved_store().clone(), ) }) }) @@ -313,9 +327,9 @@ impl RerunCloudHandler { async fn find_datasets( &self, entry_id: Option, - name: Option, + name: Option, store_kind: Option, - ) -> Result, Status> { + ) -> tonic::Result> { let store = self.store.read().await; let dataset = match (entry_id, name) { @@ -354,8 +368,8 @@ impl RerunCloudHandler { async fn find_tables( &self, entry_id: Option, - name: Option, - ) -> Result, Status> { + name: Option, + ) -> tonic::Result> { let store = self.store.read().await; let table = match (entry_id, name) { @@ -422,6 +436,22 @@ impl RerunCloudService for RerunCloudHandler { re_protos::cloud::v1alpha1::VersionResponse { build_info: Some(build_info.into()), version: re_build_info::exposed_version!().to_owned(), + cloud_provider: None, + cloud_region: None, + }, + )) + } + + async fn who_am_i( + &self, + _request: tonic::Request, + ) -> tonic::Result> { + // The local server has no authentication, so grant full access. + Ok(tonic::Response::new( + re_protos::cloud::v1alpha1::WhoAmIResponse { + user_id: None, + can_read: true, + can_write: true, }, )) } @@ -438,7 +468,12 @@ impl RerunCloudService for RerunCloudHandler { .and_then(|filter| filter.id) .map(TryInto::try_into) .transpose()?; - let name = filter.as_ref().and_then(|filter| filter.name.clone()); + let name = filter + .as_ref() + .and_then(|filter| filter.name.clone()) + .map(EntryName::new) + .transpose() + .map_err(|err| Status::invalid_argument(err.to_string()))?; let kind = filter .and_then(|filter| filter.entry_kind) .map(EntryKind::try_from) @@ -627,6 +662,7 @@ impl RerunCloudService for RerunCloudHandler { ) -> tonic::Result> { let mut store = self.store.write().await; + let dataset_id = get_entry_id_from_headers(&store, &request)?; let ext::RegisterWithDatasetRequest { @@ -657,7 +693,7 @@ impl RerunCloudService for RerunCloudHandler { }, Memory { store_slot_id: StoreSlotId, - store_handle: ChunkStoreHandle, + resolved: ResolvedStore, segment_id: SegmentId, layer_name: String, }, @@ -699,12 +735,12 @@ impl RerunCloudService for RerunCloudHandler { // Handle memory:// URLs (re-registration of existing stores) if storage_url.scheme() == "memory" { let store_slot_id = parse_memory_url(&storage_url)?; - let store_handle = store.resolve_store(&store_slot_id).ok_or_else(|| { + let resolved = store.resolve_store(&store_slot_id).ok_or_else(|| { tonic::Status::not_found(format!( "store not found for memory URL: {storage_url}" )) })?; - let store_id = store_handle.read().id().clone(); + let store_id = resolved.store_id(); if store_id.kind() != store_kind { continue; } @@ -713,7 +749,7 @@ impl RerunCloudService for RerunCloudHandler { seen.entry(key).or_default().push(storage_url.clone()); validated_sources.push(ValidatedSource::Memory { store_slot_id, - store_handle, + resolved, segment_id, layer_name: layer, }); @@ -789,7 +825,7 @@ impl RerunCloudService for RerunCloudHandler { // Phase 2: Load file sources and unify with memory sources into a common form. struct ReadySource { store_slot_id: StoreSlotId, - store_handle: ChunkStoreHandle, + resolved: ResolvedStore, segment_id: SegmentId, layer_name: String, storage_url: String, @@ -801,14 +837,14 @@ impl RerunCloudService for RerunCloudHandler { match source { ValidatedSource::Memory { store_slot_id, - store_handle, + resolved, segment_id, layer_name, } => { ready_sources.push(ReadySource { storage_url: format!("memory:///store/{store_slot_id}"), store_slot_id, - store_handle, + resolved, segment_id, layer_name, }); @@ -820,22 +856,12 @@ impl RerunCloudService for RerunCloudHandler { storage_url, } => { re_log::info!("Loading RRD: {}", rrd_path.display()); - let contents = ChunkStore::handle_from_rrd_filepath( - &InMemoryStore::chunk_store_config(), - &rrd_path, - ) - .map_err(|err| { - tonic::Status::internal(format!("Failed to load RRD: {err:#}")) - })?; - - for (store_id, chunk_store) in contents { - if store_id.kind() != store_kind { - continue; - } + for (store_id, resolved) in ResolvedStore::load_rrd_file(&rrd_path, store_kind)? + { ready_sources.push(ReadySource { store_slot_id: StoreSlotId::new(), - store_handle: chunk_store, + resolved, segment_id: SegmentId::new(store_id.recording_id().to_string()), layer_name: layer_name.clone(), storage_url: storage_url.to_string(), @@ -854,7 +880,7 @@ impl RerunCloudService for RerunCloudHandler { let mut failed_task_results: Vec<(TaskId, TaskResult)> = vec![]; for source in &ready_sources { - store.register_store_with_id(source.store_slot_id, &source.store_handle); + store.register_store_with_id(source.store_slot_id, &source.resolved); } { @@ -866,7 +892,7 @@ impl RerunCloudService for RerunCloudHandler { source.segment_id.clone(), source.layer_name.clone(), source.store_slot_id, - source.store_handle, + source.resolved, on_duplicate, ) .await; @@ -923,7 +949,7 @@ impl RerunCloudService for RerunCloudHandler { async fn unregister_from_dataset( &self, request: tonic::Request, - ) -> tonic::Result, Status> { + ) -> tonic::Result> { let mut store = self.store.write().await; let entry_id = get_entry_id_from_headers(&store, &request)?; @@ -1010,7 +1036,7 @@ impl RerunCloudService for RerunCloudHandler { entry_id.to_string(), segment_id.id.clone(), ), - InMemoryStore::chunk_store_config(), + self.eager_chunk_store_config.clone(), ) }) .insert_chunk(&chunk) @@ -1025,21 +1051,21 @@ impl RerunCloudService for RerunCloudHandler { let handles: Vec<_> = chunk_stores .into_iter() .map(|(segment_id, chunk_store)| { - let handle = ChunkStoreHandle::new(chunk_store); - let store_slot_id = store.register_store(&handle); - (segment_id, store_slot_id, handle) + let resolved = ResolvedStore::Eager(ChunkStoreHandle::new(chunk_store)); + let store_slot_id = store.register_store(&resolved); + (segment_id, store_slot_id, resolved) }) .collect(); let dataset = store.dataset_mut(entry_id)?; - for (entity_path, store_slot_id, handle) in handles { + for (entity_path, store_slot_id, resolved) in handles { dataset .add_layer( entity_path, DataSource::DEFAULT_LAYER.to_owned(), store_slot_id, - handle, + resolved, IfDuplicateBehavior::Error, ) .await?; @@ -1167,7 +1193,7 @@ impl RerunCloudService for RerunCloudHandler { async fn get_dataset_manifest_schema( &self, request: Request, - ) -> Result, Status> { + ) -> tonic::Result> { let store = self.store.read().await; let entry_id = get_entry_id_from_headers(&store, &request)?; @@ -1194,7 +1220,7 @@ impl RerunCloudService for RerunCloudHandler { async fn scan_dataset_manifest( &self, request: Request, - ) -> Result, Status> { + ) -> tonic::Result> { let store = self.store.read().await; let entry_id = get_entry_id_from_headers(&store, &request)?; @@ -1386,6 +1412,7 @@ impl RerunCloudService for RerunCloudHandler { exclude_temporal_data, scan_parameters, query, + generate_direct_urls: _, } = request.into_inner().try_into()?; if scan_parameters.is_some() { @@ -1414,9 +1441,72 @@ impl RerunCloudService for RerunCloudHandler { )); } + // Compute the union of timelines across every (segment, layer) touched by this query, so + // every response we emit below carries the same `{timeline}:start` columns and the client + // can concatenate them. Individual responses fill in `None` for timelines their chunks + // don't contain. + let all_timelines: BTreeMap = chunk_stores + .iter() + .flat_map(|(_, _, _, resolved)| { + resolved + .schema() + .timelines() + .into_values() + .map(|tl| (tl.name().as_str().to_owned(), tl.datatype())) + .collect::>() + }) + .collect(); + let stream = futures::stream::iter(chunk_stores.into_iter().map( - move |(segment_id, layer_name, store_slot_id, store_handle)| { - let num_chunks = store_handle.read().num_physical_chunks(); + move |(segment_id, layer_name, store_slot_id, resolved)| { + // Build metadata for all relevant chunks (physical + virtual). + + let metadata_vec: Vec = if let Some(query) = &query { + let (chunks, missing_virtual) = + get_chunks_for_query_results(&resolved, &entity_paths, query); + + let mut metas: Vec<_> = chunks + .iter() + .map(|c| ChunkMetadata::from_chunk(c)) + .collect(); + if let ResolvedStore::Lazy(lazy) = &resolved { + for chunk_id in &missing_virtual { + if let Some(idx) = lazy.chunk_row_index(chunk_id) { + metas.push(ChunkMetadata::from_manifest( + lazy.manifest(), + *chunk_id, + idx, + lazy.timeline_ranges().get(chunk_id), + )); + } + } + } + metas + } else { + match &resolved { + ResolvedStore::Eager(h) => h + .read() + .iter_physical_chunks() + .map(|c| ChunkMetadata::from_chunk(c)) + .collect(), + ResolvedStore::Lazy(lazy) => lazy + .manifest() + .col_chunk_ids() + .iter() + .enumerate() + .map(|(idx, &chunk_id)| { + ChunkMetadata::from_manifest( + lazy.manifest(), + chunk_id, + idx, + lazy.timeline_ranges().get(&chunk_id), + ) + }) + .collect(), + } + }; + + let num_chunks = metadata_vec.len(); let mut chunk_ids = Vec::with_capacity(num_chunks); let mut chunk_segment_ids = Vec::with_capacity(num_chunks); @@ -1424,89 +1514,90 @@ impl RerunCloudService for RerunCloudHandler { let mut chunk_entity_path = Vec::with_capacity(num_chunks); let mut chunk_is_static = Vec::with_capacity(num_chunks); let mut chunk_byte_sizes = Vec::with_capacity(num_chunks); + let mut chunk_byte_sizes_uncompressed = Vec::with_capacity(num_chunks); + let mut chunk_direct_urls = Vec::with_capacity(num_chunks); + let mut chunk_direct_url_expiry = Vec::with_capacity(num_chunks); + + // Seed with the full set of timelines the query can see so the response schema + // matches every other response in this stream, even for segments/layers whose + // chunks don't use all those timelines. + let mut timelines: BTreeMap< + String, + (arrow::datatypes::DataType, Vec>), + > = all_timelines + .iter() + .map(|(name, dtype)| { + ( + name.clone(), + (dtype.clone(), Vec::with_capacity(num_chunks)), + ) + }) + .collect(); - let mut timelines = BTreeMap::new(); - - let chunks = if let Some(query) = &query { - get_chunks_for_query(&store_handle, &entity_paths, query) - } else { - store_handle - .read() - .iter_physical_chunks() - .map(Clone::clone) - .collect() - }; - - for chunk in chunks { - if !entity_paths.is_empty() && !entity_paths.contains(chunk.entity_path()) { + for meta in &metadata_vec { + if !entity_paths.is_empty() + && !entity_paths.contains(&EntityPath::from(meta.entity_path.as_str())) + { continue; } - if !requested_chunk_ids.is_empty() && !requested_chunk_ids.contains(&chunk.id()) + if !requested_chunk_ids.is_empty() + && !requested_chunk_ids.contains(&meta.chunk_id) { continue; } // Filter by static/temporal data - if exclude_static_data && chunk.is_static() { + if exclude_static_data && meta.is_static { continue; } - if exclude_temporal_data && !chunk.is_static() { + if exclude_temporal_data && !meta.is_static { continue; } - let mut missing_timelines: BTreeSet<_> = timelines.keys().copied().collect(); - for (timeline_name, timeline_col) in chunk.timelines() { - let range = timeline_col.time_range(); - let time_min = range.min(); - let time_max = range.max(); - - let timeline_name = timeline_name.as_str(); + let mut missing_timelines: BTreeSet = + timelines.keys().cloned().collect(); + for (timeline, range) in &meta.timelines { + let timeline_name = timeline.name().as_str(); missing_timelines.remove(timeline_name); - let timeline_data_type = timeline_col.times_array().data_type().to_owned(); - - let timeline_data = timelines.entry(timeline_name).or_insert_with(|| { - ( - timeline_data_type, - vec![None; chunk_segment_ids.len()], - vec![None; chunk_segment_ids.len()], - ) - }); - timeline_data.1.push(Some(time_min.as_i64())); - timeline_data.2.push(Some(time_max.as_i64())); + let timeline_data = timelines + .get_mut(timeline_name) + .expect("timeline was pre-seeded from chunk stores"); + + timeline_data.1.push(Some(range.min().as_i64())); } for timeline_name in missing_timelines { let timeline_data = timelines - .get_mut(timeline_name) - .expect("timeline_names already checked"); // Already checked + .get_mut(&timeline_name) + .expect("timeline_names already checked"); timeline_data.1.push(None); - timeline_data.2.push(None); } chunk_segment_ids.push(segment_id.id.clone()); - chunk_ids.push(chunk.id()); - chunk_entity_path.push(chunk.entity_path().to_string()); - chunk_is_static.push(chunk.is_static()); - - // Calculate chunk byte size for batching optimization - let chunk_size_bytes = - re_byte_size::SizeBytes::total_size_bytes(chunk.as_ref()); - chunk_byte_sizes.push(chunk_size_bytes); + chunk_ids.push(meta.chunk_id); + chunk_entity_path.push(meta.entity_path.clone()); + chunk_is_static.push(meta.is_static); + chunk_byte_sizes.push(meta.byte_size); + // OSS server stores decoded data, so compressed == uncompressed. + chunk_byte_sizes_uncompressed.push(Some(meta.byte_size)); chunk_keys.push( ChunkKey { - chunk_id: chunk.id(), + chunk_id: meta.chunk_id, store_slot_id, } .encode()?, ); + + chunk_direct_urls.push(None); + chunk_direct_url_expiry.push(None); } let chunk_layer_names = vec![layer_name.clone(); chunk_ids.len()]; let chunk_key_refs = chunk_keys.iter().map(|v| v.as_slice()).collect(); - let batch = QueryDatasetResponse::create_dataframe( + let batch = QueryDatasetResponse::create_dataframe_with_timelines( chunk_ids, chunk_segment_ids, chunk_layer_names, @@ -1514,6 +1605,10 @@ impl RerunCloudService for RerunCloudHandler { chunk_entity_path, chunk_is_static, chunk_byte_sizes, + chunk_byte_sizes_uncompressed, + chunk_direct_urls, + chunk_direct_url_expiry, + &timelines, ) .map_err(|err| { tonic::Status::internal(format!("Failed to create dataframe: {err:#}")) @@ -1667,7 +1762,7 @@ impl RerunCloudService for RerunCloudHandler { async fn get_table_schema( &self, request: tonic::Request, - ) -> Result, Status> { + ) -> tonic::Result> { let store = self.store.read().await; let Some(entry_id) = request.into_inner().table_id else { return Err(Status::not_found("Table ID not specified in request")); @@ -1694,7 +1789,7 @@ impl RerunCloudService for RerunCloudHandler { async fn scan_table( &self, request: tonic::Request, - ) -> Result, Status> { + ) -> tonic::Result> { let store = self.store.read().await; let Some(entry_id) = request.into_inner().table_id else { return Err(Status::not_found("Table ID not specified in request")); @@ -1804,6 +1899,14 @@ impl RerunCloudService for RerunCloudHandler { )) } + async fn cancel_tasks( + &self, + _request: tonic::Request, + ) -> tonic::Result> { + // Cancelling tasks is a noop in the OSS server + Ok(tonic::Response::new(CancelTasksResponse {})) + } + async fn do_maintenance( &self, _request: tonic::Request, @@ -1826,11 +1929,11 @@ impl RerunCloudService for RerunCloudHandler { async fn create_table_entry( &self, request: Request, - ) -> Result, Status> { + ) -> tonic::Result> { let mut store = self.store.write().await; let request: CreateTableEntryRequest = request.into_inner().try_into()?; - let table_name = &request.name; + let table_name = request.name; let schema = Arc::new(request.schema); @@ -1877,7 +1980,7 @@ impl RerunCloudService for RerunCloudHandler { /// /// Returns a deduplicated set because a single RRD can contain duplicate /// `SetStoreInfo` messages for the same store. -fn load_store_ids(rrd_path: &std::path::Path) -> Result, tonic::Status> { +fn load_store_ids(rrd_path: &std::path::Path) -> tonic::Result> { let reader = std::io::BufReader::new( std::fs::File::open(rrd_path) .map_err(|err| tonic::Status::internal(format!("Failed to open RRD file: {err:#}")))?, @@ -1898,7 +2001,7 @@ fn load_store_ids(rrd_path: &std::path::Path) -> Result, tonic } /// Parses a `memory:///store/{store_slot_id}` URL and returns the [`StoreSlotId`]. -fn parse_memory_url(url: &url::Url) -> Result { +fn parse_memory_url(url: &url::Url) -> tonic::Result { let path = url.path(); let slot_id_str = path.strip_prefix("/store/").ok_or_else(|| { tonic::Status::invalid_argument(format!( @@ -1943,91 +2046,113 @@ fn latest_at_or_static(latest_at: &ext::QueryLatestAt) -> LatestAtQuery { } } -/// Utility function to determine the chunks to return based on query parameters -fn get_chunks_for_query( - store_handle: &ChunkStoreHandle, +/// Metadata for a single chunk, extractable from either a physical `Chunk` or a manifest. +struct ChunkMetadata { + chunk_id: ChunkId, + entity_path: String, + is_static: bool, + byte_size: u64, + timelines: IntMap, +} + +impl ChunkMetadata { + fn from_chunk(chunk: &Chunk) -> Self { + let timelines = chunk + .timelines() + .values() + .map(|col| (*col.timeline(), col.time_range())) + .collect(); + Self { + chunk_id: chunk.id(), + entity_path: chunk.entity_path().to_string(), + is_static: chunk.is_static(), + byte_size: re_byte_size::SizeBytes::total_size_bytes(chunk), + timelines, + } + } + + fn from_manifest( + manifest: &re_log_encoding::RrdManifest, + chunk_id: ChunkId, + row_idx: usize, + chunk_timelines: Option<&IntMap>, + ) -> Self { + Self { + chunk_id, + entity_path: manifest + .col_chunk_entity_path_raw() + .value(row_idx) + .to_owned(), + is_static: manifest.col_chunk_is_static_raw().value(row_idx), + byte_size: manifest.col_chunk_byte_size_uncompressed()[row_idx], + timelines: chunk_timelines.cloned().unwrap_or_default(), + } + } +} + +/// Returns physical chunks and missing virtual chunk IDs for a query. +fn get_chunks_for_query_results( + resolved: &ResolvedStore, entity_paths: &IntSet, query: &ext::Query, -) -> Vec> { +) -> (Vec>, Vec) { + // Contract: a Query with neither `latest_at` nor `range` means "all chunks", regardless of + // entity filter. This is exercised by the shared `re_redap_tests::query_dataset` "default" test + // case. + if query.latest_at.is_none() && query.range.is_none() { + return match resolved { + ResolvedStore::Eager(h) => (h.read().iter_physical_chunks().cloned().collect(), vec![]), + ResolvedStore::Lazy(lazy) => (vec![], lazy.manifest().col_chunk_ids().to_vec()), + }; + } + let paths = if entity_paths.is_empty() { - store_handle.read().all_entities() + resolved.all_entities() } else { entity_paths.clone() }; - match (&query.latest_at, &query.range) { - (Some(latest_at), Some(range)) => { - let latest_at = latest_at_or_static(latest_at); - let range = RangeQuery::new(range.index.clone().into(), range.index_range); - - // We have both a latest at and a range, so we need to combine - // chunks and ensure no duplicates - paths - .iter() - .flat_map(|entity_path| { - let read_lock = store_handle.read(); - let mut latest_at = read_lock - .latest_at_relevant_chunks_for_all_components( - ChunkTrackingMode::Report, - &latest_at, - entity_path, - true, - ) - .chunks; - let mut range = read_lock - .range_relevant_chunks_for_all_components( - ChunkTrackingMode::Report, - &range.clone(), - entity_path, - true, - ) - .chunks; - range.retain(|chunk| !latest_at.contains(chunk)); - latest_at.extend(range); - - latest_at - }) - .collect::>() - } - (Some(latest_at), None) => { - let latest_at = latest_at_or_static(latest_at); - - paths - .iter() - .flat_map(|entity_path| { - store_handle - .read() - .latest_at_relevant_chunks_for_all_components( - ChunkTrackingMode::Report, - &latest_at.clone(), - entity_path, - true, - ) - .chunks - }) - .collect::>() + let mut all_chunks: Vec> = vec![]; + let mut all_missing: BTreeSet = BTreeSet::new(); + let mut seen_physical: BTreeSet = BTreeSet::new(); + + for entity_path in &paths { + if let Some(latest_at) = &query.latest_at { + let latest_at_q = latest_at_or_static(latest_at); + let results = resolved.latest_at_relevant_chunks_for_all_components( + ChunkTrackingMode::Report, + &latest_at_q, + entity_path, + true, + ); + for chunk in results.chunks { + if seen_physical.insert(chunk.id()) { + all_chunks.push(chunk); + } + } + all_missing.extend(results.missing_virtual); } - (None, Some(range)) => { - let range = RangeQuery::new(range.index.clone().into(), range.index_range); - paths - .iter() - .flat_map(|entity_path| { - store_handle - .read() - .range_relevant_chunks_for_all_components( - ChunkTrackingMode::Report, - &range.clone(), - entity_path, - true, - ) - .chunks - }) - .collect::>() + if let Some(range) = &query.range { + let range_q = RangeQuery::new(range.index.clone().into(), range.index_range); + let results = resolved.range_relevant_chunks_for_all_components( + ChunkTrackingMode::Report, + &range_q, + entity_path, + true, + ); + for chunk in results.chunks { + if seen_physical.insert(chunk.id()) { + all_chunks.push(chunk); + } + } + all_missing.extend(results.missing_virtual); } - (None, None) => store_handle - .read() - .iter_physical_chunks() - .map(Clone::clone) - .collect(), } + + // Remove any virtual IDs that turned out to be physical in another entity's result. + for id in &seen_physical { + all_missing.remove(id); + } + + (all_chunks, all_missing.into_iter().collect()) } diff --git a/crates/store/re_server/src/server.rs b/crates/store/re_server/src/server.rs index af9cdaaf276a..1e23dfd608be 100644 --- a/crates/store/re_server/src/server.rs +++ b/crates/store/re_server/src/server.rs @@ -10,6 +10,8 @@ use tokio_stream::StreamExt as _; use tonic::service::{Routes, RoutesBuilder}; use tracing::{error, info}; +use crate::error_layer::InjectedErrors; + // --- #[derive(thiserror::Error, Debug)] @@ -26,54 +28,34 @@ pub struct Server { routes: Routes, artificial_latency: std::time::Duration, bandwidth_limit: Option, + cors_allowed_origins: Vec, } /// `ServerHandle` is a tiny helper abstraction that enables us to /// deal with the gRPC server lifecycle more easily. pub struct ServerHandle { shutdown: Option>, - ready: mpsc::Receiver, failed: mpsc::Receiver, - task: tokio::task::JoinHandle<()>, + _task: tokio::task::JoinHandle<()>, + + /// The address clients should connect to. + connect_addr: SocketAddr, + + /// Test hook: endpoints registered here will return an error. + injected_errors: InjectedErrors, } impl ServerHandle { - /// Wait until the server is ready to accept connections (or failure occurs) - pub async fn wait_for_ready(&mut self) -> Result { - tokio::select! { - ready = self.ready.recv() => { - match ready { - Some(local_addr) => { - info!("Ready for connections."); - Ok(local_addr) - }, - None => Err(ServerError::ServerFailedToStart { - reason: "ready channel closed unexpectedly".into(), - }), - } - } - failed = self.failed.recv() => { - match failed { - Some(reason) => Err(ServerError::ServerFailedToStart { reason }), - None => Err(ServerError::ServerFailedToStart { - reason: "failed channel closed unexpectedly".into(), - }), - } - } - result = &mut self.task => { - match result { - Ok(()) => Err(ServerError::ServerFailedToStart { - reason: "server task exited without signaling ready or failed".into(), - }), - Err(join_err) if join_err.is_panic() => Err(ServerError::ServerFailedToStart { - reason: format!("server task panicked: {join_err}"), - }), - Err(join_err) => Err(ServerError::ServerFailedToStart { - reason: format!("server task was cancelled: {join_err}"), - }), - } - } - } + /// The address clients should use to connect. + /// + /// This is a connectable address, e.g. `127.0.0.1:9876` instead of `0.0.0.0:9876`. + pub fn connect_addr(&self) -> SocketAddr { + self.connect_addr + } + + /// For testing: get a reference to the injected errors, which can be used to make specific gRPC endpoints fail. + pub fn injected_errors(&self) -> &InjectedErrors { + &self.injected_errors } /// Wait until the server is shutdown. @@ -91,20 +73,23 @@ impl ServerHandle { } impl Server { - /// Starts the server and return `ServerHandle` so that caller can manage - /// the server lifecycle. - pub fn start(self) -> ServerHandle { + /// Starts the server, waits for it to be ready, and returns a [`ServerHandle`]. + pub async fn start(self) -> Result { let Self { addr, routes, artificial_latency, bandwidth_limit, + cors_allowed_origins, } = self; - let (ready_tx, ready_rx) = mpsc::channel(1); + let (ready_tx, mut ready_rx) = mpsc::channel(1); let (failed_tx, failed_rx) = mpsc::channel(1); let (shutdown_tx, shutdown_rx) = oneshot::channel::<()>(); + let injected_errors = InjectedErrors::new(); + + let injected_errors_for_handle = injected_errors.clone(); let task = tokio::spawn(async move { let listener = if let Ok(listener) = TcpListener::bind(addr).await { #[expect(clippy::unwrap_used)] @@ -155,9 +140,21 @@ impl Server { let is_client = false; re_protos::headers::new_rerun_headers_layer(name, version, is_client) }) - .layer(tower_http::cors::CorsLayer::permissive()) // Allow CORS for all origins (to support web clients) + .layer(re_grpc_server::cors_layer(&cors_allowed_origins)) .layer(crate::latency_layer::LatencyLayer::new(artificial_latency)) .layer(crate::bandwidth_layer::BandwidthLayer::new(bandwidth_limit)) + .layer(re_protos::trace_id_layer::TraceIdLayer::new( + std::sync::Arc::new(|| { + // We inject a dummy trace-id here so that our e2e integration tests + // can verify that the trace-id shows up in error messages. + // We sometimes run these tests on release builds, so we always inject these trace-ids. + const DUMMY_TRACE_ID: u128 = 0xabba000000000000000000000000abba_u128; + Some(opentelemetry::TraceId::from(DUMMY_TRACE_ID)) + }), + )) + .layer(crate::error_layer::ErrorInjectionLayer::new( + injected_errors.clone(), + )) // NOTE: GrpcWebLayer is applied directly to gRPC routes in ServerBuilder::build() // to avoid rejecting regular HTTP requests .into_inner(); @@ -185,12 +182,37 @@ impl Server { let _ = failed_tx.send("gRPC server stopped".to_owned()).await; }); - ServerHandle { + // Wait for the server to signal readiness. + let mut failed_rx_for_select = failed_rx; + let connect_addr = tokio::select! { + ready = ready_rx.recv() => { + match ready { + Some(addr) => { + info!("Ready for connections."); + Ok(addr) + }, + None => Err(ServerError::ServerFailedToStart { + reason: "ready channel closed unexpectedly".into(), + }), + } + } + failed = failed_rx_for_select.recv() => { + match failed { + Some(reason) => Err(ServerError::ServerFailedToStart { reason }), + None => Err(ServerError::ServerFailedToStart { + reason: "failed channel closed unexpectedly".into(), + }), + } + } + }?; + + Ok(ServerHandle { shutdown: Some(shutdown_tx), - ready: ready_rx, - failed: failed_rx, - task, - } + failed: failed_rx_for_select, + _task: task, + connect_addr, + injected_errors: injected_errors_for_handle, + }) } } @@ -204,6 +226,7 @@ pub struct ServerBuilder { axum_routes: axum::Router, artificial_latency: std::time::Duration, bandwidth_limit: Option, + cors_allowed_origins: Vec, } impl ServerBuilder { @@ -248,6 +271,14 @@ impl ServerBuilder { self } + /// Set additional origin patterns allowed to make cross-origin requests. + /// + /// By default, only `localhost`, `127.0.0.1`, and `rerun.io` are allowed. + pub fn with_cors_allowed_origins(mut self, origins: Vec) -> Self { + self.cors_allowed_origins = origins; + self + } + pub fn build(self) -> Server { let Self { addr, @@ -255,6 +286,7 @@ impl ServerBuilder { axum_routes, artificial_latency, bandwidth_limit, + cors_allowed_origins, } = self; let grpc_routes = routes_builder.routes(); @@ -278,6 +310,7 @@ impl ServerBuilder { routes: routes.into(), artificial_latency, bandwidth_limit, + cors_allowed_origins, } } } diff --git a/crates/store/re_server/src/store/dataset.rs b/crates/store/re_server/src/store/dataset.rs index d894521fdd49..7dcf61399d86 100644 --- a/crates/store/re_server/src/store/dataset.rs +++ b/crates/store/re_server/src/store/dataset.rs @@ -7,9 +7,9 @@ use arrow::datatypes::{Field, Fields, Schema}; use itertools::{Either, Itertools as _}; use parking_lot::Mutex; use re_arrow_util::RecordBatchExt as _; -use re_chunk_store::{ChunkStore, ChunkStoreHandle}; use re_log_encoding::RawRrdManifest; use re_log_types::{EntryId, StoreId, StoreKind, TimeType}; +use re_protos::EntryName; use re_protos::cloud::v1alpha1::ext::{DataSource, DatasetDetails, DatasetEntry, EntryDetails}; use re_protos::cloud::v1alpha1::{ EntryKind, ScanDatasetManifestResponse, ScanSegmentTableResponse, @@ -17,12 +17,12 @@ use re_protos::cloud::v1alpha1::{ use re_protos::common::v1alpha1::ext::{DatasetHandle, IfDuplicateBehavior, SegmentId}; use crate::store::{ - Error, InMemoryStore, Layer, Segment, StoreSlotId, Tracked, store_pool::StorePool, + Error, Layer, ResolvedStore, Segment, StoreSlotId, Tracked, store_pool::StorePool, }; /// The mutable inner state of a [`Dataset`], wrapped in [`Tracked`] for automatic timestamp updates. pub struct DatasetInner { - name: String, + name: EntryName, details: DatasetDetails, segments: HashMap, #[cfg(feature = "lance")] @@ -41,7 +41,12 @@ pub struct Dataset { } impl Dataset { - pub fn new(id: EntryId, name: String, store_kind: StoreKind, details: DatasetDetails) -> Self { + pub fn new( + id: EntryId, + name: EntryName, + store_kind: StoreKind, + details: DatasetDetails, + ) -> Self { Self { id, store_kind, @@ -63,11 +68,11 @@ impl Dataset { } #[inline] - pub fn name(&self) -> &str { + pub fn name(&self) -> &EntryName { &self.inner.name } - pub fn set_name(&mut self, name: String) { + pub fn set_name(&mut self, name: EntryName) { if name != self.inner.name { self.inner.modify().name = name; } @@ -236,9 +241,7 @@ impl Dataset { layer_names_row.push(layer_name.to_owned()); storage_urls_row.push(format!("memory:///store/{}", layer.store_slot_id())); - let layer_properties = layer - .compute_properties() - .map_err(Error::failed_to_extract_properties)?; + let layer_properties = layer.compute_properties()?; // Accumulate properties. // @@ -441,11 +444,7 @@ impl Dataset { registration_statuses .push(re_protos::cloud::v1alpha1::ext::LayerRegistrationStatus::Done.to_string()); - properties.push( - layer - .compute_properties() - .map_err(Error::failed_to_extract_properties)?, - ); + properties.push(layer.compute_properties()?); } let base_record_batch = ScanDatasetManifestResponse::create_dataframe( @@ -473,89 +472,19 @@ impl Dataset { pub fn rrd_manifest(&self, segment_id: &SegmentId) -> Result { let partition = self.segment(segment_id)?; - - let mut rrd_manifest_builder = re_log_encoding::RrdManifestBuilder::default(); - - let mut chunk_keys = Vec::new(); - - for (_layer_name, layer) in partition.iter_layers() { - let store = layer.store_handle(); - - let mut offset = 0; - for chunk in store.read().iter_physical_chunks() { - let chunk_batch = chunk - .to_chunk_batch() - .map_err(|err| Error::RrdLoadingError(err.into()))?; - - // Not a totally accurate value, but we're certainly not going to encode every chunk - // into IPC bytes just to figure out their uncompressed size either. - // - // This is fine for 2 reasons: - // 1. The reported size is mostly for human and automated heuristics (e.g. "have I - // enough memory left to download this chunk?"), and so doesn't need to be exact. - // 2. Reporting the size in terms of heap values is even better for such heuristics. - use re_byte_size::SizeBytes as _; - let byte_size_uncompressed = chunk.heap_size_bytes(); - - // There is no such thing as "compressed data on disk" in the case of the OSS server, - // since there's no disk to begin with. That's fine, we just re-use the - // uncompressed values: the chunk-key (generated below) is what will be used to - // accurately fetch the data in any case. - // - // TODO(cmc): we could also keep track of the compressed values originally fetched - // from disk and/or network all the way into the OSS server's datastructures and - // resurface them here but that doesn't seem to have any practical use, so not - // worth the added complexity? - let uncompressed_byte_span = re_span::Span { - start: offset, - len: byte_size_uncompressed, - }; - - offset += byte_size_uncompressed; - - rrd_manifest_builder - .append(&chunk_batch, uncompressed_byte_span, byte_size_uncompressed) - .map_err(|err| Error::RrdLoadingError(err.into()))?; - - chunk_keys.push( - crate::store::ChunkKey { - chunk_id: chunk.id(), - store_slot_id: layer.store_slot_id(), - } - .encode()?, - ); - } - } - let application_id = "n/a"; // irrelevant, dropped immediately - let store_id = StoreId::new(self.store_kind(), application_id, segment_id.to_string()); - let mut rrd_manifest = rrd_manifest_builder - .build(store_id) - .map_err(|err| Error::RrdLoadingError(err.into()))?; - - { - let (schema, mut columns, num_rows) = rrd_manifest.data.clone().into_parts(); - - let schema = { - let mut schema = Arc::unwrap_or_clone(schema); - let mut fields = schema.fields.to_vec(); - fields.push(Arc::new(RawRrdManifest::field_chunk_key())); - schema.fields = fields.into(); - schema - }; - { - let chunk_keys = arrow::array::BinaryArray::from_iter_values(chunk_keys.iter()); - columns.push(Arc::new(chunk_keys)); - } - - rrd_manifest.data = RecordBatch::try_new_with_options( - Arc::new(schema), - columns, - &RecordBatchOptions::new().with_row_count(Some(num_rows)), - )?; - } - - Ok(rrd_manifest) + let segment_store_id = + StoreId::new(self.store_kind(), application_id, segment_id.to_string()); + + // Each layer produces its own manifest (Lazy clones its cached footer, Eager rebuilds + // from chunks), then we merge them under the segment-scoped store id. + let per_layer: Vec = partition + .iter_layers() + .map(|(_, layer)| layer.rrd_manifest()) + .collect::>()?; + + RawRrdManifest::merge(segment_store_id, per_layer) + .map_err(|err| Error::RrdLoadingError(err.into())) } // we can't expect there are no async calls without the lance feature @@ -566,17 +495,31 @@ impl Dataset { segment_id: SegmentId, layer_name: String, store_slot_id: StoreSlotId, - store_handle: ChunkStoreHandle, + resolved: ResolvedStore, on_duplicate: IfDuplicateBehavior, ) -> Result<(), Error> { re_log::debug!(?segment_id, ?layer_name, "add_layer"); - // Validate schema compatibility before inserting + // Validate schema compatibility before inserting. let current_schema = self.schema()?; let new_layer_schema = { - let fields = store_handle.read().schema().arrow_fields(); + let fields = resolved.schema().chunk_column_descriptors().arrow_fields(); Schema::new_with_metadata(fields, HashMap::default()) }; + for new_field in new_layer_schema.fields() { + if let Ok(current_field) = current_schema.field_with_name(new_field.name()) + && current_field != new_field.as_ref() + { + re_arrow_util::reject_unsupported_widenings(new_field.data_type()).map_err( + |err| { + Error::SchemaConflict(format!( + "schema incompatibility on segment '{segment_id}', \ + layer '{layer_name}': {err}" + )) + }, + )?; + } + } Schema::try_merge([current_schema, new_layer_schema]).map_err(|err| { Error::SchemaConflict(format!( "schema incompatibility on segment '{segment_id}', layer '{layer_name}': {err}" @@ -591,13 +534,13 @@ impl Dataset { .or_default() .insert_layer( layer_name.clone(), - Layer::new(store_slot_id, store_handle.clone()), + Layer::new(store_slot_id, resolved.clone()), on_duplicate, )?; #[cfg(feature = "lance")] self.indexes() - .on_layer_added(segment_id, store_handle, &layer_name, overwritten) + .on_layer_added(segment_id, &resolved, &layer_name, overwritten) .await?; #[cfg(not(feature = "lance"))] @@ -667,27 +610,19 @@ impl Dataset { store_kind: StoreKind, ) -> Result, Error> { re_log::info!("Loading RRD: {}", path.display()); - let contents = - ChunkStore::handle_from_rrd_filepath(&InMemoryStore::chunk_store_config(), path) - .map_err(Error::RrdLoadingError)?; let layer_name = layer_name.unwrap_or(DataSource::DEFAULT_LAYER); - let mut new_segment_ids = BTreeSet::default(); - for (store_id, chunk_store) in contents { - if store_id.kind() != store_kind { - continue; - } - + for (store_id, resolved) in ResolvedStore::load_rrd_file(path, store_kind)? { let segment_id = SegmentId::new(store_id.recording_id().to_string()); - let slot_id = pool.register(&chunk_store); + let slot_id = pool.register(&resolved); self.add_layer( segment_id.clone(), layer_name.to_owned(), slot_id, - chunk_store, + resolved, on_duplicate, ) .await?; diff --git a/crates/store/re_server/src/store/error.rs b/crates/store/re_server/src/store/error.rs index 135c0655b43c..32131d8c7046 100644 --- a/crates/store/re_server/src/store/error.rs +++ b/crates/store/re_server/src/store/error.rs @@ -1,4 +1,5 @@ -use re_log_types::EntryId; +use re_log_types::{ComponentPath, EntryId}; +use re_protos::EntryName; use re_protos::common::v1alpha1::ext::SegmentId; #[derive(thiserror::Error, Debug)] @@ -11,16 +12,16 @@ pub enum Error { StoreLoadError(#[from] re_entity_db::StoreLoadError), #[error("Invalid entry name: {0}")] - InvalidEntryName(String), + InvalidEntryName(#[from] re_protos::InvalidEntryNameError), #[error("Entry name '{0}' already exists")] - DuplicateEntryNameError(String), + DuplicateEntryNameError(EntryName), #[error("Entry id '{0}' already exists")] DuplicateEntryIdError(EntryId), #[error("Entry name '{0}' not found")] - EntryNameNotFound(String), + EntryNameNotFound(EntryName), #[error("Entry id '{0}' not found")] EntryIdNotFound(EntryId), @@ -41,8 +42,8 @@ pub enum Error { #[error("Layer '{0}' already exists")] LayerAlreadyExists(String), - #[error("Index '{0}' not found")] - IndexNotFound(String), + #[error("Component path '{0}' not found")] + ComponentPathNotFound(ComponentPath), #[error("Index '{0}' already exists")] IndexAlreadyExists(String), @@ -99,7 +100,7 @@ impl From for tonic::Status { | Error::EntryNameNotFound(_) | Error::SegmentIdNotFound { .. } | Error::LayerNameNotFound { .. } - | Error::IndexNotFound(_) + | Error::ComponentPathNotFound(_) | Error::InvalidChunkKey(_) => Self::not_found(format!("{err:#}")), Error::DataFusionError(err) => Self::internal(format!("DataFusion error: {err:#}")), diff --git a/crates/store/re_server/src/store/in_memory_store.rs b/crates/store/re_server/src/store/in_memory_store.rs index a1be761f6443..da6dd58dc8c0 100644 --- a/crates/store/re_server/src/store/in_memory_store.rs +++ b/crates/store/re_server/src/store/in_memory_store.rs @@ -10,6 +10,7 @@ use datafusion::common::DataFusionError; use itertools::Itertools as _; use re_chunk_store::{Chunk, ChunkStoreConfig}; use re_log_types::{EntryId, StoreId, StoreKind}; +use re_protos::EntryName; use re_protos::cloud::v1alpha1::EntryKind; use re_protos::cloud::v1alpha1::ext::{DatasetDetails, EntryDetails, ProviderDetails, TableEntry}; use re_protos::common::v1alpha1::ext::IfDuplicateBehavior; @@ -23,14 +24,19 @@ use crate::store::table::TableType; use crate::store::task_registry::TaskRegistry; use crate::store::{ChunkKey, Dataset, Error, StoreSlotId, Table}; -const ENTRIES_TABLE_NAME: &str = "__entries"; - pub struct InMemoryStore { datasets: HashMap, tables: HashMap, - id_by_name: HashMap, + id_by_name: HashMap, task_registry: TaskRegistry, store_pool: StorePool, + + /// Config applied to eager (in-memory) chunk stores created by this server. + /// + /// Lazy stores load their config from the RRD file they back and ignore this + /// value. Exposed via the builder as a testing hook so integration tests can + /// tune eager chunk-store knobs without relying on global env vars. + eager_chunk_store_config: ChunkStoreConfig, } impl Default for InMemoryStore { @@ -41,6 +47,7 @@ impl Default for InMemoryStore { id_by_name: HashMap::default(), task_registry: TaskRegistry::default(), store_pool: StorePool::default(), + eager_chunk_store_config: Self::default_eager_chunk_store_config(), }; ret.update_entries_table() .expect("update_entries_table should never fail on initialization."); @@ -49,29 +56,39 @@ impl Default for InMemoryStore { } impl InMemoryStore { - pub fn chunk_store_config() -> re_chunk_store::ChunkStoreConfig { + pub fn eager_chunk_store_config(&self) -> ChunkStoreConfig { + self.eager_chunk_store_config.clone() + } + + pub fn set_eager_chunk_store_config(&mut self, config: ChunkStoreConfig) { + self.eager_chunk_store_config = config; + } + + /// Default eager `ChunkStoreConfig` for callsites that can't take a `&self` + /// (e.g. the static `ResolvedStore::load_rrd_file` eager-load fallback). + pub fn default_eager_chunk_store_config() -> ChunkStoreConfig { ChunkStoreConfig::CHANGELOG_DISABLED .apply_env() .unwrap_or(ChunkStoreConfig::CHANGELOG_DISABLED) } /// Look up a store by its [`StoreSlotId`], upgrading the weak reference. - pub fn resolve_store(&self, slot_id: &StoreSlotId) -> Option { + pub fn resolve_store(&self, slot_id: &StoreSlotId) -> Option { self.store_pool.get(slot_id) } /// Register a store in the pool, returning its new [`StoreSlotId`]. - pub fn register_store(&mut self, handle: &re_chunk_store::ChunkStoreHandle) -> StoreSlotId { - self.store_pool.register(handle) + pub fn register_store(&mut self, resolved: &crate::store::ResolvedStore) -> StoreSlotId { + self.store_pool.register(resolved) } /// Register a store under an existing [`StoreSlotId`] (e.g. for `memory://` re-registration). pub fn register_store_with_id( &mut self, id: StoreSlotId, - handle: &re_chunk_store::ChunkStoreHandle, + resolved: &crate::store::ResolvedStore, ) { - self.store_pool.register_with_id(id, handle); + self.store_pool.register_with_id(id, resolved); } /// Drop expired weak entries from the store pool. @@ -82,14 +99,25 @@ impl InMemoryStore { /// Returns the chunks corresponding to the provided chunk keys. /// /// Important: there is no guarantee on the order of the returned chunks. + /// + /// For Lazy stores, any not-yet-resident chunks are loaded in a single batched call per + /// distinct store, amortizing the file-mutex, IPC parse, and store-write-lock overhead that + /// per-key loading would pay N times. pub fn chunks_from_chunk_keys( &self, chunk_keys: &[ChunkKey], ) -> Result)>, Error> { - let mut result = Vec::with_capacity(chunk_keys.len()); + use crate::store::ResolvedStore; + + // Step 1: resolve every key's store once, and collect the set of missing chunk IDs per + // Lazy store. Eager stores that lack a chunk are fatal (no way to load them). + let mut resolved_per_key: Vec<(ResolvedStore, &ChunkKey)> = + Vec::with_capacity(chunk_keys.len()); + let mut missing_per_store: HashMap> = + HashMap::default(); for chunk_key in chunk_keys { - let store_handle = self + let resolved = self .resolve_store(&chunk_key.store_slot_id) .ok_or_else(|| { Error::InvalidChunkKey(format!( @@ -97,12 +125,52 @@ impl InMemoryStore { chunk_key.store_slot_id )) })?; - let store = store_handle.read(); - let store_id = store.id().clone(); - let chunk = store.physical_chunk(&chunk_key.chunk_id).ok_or_else(|| { - Error::InvalidChunkKey(format!("chunk id {} not found", chunk_key.chunk_id)) - })?; - result.push((store_id, Arc::clone(chunk))); + + if resolved.physical_chunk(&chunk_key.chunk_id).is_none() { + match &resolved { + ResolvedStore::Lazy(_) => { + missing_per_store + .entry(chunk_key.store_slot_id) + .or_default() + .push(chunk_key.chunk_id); + } + ResolvedStore::Eager(_) => { + return Err(Error::InvalidChunkKey(format!( + "chunk id {} not found", + chunk_key.chunk_id + ))); + } + } + } + + resolved_per_key.push((resolved, chunk_key)); + } + + // Step 2: one batched `load_chunks` call per Lazy store, so I/O is more efficient (chunk + // spans are merged on read). `load_chunks` filters already-resident chunks internally, so + // this also handles the concurrent-load race: if another request loaded a chunk between + // step 1 and step 2, we simply load fewer chunks here. + for (slot_id, missing_ids) in &missing_per_store { + let Some(ResolvedStore::Lazy(lazy)) = self.resolve_store(slot_id) else { + // A store that was Lazy in step 1 should still be Lazy here; defensive guard. + continue; + }; + lazy.load_chunks(missing_ids) + .map_err(|err| Error::InvalidChunkKey(format!("lazy load failed: {err:#}")))?; + } + + // Step 3: every chunk should now be physical. Pull them from memory. + let mut result = Vec::with_capacity(chunk_keys.len()); + for (resolved, chunk_key) in resolved_per_key { + let chunk = resolved + .physical_chunk(&chunk_key.chunk_id) + .ok_or_else(|| { + Error::InvalidChunkKey(format!( + "chunk id {} not found in manifest", + chunk_key.chunk_id + )) + })?; + result.push((resolved.store_id(), chunk)); } Ok(result) @@ -151,15 +219,17 @@ impl InMemoryStore { } let entry_name = match &named_path.name { - Some(name) => name.into(), + Some(name) => name.clone(), None => directory .file_name() .expect("the directory should have a name and the path was canonicalized") - .to_string_lossy(), + .to_string_lossy() + .into_owned(), }; + let entry_name = EntryName::new(entry_name).map_err(Error::InvalidEntryName)?; let dataset_id = self - .create_dataset(entry_name.into(), None) + .create_dataset(entry_name, None) .expect("Name cannot yet exist"); for entry in std::fs::read_dir(&directory)? { @@ -225,12 +295,14 @@ impl InMemoryStore { } let entry_name = match &named_path.name { - Some(name) => name.into(), + Some(name) => name.clone(), None => directory .file_name() .expect("the directory should have a name and the path was canonicalized") - .to_string_lossy(), + .to_string_lossy() + .into_owned(), }; + let entry_name = EntryName::new(entry_name).map_err(Error::InvalidEntryName)?; // Verify it is a valid lance table let path = directory.to_str().ok_or_else(|| { @@ -255,20 +327,20 @@ impl InMemoryStore { let entry_id = EntryId::new(); let provider_details = LanceTable { table_url }; - match self.table_by_name(entry_name.as_ref()) { + match self.table_by_name(&entry_name) { None => { - self.add_table_entry(entry_name.as_ref(), entry_id, table, provider_details)?; + self.add_table_entry(entry_name, entry_id, table, provider_details)?; } Some(_) => match on_duplicate { IfDuplicateBehavior::Overwrite => { re_log::info!("Overwriting {entry_name}"); - self.add_table_entry(entry_name.as_ref(), entry_id, table, provider_details)?; + self.add_table_entry(entry_name, entry_id, table, provider_details)?; } IfDuplicateBehavior::Skip => { re_log::info!("Ignoring {entry_name}: it already exists"); } IfDuplicateBehavior::Error => { - return Err(Error::DuplicateEntryNameError(entry_name.to_string())); + return Err(Error::DuplicateEntryNameError(entry_name)); } }, } @@ -276,10 +348,7 @@ impl InMemoryStore { Ok(entry_id) } - pub fn rename_entry(&mut self, entry_id: EntryId, entry_name: String) -> Result<(), Error> { - re_protos::cloud::v1alpha1::ext::validate_entry_name(&entry_name) - .map_err(Error::InvalidEntryName)?; - + pub fn rename_entry(&mut self, entry_id: EntryId, entry_name: EntryName) -> Result<(), Error> { if let Some(existing_entry_id) = self.id_by_name.get(&entry_name) { return if existing_entry_id == &entry_id { // nothing to do, the rename is a no-op @@ -315,17 +384,17 @@ impl InMemoryStore { #[cfg(feature = "lance")] // only used by the `lance` feature fn add_table_entry( &mut self, - entry_name: &str, + entry_name: EntryName, entry_id: EntryId, table: TableType, provider_details: re_protos::cloud::v1alpha1::ext::LanceTable, ) -> Result<(), Error> { - self.id_by_name.insert(entry_name.to_owned(), entry_id); + self.id_by_name.insert(entry_name.clone(), entry_id); self.tables.insert( entry_id, Table::new( entry_id, - entry_name.to_owned(), + entry_name, table, None, ProviderDetails::LanceTable(provider_details), @@ -341,15 +410,12 @@ impl InMemoryStore { /// risk for `dataset_id`. pub fn create_dataset( &mut self, - dataset_name: String, + dataset_name: EntryName, dataset_id: Option, ) -> Result { - re_protos::cloud::v1alpha1::ext::validate_entry_name(&dataset_name) - .map_err(Error::InvalidEntryName)?; - let dataset_id = dataset_id.unwrap_or_else(EntryId::new); let blueprint_dataset_id = EntryId::new(); - let blueprint_dataset_name = format!("__bp_{dataset_id}"); + let blueprint_dataset_name = EntryName::blueprint_for(dataset_id); self.create_dataset_impl( blueprint_dataset_name, @@ -374,12 +440,12 @@ impl InMemoryStore { /// Create a dataset of the given kind with the given details. fn create_dataset_impl( &mut self, - name: String, + name: EntryName, entry_id: EntryId, store_kind: StoreKind, details: Option, ) -> Result { - re_log::debug!(name, "create_dataset"); + re_log::debug!(%name, "create_dataset"); if self.id_by_name.contains_key(&name) { return Err(Error::DuplicateEntryNameError(name)); } @@ -441,7 +507,7 @@ impl InMemoryStore { let entries_table_id = *self .id_by_name - .entry(ENTRIES_TABLE_NAME.to_owned()) + .entry(EntryName::entries_table()) .or_insert_with(EntryId::new); let prior_entries_table = self.tables.remove(&entries_table_id); @@ -450,7 +516,7 @@ impl InMemoryStore { entries_table_id, Table::new( entries_table_id, - ENTRIES_TABLE_NAME.to_owned(), + EntryName::entries_table(), TableType::DataFusionTable(entries_table), prior_entries_table.map(|t| t.created_at()), ProviderDetails::SystemTable(SystemTable { @@ -474,12 +540,12 @@ impl InMemoryStore { .ok_or(Error::EntryIdNotFound(entry_id)) } - pub fn dataset_by_name(&self, name: &str) -> Result<&Dataset, Error> { + pub fn dataset_by_name(&self, name: &EntryName) -> Result<&Dataset, Error> { let entry_id = self .id_by_name .get(name) .copied() - .ok_or(Error::EntryNameNotFound(name.to_owned()))?; + .ok_or_else(|| Error::EntryNameNotFound(name.clone()))?; self.dataset(entry_id) } @@ -495,7 +561,7 @@ impl InMemoryStore { self.tables.get_mut(&entry_id) } - pub fn table_by_name(&self, name: &str) -> Option<&Table> { + pub fn table_by_name(&self, name: &EntryName) -> Option<&Table> { let entry_id = self.id_by_name.get(name).copied()?; self.table(entry_id) } @@ -504,7 +570,7 @@ impl InMemoryStore { self.tables.values() } - pub fn id_by_name(&self, name: &str) -> Option<&EntryId> { + pub fn id_by_name(&self, name: &EntryName) -> Option<&EntryId> { self.id_by_name.get(name) } @@ -518,24 +584,21 @@ impl InMemoryStore { pub async fn create_table_entry( &mut self, - name: &str, + name: EntryName, url: &url::Url, schema: SchemaRef, ) -> Result { - re_protos::cloud::v1alpha1::ext::validate_entry_name(name) - .map_err(Error::InvalidEntryName)?; - - re_log::debug!(name, "create_table"); - if self.id_by_name.contains_key(name) { - return Err(Error::DuplicateEntryNameError(name.to_owned())); + re_log::debug!(%name, "create_table"); + if self.id_by_name.contains_key(&name) { + return Err(Error::DuplicateEntryNameError(name)); } let entry_id = EntryId::new(); - let table = Table::create_table_entry(entry_id, name, url, schema).await?; + let table = Table::create_table_entry(entry_id, name.clone(), url, schema).await?; let table_entry = table.as_table_entry(); - self.id_by_name.insert(name.to_owned(), entry_id); + self.id_by_name.insert(name, entry_id); self.tables.insert(entry_id, table); self.update_entries_table()?; @@ -547,7 +610,7 @@ fn generate_entries_table(entries: &[EntryDetails]) -> Result, - Vec, + Vec, Vec, Vec, Vec, @@ -567,7 +630,9 @@ fn generate_entries_table(entries: &[EntryDetails]) -> Result>(), + )) as ArrayRef; let kind_arr = Arc::new(Int32Array::from(entry_kind)) as ArrayRef; let created_at_arr = Arc::new(TimestampNanosecondArray::from(created_at)) as ArrayRef; let updated_at_arr = Arc::new(TimestampNanosecondArray::from(updated_at)) as ArrayRef; @@ -631,7 +696,7 @@ impl InMemoryStore { // store upon initialization. let entry_table_rb = generate_entries_table(&[EntryDetails { id: EntryId::from(Tuid::from_bytes([0; 16])), - name: ENTRIES_TABLE_NAME.to_owned(), + name: EntryName::entries_table(), kind: EntryKind::Table, created_at: Default::default(), updated_at: Default::default(), diff --git a/crates/store/re_server/src/store/layer.rs b/crates/store/re_server/src/store/layer.rs index 9d61db7a3258..191477f0ec5f 100644 --- a/crates/store/re_server/src/store/layer.rs +++ b/crates/store/re_server/src/store/layer.rs @@ -1,25 +1,28 @@ -use arrow::array::RecordBatch; +use std::collections::{BTreeMap, HashMap}; +use std::sync::Arc; + +use arrow::array::{BinaryArray, RecordBatch, RecordBatchOptions}; use arrow::datatypes::Schema; use arrow::error::ArrowError; use re_byte_size::SizeBytes as _; -use re_chunk_store::ChunkStoreHandle; +use re_log_encoding::RawRrdManifest; use re_log_types::{AbsoluteTimeRange, Timeline}; -use std::collections::{BTreeMap, HashMap}; use super::StoreSlotId; +use super::resolved_store::ResolvedStore; #[derive(Clone)] pub struct Layer { store_slot_id: StoreSlotId, - store_handle: ChunkStoreHandle, + resolved: ResolvedStore, registration_time: jiff::Timestamp, } impl Layer { - pub fn new(store_slot_id: StoreSlotId, store_handle: ChunkStoreHandle) -> Self { + pub fn new(store_slot_id: StoreSlotId, resolved: ResolvedStore) -> Self { Self { store_slot_id, - store_handle, + resolved, registration_time: jiff::Timestamp::now(), } } @@ -28,8 +31,8 @@ impl Layer { self.store_slot_id } - pub fn store_handle(&self) -> &ChunkStoreHandle { - &self.store_handle + pub fn resolved_store(&self) -> &ResolvedStore { + &self.resolved } pub fn registration_time(&self) -> jiff::Timestamp { @@ -48,19 +51,46 @@ impl Layer { } pub fn num_chunks(&self) -> u64 { - self.store_handle.read().num_physical_chunks() as u64 + match &self.resolved { + ResolvedStore::Eager(h) => h.read().num_physical_chunks() as u64, + ResolvedStore::Lazy(l) => l.num_chunks() as u64, + } } + /// Approximate size of this layer. + /// + /// The unit differs by backing store and the two values are **not directly comparable**: + /// + /// - **Eager** layers report the in-memory heap size of the materialized chunks. + /// - **Lazy** layers report the on-disk IPC byte length from the RRD footer, including + /// each chunk's message header. Chunks are not materialized. + /// + /// Treat this as a rough load indicator, not a precise accounting. pub fn size_bytes(&self) -> u64 { - self.store_handle - .read() - .iter_physical_chunks() - .map(|chunk| chunk.heap_size_bytes()) - .sum() + match &self.resolved { + ResolvedStore::Eager(h) => h + .read() + .iter_physical_chunks() + .map(|chunk| chunk.heap_size_bytes()) + .sum(), + + ResolvedStore::Lazy(l) => { + let header = re_log_encoding::MessageHeader::ENCODED_SIZE_BYTES as u64; + l.manifest() + .col_chunk_byte_size() + .iter() + .map(|size| size + header) + .sum() + } + } } pub fn schema(&self) -> Schema { - let fields = self.store_handle.read().schema().arrow_fields(); + let fields = self + .resolved + .schema() + .chunk_column_descriptors() + .arrow_fields(); Schema::new_with_metadata(fields, HashMap::default()) } @@ -68,24 +98,342 @@ impl Layer { re_log_encoding::RawRrdManifest::compute_sorbet_schema_sha256(&self.schema()) } - pub fn compute_properties( + pub fn compute_properties(&self) -> Result { + self.resolved.extract_properties() + } + + /// Produce a [`RawRrdManifest`] for this layer, with a `chunk_key` column already populated. + /// + /// - **Lazy** layers clone the cached RRD footer manifest — no chunk materialization. + /// - **Eager** layers rebuild the manifest by iterating every physical chunk. + /// + /// The `store_id` on the returned manifest is the layer's own store id; callers merging + /// multiple layer manifests into a segment-scoped manifest should override it afterwards + /// (see [`re_log_encoding::RawRrdManifest::merge`]). + pub fn rrd_manifest(&self) -> Result { + match &self.resolved { + ResolvedStore::Lazy(lazy) => self.rrd_manifest_from_lazy_cache(lazy), + ResolvedStore::Eager(handle) => self.rrd_manifest_from_chunks(handle), + } + } + + fn rrd_manifest_from_lazy_cache( &self, - ) -> Result { - self.store_handle.read().extract_properties() + lazy: &Arc, + ) -> Result { + let mut manifest = (**lazy.raw_manifest()).clone(); + + let chunk_keys = manifest + .col_chunk_id() + .map_err(|err| super::Error::RrdLoadingError(err.into()))? + .map(|chunk_id| { + super::ChunkKey { + chunk_id, + store_slot_id: self.store_slot_id, + } + .encode() + }) + .collect::, _>>()?; + + append_chunk_key_column(&mut manifest, &chunk_keys)?; + Ok(manifest) + } + + fn rrd_manifest_from_chunks( + &self, + handle: &re_chunk_store::ChunkStoreHandle, + ) -> Result { + let store = handle.read(); + let chunks: Vec> = + store.iter_physical_chunks().cloned().collect(); + let store_id = store.id().clone(); + drop(store); + + let mut builder = re_log_encoding::RrdManifestBuilder::default(); + let mut chunk_keys = Vec::with_capacity(chunks.len()); + let mut offset = 0; + + for chunk in &chunks { + let chunk_batch = chunk + .to_chunk_batch() + .map_err(|err| super::Error::RrdLoadingError(anyhow::anyhow!(err)))?; + + // There's no compression on the OSS server (no disk), so "compressed size" equals + // uncompressed size. The chunk_key is what's used to actually fetch data. + let byte_size_uncompressed = chunk.heap_size_bytes(); + let uncompressed_byte_span = re_span::Span { + start: offset, + len: byte_size_uncompressed, + }; + offset += byte_size_uncompressed; + + builder + .append(&chunk_batch, uncompressed_byte_span, byte_size_uncompressed) + .map_err(|err| super::Error::RrdLoadingError(err.into()))?; + + chunk_keys.push( + super::ChunkKey { + chunk_id: chunk.id(), + store_slot_id: self.store_slot_id, + } + .encode()?, + ); + } + + let mut manifest = builder + .build(store_id) + .map_err(|err| super::Error::RrdLoadingError(err.into()))?; + + append_chunk_key_column(&mut manifest, &chunk_keys)?; + Ok(manifest) } pub fn index_ranges(&self) -> BTreeMap { - let mut ranges = BTreeMap::new(); - for chunk in self.store_handle.read().iter_physical_chunks() { - for time_col in chunk.timelines().values() { - let timeline = time_col.timeline().to_owned(); - let range = time_col.time_range(); - - let entry = ranges.entry(timeline).or_insert(range); - *entry = entry.union(range); + match &self.resolved { + ResolvedStore::Eager(h) => { + let mut ranges = BTreeMap::new(); + for chunk in h.read().iter_physical_chunks() { + for time_col in chunk.timelines().values() { + let timeline = time_col.timeline().to_owned(); + let range = time_col.time_range(); + let entry = ranges.entry(timeline).or_insert(range); + *entry = entry.union(range); + } + } + ranges + } + ResolvedStore::Lazy(l) => { + let mut ranges = BTreeMap::new(); + for per_entity in l.manifest().temporal_map().values() { + for (timeline, per_component) in per_entity { + for per_chunk in per_component.values() { + for entry in per_chunk.values() { + let range = entry.time_range; + let e = ranges.entry(*timeline).or_insert(range); + *e = e.union(range); + } + } + } + } + ranges + } + } + } +} + +/// Append the server-synthesized `chunk_key` column to a [`RawRrdManifest`]. +/// +/// The keys must be aligned with `manifest.data`'s existing rows. +fn append_chunk_key_column( + manifest: &mut RawRrdManifest, + chunk_keys: &[Vec], +) -> Result<(), super::Error> { + let (schema, mut columns, num_rows) = manifest.data.clone().into_parts(); + + let schema = { + let mut schema = Arc::unwrap_or_clone(schema); + let mut fields = schema.fields.to_vec(); + fields.push(Arc::new(RawRrdManifest::field_chunk_key())); + schema.fields = fields.into(); + schema + }; + + let keys_array = BinaryArray::from_iter_values(chunk_keys.iter()); + columns.push(Arc::new(keys_array)); + + manifest.data = RecordBatch::try_new_with_options( + Arc::new(schema), + columns, + &RecordBatchOptions::new().with_row_count(Some(num_rows)), + )?; + + Ok(()) +} + +#[cfg(test)] +mod tests { + use std::collections::BTreeSet; + use std::path::Path; + + use arrow::array::Array as _; + use re_arrow_util::ArrowArrayDowncastRef as _; + use re_chunk_store::external::re_chunk; + use re_chunk_store::{Chunk, ChunkStore, ChunkStoreConfig, ChunkStoreHandle, LazyRrdStore}; + use re_log_encoding::EncodingOptions; + use re_log_types::{ + EntityPath, LogMsg, SetStoreInfo, StoreId, StoreInfo, StoreKind, StoreSource, TimePoint, + Timeline, + example_components::{MyPoint, MyPoints}, + }; + use re_types_core::ChunkId; + + use super::*; + use crate::store::{ChunkKey, ResolvedStore}; + + fn build_chunks() -> (StoreId, Vec>) { + let store_id = StoreId::random(StoreKind::Recording, "test"); + let timeline = Timeline::new_sequence("frame"); + let mut chunks = Vec::new(); + for entity_idx in 0..2 { + for frame_idx in 0..3i64 { + let entity_path = EntityPath::from(format!("/entity_{entity_idx}")); + let points = MyPoint::from_iter( + #[expect(clippy::cast_possible_truncation, clippy::cast_sign_loss)] + { + frame_idx as u32..frame_idx as u32 + 1 + }, + ); + let chunk = Chunk::builder(entity_path) + .with_sparse_component_batches( + re_chunk::RowId::new(), + TimePoint::default().with(timeline, frame_idx), + [(MyPoints::descriptor_points(), Some(&points as _))], + ) + .build() + .unwrap(); + chunks.push(Arc::new(chunk)); } } + (store_id, chunks) + } + + fn write_rrd(path: &Path, store_id: &StoreId, chunks: &[Arc]) { + let set_store_info = LogMsg::SetStoreInfo(SetStoreInfo { + row_id: *re_chunk::RowId::ZERO, + info: StoreInfo::new(store_id.clone(), StoreSource::Unknown), + }); + let mut file = std::fs::File::create(path).unwrap(); + let mut encoder = re_log_encoding::Encoder::new_eager( + re_log_encoding::CrateVersion::LOCAL, + EncodingOptions::PROTOBUF_COMPRESSED, + &mut file, + ) + .unwrap(); + encoder.append(&set_store_info).unwrap(); + for chunk in chunks { + let arrow_msg = chunk.to_arrow_msg().unwrap(); + let msg = LogMsg::ArrowMsg(store_id.clone(), arrow_msg); + encoder.append(&msg).unwrap(); + } + encoder.finish().unwrap(); + } + + /// Single-layer equivalence: a Lazy-backed layer and an Eager-backed layer holding the same + /// chunks must produce manifests that are equivalent on the axes clients care about (chunk + /// IDs, entity paths, staticness, row counts, schema shape, decodable `chunk_key`s). + /// + /// Byte-size/offset columns are intentionally NOT compared: per the `RawRrdManifest` + /// docstring, Lazy reports on-disk IPC sizes while Eager reports heap sizes. + #[test] + fn rrd_manifest_lazy_and_eager_produce_equivalent_output() { + let (store_id, chunks) = build_chunks(); + + // Eager backend: in-memory `ChunkStore`. `ALL_DISABLED` matches `LazyRrdStore`'s internal + // config, so both sides hold the same chunk set (otherwise compaction on insert would + // merge them and the manifests would no longer be row-wise comparable). + let mut eager_store = ChunkStore::new(store_id.clone(), ChunkStoreConfig::ALL_DISABLED); + for chunk in &chunks { + eager_store.insert_chunk(chunk).unwrap(); + } + let eager_layer = Layer::new( + StoreSlotId::new(), + ResolvedStore::Eager(ChunkStoreHandle::new(eager_store)), + ); + + // Lazy backend: same chunks, written to an RRD file with footer, then loaded lazily. + let dir = tempfile::tempdir().unwrap(); + let rrd_path = dir.path().join("test.rrd"); + write_rrd(&rrd_path, &store_id, &chunks); + + let mut footer_file = std::fs::File::open(&rrd_path).unwrap(); + let footer = re_log_encoding::read_rrd_footer(&mut footer_file) + .unwrap() + .unwrap(); + let raw_manifest = Arc::new(footer.manifests[&store_id].clone()); + let store_file = std::fs::File::open(&rrd_path).unwrap(); + let lazy = + Arc::new(LazyRrdStore::try_new(store_file, rrd_path.clone(), raw_manifest).unwrap()); + let lazy_layer = Layer::new(StoreSlotId::new(), ResolvedStore::Lazy(lazy)); + + let lazy_manifest = lazy_layer.rrd_manifest().unwrap(); + let eager_manifest = eager_layer.rrd_manifest().unwrap(); + + // Row counts match. + assert_eq!( + lazy_manifest.data.num_rows(), + eager_manifest.data.num_rows(), + "row counts differ" + ); + + // Chunk IDs match as sets (per-row order is not part of the contract). + let lazy_ids: BTreeSet = lazy_manifest.col_chunk_id().unwrap().collect(); + let eager_ids: BTreeSet = eager_manifest.col_chunk_id().unwrap().collect(); + assert_eq!(lazy_ids, eager_ids, "chunk IDs differ"); + + // Compare per-chunk metadata. Both manifests may list chunks in different orders, so + // sort by chunk_id first. + let sort_by_chunk_id = |manifest: &RawRrdManifest| -> Vec { + let mut indexed: Vec<(usize, ChunkId)> = + manifest.col_chunk_id().unwrap().enumerate().collect(); + indexed.sort_by_key(|(_, id)| *id); + indexed.into_iter().map(|(i, _)| i).collect() + }; + let lazy_order = sort_by_chunk_id(&lazy_manifest); + let eager_order = sort_by_chunk_id(&eager_manifest); + + let lazy_entity_paths = lazy_manifest.col_chunk_entity_path_raw().unwrap(); + let eager_entity_paths = eager_manifest.col_chunk_entity_path_raw().unwrap(); + let lazy_is_static = lazy_manifest.col_chunk_is_static_raw().unwrap(); + let eager_is_static = eager_manifest.col_chunk_is_static_raw().unwrap(); + let lazy_num_rows = lazy_manifest.col_chunk_num_rows_raw().unwrap(); + let eager_num_rows = eager_manifest.col_chunk_num_rows_raw().unwrap(); + + for (li, ei) in lazy_order.iter().zip(eager_order.iter()) { + assert_eq!( + lazy_entity_paths.value(*li), + eager_entity_paths.value(*ei), + "entity_path differs" + ); + assert_eq!( + lazy_is_static.value(*li), + eager_is_static.value(*ei), + "is_static differs" + ); + assert_eq!( + lazy_num_rows.value(*li), + eager_num_rows.value(*ei), + "num_rows differs" + ); + } + + // Sorbet schema SHA matches: both paths describe the same logical recording schema. + assert_eq!( + lazy_manifest.sorbet_schema_sha256, eager_manifest.sorbet_schema_sha256, + "sorbet schema SHA differs between lazy and eager" + ); + + // Manifest RecordBatch schemas match too: same columns (chunk_fetcher base columns, + // dynamically-emitted per-timeline/component index columns, plus the appended + // `chunk_key`). If either code path forgets a column or reorders fields, we'd diverge. + assert_eq!( + lazy_manifest.data.schema(), + eager_manifest.data.schema(), + "manifest RecordBatch schema differs between lazy and eager" + ); - ranges + // `chunk_key` column is present on both and decodes to in-manifest chunk IDs. + let decode_keys = |manifest: &RawRrdManifest| -> BTreeSet { + let keys: &BinaryArray = manifest + .data + .column_by_name(RawRrdManifest::FIELD_CHUNK_KEY) + .expect("chunk_key column missing") + .downcast_array_ref::() + .unwrap(); + (0..keys.len()) + .map(|i| ChunkKey::decode(keys.value(i)).unwrap().chunk_id) + .collect() + }; + assert_eq!(decode_keys(&lazy_manifest), lazy_ids); + assert_eq!(decode_keys(&eager_manifest), eager_ids); } } diff --git a/crates/store/re_server/src/store/mod.rs b/crates/store/re_server/src/store/mod.rs index 85ff3ea204e1..ff8df1c4862e 100644 --- a/crates/store/re_server/src/store/mod.rs +++ b/crates/store/re_server/src/store/mod.rs @@ -3,6 +3,7 @@ mod dataset; mod error; mod in_memory_store; mod layer; +mod resolved_store; mod segment; mod store_pool; mod table; @@ -14,6 +15,7 @@ pub use self::dataset::Dataset; pub use self::error::Error; pub use self::in_memory_store::InMemoryStore; pub use self::layer::Layer; +pub use self::resolved_store::ResolvedStore; pub use self::segment::Segment; pub use self::store_pool::StoreSlotId; pub use self::table::Table; diff --git a/crates/store/re_server/src/store/resolved_store.rs b/crates/store/re_server/src/store/resolved_store.rs new file mode 100644 index 000000000000..cf1269fc62ce --- /dev/null +++ b/crates/store/re_server/src/store/resolved_store.rs @@ -0,0 +1,180 @@ +use std::path::Path; +use std::sync::Arc; + +use arrow::array::RecordBatch; +use nohash_hasher::IntSet; +use re_chunk_store::{ + Chunk, ChunkId, ChunkStore, ChunkStoreHandle, ChunkStoreHandleWeak, ChunkTrackingMode, + LazyRrdStore, QueryResults, StoreSchema, +}; +use re_log_encoding::RrdManifest; +use re_log_types::{EntityPath, StoreId, StoreKind}; + +/// A store backend: either an in-memory eager store or a file-backed lazy store. +/// +/// Both variants are `Arc`-based, so `Clone` is cheap. +#[derive(Clone)] +pub enum ResolvedStore { + /// Fully in-memory store (e.g. from `write_chunks` or legacy RRD without footer). + Eager(ChunkStoreHandle), + + /// File-backed store with on-demand chunk loading. + Lazy(Arc), +} + +impl ResolvedStore { + pub fn store_id(&self) -> StoreId { + match self { + Self::Eager(h) => h.read().id().clone(), + Self::Lazy(l) => l.store_id().clone(), + } + } + + pub fn schema(&self) -> StoreSchema { + match self { + Self::Eager(h) => h.read().schema().clone(), + Self::Lazy(l) => l.schema(), + } + } + + pub fn all_entities(&self) -> IntSet { + match self { + Self::Eager(h) => h.read().all_entities(), + Self::Lazy(l) => l.all_entities(), + } + } + + pub fn physical_chunk(&self, id: &ChunkId) -> Option> { + match self { + Self::Eager(h) => h.read().physical_chunk(id).cloned(), + Self::Lazy(l) => l.physical_chunk(id), + } + } + + pub fn latest_at_relevant_chunks_for_all_components( + &self, + report_mode: ChunkTrackingMode, + query: &re_chunk_store::LatestAtQuery, + entity_path: &EntityPath, + include_static: bool, + ) -> QueryResults { + match self { + Self::Eager(h) => h.read().latest_at_relevant_chunks_for_all_components( + report_mode, + query, + entity_path, + include_static, + ), + Self::Lazy(l) => l.latest_at_relevant_chunks_for_all_components( + report_mode, + query, + entity_path, + include_static, + ), + } + } + + pub fn range_relevant_chunks_for_all_components( + &self, + report_mode: ChunkTrackingMode, + query: &re_chunk_store::RangeQuery, + entity_path: &EntityPath, + include_static: bool, + ) -> QueryResults { + match self { + Self::Eager(h) => h.read().range_relevant_chunks_for_all_components( + report_mode, + query, + entity_path, + include_static, + ), + Self::Lazy(l) => l.range_relevant_chunks_for_all_components( + report_mode, + query, + entity_path, + include_static, + ), + } + } + + pub fn manifest(&self) -> Option<&Arc> { + match self { + Self::Eager(_) => None, + Self::Lazy(l) => Some(l.manifest()), + } + } + + pub fn extract_properties(&self) -> Result { + match self { + Self::Eager(h) => h.read().extract_properties(), + Self::Lazy(l) => l.extract_properties(), + } + .map_err(super::Error::failed_to_extract_properties) + } + + pub(crate) fn downgrade(&self) -> ResolvedStoreWeak { + match self { + Self::Eager(h) => ResolvedStoreWeak::Eager(h.downgrade()), + Self::Lazy(l) => ResolvedStoreWeak::Lazy(Arc::downgrade(l)), + } + } + + /// Load an RRD file as one or more [`ResolvedStore`]s, one per store found in the file. + /// + /// Prefers the lazy path (chunks loaded on demand) when the RRD has a footer; falls back to + /// eager loading (whole file read into memory) when the footer is missing or unreadable. + /// Stores whose kind does not match `store_kind` are filtered out. + pub fn load_rrd_file( + path: &Path, + store_kind: StoreKind, + ) -> Result, super::Error> { + let mut file = std::fs::File::open(path)?; + + if let Ok(Some(footer)) = re_log_encoding::read_rrd_footer(&mut file) { + // The footer-reading handle is no longer needed — each `LazyRrdStore` holds its own. + drop(file); + + let mut out = Vec::with_capacity(footer.manifests.len()); + for (store_id, raw_manifest) in footer.manifests { + if store_id.kind() != store_kind { + continue; + } + let store_file = std::fs::File::open(path)?; + let lazy = Arc::new( + LazyRrdStore::try_new(store_file, path.to_owned(), Arc::new(raw_manifest)) + .map_err(|err| super::Error::RrdLoadingError(err.into()))?, + ); + out.push((store_id, Self::Lazy(lazy))); + } + Ok(out) + } else { + // Legacy fallback: eager load (no footer, or footer read error). + let contents = ChunkStore::handle_from_rrd_filepath( + &super::InMemoryStore::default_eager_chunk_store_config(), + path, + ) + .map_err(super::Error::RrdLoadingError)?; + + Ok(contents + .into_iter() + .filter(|(store_id, _)| store_id.kind() == store_kind) + .map(|(store_id, handle)| (store_id, Self::Eager(handle))) + .collect()) + } + } +} + +/// Weak counterpart of [`ResolvedStore`], held by [`StorePool`](super::store_pool::StorePool). +pub(crate) enum ResolvedStoreWeak { + Eager(ChunkStoreHandleWeak), + Lazy(std::sync::Weak), +} + +impl ResolvedStoreWeak { + pub fn upgrade(&self) -> Option { + match self { + Self::Eager(w) => w.upgrade().map(ResolvedStore::Eager), + Self::Lazy(w) => w.upgrade().map(ResolvedStore::Lazy), + } + } +} diff --git a/crates/store/re_server/src/store/store_pool.rs b/crates/store/re_server/src/store/store_pool.rs index cc067767be67..8f2dff7d043f 100644 --- a/crates/store/re_server/src/store/store_pool.rs +++ b/crates/store/re_server/src/store/store_pool.rs @@ -1,8 +1,10 @@ use std::collections::HashMap; -use re_chunk_store::{ChunkStoreHandle, ChunkStoreHandleWeak}; use re_tuid::Tuid; +use super::ResolvedStore; +use super::resolved_store::ResolvedStoreWeak; + /// Opaque identifier for a store slot in the [`StorePool`]. /// /// Used in `memory:///store/{store_slot_id}` URLs to make stores globally resolvable. @@ -36,31 +38,31 @@ impl std::str::FromStr for StoreSlotId { } } -/// A lookup index of [`ChunkStoreHandle`]s keyed by [`StoreSlotId`]. +/// A lookup index of [`ResolvedStore`]s keyed by [`StoreSlotId`]. /// /// The pool holds **weak** references. The strong (owning) references live in /// [`Layer`](super::Layer)s. When all layers drop a store, the weak entry /// expires naturally. Call [`StorePool::cleanup`] to sweep expired entries. #[derive(Default)] pub struct StorePool { - stores: HashMap, + stores: HashMap, } impl StorePool { /// Register a store, returning its new [`StoreSlotId`]. - pub fn register(&mut self, handle: &ChunkStoreHandle) -> StoreSlotId { + pub fn register(&mut self, resolved: &ResolvedStore) -> StoreSlotId { let id = StoreSlotId::new(); - self.stores.insert(id, handle.downgrade()); + self.stores.insert(id, resolved.downgrade()); id } /// Register under an existing ID (e.g. for `memory://` re-registration). - pub fn register_with_id(&mut self, id: StoreSlotId, handle: &ChunkStoreHandle) { - self.stores.insert(id, handle.downgrade()); + pub fn register_with_id(&mut self, id: StoreSlotId, resolved: &ResolvedStore) { + self.stores.insert(id, resolved.downgrade()); } /// Resolve by upgrading the `Weak`. Returns `None` if expired or unknown. - pub fn get(&self, id: &StoreSlotId) -> Option { + pub fn get(&self, id: &StoreSlotId) -> Option { let weak = self.stores.get(id)?; weak.upgrade() } @@ -78,10 +80,10 @@ mod tests { use super::*; - fn test_store_handle() -> ChunkStoreHandle { + fn test_resolved_store() -> ResolvedStore { let store_id = StoreId::new(StoreKind::Recording, "test", "test"); let config = re_chunk_store::ChunkStoreConfig::CHANGELOG_DISABLED; - ChunkStoreHandle::new(ChunkStore::new(store_id, config)) + ResolvedStore::Eager(ChunkStoreHandle::new(ChunkStore::new(store_id, config))) } #[test] @@ -95,31 +97,28 @@ mod tests { #[test] fn register_and_get() { let mut pool = StorePool::default(); - let handle = test_store_handle(); - let id = pool.register(&handle); + let resolved = test_resolved_store(); + let id = pool.register(&resolved); let retrieved = pool.get(&id).expect("should find store"); - assert!(std::ptr::eq( - std::ptr::from_ref(&*handle.read()), - std::ptr::from_ref(&*retrieved.read()) - )); + assert_eq!(resolved.store_id(), retrieved.store_id()); } #[test] fn get_returns_none_after_drop() { let mut pool = StorePool::default(); - let handle = test_store_handle(); - let id = pool.register(&handle); - drop(handle); + let resolved = test_resolved_store(); + let id = pool.register(&resolved); + drop(resolved); assert!(pool.get(&id).is_none(), "should be expired"); } #[test] fn cleanup_removes_expired() { let mut pool = StorePool::default(); - let handle = test_store_handle(); - let _ = pool.register(&handle); - drop(handle); + let resolved = test_resolved_store(); + let _ = pool.register(&resolved); + drop(resolved); pool.cleanup(); assert!(pool.stores.is_empty(), "should have been cleaned up"); } @@ -127,8 +126,8 @@ mod tests { #[test] fn cleanup_keeps_alive() { let mut pool = StorePool::default(); - let handle = test_store_handle(); - let id = pool.register(&handle); + let resolved = test_resolved_store(); + let id = pool.register(&resolved); pool.cleanup(); assert!(pool.get(&id).is_some(), "should NOT have been cleaned up"); } @@ -136,15 +135,12 @@ mod tests { #[test] fn register_with_id() { let mut pool = StorePool::default(); - let handle = test_store_handle(); + let resolved = test_resolved_store(); let id = StoreSlotId::new(); - pool.register_with_id(id, &handle); + pool.register_with_id(id, &resolved); let retrieved = pool.get(&id).expect("should find store"); - assert!(std::ptr::eq( - std::ptr::from_ref(&*handle.read()), - std::ptr::from_ref(&*retrieved.read()) - )); + assert_eq!(resolved.store_id(), retrieved.store_id()); } #[test] diff --git a/crates/store/re_server/src/store/table.rs b/crates/store/re_server/src/store/table.rs index b64aa184a46e..7592ab3c7911 100644 --- a/crates/store/re_server/src/store/table.rs +++ b/crates/store/re_server/src/store/table.rs @@ -10,6 +10,7 @@ use datafusion::execution::SessionStateBuilder; use datafusion::logical_expr::dml::InsertOp; use futures::StreamExt as _; use re_log_types::EntryId; +use re_protos::EntryName; use re_protos::cloud::v1alpha1::EntryKind; use re_protos::cloud::v1alpha1::ext::{EntryDetails, ProviderDetails, TableEntry}; @@ -23,7 +24,7 @@ pub enum TableType { #[derive(Clone)] pub struct Table { id: EntryId, - name: String, + name: EntryName, table: TableType, created_at: jiff::Timestamp, @@ -35,7 +36,7 @@ pub struct Table { impl Table { pub fn new( id: EntryId, - name: String, + name: EntryName, table: TableType, created_at: Option, provider_details: ProviderDetails, @@ -54,11 +55,11 @@ impl Table { self.id } - pub fn name(&self) -> &str { + pub fn name(&self) -> &EntryName { &self.name } - pub fn set_name(&mut self, name: String) { + pub fn set_name(&mut self, name: EntryName) { self.name = name; self.updated_at = jiff::Timestamp::now(); } @@ -232,7 +233,7 @@ impl Table { #[cfg(feature = "lance")] pub async fn create_table_entry( id: EntryId, - name: &str, + name: EntryName, url: &url::Url, schema: SchemaRef, ) -> Result { @@ -255,7 +256,7 @@ impl Table { Ok(Self::new( id, - name.to_owned(), + name, TableType::LanceDataset(ds), created_at, ProviderDetails::LanceTable(provider_details), @@ -266,7 +267,7 @@ impl Table { #[expect(clippy::unused_async)] pub async fn create_table_entry( _id: EntryId, - _name: &str, + _name: EntryName, _url: &url::Url, _schema: SchemaRef, ) -> Result { diff --git a/crates/store/re_server/tests/memory_url.rs b/crates/store/re_server/tests/memory_url.rs index 5a32cf011fcd..38026a7fa64a 100644 --- a/crates/store/re_server/tests/memory_url.rs +++ b/crates/store/re_server/tests/memory_url.rs @@ -16,7 +16,8 @@ use re_protos::cloud::v1alpha1::rerun_cloud_service_server::RerunCloudService as use re_protos::cloud::v1alpha1::{DeleteEntryRequest, ScanDatasetManifestResponse}; use re_protos::headers::RerunHeadersInjectorExt as _; use re_redap_tests::{ - DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt as _, register_and_wait, + DataSourcesDefinition, LayerDefinition, RerunCloudServiceExt as _, entry_name, + register_and_wait, }; use re_server::{RerunCloudHandler, RerunCloudHandlerBuilder}; @@ -75,7 +76,7 @@ async fn register_memory_url_cross_dataset() { data_sources: vec![memory_data_source.clone()], on_duplicate: Default::default(), }) - .with_entry_name("dataset_b") + .with_entry_name(entry_name("dataset_b")) .unwrap(); let task_results = register_and_wait(&service, request).await; @@ -121,7 +122,7 @@ async fn register_memory_url_cross_dataset() { data_sources: vec![memory_data_source], on_duplicate: Default::default(), }) - .with_entry_name("dataset_c") + .with_entry_name(entry_name("dataset_c")) .unwrap(); let result = service.register_with_dataset(request).await; @@ -159,7 +160,7 @@ async fn register_memory_url_not_found() { data_sources: vec![memory_data_source], on_duplicate: Default::default(), }) - .with_entry_name("dataset_nf") + .with_entry_name(entry_name("dataset_nf")) .unwrap(); let result = service.register_with_dataset(request).await; @@ -183,7 +184,7 @@ async fn scan_manifest( let responses: Vec<_> = service .scan_dataset_manifest( tonic::Request::new(ScanDatasetManifestRequest { columns: vec![] }) - .with_entry_name(dataset_name) + .with_entry_name(entry_name(dataset_name)) .unwrap(), ) .await diff --git a/crates/store/re_server/tests/redap_tests.rs b/crates/store/re_server/tests/redap_tests.rs index a7b2929db3cc..2ec58ac84e35 100644 --- a/crates/store/re_server/tests/redap_tests.rs +++ b/crates/store/re_server/tests/redap_tests.rs @@ -11,7 +11,7 @@ re_redap_tests::generate_redap_tests!(build); #[tokio::test(flavor = "multi_thread")] async fn version() { - let (handle, addr) = re_server::Args { + let handle = re_server::Args { host: "127.0.0.1".into(), port: 0, ..Default::default() @@ -20,6 +20,7 @@ async fn version() { .await .expect("failed to start server"); + let addr = handle.connect_addr(); let response = ehttp::fetch_async(ehttp::Request::get(format!("http://{addr}/version"))) .await .expect("failed to get `/version`"); diff --git a/crates/store/re_sorbet/Cargo.toml b/crates/store/re_sorbet/Cargo.toml index 7bcf5213b8b0..f34ca0def570 100644 --- a/crates/store/re_sorbet/Cargo.toml +++ b/crates/store/re_sorbet/Cargo.toml @@ -21,6 +21,7 @@ all-features = true [dependencies] re_arrow_util.workspace = true +re_byte_size.workspace = true re_log_types.workspace = true re_log.workspace = true re_tracing.workspace = true diff --git a/crates/store/re_sorbet/src/component_column_descriptor.rs b/crates/store/re_sorbet/src/component_column_descriptor.rs index 73e13fa3d99d..e1d7fb969e8b 100644 --- a/crates/store/re_sorbet/src/component_column_descriptor.rs +++ b/crates/store/re_sorbet/src/component_column_descriptor.rs @@ -66,6 +66,27 @@ pub struct ComponentColumnDescriptor { pub is_semantically_empty: bool, } +impl re_byte_size::SizeBytes for ComponentColumnDescriptor { + #[inline] + fn heap_size_bytes(&self) -> u64 { + let Self { + entity_path, + archetype, + component, + component_type, + store_datatype, + is_static: _, + is_tombstone: _, + is_semantically_empty: _, + } = self; + entity_path.heap_size_bytes() + + archetype.heap_size_bytes() + + component.heap_size_bytes() + + component_type.heap_size_bytes() + + store_datatype.heap_size_bytes() + } +} + impl PartialOrd for ComponentColumnDescriptor { #[inline] fn partial_cmp(&self, other: &Self) -> Option { @@ -263,10 +284,9 @@ impl ComponentColumnDescriptor { dt => { re_log::warn_once!( - "Component '{}' on entity '{}' has unexpected non-list-array type: {}", + "Component '{}' on entity '{}' has unexpected non-list-array type: {dt}", self.component, self.entity_path, - re_arrow_util::format_data_type(&dt), ); dt } diff --git a/crates/store/re_sorbet/src/index_column_descriptor.rs b/crates/store/re_sorbet/src/index_column_descriptor.rs index 399e06f7d0ff..3973c37156b8 100644 --- a/crates/store/re_sorbet/src/index_column_descriptor.rs +++ b/crates/store/re_sorbet/src/index_column_descriptor.rs @@ -24,6 +24,17 @@ pub struct IndexColumnDescriptor { pub is_sorted: bool, } +impl re_byte_size::SizeBytes for IndexColumnDescriptor { + fn heap_size_bytes(&self) -> u64 { + let Self { + timeline, + datatype, + is_sorted: _, + } = self; + timeline.heap_size_bytes() + datatype.heap_size_bytes() + } +} + impl PartialOrd for IndexColumnDescriptor { #[inline] fn partial_cmp(&self, other: &Self) -> Option { diff --git a/crates/store/re_sorbet/src/sorbet_batch.rs b/crates/store/re_sorbet/src/sorbet_batch.rs index 260d84d85d98..f68497aeb124 100644 --- a/crates/store/re_sorbet/src/sorbet_batch.rs +++ b/crates/store/re_sorbet/src/sorbet_batch.rs @@ -54,6 +54,34 @@ impl SorbetBatch { batch: self.batch.slice(0, 0), } } + + /// Replace the data of one column by index, keeping the same schema. + /// + /// Returns `None` if the column index is out of bounds or if the new + /// `RecordBatch` cannot be constructed. + #[must_use] + pub fn with_replaced_column(&self, col_idx: usize, new_array: ArrowArrayRef) -> Option { + if col_idx >= self.batch.num_columns() { + return None; + } + re_log::debug_assert_eq!( + self.batch.column(col_idx).data_type(), + new_array.data_type(), + "with_replaced_column: data type mismatch for column {col_idx}" + ); + let mut columns: Vec = self.batch.columns().to_vec(); + columns[col_idx] = new_array; + let batch = ArrowRecordBatch::try_new_with_options( + self.batch.schema(), + columns, + &RecordBatchOptions::default(), + ) + .ok()?; + Some(Self { + schema: self.schema.clone(), + batch, + }) + } } impl SorbetBatch { diff --git a/crates/store/re_sorbet/src/sorbet_schema.rs b/crates/store/re_sorbet/src/sorbet_schema.rs index 3548b7c24cfe..727a6957fb0c 100644 --- a/crates/store/re_sorbet/src/sorbet_schema.rs +++ b/crates/store/re_sorbet/src/sorbet_schema.rs @@ -1,3 +1,5 @@ +use std::collections::BTreeSet; + use arrow::datatypes::{Schema as ArrowSchema, SchemaRef as ArrowSchemaRef}; use re_log_types::EntityPath; use re_types_core::ChunkId; @@ -85,6 +87,15 @@ impl SorbetSchema { .chain(timestamps.to_metadata()) .collect() } + + /// All the entities referenced by any column. + pub fn all_entities(&self) -> BTreeSet<&EntityPath> { + self.columns + .iter() + .filter_map(|c| c.entity_path()) + .chain(self.entity_path.iter()) + .collect() + } } impl From for SorbetColumnDescriptors { diff --git a/crates/store/re_tf/Cargo.toml b/crates/store/re_tf/Cargo.toml index 98cccee7b47c..7699899ecf1a 100644 --- a/crates/store/re_tf/Cargo.toml +++ b/crates/store/re_tf/Cargo.toml @@ -24,11 +24,12 @@ re_arrow_util.workspace = true re_byte_size = { workspace = true, features = ["glam"] } re_chunk_store.workspace = true re_entity_db.workspace = true # It would be nice not to depend on this, but we need this in order to do queries right now. -re_log.workspace = true +re_log_encoding.workspace = true re_log_types.workspace = true +re_log.workspace = true re_mutex.workspace = true -re_tracing.workspace = true re_sdk_types = { workspace = true, features = ["glam"] } +re_tracing.workspace = true ahash.workspace = true arrow.workspace = true diff --git a/crates/store/re_tf/benches/transform_resolution_cache_bench.rs b/crates/store/re_tf/benches/transform_resolution_cache_bench.rs index 282ef3eccf13..a2b1dc02df75 100644 --- a/crates/store/re_tf/benches/transform_resolution_cache_bench.rs +++ b/crates/store/re_tf/benches/transform_resolution_cache_bench.rs @@ -51,7 +51,7 @@ fn setup_store() -> (EntityDb, Vec) { } let chunk = builder.build().unwrap(); - events.extend(entity_db.add_chunk(&Arc::new(chunk)).unwrap().into_iter()); + events.extend(entity_db.add_chunk(&Arc::new(chunk)).unwrap()); } } (entity_db, events) diff --git a/crates/store/re_tf/src/frame_id_registry.rs b/crates/store/re_tf/src/frame_id_registry.rs index 978e193fd7cc..2923632156e1 100644 --- a/crates/store/re_tf/src/frame_id_registry.rs +++ b/crates/store/re_tf/src/frame_id_registry.rs @@ -82,9 +82,15 @@ impl FrameIdRegistry { archetypes::CoordinateFrame::descriptor_frame().component, ]; + // Warn on empty frame IDs, but collect the affected components first. + // Avoids warning multiple times for entities with multiple frame names, e.g. pinhole. for component in frame_components { for frame_id_strings in chunk.iter_slices::(component) { for frame_id_string in frame_id_strings { + if frame_id_string.is_empty() { + continue; + } + let (frame_id_hash, entity_path) = TransformFrameIdHash::from_str_with_optional_derived_path( frame_id_string.as_str(), @@ -115,7 +121,7 @@ impl FrameIdRegistry { } /// Registers entity path derived frame id and all its parents. - fn register_frame_id_from_entity_path(&mut self, entity_path: &EntityPath) { + pub fn register_frame_id_from_entity_path(&mut self, entity_path: &EntityPath) { // Ensure all implicit frames from this entity all the way up to the root are known. // Note that in-between entities may never be mentioned in any chunk, but we want to make sure they're known to the system. let mut entity_path = entity_path; // Have to redeclare to make borrow-checker happy. diff --git a/crates/store/re_tf/src/transform_aspect.rs b/crates/store/re_tf/src/transform_aspect.rs index 676c6aabcbcd..b3cf56984fc5 100644 --- a/crates/store/re_tf/src/transform_aspect.rs +++ b/crates/store/re_tf/src/transform_aspect.rs @@ -11,8 +11,8 @@ bitflags::bitflags! { /// The entity has instance poses, i.e. any non-style component of [`archetypes::InstancePoses3D`]. const Pose = 1 << 1; - /// The entity has a pinhole projection or view coordinates, i.e. either [`components::PinholeProjection`] or [`components::ViewCoordinates`]. - const PinholeOrViewCoordinates = 1 << 2; + /// The entity has a pinhole projection i.e. any component of [`components::PinholeProjection`]. + const Pinhole = 1 << 2; /// The entity has a clear component. const Clear = 1 << 3; @@ -20,16 +20,13 @@ bitflags::bitflags! { } impl TransformAspect { - /// Converts a component type to a transform aspect. - pub fn from_archetype(archetype: ArchetypeName) -> Self { + fn from_archetype(archetype: ArchetypeName) -> Self { if archetypes::Transform3D::name() == archetype { Self::Frame } else if archetypes::InstancePoses3D::name() == archetype { Self::Pose - } else if archetypes::Pinhole::name() == archetype - || archetypes::ViewCoordinates::name() == archetype - { - Self::PinholeOrViewCoordinates + } else if archetypes::Pinhole::name() == archetype { + Self::Pinhole } else if archetypes::Clear::name() == archetype { Self::Clear } else { @@ -38,6 +35,9 @@ impl TransformAspect { } /// Collects the transform aspects a chunk covers. + /// + /// This serves as a chunk-prefilter when processing store events. + /// We later on do a full check on relevant rows, see `iter_relevant_rows_in_chunk_with_child_frames` for details. pub fn transform_aspects_of(chunk: &Chunk) -> Self { let mut aspects = Self::empty(); for archetype in chunk.component_descriptors().filter_map(|c| c.archetype) { diff --git a/crates/store/re_tf/src/transform_forest.rs b/crates/store/re_tf/src/transform_forest.rs index 0bc5c3366a5b..021b8a578a1b 100644 --- a/crates/store/re_tf/src/transform_forest.rs +++ b/crates/store/re_tf/src/transform_forest.rs @@ -3,7 +3,6 @@ use re_byte_size::SizeBytes; use re_chunk_store::{LatestAtQuery, MissingChunkReporter}; use re_entity_db::EntityDb; use re_log::debug_assert; -use re_sdk_types::components::TransformFrameId; use crate::frame_id_registry::FrameIdRegistry; use crate::transform_resolution_cache::ParentFromChildTransform; @@ -428,9 +427,6 @@ impl re_byte_size::MemUsageTreeCapture for TransformForest { } } -static UNKNOWN_TRANSFORM_ID: std::sync::LazyLock = - std::sync::LazyLock::new(|| TransformFrameId::new("")); - /// Starting from a `current_frame`, walks towards the parent and accumulates transforms into `transform_stack`. /// Stops until not more connection is found or an already processed `frame_id` is hit. #[expect(clippy::too_many_arguments)] @@ -726,12 +722,8 @@ fn pinhole3d_from_image_plane( resolved_pinhole_projection: &ResolvedPinholeProjection, pinhole_image_plane_distance: f64, ) -> glam::DAffine3 { - let ResolvedPinholeProjection { - parent: _, // TODO(andreas): Make use of this. - image_from_camera, - resolution: _, - view_coordinates, - } = resolved_pinhole_projection; + let image_from_camera = resolved_pinhole_projection.image_from_camera; + let view_coordinates = resolved_pinhole_projection.view_coordinates; // Everything under a pinhole camera is a 2D projection, thus doesn't actually have a proper 3D representation. // Our visualization interprets this as looking at a 2D image plane from a single point (the pinhole). @@ -802,18 +794,32 @@ fn transforms_at( if let Some(pinhole_projection) = pinhole_projection.as_ref() && pinhole_projection.parent != transform.parent { - re_log::warn_once!( - "The transform frame {:?} is connected to {:?} via a pinhole but also connected to {:?} via a transform. Any frame is only ever allowed to have a single parent at any given time.", - id_registry - .lookup_frame_id(child_frame) - .unwrap_or(&UNKNOWN_TRANSFORM_ID), - id_registry - .lookup_frame_id(pinhole_projection.parent) - .unwrap_or(&UNKNOWN_TRANSFORM_ID), - id_registry - .lookup_frame_id(transform.parent) - .unwrap_or(&UNKNOWN_TRANSFORM_ID), - ); + let transform_frame = id_registry.lookup_frame_id(child_frame); + let pinhole_parent_frame = id_registry.lookup_frame_id(pinhole_projection.parent); + let transform_parent_frame = id_registry.lookup_frame_id(transform.parent); + + // If any of the frames ids can't be resolved to a string, we're in bigger trouble and can't show a useful error + // as this implies that the registry is in an invalid state. + if let Some(transform_frame) = transform_frame + && let Some(pinhole_parent_frame) = pinhole_parent_frame + && let Some(transform_parent_frame) = transform_parent_frame + { + re_log::warn_once!( + "The transform frame {transform_frame:?} is connected to {pinhole_parent_frame:?} via a pinhole but also connected to {transform_parent_frame:?} via a transform. Any frame is only ever allowed to have a single parent at any given time.", + ); + } else { + for frame in [ + transform_frame, + pinhole_parent_frame, + transform_parent_frame, + ] { + if frame.is_none() { + re_log::debug_panic!( + "Couldn't resolve frame id for {frame:?} in the registry, even though it was present in the transforms for timeline.", + ); + } + } + } } Some(transform.parent) @@ -851,6 +857,8 @@ mod tests { use re_sdk_types::components::TransformFrameId; use re_sdk_types::{RowId, archetypes, components}; + use crate::transform_resolution_cache::ResolvedPinholeProjectionCached; + use super::*; fn test_pinhole() -> archetypes::Pinhole { @@ -859,12 +867,15 @@ mod tests { fn test_resolved_pinhole(parent: TransformFrameIdHash) -> ResolvedPinholeProjection { ResolvedPinholeProjection { - parent, - image_from_camera: components::PinholeProjection::from_focal_length_and_principal_point( - [1.0, 2.0], - [50.0, 100.0], - ), - resolution: Some([100.0, 200.0].into()), + cached: ResolvedPinholeProjectionCached { + parent, + image_from_camera: + components::PinholeProjection::from_focal_length_and_principal_point( + [1.0, 2.0], + [50.0, 100.0], + ), + resolution: Some([100.0, 200.0].into()), + }, view_coordinates: archetypes::Pinhole::DEFAULT_CAMERA_XYZ, } } @@ -1415,9 +1426,9 @@ mod tests { .build()?, ))?; let transform_forest = TransformForest::new(&test_scene, &transform_cache, &query); - assert!(!transform_forest.any_missing_chunks()); + assert!(transform_forest.any_missing_chunks()); - // Forest sees the new relationship despite not having it reported since the cold cache will pick it up. + // Forest can't see the new relationship since it hasn't been reported to the cache. assert_eq!( transform_forest .transform_from_to( @@ -1428,12 +1439,9 @@ mod tests { .collect::>(), vec![( TransformFrameIdHash::from_str("new_top"), - Ok(TreeTransform { - root: TransformFrameIdHash::from_str("new_top"), - target_from_source: glam::DAffine3::from_translation(glam::dvec3( - -5.0, 0.0, 0.0 - )), - }) + Err(TransformFromToError::UnknownSourceFrame( + TransformFrameIdHash::from_str("new_top") + )) )] ); assert_eq!( @@ -1449,7 +1457,7 @@ mod tests { Err(TransformFromToError::NoPathBetweenFrames { target: TransformFrameIdHash::from_str("child2"), src: TransformFrameIdHash::from_str("top"), - target_root: TransformFrameIdHash::from_str("new_top"), + target_root: TransformFrameIdHash::from_str("child2"), source_root: TransformFrameIdHash::from_str("root"), }) )] diff --git a/crates/store/re_tf/src/transform_queries.rs b/crates/store/re_tf/src/transform_queries.rs index 7a47ab6c7193..e7ac6e036553 100644 --- a/crates/store/re_tf/src/transform_queries.rs +++ b/crates/store/re_tf/src/transform_queries.rs @@ -1,20 +1,21 @@ //! Utilities for querying out transform types. +use std::sync::OnceLock; + use glam::DAffine3; -use itertools::{Either, Itertools as _}; -use re_arrow_util::ArrowArrayDowncastRef as _; -use re_chunk_store::{ - Chunk, ChunkTrackingMode, LatestAtQuery, MissingChunkReporter, UnitChunkShared, -}; +use itertools::Either; +use re_chunk_store::{ChunkShared, LatestAtQuery, MissingChunkReporter}; use re_entity_db::EntityDb; -use re_log_types::{EntityPath, TimeInt}; +use re_entity_db::external::re_query::StorageEngineReadGuard; +use re_log_types::EntityPath; use re_sdk_types::archetypes::{self, InstancePoses3D}; -use re_sdk_types::external::arrow; use re_sdk_types::external::arrow::array::Array as _; -use re_sdk_types::{ComponentIdentifier, TransformFrameIdHash, components}; +use re_sdk_types::{ChunkId, ComponentIdentifier, RowId, TransformFrameIdHash, components}; -use crate::transform_resolution_cache::ParentFromChildTransform; -use crate::{ResolvedPinholeProjection, convert}; +use crate::convert; +use crate::transform_resolution_cache::{ + ParentFromChildTransform, ResolvedPinholeProjectionCached, +}; #[derive(Debug, thiserror::Error)] pub enum TransformError { @@ -27,191 +28,90 @@ pub enum TransformError { #[error("missing transform on entity `{entity_path}`")] MissingTransform { entity_path: EntityPath }, + #[error( + "Ignoring transform due to empty parent frame name for component `{component}` on entity `{entity_path}`." + )] + EmptyParentFrame { + entity_path: EntityPath, + component: ComponentIdentifier, + }, + #[error( "Ignoring transform at root entity /. Transforms require either a parent entity that can be used as implicit frame, or the parent_frame field to be set." )] ImplicitRootParentFrame, } -/// Returns true if any of the given components is non-null on the given row. -fn has_row_any_component( - chunk: &Chunk, - row_index: usize, - components: &[ComponentIdentifier], -) -> bool { - components.iter().any(|component| { - chunk - .components() - .get_array(*component) - .is_some_and(|array| !array.is_null(row_index)) - }) -} - -/// Filters a atomic-latest-at the given [`Self::requested_frame_id`] at the [`Self::condition_frame_id_component`]. -/// We have to find the last row-id for the given `condition_frame_id_component` and time. -/// Today, `condition_frame_id_component` is always `child_frame_id` for either `Transform3D` or `Pinhole`. -#[derive(Copy, Clone)] -struct AtomicLatestAtFrameFilter { - condition_frame_id_component: ComponentIdentifier, - requested_frame_id: TransformFrameIdHash, -} - -/// Finds a unit chunk/row that has the latest changes for the given set of components and optionally matches for a frame id. -/// -/// Since everything has the same row-id, everything has to be on the same chunk -> we return a unit chunk! -/// -/// Does **not** handle clears. Our transform cache already handles clear events separately, -/// since we eagerly create events whenever a change occurs. -/// (Unlike transform components, we immediately read out clears and add those clear events to our event book-keeping) -fn atomic_latest_at_query( - entity_db: &EntityDb, +fn lookup_chunk_row<'a>( + storage_engine: &'a StorageEngineReadGuard<'a>, missing_chunk_reporter: &MissingChunkReporter, - query: &LatestAtQuery, - entity_path: &EntityPath, - frame_filter: Option, - atomic_component_set: &[ComponentIdentifier], -) -> Option { - let storage_engine = entity_db.storage_engine(); + chunk_id: ChunkId, + row_id: RowId, +) -> Option<(&'a ChunkShared, usize)> { let store = storage_engine.store(); - let chunks = atomic_component_set - .iter() - .flat_map(|comp| { - let re_chunk_store::QueryResults { - chunks, - missing_virtual, - } = store.latest_at_relevant_chunks( - ChunkTrackingMode::Report, - query, - entity_path, - *comp, - ); - if !missing_virtual.is_empty() { - missing_chunk_reporter.report_missing_chunk(); - } - - chunks - }) - // An optimization to avoid duplicates early. - .unique_by(|c| c.id()); + let Some(chunk) = store.physical_chunk(&chunk_id) else { + missing_chunk_reporter.report_missing_chunk(); + return None; + }; - let entity_path_derived_frame_id = TransformFrameIdHash::from_entity_path(entity_path); + let index = if chunk.is_sorted() { + chunk.row_ids_slice().binary_search(&row_id).ok()? + } else { + chunk.row_ids_slice().iter().position(|r| *r == row_id)? + }; - let mut unit_chunk: Option = None; + Some((chunk, index)) +} - let query_time = query.at().as_i64(); +pub fn atomic_component_set_for_tree_transforms() -> &'static [ComponentIdentifier] { + static ATOMIC_COMPONENTS_FOR_TREE_TRANSFORMS: OnceLock<[ComponentIdentifier; 8]> = + OnceLock::new(); - for chunk in chunks { - // Make sure the chunk is sorted (they usually are) in order to ensure we're getting the last relevant row. - let chunk = if chunk.is_sorted() { - chunk - } else { - let mut sorted_chunk = (*chunk).clone(); - sorted_chunk.sort_if_unsorted(); - std::sync::Arc::new(sorted_chunk) - }; - - let mut row_indices_with_queried_time_from_new_to_old = if let Some(time_column) = - chunk.timelines().get(&query.timeline()) - && query_time != TimeInt::STATIC.as_i64() - { - if time_column.is_sorted() { - let partition_point = time_column - .times_raw() - .partition_point(|time| *time <= query_time); - Either::Left((0..partition_point).rev()) - } else { - Either::Right( - time_column - .times_raw() - .iter() - .enumerate() - .filter(|(_row_index, time)| **time <= query_time) - .sorted_by_key(|(_row_index, time)| *time) - // Do *not* sort by negative time instead. - // This gives a subtly different outcome since sorting is stable it would mean that runs of equal times wouldn't be reversed then. - .rev() - .map(|(row_index, _time)| row_index), - ) - } - } else { - Either::Left((0..chunk.num_rows()).rev()) - }; + ATOMIC_COMPONENTS_FOR_TREE_TRANSFORMS.get_or_init(|| { + [ + // Topology + archetypes::Transform3D::descriptor_parent_frame().component, + archetypes::Transform3D::descriptor_child_frame().component, + archetypes::Transform3D::descriptor_relation().component, + // Geometry + archetypes::Transform3D::descriptor_translation().component, + archetypes::Transform3D::descriptor_rotation_axis_angle().component, + archetypes::Transform3D::descriptor_quaternion().component, + archetypes::Transform3D::descriptor_scale().component, + archetypes::Transform3D::descriptor_mat3x3().component, + ] + }) +} - // Finds the last row index with time <= the query time and a matching frame id. - let highest_row_index_with_expected_frame_id = if let Some(AtomicLatestAtFrameFilter { - condition_frame_id_component, - requested_frame_id, - }) = frame_filter - { - if let Some(frame_id_column) = - chunk.components().get_array(condition_frame_id_component) - { - row_indices_with_queried_time_from_new_to_old.find(|index| { - let frame_id_row_untyped = frame_id_column.value(*index); - let Some(frame_id_row) = - frame_id_row_untyped.downcast_array_ref::() - else { - re_log::error_once!("Expected at {condition_frame_id_component:?} @ {entity_path:?} to be a string array, but its type is instead {}", frame_id_row_untyped.data_type()); - return false; - }; - // Right now everything is singular on a single row, so check only the first element of this string array. - let frame_id = if frame_id_row.is_empty() || frame_id_row.is_null(0) { - // *Something* on this row has to be non-empty & non-null! - // Example where this is not the case: - // - // ┌────────────────┬─────────────┬────────────┐ - // │ child_frame_id │ translation │ color │ - // ├────────────────┼─────────────┼────────────┤ - // │ ["myframe"] │ [[1,2,3]] │ null │ - // │ null │ null │ 0xFF00FFFF │ - // │ null │ [] │ null │ - // └────────────────┴─────────────┴────────────┘ - // - // The second row doesn't have any of the components of our atomic set. - // It is therefore not relevant for what we're looking for! - // The last row *is* relevant, because it clears out the translation for the - // entity derived child_frame_id, thus setting it to an identity transform. - if !has_row_any_component(&chunk, *index, atomic_component_set) { - return false; - } - entity_path_derived_frame_id - } else { - TransformFrameIdHash::from_str(frame_id_row.value(0)) - }; +pub fn atomic_component_set_for_instance_poses() -> &'static [ComponentIdentifier] { + static ATOMIC_COMPONENTS_FOR_INSTANCE_POSES: OnceLock<[ComponentIdentifier; 5]> = + OnceLock::new(); - frame_id == requested_frame_id - }) - } else if entity_path_derived_frame_id == requested_frame_id { - // Pick the last where any relevant component is non-null & non-empty. - row_indices_with_queried_time_from_new_to_old - .find(|index| has_row_any_component(&chunk, *index, atomic_component_set)) - } else { - // There's no child_frame id and we're also not looking for the entity-path derived frame, - // so this chunk doesn't have any information about the transform we're looking for. - continue; - } - } else { - // Pick the last where any relevant component is non-null & non-empty. - row_indices_with_queried_time_from_new_to_old - .find(|index| has_row_any_component(&chunk, *index, atomic_component_set)) - }; + ATOMIC_COMPONENTS_FOR_INSTANCE_POSES.get_or_init(|| { + [ + InstancePoses3D::descriptor_translations().component, + InstancePoses3D::descriptor_rotation_axis_angles().component, + InstancePoses3D::descriptor_quaternions().component, + InstancePoses3D::descriptor_scales().component, + InstancePoses3D::descriptor_mat3x3().component, + ] + }) +} - if let Some(row_index) = highest_row_index_with_expected_frame_id { - re_log::debug_assert!(!chunk.is_empty()); - let new_unit_chunk = chunk.row_sliced_unit_shallow(row_index); +pub fn atomic_component_set_for_pinhole_projection() -> &'static [ComponentIdentifier] { + static ATOMIC_COMPONENTS_FOR_PINHOLE_PROJECTION: OnceLock<[ComponentIdentifier; 4]> = + OnceLock::new(); - if let Some(previous_chunk) = &unit_chunk - && previous_chunk.row_id() > new_unit_chunk.row_id() - { - // This should be rare: there's another chunk that also fits the exact same child id and the exact same time. - // Have to use row id as the tie breaker - if we failed that we're in here. - } else { - unit_chunk = Some(chunk.row_sliced_unit_shallow(row_index)); - } - } - } - - unit_chunk + ATOMIC_COMPONENTS_FOR_PINHOLE_PROJECTION.get_or_init(|| { + [ + // Topology + archetypes::Pinhole::descriptor_parent_frame().component, + archetypes::Pinhole::descriptor_child_frame().component, + // Geometry + archetypes::Pinhole::descriptor_image_from_camera().component, + archetypes::Pinhole::descriptor_resolution().component, + ] + }) } /// Queries & processes all components that are part of a transform, returning the transform from child to parent. @@ -222,12 +122,11 @@ pub fn query_and_resolve_tree_transform_at_entity( entity_db: &EntityDb, missing_chunk_reporter: &MissingChunkReporter, entity_path: &EntityPath, - child_frame_id: TransformFrameIdHash, - query: &LatestAtQuery, + chunk_id: ChunkId, + row_id: RowId, ) -> Result { // Topology let identifier_parent_frame = archetypes::Transform3D::descriptor_parent_frame().component; - let identifier_child_frame = archetypes::Transform3D::descriptor_child_frame().component; let identifier_relation = archetypes::Transform3D::descriptor_relation().component; // Geometry @@ -238,18 +137,6 @@ pub fn query_and_resolve_tree_transform_at_entity( let identifier_scales = archetypes::Transform3D::descriptor_scale().component; let identifier_mat3x3 = archetypes::Transform3D::descriptor_mat3x3().component; - let all_components_of_transaction = [ - identifier_parent_frame, - identifier_child_frame, - identifier_relation, - // Geometry - identifier_translations, - identifier_rotation_axis_angles, - identifier_quaternions, - identifier_scales, - identifier_mat3x3, - ]; - // We're querying for transactional/atomic transform state: // If any of the topology or geometry components change, we reset the entire transform. // @@ -260,18 +147,10 @@ pub fn query_and_resolve_tree_transform_at_entity( // * we're already doing special caching anyways // * we don't want to merge over row-ids *at all* since our query handling here is a little bit different. The query cache is geared towards "regular Rerun semantics" // * we already handled `Clear`/`ClearRecursive` upon pre-population of our cache entries (we know when a clear occurs on this entity!) - let unit_chunk: Option = atomic_latest_at_query( - entity_db, - missing_chunk_reporter, - query, - entity_path, - Some(AtomicLatestAtFrameFilter { - condition_frame_id_component: identifier_child_frame, - requested_frame_id: child_frame_id, - }), - &all_components_of_transaction, - ); - let Some(unit_chunk) = unit_chunk else { + let storage_engine = entity_db.storage_engine(); + let Some((chunk, row_index)) = + lookup_chunk_row(&storage_engine, missing_chunk_reporter, chunk_id, row_id) + else { return Err(TransformError::MissingTransform { entity_path: entity_path.clone(), }); @@ -279,20 +158,20 @@ pub fn query_and_resolve_tree_transform_at_entity( // TODO(andreas): silently ignores deserialization error right now. - let parent = get_parent_frame(&unit_chunk, entity_path, identifier_parent_frame)?; + let parent = get_parent_frame(chunk, row_index, entity_path, identifier_parent_frame)?; #[expect(clippy::useless_let_if_seq)] let mut transform = DAffine3::IDENTITY; // The order of the components here is important. - if let Some(translation) = unit_chunk - .component_mono::(identifier_translations) + if let Some(translation) = chunk + .component_mono::(identifier_translations, row_index) .and_then(|v| v.ok()) { transform = convert::translation_3d_to_daffine3(translation); } - if let Some(axis_angle) = unit_chunk - .component_mono::(identifier_rotation_axis_angles) + if let Some(axis_angle) = chunk + .component_mono::(identifier_rotation_axis_angles, row_index) .and_then(|v| v.ok()) { let axis_angle = convert::rotation_axis_angle_to_daffine3(axis_angle).map_err(|_err| { @@ -303,8 +182,8 @@ pub fn query_and_resolve_tree_transform_at_entity( })?; transform *= axis_angle; } - if let Some(quaternion) = unit_chunk - .component_mono::(identifier_quaternions) + if let Some(quaternion) = chunk + .component_mono::(identifier_quaternions, row_index) .and_then(|v| v.ok()) { let quaternion = convert::rotation_quat_to_daffine3(quaternion).map_err(|_err| { @@ -315,8 +194,8 @@ pub fn query_and_resolve_tree_transform_at_entity( })?; transform *= quaternion; } - if let Some(scale) = unit_chunk - .component_mono::(identifier_scales) + if let Some(scale) = chunk + .component_mono::(identifier_scales, row_index) .and_then(|v| v.ok()) { if scale.x() == 0.0 && scale.y() == 0.0 && scale.z() == 0.0 { @@ -327,8 +206,8 @@ pub fn query_and_resolve_tree_transform_at_entity( } transform *= convert::scale_3d_to_daffine3(scale); } - if let Some(mat3x3) = unit_chunk - .component_mono::(identifier_mat3x3) + if let Some(mat3x3) = chunk + .component_mono::(identifier_mat3x3, row_index) .and_then(|v| v.ok()) { let affine_transform = convert::transform_mat3x3_to_daffine3(mat3x3); @@ -341,8 +220,8 @@ pub fn query_and_resolve_tree_transform_at_entity( transform *= affine_transform; } - if unit_chunk - .component_mono::(identifier_relation) + if chunk + .component_mono::(identifier_relation, row_index) .and_then(|v| v.ok()) == Some(components::TransformRelation::ChildFromParent) { @@ -364,14 +243,14 @@ pub fn query_and_resolve_tree_transform_at_entity( /// Queries all components that are part of pose transforms, returning the transform from child to parent. /// -/// If any of the components yields an invalid transform, returns a `glam::DAffine3::ZERO` for that instance. -/// (this effectively ignores the instance for most visualizations!) -// TODO(#3849): There's no way to discover invalid transforms right now (they can be intentional but often aren't). +// TODO(#3849): There's no uniform way to discover invalid transforms right now (they can be intentional but often aren't). +// Here, we only detect and ignore invalid rotations and log an error. pub fn query_and_resolve_instance_poses_at_entity( entity_db: &EntityDb, missing_chunk_reporter: &MissingChunkReporter, entity_path: &EntityPath, - query: &LatestAtQuery, + chunk_id: ChunkId, + row_id: RowId, ) -> Vec { let identifier_translations = InstancePoses3D::descriptor_translations().component; let identifier_rotation_axis_angles = @@ -380,13 +259,7 @@ pub fn query_and_resolve_instance_poses_at_entity( let identifier_scales = InstancePoses3D::descriptor_scales().component; let identifier_mat3x3 = InstancePoses3D::descriptor_mat3x3().component; - let all_components_of_transaction = [ - identifier_translations, - identifier_rotation_axis_angles, - identifier_quaternions, - identifier_scales, - identifier_mat3x3, - ]; + let all_components_of_transaction = atomic_component_set_for_instance_poses(); // We're querying for transactional/atomic pose state: // If any of the topology or geometry components change, we reset all poses. @@ -398,23 +271,19 @@ pub fn query_and_resolve_instance_poses_at_entity( // * we're already doing special caching anyways // * we don't want to merge over row-ids *at all* since our query handling here is a little bit different. The query cache is geared towards "regular Rerun semantics" // * we already handled `Clear`/`ClearRecursive` upon pre-population of our cache entries (we know when a clear occurs on this entity!) - let unit_chunk: Option = atomic_latest_at_query( - entity_db, - missing_chunk_reporter, - query, - entity_path, - None, - &all_components_of_transaction, - ); - let Some(unit_chunk) = unit_chunk else { + let storage_engine = entity_db.storage_engine(); + let Some((chunk, row_index)) = + lookup_chunk_row(&storage_engine, missing_chunk_reporter, chunk_id, row_id) + else { return Vec::new(); }; let max_num_instances = all_components_of_transaction .iter() .map(|component| { - unit_chunk - .component_batch_raw(*component) + chunk + .component_batch_raw(*component, row_index) + .and_then(|batch| batch.ok()) .map_or(0, |batch| batch.len()) }) .max() @@ -441,24 +310,27 @@ pub fn query_and_resolve_instance_poses_at_entity( ) } - let batch_translation = unit_chunk - .component_batch::(identifier_translations) + let batch_translation = chunk + .component_batch::(identifier_translations, row_index) .and_then(|v| v.ok()) .unwrap_or_default(); - let batch_rotation_axis_angle = unit_chunk - .component_batch::(identifier_rotation_axis_angles) + let batch_rotation_axis_angle = chunk + .component_batch::( + identifier_rotation_axis_angles, + row_index, + ) .and_then(|v| v.ok()) .unwrap_or_default(); - let batch_rotation_quat = unit_chunk - .component_batch::(identifier_quaternions) + let batch_rotation_quat = chunk + .component_batch::(identifier_quaternions, row_index) .and_then(|v| v.ok()) .unwrap_or_default(); - let batch_scale = unit_chunk - .component_batch::(identifier_scales) + let batch_scale = chunk + .component_batch::(identifier_scales, row_index) .and_then(|v| v.ok()) .unwrap_or_default(); - let batch_mat3x3 = unit_chunk - .component_batch::(identifier_mat3x3) + let batch_mat3x3 = chunk + .component_batch::(identifier_mat3x3, row_index) .and_then(|v| v.ok()) .unwrap_or_default(); @@ -477,7 +349,11 @@ pub fn query_and_resolve_instance_poses_at_entity( let mut iter_scale = clamped_or_nothing(batch_scale, max_num_instances); let mut iter_mat3x3 = clamped_or_nothing(batch_mat3x3, max_num_instances); - (0..max_num_instances) + // Gracefully ignore invalid rotations (e.g. an accidentally unnormalized quaternion like [0, 0, 0, 0]), + // but log an error about it to inform the user. + let mut has_invalid_rotation = false; + + let transforms = (0..max_num_instances) .map(|_| { // We apply these in a specific order. #[expect(clippy::useless_let_if_seq)] @@ -492,14 +368,14 @@ pub fn query_and_resolve_instance_poses_at_entity( { transform *= axis_angle; } else { - transform = DAffine3::ZERO; + has_invalid_rotation = true; } } if let Some(rotation_quat) = iter_rotation_quat.next() { if let Ok(rotation_quat) = convert::rotation_quat_to_daffine3(rotation_quat) { transform *= rotation_quat; } else { - transform = DAffine3::ZERO; + has_invalid_rotation = true; } } if let Some(scale) = iter_scale.next() { @@ -510,51 +386,43 @@ pub fn query_and_resolve_instance_poses_at_entity( } transform }) - .collect() + .collect(); + + if has_invalid_rotation { + re_log::warn_once!( + "Detected an invalid rotation in the instance poses at {}. Ignoring it and treating it as an identity rotation.", + entity_path + ); + } + + transforms } pub fn query_and_resolve_pinhole_projection_at_entity( entity_db: &EntityDb, missing_chunk_reporter: &MissingChunkReporter, entity_path: &EntityPath, - child_frame_id: TransformFrameIdHash, - query: &LatestAtQuery, -) -> Result { + chunk_id: ChunkId, + row_id: RowId, +) -> Result { // Topology let identifier_parent_frame = archetypes::Pinhole::descriptor_parent_frame().component; - let identifier_child_frame = archetypes::Pinhole::descriptor_child_frame().component; // Geometry let identifier_image_from_camera = archetypes::Pinhole::descriptor_image_from_camera().component; let identifier_resolution = archetypes::Pinhole::descriptor_resolution().component; - let all_components_of_transaction = [ - identifier_parent_frame, - identifier_child_frame, - // Geometry - identifier_image_from_camera, - identifier_resolution, - ]; - - let unit_chunk = atomic_latest_at_query( - entity_db, - missing_chunk_reporter, - query, - entity_path, - Some(AtomicLatestAtFrameFilter { - condition_frame_id_component: identifier_child_frame, - requested_frame_id: child_frame_id, - }), - &all_components_of_transaction, - ); - let Some(unit_chunk) = unit_chunk else { + let storage_engine = entity_db.storage_engine(); + let Some((chunk, row_index)) = + lookup_chunk_row(&storage_engine, missing_chunk_reporter, chunk_id, row_id) + else { return Err(TransformError::MissingTransform { entity_path: entity_path.clone(), }); }; - let Some(image_from_camera) = unit_chunk - .component_mono::(identifier_image_from_camera) + let Some(image_from_camera) = chunk + .component_mono::(identifier_image_from_camera, row_index) .and_then(|v| v.ok()) else { // Intrinsics are required. @@ -562,34 +430,27 @@ pub fn query_and_resolve_pinhole_projection_at_entity( entity_path: entity_path.clone(), }); }; - let resolution = unit_chunk - .component_mono::(identifier_resolution) + let resolution = chunk + .component_mono::(identifier_resolution, row_index) .and_then(|v| v.ok()); - let parent = get_parent_frame(&unit_chunk, entity_path, identifier_parent_frame)?; + let parent = get_parent_frame(chunk, row_index, entity_path, identifier_parent_frame)?; - Ok(ResolvedPinholeProjection { + Ok(ResolvedPinholeProjectionCached { parent, image_from_camera, resolution, - - // TODO(andreas): view coordinates are in a weird limbo state in more than one way. - // Not only are they only _partially_ relevant for the camera's transform (they both name axis & orient cameras), - // we also rely on them too much being latest-at driven and to make matters worse query them from two different archetypes. - view_coordinates: { - query_view_coordinates(entity_path, entity_db, query) - .unwrap_or(archetypes::Pinhole::DEFAULT_CAMERA_XYZ) - }, }) } fn get_parent_frame( - unit_chunk: &UnitChunkShared, + chunk: &ChunkShared, + row_index: usize, entity_path: &EntityPath, identifier_parent_frame: ComponentIdentifier, ) -> Result { - unit_chunk - .component_mono::(identifier_parent_frame) + chunk + .component_mono::(identifier_parent_frame, row_index) .and_then(|v| v.ok()) .map_or_else( || { @@ -598,7 +459,16 @@ fn get_parent_frame( .ok_or(TransformError::ImplicitRootParentFrame) .map(|parent| TransformFrameIdHash::from_entity_path(&parent)) }, - |frame_id| Ok(TransformFrameIdHash::new(&frame_id)), + |frame_id| { + if frame_id.as_str().is_empty() { + Err(TransformError::EmptyParentFrame { + entity_path: entity_path.clone(), + component: identifier_parent_frame, + }) + } else { + Ok(TransformFrameIdHash::new(&frame_id)) + } + }, ) } @@ -657,500 +527,45 @@ pub fn query_view_coordinates_at_closest_ancestor( mod tests { use std::sync::Arc; - use re_chunk_store::{Chunk, LatestAtQuery}; + use re_chunk_store::Chunk; use re_entity_db::{EntityDb, EntityPath}; - use re_log_types::example_components::{MyColor, MyIndex, MyLabel, MyPoint, MyPoints}; - use re_log_types::{TimePoint, Timeline}; - use re_sdk_types::RowId; + use re_log_types::Timeline; + use re_sdk_types::{archetypes::InstancePoses3D, components::RotationQuat}; use super::*; - fn atomic_latest_at_query_test( - entity_db: &EntityDb, - query: &LatestAtQuery, - entity_path: &EntityPath, - frame_filter: Option, - atomic_component_set: &[ComponentIdentifier], - ) -> Option { - let missing_chunk_reporter = MissingChunkReporter::default(); - let result = atomic_latest_at_query( - entity_db, - &missing_chunk_reporter, - query, - entity_path, - frame_filter, - atomic_component_set, - ); - assert!( - missing_chunk_reporter.is_empty(), - "Test expected no missing chunks, but some were missing. This likely means the test is not properly populating the store with all relevant chunks." - ); - result - } - - fn timeline() -> Timeline { - Timeline::new("test_timeline", re_log_types::TimeType::Sequence) - } - - fn tp(tick: i64) -> TimePoint { - TimePoint::from([(timeline(), tick)]) - } - - fn atomic_component_set() -> [ComponentIdentifier; 3] { - [ - MyPoints::descriptor_points().component, - MyPoints::descriptor_colors().component, - MyPoints::descriptor_labels().component, - ] - } - - fn frame_condition_component() -> ComponentIdentifier { - // We stick with `MyPoints` all the way and its labels happen to be compatible with frame ids (it's just utf8!) - MyPoints::descriptor_labels().component - } - - fn atomic_latest_at_temporal_only_no_frames_present( - out_of_order: bool, - ) -> Result<(), Box> { - let mut entity_db = EntityDb::new(re_log_types::StoreInfo::testing().store_id); - - // Populate store. - let entity_path = EntityPath::from("my_entity"); - let row_id_temp0 = RowId::new(); - let row_id_temp1 = RowId::new(); - let row_id_irrelevant = RowId::new(); - let row_id_temp2 = RowId::new(); - let chunk = Chunk::builder(entity_path.clone()) - .with_archetype( - row_id_temp0, - if out_of_order { tp(30) } else { tp(10) }, - &MyPoints::new([MyPoint::new(1.0, 1.0)]).with_colors([MyColor(1)]), - ) - .with_archetype( - row_id_temp1, - tp(20), - &MyPoints::update_fields().with_colors([MyColor(2)]), - ) - .with_component( - row_id_irrelevant, - tp(25), - // Some random components that aren't of interest to us! - MyIndex::partial_descriptor(), - &MyIndex(123), - )? - .with_archetype( - row_id_temp2, - if out_of_order { tp(10) } else { tp(30) }, - &MyPoints::new([MyPoint::new(2.0, 2.0)]), - ) - .build()?; - entity_db.add_chunk(&Arc::new(chunk))?; - - let requested_frame_id = TransformFrameIdHash::from_entity_path(&entity_path); - - let query_row_at_time = |t| { - atomic_latest_at_query_test( - &entity_db, - &LatestAtQuery::new(*timeline().name(), t), - &entity_path, - Some(AtomicLatestAtFrameFilter { - condition_frame_id_component: frame_condition_component(), - requested_frame_id, - }), - &atomic_component_set(), - )? - .row_id() - }; - - assert_eq!(query_row_at_time(0), None); - if out_of_order { - assert_eq!(query_row_at_time(10), Some(row_id_temp2)); - assert_eq!(query_row_at_time(15), Some(row_id_temp2)); - assert_eq!(query_row_at_time(20), Some(row_id_temp1)); - assert_eq!(query_row_at_time(25), Some(row_id_temp1)); - assert_eq!(query_row_at_time(30), Some(row_id_temp0)); - assert_eq!(query_row_at_time(35), Some(row_id_temp0)); - } else { - assert_eq!(query_row_at_time(10), Some(row_id_temp0)); - assert_eq!(query_row_at_time(15), Some(row_id_temp0)); - assert_eq!(query_row_at_time(20), Some(row_id_temp1)); - assert_eq!(query_row_at_time(25), Some(row_id_temp1)); - assert_eq!(query_row_at_time(30), Some(row_id_temp2)); - assert_eq!(query_row_at_time(35), Some(row_id_temp2)); - } - - // The condition should not make any difference in this scenario! - for t in [0, 10, 15, 20, 25, 30, 35] { - assert_eq!( - query_row_at_time(t), - atomic_latest_at_query_test( - &entity_db, - &LatestAtQuery::new(*timeline().name(), t), - &entity_path, - None, - &atomic_component_set(), - ) - .and_then(|chunk| chunk.row_id()) - ); - } - - // Any query with another frame should fail - for t in [0, 15, 30, 40] { - assert!( - atomic_latest_at_query_test( - &entity_db, - &LatestAtQuery::new(*timeline().name(), t), - &entity_path, - Some(AtomicLatestAtFrameFilter { - condition_frame_id_component: frame_condition_component(), - requested_frame_id: TransformFrameIdHash::from_str("nope"), - }), - &atomic_component_set(), - ) - .is_none() - ); - } - - Ok(()) - } - #[test] - fn atomic_latest_at_temporal_only_no_frame_cond_in_order() - -> Result<(), Box> { - atomic_latest_at_temporal_only_no_frames_present(false) - } - + /// Test that an invalid instance pose quaternion is ignored while still keeping the translation. #[test] - fn atomic_latest_at_temporal_only_no_frame_cond_out_of_order() - -> Result<(), Box> { - atomic_latest_at_temporal_only_no_frames_present(true) - } - - #[test] - fn atomic_latest_at_static_and_temporal_no_frames_present() + fn invalid_instance_pose_quaternion_preserves_translation() -> Result<(), Box> { let mut entity_db = EntityDb::new(re_log_types::StoreInfo::testing().store_id); - // Populate store. + let timeline = Timeline::new_sequence("t"); let entity_path = EntityPath::from("my_entity"); - let row_id_static0 = RowId::new(); - let row_id_static1 = RowId::new(); - let row_id_irrelevant = RowId::new(); - let row_id_temp = RowId::new(); - let chunk = Chunk::builder(entity_path.clone()) - .with_archetype( - row_id_static0, - TimePoint::STATIC, - &MyPoints::new([MyPoint::new(1.0, 1.0)]), - ) - .with_archetype( - row_id_static1, - TimePoint::STATIC, - &MyPoints::new([MyPoint::new(2.0, 2.0)]), - ) - .with_component( - row_id_irrelevant, - TimePoint::STATIC, - // Some random components that aren't of interest to us! - MyIndex::partial_descriptor(), - &MyIndex(123), - )? - .build()?; - entity_db.add_chunk(&Arc::new(chunk))?; - let chunk = Chunk::builder(entity_path.clone()) - .with_archetype( - row_id_temp, - tp(10), - // Not allowed to write position & index, but color is fine since it wasn't written statically. - &MyPoints::update_fields().with_colors([MyColor(1)]), + .with_archetype_auto_row( + [(timeline, 1)], + &InstancePoses3D::new() + .with_translations([[1.0, 2.0, 3.0]]) + .with_quaternions([RotationQuat::INVALID]), ) .build()?; + let chunk_id = chunk.id(); + let row_id = chunk.row_ids_slice()[0]; entity_db.add_chunk(&Arc::new(chunk))?; - let requested_frame_id = TransformFrameIdHash::from_entity_path(&entity_path); - - let query_row_at_time = |t| { - atomic_latest_at_query_test( - &entity_db, - &LatestAtQuery::new(*timeline().name(), t), - &entity_path, - Some(AtomicLatestAtFrameFilter { - condition_frame_id_component: frame_condition_component(), - requested_frame_id, - }), - &atomic_component_set(), - )? - .row_id() - }; - - assert_eq!(query_row_at_time(0), Some(row_id_static1)); - assert_eq!(query_row_at_time(10), Some(row_id_temp)); - assert_eq!(query_row_at_time(123), Some(row_id_temp)); - - // Any query with another frame should fail - assert!( - atomic_latest_at_query_test( - &entity_db, - &LatestAtQuery::new(*timeline().name(), 0), - &entity_path, - Some(AtomicLatestAtFrameFilter { - condition_frame_id_component: frame_condition_component(), - requested_frame_id: TransformFrameIdHash::from_str("nope"), - }), - &atomic_component_set(), - ) - .is_none() + let poses = query_and_resolve_instance_poses_at_entity( + &entity_db, + &MissingChunkReporter::default(), + &entity_path, + chunk_id, + row_id, ); - Ok(()) - } - - fn atomic_latest_at_temporal_only_with_frames( - out_of_order: bool, - ) -> Result<(), Box> { - let mut entity_db = EntityDb::new(re_log_types::StoreInfo::testing().store_id); - - // Populate store. - let entity_path = EntityPath::from("my_entity"); - let row_id_temp0 = RowId::new(); - let row_id_temp1 = RowId::new(); - let row_id_irrelevant = RowId::new(); - let row_id_temp2 = RowId::new(); - let chunk = Chunk::builder(entity_path.clone()) - .with_archetype( - row_id_temp0, - if out_of_order { tp(30) } else { tp(10) }, - &MyPoints::new([MyPoint::new(1.0, 1.0)]) - .with_colors([MyColor(1)]) - .with_labels([MyLabel("first".to_owned())]), - ) - .with_archetype( - row_id_temp1, - tp(20), - &MyPoints::update_fields() - .with_colors([MyColor(2)]) - .with_labels([MyLabel("second!".to_owned())]), - ) - .with_component( - row_id_irrelevant, - tp(25), - // Some random components that aren't of interest to us! - MyIndex::partial_descriptor(), - &MyIndex(123), - )? - .with_archetype( - row_id_temp2, - if out_of_order { tp(10) } else { tp(30) }, - &MyPoints::new([MyPoint::new(2.0, 2.0)]), - ) - .build()?; - entity_db.add_chunk(&Arc::new(chunk))?; - - let query_row = |t, label: &str| { - atomic_latest_at_query_test( - &entity_db, - &LatestAtQuery::new(*timeline().name(), t), - &entity_path, - Some(AtomicLatestAtFrameFilter { - condition_frame_id_component: frame_condition_component(), - requested_frame_id: TransformFrameIdHash::from_str(label), - }), - &atomic_component_set(), - )? - .row_id() - }; - - let query_row_no_cond = |t| { - atomic_latest_at_query_test( - &entity_db, - &LatestAtQuery::new(*timeline().name(), t), - &entity_path, - None, - &atomic_component_set(), - )? - .row_id() - }; - - assert_eq!(query_row(0, "first"), None); - assert_eq!(query_row(0, "second!"), None); - assert_eq!(query_row(0, "tf#/my_entity"), None); - if out_of_order { - assert_eq!(query_row(10, "first"), None); - assert_eq!(query_row(20, "first"), None); - assert_eq!(query_row(25, "first"), None); - assert_eq!(query_row(35, "first"), Some(row_id_temp0)); - - assert_eq!(query_row(10, "second!"), None); - assert_eq!(query_row(20, "second!"), Some(row_id_temp1)); - assert_eq!(query_row(25, "second!"), Some(row_id_temp1)); - assert_eq!(query_row(35, "second!"), Some(row_id_temp1)); - - assert_eq!(query_row(10, "tf#/my_entity"), Some(row_id_temp2)); - assert_eq!(query_row(20, "tf#/my_entity"), Some(row_id_temp2)); - assert_eq!(query_row(25, "tf#/my_entity"), Some(row_id_temp2)); - assert_eq!(query_row(35, "tf#/my_entity"), Some(row_id_temp2)); - - assert_eq!(query_row_no_cond(10), Some(row_id_temp2)); - assert_eq!(query_row_no_cond(20), Some(row_id_temp1)); - assert_eq!(query_row_no_cond(25), Some(row_id_temp1)); - assert_eq!(query_row_no_cond(35), Some(row_id_temp0)); - } else { - assert_eq!(query_row(10, "first"), Some(row_id_temp0)); - assert_eq!(query_row(20, "first"), Some(row_id_temp0)); - assert_eq!(query_row(25, "first"), Some(row_id_temp0)); - assert_eq!(query_row(35, "first"), Some(row_id_temp0)); - - assert_eq!(query_row(10, "second!"), None); - assert_eq!(query_row(20, "second!"), Some(row_id_temp1)); - assert_eq!(query_row(25, "second!"), Some(row_id_temp1)); - assert_eq!(query_row(35, "second!"), Some(row_id_temp1)); - - assert_eq!(query_row(10, "tf#/my_entity"), None); - assert_eq!(query_row(20, "tf#/my_entity"), None); - assert_eq!(query_row(25, "tf#/my_entity"), None); - assert_eq!(query_row(35, "tf#/my_entity"), Some(row_id_temp2)); - - assert_eq!(query_row_no_cond(10), Some(row_id_temp0)); - assert_eq!(query_row_no_cond(20), Some(row_id_temp1)); - assert_eq!(query_row_no_cond(25), Some(row_id_temp1)); - assert_eq!(query_row_no_cond(35), Some(row_id_temp2)); - } - - Ok(()) - } - #[test] - fn atomic_latest_at_temporal_only_with_frames_in_order() - -> Result<(), Box> { - atomic_latest_at_temporal_only_with_frames(false) - } - - #[test] - fn atomic_latest_at_temporal_only_with_frames_out_of_order() - -> Result<(), Box> { - atomic_latest_at_temporal_only_with_frames(true) - } - - #[test] - fn atomic_latest_at_handle_simultaneous_events() -> Result<(), Box> { - let mut entity_db = EntityDb::new(re_log_types::StoreInfo::testing().store_id); - - // Populate store. - let entity_path = EntityPath::from("my_entity"); - let row_id_temp0 = RowId::new(); - let row_id_temp1 = RowId::new(); - let row_id_irrelevant = RowId::new(); - let row_id_temp2 = RowId::new(); - - let time = tp(10); - - let chunk = Chunk::builder(entity_path.clone()) - .with_archetype( - row_id_temp0, - time.clone(), - &MyPoints::new([MyPoint::new(1.0, 1.0)]) - .with_colors([MyColor(1)]) - .with_labels([MyLabel("first".to_owned())]), - ) - .with_archetype( - row_id_temp1, - time.clone(), - &MyPoints::update_fields() - .with_colors([MyColor(2)]) - .with_labels([MyLabel("second!".to_owned())]), - ) - .with_component( - row_id_irrelevant, - time.clone(), - // Some random components that aren't of interest to us! - MyIndex::partial_descriptor(), - &MyIndex(123), - )? - .with_archetype( - row_id_temp2, - time.clone(), - &MyPoints::new([MyPoint::new(2.0, 2.0)]), - ) - .build()?; - entity_db.add_chunk(&Arc::new(chunk))?; - - let query_row = |t, label: &str| { - atomic_latest_at_query_test( - &entity_db, - &LatestAtQuery::new(*timeline().name(), t), - &entity_path, - Some(AtomicLatestAtFrameFilter { - condition_frame_id_component: frame_condition_component(), - requested_frame_id: TransformFrameIdHash::from_str(label), - }), - &atomic_component_set(), - )? - .row_id() - }; - - assert_eq!(query_row(0, "first"), None); - assert_eq!(query_row(0, "second!"), None); - assert_eq!(query_row(0, "tf#/my_entity"), None); - assert_eq!(query_row(10, "first"), Some(row_id_temp0)); - assert_eq!(query_row(10, "second!"), Some(row_id_temp1)); - assert_eq!(query_row(10, "tf#/my_entity"), Some(row_id_temp2)); - - Ok(()) - } - - #[test] - fn atomic_latest_at_handle_empty_arrays() -> Result<(), Box> { - let mut entity_db = EntityDb::new(re_log_types::StoreInfo::testing().store_id); - - // Populate store. - let entity_path = EntityPath::from("my_entity"); - let row_id_temp0 = RowId::new(); - let row_id_irrelevant = RowId::new(); - let row_id_temp1 = RowId::new(); - - let chunk = Chunk::builder(entity_path.clone()) - .with_archetype( - row_id_temp0, - tp(10), - &MyPoints::new([MyPoint::new(1.0, 1.0)]) - .with_labels([MyLabel("myframe".to_owned())]), - ) - .with_component( - row_id_irrelevant, - tp(20), - // Some random components that aren't of interest to us! - MyIndex::partial_descriptor(), - &MyIndex(123), - )? - .with_archetype( - row_id_temp1, - tp(30), - &MyPoints::update_fields().with_colors(std::iter::empty::()), // Empty array on a relevant component, still clears out things| - ) - .build()?; - entity_db.add_chunk(&Arc::new(chunk))?; - - let query_row = |t, label: &str| { - atomic_latest_at_query_test( - &entity_db, - &LatestAtQuery::new(*timeline().name(), t), - &entity_path, - Some(AtomicLatestAtFrameFilter { - condition_frame_id_component: frame_condition_component(), - requested_frame_id: TransformFrameIdHash::from_str(label), - }), - &atomic_component_set(), - )? - .row_id() - }; - - assert_eq!(query_row(0, "myframe"), None); - assert_eq!(query_row(0, "tf#/my_entity"), None); - assert_eq!(query_row(10, "myframe"), Some(row_id_temp0)); - assert_eq!(query_row(10, "tf#/my_entity"), None); - assert_eq!(query_row(20, "myframe"), Some(row_id_temp0)); - assert_eq!(query_row(20, "tf#/my_entity"), None); - assert_eq!(query_row(30, "myframe"), Some(row_id_temp0)); - assert_eq!(query_row(30, "tf#/my_entity"), Some(row_id_temp1)); + assert_eq!( + poses, + vec![DAffine3::from_translation(glam::dvec3(1.0, 2.0, 3.0))] + ); Ok(()) } diff --git a/crates/store/re_tf/src/transform_resolution_cache/cache.rs b/crates/store/re_tf/src/transform_resolution_cache/cache.rs index 3d848e8c8286..0d7dde0b9fcc 100644 --- a/crates/store/re_tf/src/transform_resolution_cache/cache.rs +++ b/crates/store/re_tf/src/transform_resolution_cache/cache.rs @@ -6,14 +6,20 @@ use re_byte_size::SizeBytes; use re_chunk_store::ChunkStore; use re_entity_db::EntityDb; use re_log::{debug_assert, debug_assert_eq}; +use re_log_encoding::RrdManifest; use re_log_types::{TimeInt, TimelineName}; use re_sdk_types::archetypes; use crate::frame_id_registry::FrameIdRegistry; use crate::transform_aspect::TransformAspect; +use crate::transform_queries::{ + atomic_component_set_for_instance_poses, atomic_component_set_for_pinhole_projection, + atomic_component_set_for_tree_transforms, +}; +use crate::transform_resolution_cache::iter_relevant_rows_in_chunk; use super::cached_transforms_for_timeline::CachedTransformsForTimeline; -use super::iter_child_frames_in_chunk; +use super::iter_relevant_rows_in_chunk_with_child_frames; type ArcRwLock = Arc>; @@ -65,6 +71,10 @@ impl TransformResolutionCache { let mut cache = Self::default(); + if let Some(manifest) = entity_db.rrd_manifest_index().manifest() { + cache.register_manifest(manifest); + } + for chunk in entity_db.storage_engine().store().iter_physical_chunks() { // Register all frames even if this chunk doesn't have transform data. cache @@ -189,7 +199,7 @@ impl TransformResolutionCache { /// /// This will internally… /// * keep track of which child frames are influenced by which entity - /// * create empty entries for where transforms may change over time (may happen conservatively - creating more entries than needed) + /// * create empty entries for where transforms may change over time /// * this may invalidate previous entries at the same position /// * remove cached entries if chunks were GC'ed pub fn process_store_events<'a>( @@ -199,34 +209,55 @@ impl TransformResolutionCache { re_tracing::profile_function!(); for event in events { - // This doesn't maintain a collection of chunks that needs to be kept in sync 1:1 with - // the store, rather it just keeps track of what entities have what properties, and for - // that a delta chunk is all we need. - let Some(delta_chunk) = event.delta_chunk() else { - continue; // virtual event, we don't care - }; + match &**event { + re_chunk_store::ChunkStoreDiff::Addition(addition) => { + // Since entity paths lead to implicit frames, we have to prime our lookup table + // with them even if this chunk doesn't have transform data. + // Note that here we can use the delta chunk, since we're only interested in newly added entity paths & transform ids. + self.frame_id_registry + .write() + .register_all_frames_in_chunk(addition.delta_chunk()); + + // We always keep track of the latest physical chunks. + // By overriding with the latest chunk after processing we never leave references to virtual chunks. + let chunk = &addition.chunk_after_processing; + + let aspects = TransformAspect::transform_aspects_of(chunk); + if !aspects.is_empty() { + if chunk.is_static() { + self.add_static_chunk(chunk, aspects); + } else { + self.add_temporal_chunk(chunk, aspects); + } + } + } - // Since entity paths lead to implicit frames, we have to prime our lookup table - // with them even if this chunk doesn't have transform data. - self.frame_id_registry - .write() - .register_all_frames_in_chunk(delta_chunk); + re_chunk_store::ChunkStoreDiff::VirtualAddition(addition) => { + self.register_manifest(&addition.rrd_manifest); + } - let aspects = TransformAspect::transform_aspects_of(delta_chunk); - if aspects.is_empty() { - continue; - } + re_chunk_store::ChunkStoreDiff::Deletion(deletion) => { + let aspects = TransformAspect::transform_aspects_of(&deletion.chunk); + if !aspects.is_empty() { + self.remove_chunk(&deletion.chunk, aspects); + } + } - if event.is_deletion() { - self.remove_chunk(delta_chunk, aspects); - } else if delta_chunk.is_static() { - self.add_static_chunk(delta_chunk, aspects); - } else { - self.add_temporal_chunk(delta_chunk, aspects); + re_chunk_store::ChunkStoreDiff::SchemaAddition(_) => {} } } } + fn register_manifest(&self, manifest: &RrdManifest) { + re_tracing::profile_function!(); + + // Make all the entity paths known as potential transform paths. + let mut frame_id_registry = self.frame_id_registry.write(); + for entity_path in manifest.recording_schema().all_entities() { + frame_id_registry.register_frame_id_from_entity_path(entity_path); + } + } + fn add_temporal_chunk(&self, chunk: &re_chunk_store::Chunk, aspects: TransformAspect) { re_tracing::profile_function!(format!( "{} rows, {}", @@ -248,6 +279,7 @@ impl TransformResolutionCache { per_timeline.add_temporal_chunk( chunk, + chunk.id(), aspects, *timeline, &static_timeline, @@ -274,10 +306,11 @@ impl TransformResolutionCache { // Add a static transform invalidation to affected child frames on ALL timelines. if aspects.contains(TransformAspect::Frame) { - for (time, frame) in iter_child_frames_in_chunk( + for ((time, row_id), frame) in iter_relevant_rows_in_chunk_with_child_frames( chunk, place_holder_timeline, transform_child_frame_component, + atomic_component_set_for_tree_transforms(), ) { debug_assert_eq!(time, TimeInt::STATIC); @@ -286,7 +319,7 @@ impl TransformResolutionCache { frame, &frame_id_registry, ); - frame_transforms.invalidate_transform_at(TimeInt::STATIC); + frame_transforms.invalidate_transform_at(TimeInt::STATIC, chunk.id(), row_id); #[cfg_attr(not(debug_assertions), expect(clippy::for_kv_map))] for (_timeline, per_timeline) in &mut self.per_timeline { @@ -298,7 +331,7 @@ impl TransformResolutionCache { frame, &frame_id_registry, ); - transforms.invalidate_transform_at(TimeInt::STATIC); + transforms.invalidate_transform_at(TimeInt::STATIC, chunk.id(), row_id); // Entry might have been newly created. Have to ensure that its associated with the right timeline. #[cfg(debug_assertions)] @@ -311,20 +344,36 @@ impl TransformResolutionCache { if aspects.contains(TransformAspect::Pose) { let frame_transforms = static_timeline.get_or_create_pose_transforms_static(entity_path); - frame_transforms.invalidate_at(TimeInt::STATIC); + + for (time, row_id) in iter_relevant_rows_in_chunk( + chunk, + place_holder_timeline, + atomic_component_set_for_instance_poses(), + ) { + debug_assert_eq!(time, TimeInt::STATIC); + frame_transforms.invalidate_at(time, chunk.id(), row_id); + } for per_timeline in self.per_timeline.values_mut() { - per_timeline - .write() - .get_or_create_pose_transforms_temporal(entity_path, &static_timeline) - .invalidate_at(TimeInt::STATIC); + for (time, row_id) in iter_relevant_rows_in_chunk( + chunk, + place_holder_timeline, + atomic_component_set_for_instance_poses(), + ) { + debug_assert_eq!(time, TimeInt::STATIC); + per_timeline + .write() + .get_or_create_pose_transforms_temporal(entity_path, &static_timeline) + .invalidate_at(time, chunk.id(), row_id); + } } } - if aspects.contains(TransformAspect::PinholeOrViewCoordinates) { - for (time, frame) in iter_child_frames_in_chunk( + if aspects.contains(TransformAspect::Pinhole) { + for ((time, row_id), frame) in iter_relevant_rows_in_chunk_with_child_frames( chunk, place_holder_timeline, pinhole_child_frame_component, + atomic_component_set_for_pinhole_projection(), ) { debug_assert_eq!(time, TimeInt::STATIC); @@ -333,7 +382,11 @@ impl TransformResolutionCache { frame, &frame_id_registry, ); - frame_transforms.invalidate_pinhole_projection_at(TimeInt::STATIC); + frame_transforms.invalidate_pinhole_projection_at( + TimeInt::STATIC, + chunk.id(), + row_id, + ); #[cfg_attr(not(debug_assertions), expect(clippy::for_kv_map))] for (_timeline, per_timeline) in &mut self.per_timeline { @@ -345,7 +398,11 @@ impl TransformResolutionCache { frame, &frame_id_registry, ); - transforms.invalidate_pinhole_projection_at(TimeInt::STATIC); + transforms.invalidate_pinhole_projection_at( + TimeInt::STATIC, + chunk.id(), + row_id, + ); // Entry might have been newly created. Have to ensure that its associated with the right timeline. #[cfg(debug_assertions)] diff --git a/crates/store/re_tf/src/transform_resolution_cache/cached_transform_value.rs b/crates/store/re_tf/src/transform_resolution_cache/cached_transform_value.rs index f74cb9aeb85a..cfd593f5e551 100644 --- a/crates/store/re_tf/src/transform_resolution_cache/cached_transform_value.rs +++ b/crates/store/re_tf/src/transform_resolution_cache/cached_transform_value.rs @@ -1,23 +1,36 @@ use re_byte_size::{BookkeepingBTreeMap, SizeBytes}; use re_log_types::TimeInt; +use re_sdk_types::{ChunkId, RowId}; #[derive(Clone, Debug, PartialEq, Eq)] pub enum CachedTransformValue { /// Cache is invalidated, we don't know what state we're in. - Invalidated, + Invalidated { + chunk_id: ChunkId, // TODO(RR-4439): rows are allowed to be distributed across several chunks. + row_id: RowId, + }, /// There's a transform at this time. - Resident(T), + Resident { value: T, row_id: RowId }, /// The value has been cleared out at this time. Cleared, } +impl CachedTransformValue { + pub fn row_id(&self) -> Option { + match self { + Self::Resident { row_id, .. } | Self::Invalidated { row_id, .. } => Some(*row_id), + Self::Cleared => None, + } + } +} + impl SizeBytes for CachedTransformValue { fn heap_size_bytes(&self) -> u64 { match self { - Self::Resident(item) => item.heap_size_bytes(), - Self::Invalidated | Self::Cleared => 0, + Self::Resident { value, .. } => value.heap_size_bytes(), + Self::Invalidated { .. } | Self::Cleared => 0, } } } @@ -25,10 +38,46 @@ impl SizeBytes for CachedTransformValue { pub fn add_invalidated_entry_if_not_already_cleared( transforms: &mut BookkeepingBTreeMap>, time: TimeInt, + new_chunk_id: ChunkId, + new_row_id: RowId, ) { - transforms.mutate_entry(time, CachedTransformValue::Invalidated, |value| { - if *value != CachedTransformValue::Cleared { - *value = CachedTransformValue::Invalidated; - } - }); + transforms.mutate_entry( + time, + CachedTransformValue::Invalidated { + chunk_id: new_chunk_id, + row_id: new_row_id, + }, + |value| { + match value { + CachedTransformValue::Invalidated { chunk_id, row_id } => { + // Update to the latest row id. + // + // There are two reasons why the row id may be equal: + // * there has been a compaction/split event and we have to update the chunk id now + // * the row is distributed across many chunks + // TODO(RR-4439): this is not yet supported + // TODO(RR-4441): we should at least warn if we hit that case. Surprisingly hard since we have to distinguish whether this is a new chunk or just a replacement. + if new_row_id >= *row_id { + *row_id = new_row_id; + *chunk_id = new_chunk_id; + } + } + CachedTransformValue::Resident { row_id, .. } => { + // If this is the same row id as before, we don't have to invalidate the cached value. + // However, if there's a new, higher row id, new (uncalculated) value wins over the previous one. + // TODO(RR-4439): to support rows distributed across several chunks, we need to invalidate. + // TODO(RR-4441): we should at least warn if we hit that case. Surprisingly hard since we have to distinguish whether this is a new chunk or just a replacement. + if new_row_id > *row_id { + *value = CachedTransformValue::Invalidated { + chunk_id: new_chunk_id, + row_id: new_row_id, + }; + } + } + CachedTransformValue::Cleared => { + // Always keep. + } + } + }, + ); } diff --git a/crates/store/re_tf/src/transform_resolution_cache/cached_transforms_for_timeline.rs b/crates/store/re_tf/src/transform_resolution_cache/cached_transforms_for_timeline.rs index f646854cedf5..e7793eef7008 100644 --- a/crates/store/re_tf/src/transform_resolution_cache/cached_transforms_for_timeline.rs +++ b/crates/store/re_tf/src/transform_resolution_cache/cached_transforms_for_timeline.rs @@ -4,12 +4,18 @@ use nohash_hasher::IntMap; use re_byte_size::SizeBytes; use re_chunk_store::ChunkStore; use re_log_types::{EntityPath, EntityPathHash, TimeInt, TimelineName}; +use re_sdk_types::ChunkId; use crate::TransformFrameIdHash; use crate::frame_id_registry::FrameIdRegistry; use crate::transform_aspect::TransformAspect; +use crate::transform_queries::{ + atomic_component_set_for_instance_poses, atomic_component_set_for_pinhole_projection, + atomic_component_set_for_tree_transforms, +}; +use crate::transform_resolution_cache::iter_relevant_rows_in_chunk; -use super::iter_child_frames_in_chunk; +use super::iter_relevant_rows_in_chunk_with_child_frames; use super::pose_transform_for_entity::PoseTransformForEntity; use super::tree_transforms_for_child_frame::TreeTransformsForChildFrame; @@ -91,6 +97,7 @@ impl CachedTransformsForTimeline { result.add_temporal_chunk( chunk, + chunk.id(), aspects, timeline, static_transforms, @@ -105,6 +112,7 @@ impl CachedTransformsForTimeline { pub fn add_temporal_chunk( &mut self, chunk: &re_chunk_store::Chunk, + physical_chunk_id: ChunkId, aspects: TransformAspect, timeline: TimelineName, static_timeline: &Self, @@ -120,9 +128,12 @@ impl CachedTransformsForTimeline { re_sdk_types::archetypes::Pinhole::descriptor_child_frame().component; if aspects.contains(TransformAspect::Frame) { - for (time, frame) in - iter_child_frames_in_chunk(chunk, timeline, transform_child_frame_component) - { + for ((time, row_id), frame) in iter_relevant_rows_in_chunk_with_child_frames( + chunk, + timeline, + transform_child_frame_component, + atomic_component_set_for_tree_transforms(), + ) { self.get_or_create_tree_transforms_temporal( entity_path, frame, @@ -130,19 +141,26 @@ impl CachedTransformsForTimeline { static_timeline, frame_id_registry, ) - .invalidate_transform_at(time); + .invalidate_transform_at(time, physical_chunk_id, row_id); } } if aspects.contains(TransformAspect::Pose) { let poses = self.get_or_create_pose_transforms_temporal(entity_path, static_timeline); - for (time, _) in chunk.iter_indices(&timeline) { - poses.invalidate_at(time); + for (time, row_id) in iter_relevant_rows_in_chunk( + chunk, + timeline, + atomic_component_set_for_instance_poses(), + ) { + poses.invalidate_at(time, physical_chunk_id, row_id); } } - if aspects.contains(TransformAspect::PinholeOrViewCoordinates) { - for (time, frame) in - iter_child_frames_in_chunk(chunk, timeline, pinhole_child_frame_component) - { + if aspects.contains(TransformAspect::Pinhole) { + for ((time, row_id), frame) in iter_relevant_rows_in_chunk_with_child_frames( + chunk, + timeline, + pinhole_child_frame_component, + atomic_component_set_for_pinhole_projection(), + ) { self.get_or_create_tree_transforms_temporal( entity_path, frame, @@ -150,7 +168,7 @@ impl CachedTransformsForTimeline { static_timeline, frame_id_registry, ) - .invalidate_pinhole_projection_at(time); + .invalidate_pinhole_projection_at(time, physical_chunk_id, row_id); } } @@ -209,9 +227,12 @@ impl CachedTransformsForTimeline { // Remove existing data. if aspects.contains(TransformAspect::Frame) { - for (time, frame) in - iter_child_frames_in_chunk(chunk, timeline, transform_child_frame_component) - { + for ((time, _row_id), frame) in iter_relevant_rows_in_chunk_with_child_frames( + chunk, + timeline, + transform_child_frame_component, + atomic_component_set_for_tree_transforms(), + ) { if let Some(transforms) = self.per_child_frame_transforms.get_mut(&frame) { transforms.events.get_mut().frame_transforms.remove(&time); } @@ -220,14 +241,21 @@ impl CachedTransformsForTimeline { if aspects.contains(TransformAspect::Pose) && let Some(poses) = self.per_entity_poses.get_mut(&entity_path.hash()) { - for (time, _) in chunk.iter_indices(&timeline) { + for (time, _) in iter_relevant_rows_in_chunk( + chunk, + timeline, + atomic_component_set_for_instance_poses(), + ) { poses.poses_per_time.get_mut().remove(&time); } } - if aspects.contains(TransformAspect::PinholeOrViewCoordinates) { - for (time, frame) in - iter_child_frames_in_chunk(chunk, timeline, pinhole_child_frame_component) - { + if aspects.contains(TransformAspect::Pinhole) { + for ((time, _row_id), frame) in iter_relevant_rows_in_chunk_with_child_frames( + chunk, + timeline, + pinhole_child_frame_component, + atomic_component_set_for_pinhole_projection(), + ) { if let Some(transforms) = self.per_child_frame_transforms.get_mut(&frame) { transforms .events @@ -260,7 +288,7 @@ impl CachedTransformsForTimeline { Some(existing_path) => { if existing_path != entity_path { re_log::error_once!( - "The entity path associated with a child frame mustn't change except for static vs temporal data. The frame {:?} was previously logged temporally at the path {existing_path:?} and was now logged on {entity_path:?}.", + "Two entities define the same child frame. This is only allowed if one of them is static and the other is temporal. The frame {:?} was already temporally associated with {existing_path:?}, but is now also being temporally logged on {entity_path:?}.", frame_registry.lookup_frame_id(child_frame).map_or_else( || format!("{child_frame:?}"), ToString::to_string @@ -303,7 +331,7 @@ impl CachedTransformsForTimeline { Some(existing_path) => { if existing_path != entity_path { re_log::error_once!( - "The entity path associated with a child frame mustn't change except for static vs temporal data. The frame {:?} was previously logged statically at the path {existing_path:?} and was now logged on {entity_path:?}.", + "Two entities define the same child frame. This is only allowed if one of them is static and the other is temporal. The frame {:?} was already statically associated with {existing_path:?}, but is now also being statically logged on {entity_path:?}.", frame_registry.lookup_frame_id(child_frame).map_or_else( || format!("{child_frame:?}"), ToString::to_string diff --git a/crates/store/re_tf/src/transform_resolution_cache/mod.rs b/crates/store/re_tf/src/transform_resolution_cache/mod.rs index cc5d835bcc17..4c266d809762 100644 --- a/crates/store/re_tf/src/transform_resolution_cache/mod.rs +++ b/crates/store/re_tf/src/transform_resolution_cache/mod.rs @@ -13,28 +13,33 @@ mod tests; pub use self::cache::TransformResolutionCache; pub use self::cached_transforms_for_timeline::CachedTransformsForTimeline; pub use self::parent_from_child_transform::ParentFromChildTransform; -pub use self::resolved_pinhole_projection::ResolvedPinholeProjection; +pub use self::resolved_pinhole_projection::{ + ResolvedPinholeProjection, ResolvedPinholeProjectionCached, +}; +use arrow::array::Array as _; use itertools::{Either, izip}; use re_arrow_util::ArrowArrayDowncastRef as _; use re_chunk_store::Chunk; use re_chunk_store::external::arrow; use re_log_types::{TimeInt, TimelineName}; -use re_sdk_types::ComponentIdentifier; +use re_sdk_types::{ComponentIdentifier, RowId}; use crate::TransformFrameIdHash; -/// Iterates over all frames of a given component type that are in a chunk. +/// Iterates over all relevant rows in a chunk in a given timeline, resolving the child frames for each. /// /// If the chunk is static, `timeline` will be ignored. /// -/// Yields an entry for every row. Note that there may be many entries per time though. +/// Yields an entry for every row where at least one out of `relevant_components` is non-null (even if the `frame_component` is null on that row). +/// Note that there may be many entries per time though. /// (Currently, there can be only a single frame id per row) -fn iter_child_frames_in_chunk( - chunk: &Chunk, +fn iter_relevant_rows_in_chunk_with_child_frames<'a>( + chunk: &'a Chunk, timeline: TimelineName, frame_component: ComponentIdentifier, -) -> impl Iterator { + relevant_components: &'static [ComponentIdentifier], +) -> impl Iterator + 'a { let implicit_frame = TransformFrameIdHash::from_entity_path(chunk.entity_path()); // This is similar to `iter_slices` but it also yields elements for rows where the component is null. @@ -54,20 +59,190 @@ fn iter_child_frames_in_chunk( let offsets = list_array.offsets().iter().map(|idx| *idx as usize); let lengths = list_array.offsets().lengths(); - Either::Right(izip!(offsets, lengths).map(move |(offset, length)| { + Either::Right(izip!(offsets, lengths).filter_map(move |(offset, length)| { // No need to check for nulls since we treat nulls and empty arrays both as the implicit frame. if length == 0 { - implicit_frame + Some(implicit_frame) } else { // There can only be a single frame id per row today, so only look at the first element. - TransformFrameIdHash::from_str(values.value(offset)) + let frame_id = values.value(offset); + if frame_id.is_empty() { + // Special case: we have a frame id value, but it's an empty string. + // Empty explicit frame names are undefined and thus ignored here. + // (see related errors / warnings that are shown in this case elsewhere) + None + } else { + Some(TransformFrameIdHash::from_str(frame_id)) + } } })) } ); - izip!( - chunk.iter_indices(&timeline).map(|(t, _)| t), - frame_ids_per_row - ) + let relevant_chunk_chunk_arrays = relevant_components + .iter() + .filter_map(|component| chunk.components().get_array(*component)) + .collect::>(); + + izip!(chunk.iter_indices(&timeline), frame_ids_per_row) + .enumerate() + .filter(move |(index, _)| { + // *Something* on this row has to be non-empty & non-null! + // Example where this is not the case: + // + // ┌────────────────┬─────────────┬────────────┐ + // │ child_frame_id │ translation │ color │ + // ├────────────────┼─────────────┼────────────┤ + // │ ["myframe"] │ [[1,2,3]] │ null │ + // │ null │ null │ 0xFF00FFFF │ + // │ null │ [] │ null │ + // └────────────────┴─────────────┴────────────┘ + // + // The second row doesn't have any of the components of our atomic set. + // It is therefore not relevant for what we're looking for! + // The last row *is* relevant, because it clears out the translation for the + // entity derived child_frame_id, thus setting it to an identity transform. + relevant_chunk_chunk_arrays + .iter() + .any(|array| !array.is_null(*index)) + }) + .map(|(_, values)| values) +} + +/// Iterates over relevant rows of a chunk in a given timeline. +/// +/// If the chunk is static, `timeline` will be ignored. +/// +/// Yields an entry for every row where at least one out of `relevant_components` is non-null. +/// Note that there may be many entries per time though. +fn iter_relevant_rows_in_chunk<'a>( + chunk: &'a Chunk, + timeline: TimelineName, + relevant_components: &'static [ComponentIdentifier], +) -> impl Iterator + 'a { + let relevant_chunk_chunk_arrays = relevant_components + .iter() + .filter_map(|component| chunk.components().get_array(*component)) + .collect::>(); + + chunk + .iter_indices(&timeline) + .enumerate() + .filter(move |(index, _)| { + // *Something* on this row has to be non-empty & non-null! + // Example where this is not the case: + // + // ┌────────────────┬─────────────┬────────────┐ + // │ child_frame_id │ translation │ color │ + // ├────────────────┼─────────────┼────────────┤ + // │ ["myframe"] │ [[1,2,3]] │ null │ + // │ null │ null │ 0xFF00FFFF │ + // │ null │ [] │ null │ + // └────────────────┴─────────────┴────────────┘ + // + // The second row doesn't have any of the components of our atomic set. + // It is therefore not relevant for what we're looking for! + // The last row *is* relevant, because it clears out the translation for the + // entity derived child_frame_id, thus setting it to an identity transform. + relevant_chunk_chunk_arrays + .iter() + .any(|array| !array.is_null(*index)) + }) + .map(|(_, values)| values) +} + +#[cfg(test)] +mod iterator_tests { + use re_chunk_store::Chunk; + use re_log_types::{ + EntityPath, TimeInt, Timeline, + example_components::{MyPoint, MyPoints}, + }; + use re_sdk_types::{ + archetypes::{self, Pinhole, Transform3D}, + components::PinholeProjection, + }; + + use super::{iter_relevant_rows_in_chunk, iter_relevant_rows_in_chunk_with_child_frames}; + use crate::{TransformFrameIdHash, transform_queries}; + + #[test] + fn iter_relevant_rows_in_chunk_with_child_frames_skips_unrelated_rows_and_uses_implicit_frame() + -> Result<(), Box> { + let timeline = Timeline::new_sequence("t"); + let entity_path = EntityPath::from("my_entity"); + let chunk = Chunk::builder(entity_path.clone()) + .with_archetype_auto_row( + [(timeline, 1)], + &Transform3D::from_translation([1.0, 2.0, 3.0]).with_child_frame("explicit_frame"), + ) + .with_archetype_auto_row([(timeline, 2)], &MyPoints::new([MyPoint::new(1.0, 2.0)])) + .with_archetype_auto_row([(timeline, 3)], &Transform3D::clear_fields()) + .with_archetype_auto_row([(timeline, 4)], &Transform3D::from_scale([2.0, 3.0, 4.0])) + .build()?; + + let row_ids = chunk.row_ids_slice().to_vec(); + let relevant_rows = iter_relevant_rows_in_chunk_with_child_frames( + &chunk, + *timeline.name(), + archetypes::Transform3D::descriptor_child_frame().component, + transform_queries::atomic_component_set_for_tree_transforms(), + ) + .collect::>(); + + assert_eq!( + relevant_rows, + vec![ + ( + (TimeInt::new_temporal(1), row_ids[0]), + TransformFrameIdHash::from_str("explicit_frame"), + ), + ( + (TimeInt::new_temporal(3), row_ids[2]), + TransformFrameIdHash::from_entity_path(&entity_path), + ), + ( + (TimeInt::new_temporal(4), row_ids[3]), + TransformFrameIdHash::from_entity_path(&entity_path), + ), + ] + ); + + Ok(()) + } + + #[test] + fn iter_relevant_rows_in_chunk_skips_unrelated_rows() -> Result<(), Box> + { + let timeline = Timeline::new_sequence("t"); + let chunk = Chunk::builder(EntityPath::from("my_entity")) + .with_archetype_auto_row( + [(timeline, 1)], + &Pinhole::new(PinholeProjection::from_focal_length_and_principal_point( + [1.0, 2.0], + [3.0, 4.0], + )), + ) + .with_archetype_auto_row([(timeline, 2)], &MyPoints::new([MyPoint::new(1.0, 2.0)])) + .with_archetype_auto_row([(timeline, 3)], &Pinhole::clear_fields()) + .build()?; + + let row_ids = chunk.row_ids_slice().to_vec(); + let relevant_rows = iter_relevant_rows_in_chunk( + &chunk, + *timeline.name(), + transform_queries::atomic_component_set_for_pinhole_projection(), + ) + .collect::>(); + + assert_eq!( + relevant_rows, + vec![ + (TimeInt::new_temporal(1), row_ids[0]), + (TimeInt::new_temporal(3), row_ids[2]), + ] + ); + + Ok(()) + } } diff --git a/crates/store/re_tf/src/transform_resolution_cache/pose_transform_for_entity.rs b/crates/store/re_tf/src/transform_resolution_cache/pose_transform_for_entity.rs index 162ba74cccc4..7543b4efd07e 100644 --- a/crates/store/re_tf/src/transform_resolution_cache/pose_transform_for_entity.rs +++ b/crates/store/re_tf/src/transform_resolution_cache/pose_transform_for_entity.rs @@ -92,20 +92,23 @@ impl PoseTransformForEntity { poses_per_time .mutate_latest_at(&query.at(), |_t, pose_transform| { // Separate check to work around borrow checker issues. - if pose_transform == &CachedTransformValue::Invalidated { - *pose_transform = - CachedTransformValue::Resident(query_and_resolve_instance_poses_at_entity( + if let CachedTransformValue::Invalidated { row_id, chunk_id } = pose_transform { + *pose_transform = CachedTransformValue::Resident { + value: query_and_resolve_instance_poses_at_entity( entity_db, missing_chunk_reporter, &self.entity_path, - query, - )); + *chunk_id, + *row_id, + ), + row_id: *row_id, + }; } match pose_transform { - CachedTransformValue::Resident(transform) => transform.clone(), + CachedTransformValue::Resident { value, .. } => value.clone(), CachedTransformValue::Cleared => Vec::new(), - CachedTransformValue::Invalidated => { + CachedTransformValue::Invalidated { .. } => { unreachable!("Just made transform cache-resident") } } @@ -128,7 +131,17 @@ impl PoseTransformForEntity { } /// Inserts an invalidation point for poses. - pub fn invalidate_at(&mut self, time: TimeInt) { - add_invalidated_entry_if_not_already_cleared(self.poses_per_time.get_mut(), time); + pub fn invalidate_at( + &mut self, + time: TimeInt, + chunk_id: re_sdk_types::ChunkId, + row_id: re_sdk_types::RowId, + ) { + add_invalidated_entry_if_not_already_cleared( + self.poses_per_time.get_mut(), + time, + chunk_id, + row_id, + ); } } diff --git a/crates/store/re_tf/src/transform_resolution_cache/resolved_pinhole_projection.rs b/crates/store/re_tf/src/transform_resolution_cache/resolved_pinhole_projection.rs index ab1a437688d8..e6d8e1d0de21 100644 --- a/crates/store/re_tf/src/transform_resolution_cache/resolved_pinhole_projection.rs +++ b/crates/store/re_tf/src/transform_resolution_cache/resolved_pinhole_projection.rs @@ -1,3 +1,5 @@ +use std::ops::Deref; + use re_byte_size::SizeBytes; use re_sdk_types::components; @@ -5,12 +7,8 @@ use crate::TransformFrameIdHash; #[derive(Clone, Debug, PartialEq)] pub struct ResolvedPinholeProjection { - /// The parent frame of the pinhole projection. - pub parent: TransformFrameIdHash, - - pub image_from_camera: components::PinholeProjection, - - pub resolution: Option, + /// All components that are updated atomically are cached. + pub(crate) cached: ResolvedPinholeProjectionCached, /// View coordinates at this pinhole camera. /// @@ -20,20 +18,41 @@ pub struct ResolvedPinholeProjection { pub view_coordinates: components::ViewCoordinates, } +impl Deref for ResolvedPinholeProjection { + type Target = ResolvedPinholeProjectionCached; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.cached + } +} + impl SizeBytes for ResolvedPinholeProjection { + fn is_pod() -> bool { + true + } + + fn heap_size_bytes(&self) -> u64 { + 0 + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct ResolvedPinholeProjectionCached { + /// The parent frame of the pinhole projection. + pub parent: TransformFrameIdHash, + + pub image_from_camera: components::PinholeProjection, + + pub resolution: Option, +} + +impl SizeBytes for ResolvedPinholeProjectionCached { + fn is_pod() -> bool { + true + } + fn heap_size_bytes(&self) -> u64 { - re_tracing::profile_function!(); - - let Self { - parent, - image_from_camera, - resolution, - view_coordinates, - } = self; - - parent.heap_size_bytes() - + image_from_camera.heap_size_bytes() - + resolution.heap_size_bytes() - + view_coordinates.heap_size_bytes() + 0 } } diff --git a/crates/store/re_tf/src/transform_resolution_cache/tests.rs b/crates/store/re_tf/src/transform_resolution_cache/tests.rs index be47415fed7f..ec824655161f 100644 --- a/crates/store/re_tf/src/transform_resolution_cache/tests.rs +++ b/crates/store/re_tf/src/transform_resolution_cache/tests.rs @@ -11,13 +11,12 @@ use re_log_types::{ example_components::{MyPoint, MyPoints}, }; use re_sdk_types::{ - ChunkId, archetypes::{self, InstancePoses3D, Pinhole, Transform3D}, components::{self, PinholeProjection}, }; -use crate::TransformFrameIdHash; use crate::convert; +use crate::{TransformFrameIdHash, transform_resolution_cache::ResolvedPinholeProjectionCached}; use super::pose_transform_for_entity::PoseTransformForEntity; use super::tree_transforms_for_child_frame::TreeTransformsForChildFrame; @@ -127,6 +126,12 @@ impl TestStoreSubscriber { } } +impl re_byte_size::MemUsageTreeCapture for TestStoreSubscriber { + fn capture_mem_usage_tree(&self) -> re_byte_size::MemUsageTree { + re_byte_size::MemUsageTree::Bytes(0) + } +} + impl PerStoreChunkSubscriber for TestStoreSubscriber { fn name() -> String { "TestStoreSubscriber".to_owned() @@ -513,9 +518,11 @@ fn test_static_pinhole_projection() -> Result<(), Box> { &LatestAtQuery::new(*timeline.name(), TimeInt::MIN) ), Some(ResolvedPinholeProjection { - parent: TransformFrameIdHash::entity_path_hierarchy_root(), - image_from_camera: image_from_camera_final, - resolution: Some([2.0, 2.0].into()), + cached: ResolvedPinholeProjectionCached { + parent: TransformFrameIdHash::entity_path_hierarchy_root(), + image_from_camera: image_from_camera_final, + resolution: Some([2.0, 2.0].into()), + }, view_coordinates: archetypes::Pinhole::DEFAULT_CAMERA_XYZ, }) ); @@ -538,9 +545,11 @@ fn test_static_pinhole_projection() -> Result<(), Box> { &LatestAtQuery::new(*timeline.name(), 1) ), Some(ResolvedPinholeProjection { - parent: TransformFrameIdHash::entity_path_hierarchy_root(), - image_from_camera: image_from_camera_final, - resolution: Some([2.0, 2.0].into()), + cached: ResolvedPinholeProjectionCached { + parent: TransformFrameIdHash::entity_path_hierarchy_root(), + image_from_camera: image_from_camera_final, + resolution: Some([2.0, 2.0].into()), + }, view_coordinates: components::ViewCoordinates::BLU, }) ); @@ -559,9 +568,11 @@ fn test_static_pinhole_projection() -> Result<(), Box> { &LatestAtQuery::new(TimelineName::new("other"), 123) ), Some(ResolvedPinholeProjection { - parent: TransformFrameIdHash::entity_path_hierarchy_root(), - image_from_camera: image_from_camera_final, - resolution: Some([2.0, 2.0].into()), + cached: ResolvedPinholeProjectionCached { + parent: TransformFrameIdHash::entity_path_hierarchy_root(), + image_from_camera: image_from_camera_final, + resolution: Some([2.0, 2.0].into()), + }, view_coordinates: archetypes::Pinhole::DEFAULT_CAMERA_XYZ, }) ); @@ -635,9 +646,11 @@ fn test_static_view_coordinates_projection() -> Result<(), Box Result<(), Box> { assert_eq!( latest_at_pinhole_test(transforms, &entity_db, &LatestAtQuery::new(timeline, t)), pinhole_view_coordinates.map(|view_coordinates| ResolvedPinholeProjection { - parent: TransformFrameIdHash::entity_path_hierarchy_root(), - image_from_camera, - resolution: None, + cached: ResolvedPinholeProjectionCached { + parent: TransformFrameIdHash::entity_path_hierarchy_root(), + image_from_camera, + resolution: None, + }, view_coordinates, }), "Unexpected result at time {t}" @@ -1194,17 +1209,13 @@ fn test_single_child_and_parent_over_time( } ChildParentFrameChangesOverTimeTestMode::MultipleChunksInOrder => { for row_idx in 0..chunk.num_rows() { - entity_db.add_chunk(&Arc::new( - chunk.row_sliced_shallow(row_idx, 1).with_id(ChunkId::new()), - ))?; + entity_db.add_chunk(&Arc::new(chunk.row_sliced_shallow(row_idx, 1)))?; apply_store_subscriber_events(&mut cache, &entity_db); } } ChildParentFrameChangesOverTimeTestMode::MultipleChunksReverseOrder => { for row_idx in (0..chunk.num_rows()).rev() { - entity_db.add_chunk(&Arc::new( - chunk.row_sliced_shallow(row_idx, 1).with_id(ChunkId::new()), - ))?; + entity_db.add_chunk(&Arc::new(chunk.row_sliced_shallow(row_idx, 1)))?; apply_store_subscriber_events(&mut cache, &entity_db); } } @@ -1756,9 +1767,11 @@ fn test_pinhole_with_explicit_frames() -> Result<(), Box> &LatestAtQuery::new(timeline_name, t) ), Some(ResolvedPinholeProjection { - parent: TransformFrameIdHash::from_str("parent_frame"), - image_from_camera, - resolution: None, + cached: ResolvedPinholeProjectionCached { + parent: TransformFrameIdHash::from_str("parent_frame"), + image_from_camera, + resolution: None, + }, view_coordinates: archetypes::Pinhole::DEFAULT_CAMERA_XYZ, }), "Unexpected pinhole for child_frame at time t={t}" @@ -1815,9 +1828,11 @@ fn test_pinhole_with_explicit_frames() -> Result<(), Box> &LatestAtQuery::new(timeline_name, t) ), Some(ResolvedPinholeProjection { - parent: TransformFrameIdHash::from_str("parent_frame"), - image_from_camera, - resolution: Some([1.0, 2.0].into()), + cached: ResolvedPinholeProjectionCached { + parent: TransformFrameIdHash::from_str("parent_frame"), + image_from_camera, + resolution: Some([1.0, 2.0].into()), + }, view_coordinates: archetypes::Pinhole::DEFAULT_CAMERA_XYZ, }), "Unexpected pinhole for other_frame at time t={t}" diff --git a/crates/store/re_tf/src/transform_resolution_cache/transforms_for_child_frame_events.rs b/crates/store/re_tf/src/transform_resolution_cache/transforms_for_child_frame_events.rs index 7d1b263625dd..ff328b41796e 100644 --- a/crates/store/re_tf/src/transform_resolution_cache/transforms_for_child_frame_events.rs +++ b/crates/store/re_tf/src/transform_resolution_cache/transforms_for_child_frame_events.rs @@ -5,7 +5,7 @@ use re_log_types::TimeInt; use super::cached_transform_value::CachedTransformValue; use super::parent_from_child_transform::ParentFromChildTransform; -use super::resolved_pinhole_projection::ResolvedPinholeProjection; +use super::resolved_pinhole_projection::ResolvedPinholeProjectionCached; // TODO(RR-3539): replace this with a range-map, mapping non-overlapping // time ranges to transforms. That way we can avoid storing the same value multiple times, saving a lot of memory. @@ -17,7 +17,7 @@ pub type FrameTransformTimeMap = // time ranges to transforms. That way we can avoid storing the same value multiple times, saving a lot of memory. // Then we probably wouldn't need the BookkeepingBTreeMap either. pub type PinholeProjectionMap = - BookkeepingBTreeMap>; + BookkeepingBTreeMap>; #[derive(Clone, Debug, PartialEq)] pub struct TransformsForChildFrameEvents { diff --git a/crates/store/re_tf/src/transform_resolution_cache/tree_transforms_for_child_frame.rs b/crates/store/re_tf/src/transform_resolution_cache/tree_transforms_for_child_frame.rs index d1dcbe533ca2..8b318182d599 100644 --- a/crates/store/re_tf/src/transform_resolution_cache/tree_transforms_for_child_frame.rs +++ b/crates/store/re_tf/src/transform_resolution_cache/tree_transforms_for_child_frame.rs @@ -7,29 +7,23 @@ use re_chunk_store::{LatestAtQuery, MissingChunkReporter}; use re_entity_db::EntityDb; use re_log::debug_assert; use re_log_types::{EntityPath, TimeInt, TimelineName}; +use re_sdk_types::{ChunkId, RowId}; -use crate::TransformFrameIdHash; use crate::transform_queries::{ query_and_resolve_pinhole_projection_at_entity, query_and_resolve_tree_transform_at_entity, }; +use crate::{ResolvedPinholeProjection, TransformFrameIdHash, query_view_coordinates}; use super::cached_transform_value::{ CachedTransformValue, add_invalidated_entry_if_not_already_cleared, }; use super::cached_transforms_for_timeline::CachedTransformsForTimeline; use super::parent_from_child_transform::ParentFromChildTransform; -use super::resolved_pinhole_projection::ResolvedPinholeProjection; use super::transforms_for_child_frame_events::TransformsForChildFrameEvents; /// Cached transforms from a single child frame to a (potentially changing) parent frame over time. /// /// Incorporates any static transforms that may apply to this entity. -/// -/// Time points are conservative: it can happen that we generate new events (==cache slots) despite no change -/// occurring for this child frame. -/// However, we mustn't ever note down timepoints at which the given child frame is not "active" on its entity. -/// Doing so would mean that queries using `re_query` yield information about a _different_ child frame -/// which we then can't add to the cache entries of the current frame. #[derive(Debug)] pub struct TreeTransformsForChildFrame { // Is None if this is about static time. @@ -189,15 +183,30 @@ impl TreeTransformsForChildFrame { } /// Inserts an invalidation point for transforms. - pub fn invalidate_transform_at(&mut self, time: TimeInt) { + pub fn invalidate_transform_at(&mut self, time: TimeInt, chunk_id: ChunkId, row_id: RowId) { let events = self.events.get_mut(); - add_invalidated_entry_if_not_already_cleared(&mut events.frame_transforms, time); + add_invalidated_entry_if_not_already_cleared( + &mut events.frame_transforms, + time, + chunk_id, + row_id, + ); } /// Inserts an invalidation point for pinhole projections. - pub fn invalidate_pinhole_projection_at(&mut self, time: TimeInt) { + pub fn invalidate_pinhole_projection_at( + &mut self, + time: TimeInt, + chunk_id: ChunkId, + row_id: RowId, + ) { let events = self.events.get_mut(); - add_invalidated_entry_if_not_already_cleared(&mut events.pinhole_projections, time); + add_invalidated_entry_if_not_already_cleared( + &mut events.pinhole_projections, + time, + chunk_id, + row_id, + ); } #[inline] @@ -218,29 +227,22 @@ impl TreeTransformsForChildFrame { &query.at(), |time_of_last_update_to_this_frame, frame_transform| { // Separate check to work around borrow checker issues. - if frame_transform == &CachedTransformValue::Invalidated { + if let CachedTransformValue::Invalidated { row_id, chunk_id } = frame_transform + { let transform = query_and_resolve_tree_transform_at_entity( entity_db, missing_chunk_reporter, self.associated_entity_path(*time_of_last_update_to_this_frame), - self.child_frame, - // Do NOT use the original query time since that may give us information about a different child frame! - &LatestAtQuery::new( - query.timeline(), - *time_of_last_update_to_this_frame, - ), + *chunk_id, + *row_id, ); // First, we update the cache value. *frame_transform = match &transform { - Ok(transform) => CachedTransformValue::Resident(transform.clone()), - - Err(crate::transform_queries::TransformError::MissingTransform { - .. - }) => { - // This can happen if we conservatively added a timepoint before any transform event happened. - CachedTransformValue::Cleared - } + Ok(transform) => CachedTransformValue::Resident { + value: transform.clone(), + row_id: *row_id, + }, Err(err) => { // Only warn since we can still work just fine if a transform didn't work. @@ -251,9 +253,9 @@ impl TreeTransformsForChildFrame { } match frame_transform { - CachedTransformValue::Resident(transform) => Some(transform.clone()), + CachedTransformValue::Resident { value, .. } => Some(value.clone()), CachedTransformValue::Cleared => None, - CachedTransformValue::Invalidated => { + CachedTransformValue::Invalidated { .. } => { unreachable!("Just made transform cache-resident") } } @@ -279,29 +281,26 @@ impl TreeTransformsForChildFrame { .mutate_latest_at( &query.at(), |time_of_last_update_to_this_frame, pinhole_projection| { + let entity_path = + self.associated_entity_path(*time_of_last_update_to_this_frame); + // Separate check to work around borrow checker issues. - if pinhole_projection == &CachedTransformValue::Invalidated { + if let CachedTransformValue::Invalidated { row_id, chunk_id } = + pinhole_projection + { let transform = query_and_resolve_pinhole_projection_at_entity( entity_db, missing_chunk_reporter, - self.associated_entity_path(*time_of_last_update_to_this_frame), - self.child_frame, - // Do NOT use the original query time since that may give us information about a different child frame! - &LatestAtQuery::new( - query.timeline(), - *time_of_last_update_to_this_frame, - ), + entity_path, + *chunk_id, + *row_id, ); *pinhole_projection = match &transform { - Ok(transform) => CachedTransformValue::Resident(transform.clone()), - - Err(crate::transform_queries::TransformError::MissingTransform { - .. - }) => { - // This can happen if we conservatively added a timepoint before any transform event happened. - CachedTransformValue::Cleared - } + Ok(transform) => CachedTransformValue::Resident { + value: transform.clone(), + row_id: *row_id, + }, Err(err) => { // Only warn since we can still work just fine if a transform didn't work. @@ -312,9 +311,22 @@ impl TreeTransformsForChildFrame { } match pinhole_projection { - CachedTransformValue::Resident(transform) => Some(transform.clone()), + CachedTransformValue::Resident { value, .. } => { + Some(ResolvedPinholeProjection { + cached: value.clone(), + + // TODO(andreas): view coordinates are in a weird limbo state in more than one way. + // Not only are they only _partially_ relevant for the camera's transform (they both name axis & orient cameras), + // we also rely on them too much being latest-at driven and to make matters worse query them from two different archetypes. + view_coordinates: { + query_view_coordinates(entity_path, entity_db, query).unwrap_or( + re_sdk_types::archetypes::Pinhole::DEFAULT_CAMERA_XYZ, + ) + }, + }) + } CachedTransformValue::Cleared => None, - CachedTransformValue::Invalidated => { + CachedTransformValue::Invalidated { .. } => { unreachable!("Just made transform cache-resident") } } diff --git a/crates/store/re_types_core/Cargo.toml b/crates/store/re_types_core/Cargo.toml index b9e9aebb9f89..9dc897363e2b 100644 --- a/crates/store/re_types_core/Cargo.toml +++ b/crates/store/re_types_core/Cargo.toml @@ -19,12 +19,6 @@ workspace = true [package.metadata.docs.rs] all-features = true -[package.metadata.cargo-shear] -ignored = [ - "serde", # Needed to make `ComponentType` (an interned string) serializable. -] - - [features] default = [] diff --git a/crates/store/re_types_core/src/as_components.rs b/crates/store/re_types_core/src/as_components.rs index 8709cfbce254..bff1f204b2fa 100644 --- a/crates/store/re_types_core/src/as_components.rs +++ b/crates/store/re_types_core/src/as_components.rs @@ -11,22 +11,22 @@ use crate::{SerializationResult, SerializedComponentBatch}; /// [`AsComponents::as_serialized_batches`], which describes how the bundle can be interpreted /// as a set of [`SerializedComponentBatch`]es: serialized component data. /// -/// Have a look at our [Custom Data Loader] example to learn more about handwritten bundles. +/// Have a look at our [Custom Data Importer] example to learn more about handwritten bundles. /// /// [IDL definitions]: https://github.com/rerun-io/rerun/tree/latest/crates/store/re_sdk_types/definitions/rerun -/// [Custom Data Loader]: https://github.com/rerun-io/rerun/blob/latest/examples/rust/custom_data_loader +/// [Custom Data Importer]: https://github.com/rerun-io/rerun/blob/latest/examples/rust/custom_importer /// [`Component`]: [crate::Component] pub trait AsComponents { /// Exposes the object's contents as a set of [`SerializedComponentBatch`]es. /// /// This is the main mechanism for easily extending builtin archetypes or even writing /// fully custom ones. - /// Have a look at our [Custom Data Loader] example to learn more about extending archetypes. + /// Have a look at our [Custom Data Importer] example to learn more about extending archetypes. /// /// Implementers of [`AsComponents`] get one last chance to override the tags in the /// [`ComponentDescriptor`], see [`SerializedComponentBatch::with_descriptor_override`]. /// - /// [Custom Data Loader]: https://github.com/rerun-io/rerun/blob/latest/docs/snippets/all/tutorials/custom_data.rs + /// [Custom Data Importer]: https://github.com/rerun-io/rerun/blob/latest/docs/snippets/all/tutorials/custom_data.rs /// [`ComponentDescriptor`]: [crate::ComponentDescriptor] // // NOTE: Don't bother returning a CoW here: we need to dynamically discard optional components diff --git a/crates/store/re_types_core/src/chunk_id.rs b/crates/store/re_types_core/src/chunk_id.rs index 495f1a646e52..64dc4b7b6bea 100644 --- a/crates/store/re_types_core/src/chunk_id.rs +++ b/crates/store/re_types_core/src/chunk_id.rs @@ -105,25 +105,15 @@ impl ChunkId { /// /// Beware: wrong usage can easily lead to conflicts. /// Prefer [`ChunkId::new`] when unsure. + /// + /// Only available in debug builds (tests). Use [`ChunkId::new`] in production. + #[cfg(debug_assertions)] #[must_use] #[inline] pub fn next(&self) -> Self { Self(self.0.next()) } - /// Returns the `n`-next logical [`ChunkId`]. - /// - /// This is equivalent to calling [`ChunkId::next`] `n` times. - /// Wraps the monotonically increasing back to zero on overflow. - /// - /// Beware: wrong usage can easily lead to conflicts. - /// Prefer [`ChunkId::new`] when unsure. - #[must_use] - #[inline] - pub fn incremented_by(&self, n: u64) -> Self { - Self(self.0.incremented_by(n)) - } - #[inline] pub fn from_u128(id: u128) -> Self { Self(re_tuid::Tuid::from_u128(id)) diff --git a/crates/store/re_types_core/src/datatypes/time_range_boundary.rs b/crates/store/re_types_core/src/datatypes/time_range_boundary.rs index 73444385beda..94277df97567 100644 --- a/crates/store/re_types_core/src/datatypes/time_range_boundary.rs +++ b/crates/store/re_types_core/src/datatypes/time_range_boundary.rs @@ -41,7 +41,7 @@ impl crate::Loggable for TimeRangeBoundary { fn arrow_datatype() -> arrow::datatypes::DataType { use arrow::datatypes::*; DataType::Union( - UnionFields::new( + UnionFields::try_new( vec![0, 1, 2, 3], vec![ Field::new("_null_markers", DataType::Null, true), @@ -57,7 +57,8 @@ impl crate::Loggable for TimeRangeBoundary { ), Field::new("Infinite", DataType::Null, true), ], - ), + ) + .expect("UnionFields::try_new should be infallible"), UnionMode::Dense, ) } @@ -183,7 +184,7 @@ impl crate::Loggable for TimeRangeBoundary { re_log::debug_assert_eq!(field_type_ids.len(), fields.len()); re_log::debug_assert_eq!(fields.len(), children.len()); as_array_ref(UnionArray::try_new( - UnionFields::new(field_type_ids, fields), + UnionFields::try_new(field_type_ids, fields)?, ScalarBuffer::from(type_ids), Some(offsets), children, diff --git a/crates/store/re_types_core/src/reflection.rs b/crates/store/re_types_core/src/reflection.rs index 739742e8a561..7476cd830f0a 100644 --- a/crates/store/re_types_core/src/reflection.rs +++ b/crates/store/re_types_core/src/reflection.rs @@ -11,11 +11,26 @@ use crate::{ArchetypeName, ComponentDescriptor, ComponentIdentifier, ComponentTy pub trait Enum: Sized + Copy + Clone + std::hash::Hash + PartialEq + Eq + std::fmt::Display + 'static { + /// The underlying integer type used to represent this enum (e.g. `u8`, `u32`). + type Repr: Copy; + /// All variants, in the order they appear in the enum. fn variants() -> &'static [Self]; /// Markdown docstring for the given enum variant. fn docstring_md(self) -> &'static str; + + /// Create from the underlying integer repr, returning `None` if + /// the value does not match any known variant. + fn try_from_integer(value: Self::Repr) -> Option; + + /// Convert a slice of repr integers to an iterator of optional enum values. + /// + /// Values that don't correspond to a known variant yield `None`. + #[inline] + fn from_integer_slice(slice: &[Self::Repr]) -> impl Iterator> + '_ { + slice.iter().map(|&v| Self::try_from_integer(v)) + } } /// Runtime reflection about components and archetypes. @@ -383,7 +398,7 @@ bitflags::bitflags! { /// The field should be editable through the UI. /// - /// By default, required components are non-editable and all other components are editable. + /// By default all components are editable. const UI_EDITABLE = 1 << 1; } } diff --git a/crates/store/re_types_core/src/result.rs b/crates/store/re_types_core/src/result.rs index ddfc90d6d914..5b3f87294ccc 100644 --- a/crates/store/re_types_core/src/result.rs +++ b/crates/store/re_types_core/src/result.rs @@ -2,7 +2,7 @@ use std::any; use std::fmt::Display; use std::ops::Deref; -use re_arrow_util::DisplayDataType; +use arrow::datatypes::DataType; // --- @@ -145,7 +145,7 @@ pub enum DeserializationError { #[error("Expected field {field_name:?} to be present in {datatype}")] MissingStructField { - datatype: DisplayDataType, + datatype: DataType, field_name: String, backtrace: Box<_Backtrace>, }, @@ -163,7 +163,7 @@ pub enum DeserializationError { #[error("Expected union arm {arm_name:?} (#{arm_index}) to be present in {datatype}")] MissingUnionArm { - datatype: DisplayDataType, + datatype: DataType, arm_name: String, arm_index: usize, backtrace: Box<_Backtrace>, @@ -171,8 +171,8 @@ pub enum DeserializationError { #[error("Expected {expected} but found {got} instead")] DatatypeMismatch { - expected: DisplayDataType, - got: DisplayDataType, + expected: DataType, + got: DataType, backtrace: Box<_Backtrace>, }, @@ -239,7 +239,7 @@ impl DeserializationError { field_name: impl AsRef, ) -> Self { Self::MissingStructField { - datatype: datatype.into().into(), + datatype: datatype.into(), field_name: field_name.as_ref().into(), backtrace: Box::new(std::backtrace::Backtrace::capture()), } @@ -268,7 +268,7 @@ impl DeserializationError { arm_index: usize, ) -> Self { Self::MissingUnionArm { - datatype: datatype.into().into(), + datatype: datatype.into(), arm_name: arm_name.as_ref().into(), arm_index, backtrace: Box::new(std::backtrace::Backtrace::capture()), @@ -281,8 +281,8 @@ impl DeserializationError { got: impl Into, ) -> Self { Self::DatatypeMismatch { - expected: expected.into().into(), - got: got.into().into(), + expected: expected.into(), + got: got.into(), backtrace: Box::new(std::backtrace::Backtrace::capture()), } } diff --git a/crates/store/re_types_core/src/row_id.rs b/crates/store/re_types_core/src/row_id.rs index ab0972b8c179..5ee6551bf424 100644 --- a/crates/store/re_types_core/src/row_id.rs +++ b/crates/store/re_types_core/src/row_id.rs @@ -13,13 +13,15 @@ use crate::Loggable as _; /// /// ### Uniqueness /// -/// Duplicated [`RowId`]s within a single recording is considered undefined behavior. +/// [`RowId::new`] generates fresh monotonic IDs, so each logged row normally has a unique +/// [`RowId`]. Multiple chunks may share a [`RowId`] though, and queries handle that correctly. /// -/// While it is benign in most cases, care has to be taken when manually crafting [`RowId`]s. -/// Ideally: don't do so and stick to [`RowId::new`] instead to avoid bad surprises. +/// But there are secondary caches that use row id in a way that requires uniqueness for a +/// specific component. /// -/// This makes it easy to build and maintain secondary indices around [`RowId`]s with few to no -/// extraneous state tracking. +/// ### Immutability +/// +/// Because of secondary caches, a row id & component pair should never be mutated. /// /// ### Query /// diff --git a/crates/store/re_uri/Cargo.toml b/crates/store/re_uri/Cargo.toml index 89601d568f96..1fcb75f144aa 100644 --- a/crates/store/re_uri/Cargo.toml +++ b/crates/store/re_uri/Cargo.toml @@ -20,6 +20,7 @@ re_log_types = { workspace = true, features = ["serde"] } re_tuid.workspace = true # External +percent-encoding.workspace = true serde.workspace = true static_assertions.workspace = true thiserror.workspace = true diff --git a/crates/store/re_uri/src/dataset_hierarchy.rs b/crates/store/re_uri/src/dataset_hierarchy.rs new file mode 100644 index 000000000000..cbf4bc9a7dd7 --- /dev/null +++ b/crates/store/re_uri/src/dataset_hierarchy.rs @@ -0,0 +1,56 @@ +/// Separator used in folder path prefixes and in dataset names to denote hierarchy +/// levels (e.g. `"project.subdir.leaf"`). +pub const DATASET_HIERARCHY_SEPARATOR: char = '.'; + +/// Split an entry name into hierarchy path segments. +/// +/// Trailing separators are part of the leaf name, not hierarchy delimiters: +/// `"a.b."` becomes `["a", "b."]`, and `"a."` becomes `["a."]`. +pub fn split_dataset_hierarchy_path(path: &str) -> impl Iterator { + let hierarchy_path = path.trim_end_matches(DATASET_HIERARCHY_SEPARATOR); + + let (parents, leaf) = if let Some((parents, _leaf_without_trailing_separators)) = + hierarchy_path.rsplit_once(DATASET_HIERARCHY_SEPARATOR) + { + let leaf_start = parents.len() + DATASET_HIERARCHY_SEPARATOR.len_utf8(); + (Some(parents), &path[leaf_start..]) + } else { + (None, path) + }; + + parents + .into_iter() + .flat_map(|parents| { + parents + .split(DATASET_HIERARCHY_SEPARATOR) + .filter(|s| !s.is_empty()) + }) + .chain(std::iter::once(leaf)) +} + +/// Returns the leaf segment of an entry name using [`split_dataset_hierarchy_path`] semantics. +pub fn dataset_hierarchy_leaf_name(path: &str) -> &str { + split_dataset_hierarchy_path(path).last().unwrap_or(path) +} + +#[cfg(test)] +mod tests { + use super::{dataset_hierarchy_leaf_name, split_dataset_hierarchy_path}; + + #[test] + fn split_dataset_hierarchy_path_keeps_trailing_dots_in_leaf() { + let split = |path| split_dataset_hierarchy_path(path).collect::>(); + assert_eq!(split("a.b.c"), vec!["a", "b", "c"]); + assert_eq!(split("a.b."), vec!["a", "b."]); + assert_eq!(split("a."), vec!["a."]); + assert_eq!(split("a.b.."), vec!["a", "b.."]); + } + + #[test] + fn dataset_hierarchy_leaf_name_keeps_trailing_dots() { + assert_eq!(dataset_hierarchy_leaf_name("a.b.c"), "c"); + assert_eq!(dataset_hierarchy_leaf_name("a.b."), "b."); + assert_eq!(dataset_hierarchy_leaf_name("a."), "a."); + assert_eq!(dataset_hierarchy_leaf_name("a.b.."), "b.."); + } +} diff --git a/crates/store/re_uri/src/endpoints/folder.rs b/crates/store/re_uri/src/endpoints/folder.rs new file mode 100644 index 000000000000..d45754d5c664 --- /dev/null +++ b/crates/store/re_uri/src/endpoints/folder.rs @@ -0,0 +1,71 @@ +use crate::{Error, Origin, RedapUri}; + +/// `scheme://hostname:port/folder/` +/// +/// `path` is a dataset-name prefix using the dataset hierarchy separator (`.`). +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct FolderUri { + pub origin: Origin, + pub path: String, +} + +impl std::fmt::Display for FolderUri { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { origin, path } = self; + + // Use `url::Url` to handle percent-encoding of the path segment, + // so unusual characters in dataset names round-trip safely. + // We could use `percent-encoding` directly, but then we have to hardcode the set of allowed characters ourselves. + let mut tmp = url::Url::parse("http://x/").expect("static URL is valid"); + tmp.path_segments_mut() + .expect("absolute URL has a path") + .clear() + .push("folder") + .push(path); + let encoded_path = tmp.path(); // e.g. "/folder/perception.detection" + + write!(f, "{origin}{encoded_path}") + } +} + +impl FolderUri { + pub fn new(origin: Origin, path: impl Into) -> Self { + Self { + origin, + path: path.into(), + } + } +} + +impl std::str::FromStr for FolderUri { + type Err = Error; + + fn from_str(s: &str) -> Result { + if let RedapUri::Folder(uri) = RedapUri::from_str(s)? { + Ok(uri) + } else { + Err(Error::UnexpectedUri(s.to_owned())) + } + } +} + +// Serialize as string: +impl serde::Serialize for FolderUri { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + self.to_string().serialize(serializer) + } +} + +impl<'de> serde::Deserialize<'de> for FolderUri { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + String::deserialize(deserializer)? + .parse::() + .map_err(|err| serde::de::Error::custom(err.to_string())) + } +} diff --git a/crates/store/re_uri/src/endpoints/mod.rs b/crates/store/re_uri/src/endpoints/mod.rs index daaf99c05bfc..308c17131894 100644 --- a/crates/store/re_uri/src/endpoints/mod.rs +++ b/crates/store/re_uri/src/endpoints/mod.rs @@ -1,4 +1,5 @@ pub mod catalog; pub mod dataset; pub mod entry; +pub mod folder; pub mod proxy; diff --git a/crates/store/re_uri/src/lib.rs b/crates/store/re_uri/src/lib.rs index ab6c6f5692b1..20d7ebdd8844 100644 --- a/crates/store/re_uri/src/lib.rs +++ b/crates/store/re_uri/src/lib.rs @@ -27,12 +27,16 @@ //! //! // Links to recording on the Data Platform (optionally with timestamp). //! "rerun://127.0.0.1:1234/dataset/1830B33B45B963E7774455beb91701ae/data?segment_id=sid&time_range=timeline@1.23s..72s", +//! +//! // Links to a folder (dataset-name prefix) within the catalog. +//! "rerun://rerun.io/folder/perception.detection", //! ] { //! assert!(uri.parse::().is_ok()); //! } //! //! ``` +mod dataset_hierarchy; mod endpoints; mod error; mod fragment; @@ -41,9 +45,13 @@ mod redap_uri; mod scheme; mod time_selection; +pub use self::dataset_hierarchy::{ + DATASET_HIERARCHY_SEPARATOR, dataset_hierarchy_leaf_name, split_dataset_hierarchy_path, +}; pub use self::endpoints::catalog::CatalogUri; pub use self::endpoints::dataset::DatasetSegmentUri; pub use self::endpoints::entry::EntryUri; +pub use self::endpoints::folder::FolderUri; pub use self::endpoints::proxy::ProxyUri; pub use self::error::Error; pub use self::fragment::Fragment; diff --git a/crates/store/re_uri/src/origin.rs b/crates/store/re_uri/src/origin.rs index 80b2a896519f..04c61ea0fcbb 100644 --- a/crates/store/re_uri/src/origin.rs +++ b/crates/store/re_uri/src/origin.rs @@ -60,14 +60,21 @@ impl Origin { input: &str, default_localhost_port: Option, ) -> Result<(Self, url::Url), Error> { - let (scheme, rewritten) = if !input.contains("://") - && (input.contains("localhost") || input.contains("127.0.0.1")) - { - // Assume `rerun+http://`, because that is the default for localhost - (Scheme::RerunHttp, format!("http://{input}")) - } else { + let has_scheme = input.contains("://"); + let (scheme, rewritten) = if has_scheme { let scheme: Scheme = input.parse()?; (scheme, scheme.canonical_url(input)) + } else { + // No scheme - make a guess: + if input.contains("localhost") || input.contains("127.0.0.1") { + // Assume `rerun+http://`, because that is the default for localhost + (Scheme::RerunHttp, format!("http://{input}")) + } else if input.contains("rerun.io") { + // Default to `rerun://` (gRPC over TLS) + (Scheme::RerunHttps, format!("https://{input}")) + } else { + return Err(Error::InvalidScheme); + } }; // We have to first rewrite the endpoint, because `Url` does not allow @@ -163,16 +170,34 @@ fn is_host_localhost(host: &url::Host) -> bool { } } -#[test] -fn test_origin_format() { - assert_eq!( - Origin::from_scheme_and_socket_addr(Scheme::Rerun, "192.168.0.2:1234".parse().unwrap()) +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_origin_format() { + assert_eq!( + Origin::from_scheme_and_socket_addr( + Scheme::RerunHttps, + "192.168.0.2:1234".parse().unwrap() + ) .to_string(), - "rerun://192.168.0.2:1234" - ); - assert_eq!( - Origin::from_scheme_and_socket_addr(Scheme::Rerun, "0.0.0.0:1234".parse().unwrap()) + "rerun://192.168.0.2:1234" + ); + assert_eq!( + Origin::from_scheme_and_socket_addr( + Scheme::RerunHttps, + "0.0.0.0:1234".parse().unwrap() + ) .to_string(), - "rerun://127.0.0.1:1234" - ); + "rerun://127.0.0.1:1234" + ); + } + + #[test] + fn test_rerun_alias() { + let https = "rerun+https://some.url.io:443".parse::().unwrap(); + let rerun = "rerun://some.url.io:443".parse::().unwrap(); + assert_eq!(https, rerun); + } } diff --git a/crates/store/re_uri/src/redap_uri.rs b/crates/store/re_uri/src/redap_uri.rs index 6334ce140c58..2c2aa638506a 100644 --- a/crates/store/re_uri/src/redap_uri.rs +++ b/crates/store/re_uri/src/redap_uri.rs @@ -2,7 +2,7 @@ use re_log_types::StoreId; use crate::{ CatalogUri, DEFAULT_PROXY_PORT, DEFAULT_REDAP_PORT, DatasetSegmentUri, EntryUri, Error, - Fragment, Origin, ProxyUri, + FolderUri, Fragment, Origin, ProxyUri, }; /// Parsed from `rerun://addr:port/recording/12345` or `rerun://addr:port/catalog` @@ -14,6 +14,9 @@ pub enum RedapUri { /// `/entry` Entry(EntryUri), + /// `/folder/` — a dataset-name prefix grouping. + Folder(FolderUri), + /// `/dataset` DatasetData(DatasetSegmentUri), @@ -26,6 +29,7 @@ impl RedapUri { match self { Self::Catalog(uri) => &uri.origin, Self::Entry(uri) => &uri.origin, + Self::Folder(uri) => &uri.origin, Self::DatasetData(uri) => &uri.origin, Self::Proxy(uri) => &uri.origin, } @@ -34,14 +38,14 @@ impl RedapUri { /// Return the parsed `#fragment` of the URI, if any. pub fn fragment(&self) -> Option<&Fragment> { match self { - Self::Catalog(_) | Self::Proxy(_) | Self::Entry(_) => None, + Self::Catalog(_) | Self::Proxy(_) | Self::Entry(_) | Self::Folder(_) => None, Self::DatasetData(dataset_data_endpoint) => Some(&dataset_data_endpoint.fragment), } } pub fn store_id(&self) -> Option { match self { - Self::Catalog(_) | Self::Entry(_) | Self::Proxy(_) => None, + Self::Catalog(_) | Self::Entry(_) | Self::Folder(_) | Self::Proxy(_) => None, Self::DatasetData(dataset_data_uri) => Some(dataset_data_uri.store_id()), } } @@ -52,6 +56,7 @@ impl std::fmt::Display for RedapUri { match self { Self::Catalog(uri) => write!(f, "{uri}",), Self::Entry(uri) => write!(f, "{uri}",), + Self::Folder(uri) => write!(f, "{uri}",), Self::DatasetData(uri) => write!(f, "{uri}",), Self::Proxy(uri) => write!(f, "{uri}",), } @@ -61,21 +66,27 @@ impl std::fmt::Display for RedapUri { impl std::str::FromStr for RedapUri { type Err = Error; - fn from_str(value: &str) -> Result { + fn from_str(input: &str) -> Result { + // If someone manually visits `https://rerun.io/viewer?url=rerun+https://…` then + // that `+` will be turned into a space. So let's gracefully handle that here: + let input = &input + .replace("rerun http", "rerun+http") + .replace("rerun https", "rerun+https"); + // Hacky, but I don't want to have to memorize ports. - let default_localhost_port = if value.contains("/proxy") { + let default_localhost_port = if input.contains("/proxy") { DEFAULT_PROXY_PORT } else { DEFAULT_REDAP_PORT }; - let (origin, http_url) = Origin::replace_and_parse(value, Some(default_localhost_port))?; + let (origin, http_url) = Origin::replace_and_parse(input, Some(default_localhost_port))?; // :warning: We limit the amount of segments, which might need to be // adjusted when adding additional resources. let segments = http_url .path_segments() - .ok_or_else(|| Error::UnexpectedBaseUrl(value.to_owned()))? + .ok_or_else(|| Error::UnexpectedBaseUrl(input.to_owned()))? .take(2) .filter(|s| !s.is_empty()) // handle trailing slashes .collect::>(); @@ -91,6 +102,16 @@ impl std::str::FromStr for RedapUri { Ok(Self::Entry(EntryUri::new(origin, entry_id))) } + ["folder", path] => { + let decoded = percent_encoding::percent_decode_str(path) + .decode_utf8() + .map_err(|_err| Error::UnexpectedUri(format!("folder/{path}")))?; + if decoded.is_empty() { + return Err(Error::UnexpectedUri("folder/".to_owned())); + } + Ok(Self::Folder(FolderUri::new(origin, decoded.into_owned()))) + } + ["dataset", dataset_id] => { let dataset_id = re_tuid::Tuid::from_str(dataset_id).map_err(Error::InvalidTuid)?; @@ -137,15 +158,14 @@ mod tests { #[test] fn scheme_conversion() { - assert_eq!(Scheme::Rerun.as_http_scheme(), "https"); - assert_eq!(Scheme::RerunHttp.as_http_scheme(), "http"); assert_eq!(Scheme::RerunHttps.as_http_scheme(), "https"); + assert_eq!(Scheme::RerunHttp.as_http_scheme(), "http"); } #[test] fn origin_conversion() { let origin = crate::Origin { - scheme: Scheme::Rerun, + scheme: Scheme::RerunHttps, host: url::Host::Ipv4(Ipv4Addr::LOCALHOST), port: 1234, }; @@ -175,7 +195,7 @@ mod tests { panic!("Expected recording"); }; - assert_eq!(origin.scheme, Scheme::Rerun); + assert_eq!(origin.scheme, Scheme::RerunHttps); assert_eq!(origin.host, url::Host::::Ipv4(Ipv4Addr::LOCALHOST)); assert_eq!(origin.port, 1234); assert_eq!( @@ -200,7 +220,7 @@ mod tests { panic!("Expected recording"); }; - assert_eq!(origin.scheme, Scheme::Rerun); + assert_eq!(origin.scheme, Scheme::RerunHttps); assert_eq!(origin.host, url::Host::::Ipv4(Ipv4Addr::LOCALHOST)); assert_eq!(origin.port, 1234); assert_eq!( @@ -250,7 +270,7 @@ mod tests { panic!("Expected recording"); }; - assert_eq!(origin.scheme, Scheme::Rerun); + assert_eq!(origin.scheme, Scheme::RerunHttps); assert_eq!(origin.host, url::Host::::Ipv4(Ipv4Addr::LOCALHOST)); assert_eq!(origin.port, 1234); assert_eq!( @@ -286,7 +306,7 @@ mod tests { panic!("Expected recording"); }; - assert_eq!(origin.scheme, Scheme::Rerun); + assert_eq!(origin.scheme, Scheme::RerunHttps); assert_eq!(origin.host, url::Host::::Ipv4(Ipv4Addr::LOCALHOST)); assert_eq!(origin.port, 1234); assert_eq!( @@ -380,7 +400,7 @@ mod tests { let expected = RedapUri::Proxy(ProxyUri { origin: Origin { - scheme: Scheme::Rerun, + scheme: Scheme::RerunHttps, host: url::Host::Domain("localhost".to_owned()), port: 51234, }, @@ -394,6 +414,22 @@ mod tests { assert_eq!(address.unwrap(), expected); } + #[test] + fn test_proxy_endpoint_with_space() { + let url = "rerun http://127.0.0.1:9876/proxy"; + let address: Result = url.parse(); + + let expected = RedapUri::Proxy(ProxyUri { + origin: Origin { + scheme: Scheme::RerunHttp, + host: url::Host::Ipv4(Ipv4Addr::LOCALHOST), + port: 9876, + }, + }); + + assert_eq!(address.unwrap(), expected); + } + #[test] fn test_parsing() { let test_cases = [ @@ -401,7 +437,7 @@ mod tests { "rerun://localhost/catalog", RedapUri::Catalog(CatalogUri { origin: Origin { - scheme: Scheme::Rerun, + scheme: Scheme::RerunHttps, host: url::Host::Domain("localhost".to_owned()), port: DEFAULT_REDAP_PORT, }, @@ -461,7 +497,7 @@ mod tests { "rerun://example.com", RedapUri::Catalog(CatalogUri { origin: Origin { - scheme: Scheme::Rerun, + scheme: Scheme::RerunHttps, host: url::Host::Domain("example.com".to_owned()), port: 443, }, @@ -471,7 +507,7 @@ mod tests { "rerun://example.com:420/catalog", RedapUri::Catalog(CatalogUri { origin: Origin { - scheme: Scheme::Rerun, + scheme: Scheme::RerunHttps, host: url::Host::Domain("example.com".to_owned()), port: 420, }, @@ -496,7 +532,7 @@ mod tests { let expected = RedapUri::Catalog(CatalogUri { origin: Origin { - scheme: Scheme::Rerun, + scheme: Scheme::RerunHttps, host: url::Host::Domain("localhost".to_owned()), port: 51234, }, @@ -516,7 +552,7 @@ mod tests { let expected = RedapUri::Catalog(CatalogUri { origin: Origin { - scheme: Scheme::Rerun, + scheme: Scheme::RerunHttps, host: url::Host::Domain("localhost".to_owned()), port: 123, }, @@ -524,4 +560,44 @@ mod tests { assert_eq!(url.parse::().unwrap(), expected); } + + #[test] + fn test_folder_endpoint_roundtrip() { + let url = "rerun://localhost:51234/folder/perception.detection"; + let parsed: RedapUri = url.parse().unwrap(); + + let RedapUri::Folder(folder_uri) = &parsed else { + panic!("expected Folder variant, got {parsed:?}"); + }; + assert_eq!(folder_uri.path, "perception.detection"); + assert_eq!(folder_uri.origin.host.to_string(), "localhost"); + assert_eq!(folder_uri.origin.port, 51234); + + // Display → parse roundtrips back to the same URI. + let displayed = parsed.to_string(); + let reparsed: RedapUri = displayed.parse().unwrap(); + assert_eq!(parsed, reparsed); + } + + #[test] + fn test_folder_endpoint_percent_encoded() { + // Path containing a `/` must be percent-encoded as `%2F` to survive a roundtrip. + let url = "rerun://localhost:51234/folder/odd%2Fname"; + let parsed: RedapUri = url.parse().unwrap(); + + let RedapUri::Folder(folder_uri) = &parsed else { + panic!("expected Folder variant, got {parsed:?}"); + }; + assert_eq!(folder_uri.path, "odd/name"); + + let reparsed: RedapUri = parsed.to_string().parse().unwrap(); + assert_eq!(parsed, reparsed); + } + + #[test] + fn test_folder_endpoint_empty_path_rejected() { + let url = "rerun://localhost:51234/folder/"; + let address: Result = url.parse(); + assert!(address.is_err()); + } } diff --git a/crates/store/re_uri/src/scheme.rs b/crates/store/re_uri/src/scheme.rs index c1a539c79a6b..da90867a6040 100644 --- a/crates/store/re_uri/src/scheme.rs +++ b/crates/store/re_uri/src/scheme.rs @@ -5,11 +5,11 @@ use crate::Error; /// The different schemes supported by Rerun. /// /// We support `rerun`, `rerun+http`, and `rerun+https`. +/// `rerun` and `rerun+https` parses to the same thing, but we prefer to display just `rerun`. #[derive( Debug, PartialEq, Eq, Copy, Clone, Hash, PartialOrd, Ord, serde::Serialize, serde::Deserialize, )] pub enum Scheme { - Rerun, RerunHttp, RerunHttps, } @@ -17,9 +17,8 @@ pub enum Scheme { impl std::fmt::Display for Scheme { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::Rerun => write!(f, "rerun"), Self::RerunHttp => write!(f, "rerun+http"), - Self::RerunHttps => write!(f, "rerun+https"), + Self::RerunHttps => write!(f, "rerun"), } } } @@ -28,7 +27,7 @@ impl Scheme { /// Converts a [`Scheme`] to either `http` or `https`. pub(crate) fn as_http_scheme(&self) -> &str { match self { - Self::Rerun | Self::RerunHttps => "https", + Self::RerunHttps => "https", Self::RerunHttp => "http", } } @@ -36,17 +35,19 @@ impl Scheme { /// Converts a rerun url into a canonical http or https url. pub(crate) fn canonical_url(&self, url: &str) -> String { match self { - Self::Rerun => { - debug_assert!(url.starts_with("rerun://")); - url.replace("rerun://", "https://") - } Self::RerunHttp => { debug_assert!(url.starts_with("rerun+http://")); url.replace("rerun+http://", "http://") } Self::RerunHttps => { - debug_assert!(url.starts_with("rerun+https://")); - url.replace("rerun+https://", "https://") + if url.starts_with("rerun://") { + url.replace("rerun://", "https://") + } else if url.starts_with("rerun+https://") { + url.replace("rerun+https://", "https://") + } else { + debug_assert!(false, "unexpected url format: {url}"); + url.to_owned() + } } } } @@ -56,11 +57,9 @@ impl std::str::FromStr for Scheme { type Err = Error; fn from_str(url: &str) -> Result { - if url.starts_with("rerun://") { - Ok(Self::Rerun) - } else if url.starts_with("rerun+http://") { + if url.starts_with("rerun+http://") { Ok(Self::RerunHttp) - } else if url.starts_with("rerun+https://") { + } else if url.starts_with("rerun://") || url.starts_with("rerun+https://") { Ok(Self::RerunHttps) } else { Err(crate::Error::InvalidScheme) diff --git a/crates/top/re_sdk/Cargo.toml b/crates/top/re_sdk/Cargo.toml index 75102ee1dd48..91c13fb11cd5 100644 --- a/crates/top/re_sdk/Cargo.toml +++ b/crates/top/re_sdk/Cargo.toml @@ -24,11 +24,11 @@ all-features = true [features] default = [] -## Support for using Rerun's data-loaders directly from the SDK. +## Support for using Rerun's importers directly from the SDK. ## -## See our `log_file` example and +## See our `log_file` example and ## for more information. -data_loaders = ["dep:re_data_loader", "dep:re_log_channel"] +importers = ["dep:re_importer", "dep:re_log_channel"] ## Support serving a web viewer over HTTP. ## @@ -44,7 +44,7 @@ web_viewer = ["dep:re_log_channel", "dep:re_web_viewer_server", "dep:tokio", "de server = ["dep:re_grpc_server", "dep:re_memory", "dep:re_log_channel", "dep:tokio"] [dependencies] -re_arrow_combinators.workspace = true +re_lenses_core.workspace = true re_build_info.workspace = true re_byte_size.workspace = true re_chunk.workspace = true @@ -60,7 +60,6 @@ re_tracing.workspace = true re_uri.workspace = true ahash.workspace = true -arrow.workspace = true const_format.workspace = true crossbeam.workspace = true document-features.workspace = true @@ -73,7 +72,7 @@ uuid = { workspace = true, features = ["v4"] } # Optional dependencies -re_data_loader = { workspace = true, optional = true } +re_importer = { workspace = true, optional = true } re_grpc_server = { workspace = true, optional = true } re_log_channel = { workspace = true, optional = true } re_memory = { workspace = true, optional = true } diff --git a/crates/top/re_sdk/src/binary_stream_sink.rs b/crates/top/re_sdk/src/binary_stream_sink.rs index 71bc18082c3a..ec2ba23a4f3e 100644 --- a/crates/top/re_sdk/src/binary_stream_sink.rs +++ b/crates/top/re_sdk/src/binary_stream_sink.rs @@ -92,6 +92,13 @@ impl BinaryStreamSink { storage, ) } + + /// Create a [`BinaryStreamSink`] that shares the storage's buffer. + pub fn with_shared_storage(storage: &BinaryStreamStorage) -> Self { + Self { + buffer: storage.inner.clone(), + } + } } impl LogSink for BinaryStreamSink { diff --git a/crates/top/re_sdk/src/lenses/mod.rs b/crates/top/re_sdk/src/lenses/mod.rs index 8f795f379cc5..bd46f4ac5c06 100644 --- a/crates/top/re_sdk/src/lenses/mod.rs +++ b/crates/top/re_sdk/src/lenses/mod.rs @@ -1,6 +1,5 @@ //! Lenses allow you to extract, transform, and restructure component data. They -//! are applied to chunks that match the specified entity path filter and contain -//! the target component. +//! are applied to chunks that contain the target component. //! //! See [`crate::lenses::Lens`] for more details and assumptions. One way to make use of lenses is //! by using the [`crate::lenses::LensesSink`]. @@ -10,11 +9,11 @@ mod sink; // Re-exports from re_lenses. // We should be careful not to expose too much implementation details here. pub use re_lenses::{ - ColumnsBuilder, Lens, LensBuilder, LensError, Lenses, Op, OpError, OutputMode, PartialChunk, - ScatterColumnsBuilder, StaticColumnsBuilder, + ChunkExt, Lens, LensBuilder, LensBuilderError, LensRuntimeError, Lenses, OutputBuilder, + OutputMode, PartialChunk, op, }; -pub use re_arrow_combinators::Selector; +pub use re_lenses_core::Selector; // We keep the sink in re_sdk since it depends on LogSink. pub use self::sink::LensesSink; diff --git a/crates/top/re_sdk/src/lenses/sink.rs b/crates/top/re_sdk/src/lenses/sink.rs index 98cd016669a4..bae3db3dad5a 100644 --- a/crates/top/re_sdk/src/lenses/sink.rs +++ b/crates/top/re_sdk/src/lenses/sink.rs @@ -1,12 +1,12 @@ use re_chunk::Chunk; -use re_lenses::{Lens, Lenses, OutputMode}; +use re_lenses::Lenses; use re_log_types::{LogMsg, StoreId}; use crate::sink::LogSink; /// A sink which can transform a [`LogMsg`] and forward the result to an underlying backing [`LogSink`]. /// -/// The sink will only forward components that are matched by a lens specified via [`Self::with_lens`]. +/// The sink will only forward components that are matched by the provided [`Lenses`]. pub struct LensesSink { sink: S, lenses: Lenses, @@ -14,34 +14,18 @@ pub struct LensesSink { } impl LensesSink { - /// Creates a new sink without any lenses attached. - /// - /// Use [`Self::with_lens`] to add an additional lens to this sink. + /// Creates a new sink with the given lenses. /// /// By default, the sink will do its best effort to produce chunks despite /// of errors in Lenses that it might encounter. - pub fn new(sink: S) -> Self { + pub fn new(sink: S, lenses: Lenses) -> Self { Self { sink, - lenses: Lenses::new(OutputMode::DropUnmatched), + lenses, strict: false, } } - /// Adds a [`Lens`] to this sink. - pub fn with_lens(mut self, lens: Lens) -> Self { - self.lenses.add_lens(lens); - self - } - - /// Configure how to handle matched and unmatched data. - /// - /// See [`OutputMode`] for more details. - pub fn output_mode(mut self, mode: OutputMode) -> Self { - self.lenses.set_output_mode(mode); - self - } - /// When `strict` is `true` Lenses that encounter an error will not emit partial chunks. pub fn strict(mut self, strict: bool) -> Self { self.strict = strict; diff --git a/crates/top/re_sdk/src/lib.rs b/crates/top/re_sdk/src/lib.rs index 4d7002c44da2..49b4144eea33 100644 --- a/crates/top/re_sdk/src/lib.rs +++ b/crates/top/re_sdk/src/lib.rs @@ -117,8 +117,25 @@ pub use time::{TimeCell, TimePoint, Timeline}; pub mod lenses; pub use re_byte_size::SizeBytes; -#[cfg(feature = "data_loaders")] -pub use re_data_loader::{DataLoader, DataLoaderError, DataLoaderSettings, LoadedData}; +#[cfg(feature = "importers")] +pub use re_importer::{ImportedData, Importer, ImporterError, ImporterSettings}; + +#[cfg(feature = "importers")] +#[deprecated(since = "0.32.0", note = "Renamed to `Importer`.")] +#[doc(hidden)] +pub use re_importer::Importer as DataLoader; +#[cfg(feature = "importers")] +#[deprecated(since = "0.32.0", note = "Renamed to `ImporterError`.")] +#[doc(hidden)] +pub type DataLoaderError = re_importer::ImporterError; +#[cfg(feature = "importers")] +#[deprecated(since = "0.32.0", note = "Renamed to `ImporterSettings`.")] +#[doc(hidden)] +pub type DataLoaderSettings = re_importer::ImporterSettings; +#[cfg(feature = "importers")] +#[deprecated(since = "0.32.0", note = "Renamed to `ImportedData`.")] +#[doc(hidden)] +pub type LoadedData = re_importer::ImportedData; /// Methods for spawning the web viewer and streaming the SDK log stream to it. #[cfg(feature = "web_viewer")] @@ -134,10 +151,10 @@ pub use re_grpc_server::{MemoryLimit, PlaybackBehavior, ServerOptions}; /// Re-exports of other crates. pub mod external { pub use re_chunk::external::*; - #[cfg(feature = "data_loaders")] - pub use re_data_loader::{self, external::*}; #[cfg(feature = "server")] pub use re_grpc_server; + #[cfg(feature = "importers")] + pub use re_importer::{self, external::*}; pub use re_log::external::*; pub use re_log_types::external::*; pub use {re_grpc_client, re_log, re_log_encoding, re_log_types, re_uri}; diff --git a/crates/top/re_sdk/src/log_sink.rs b/crates/top/re_sdk/src/log_sink.rs index 0586cc285ede..5973eb90007f 100644 --- a/crates/top/re_sdk/src/log_sink.rs +++ b/crates/top/re_sdk/src/log_sink.rs @@ -239,6 +239,10 @@ impl private::Sealed for crate::sink::GrpcSink {} impl MultiSinkCompatible for crate::sink::GrpcSink {} +impl private::Sealed for crate::binary_stream_sink::BinaryStreamSink {} + +impl MultiSinkCompatible for crate::binary_stream_sink::BinaryStreamSink {} + // ---------------------------------------------------------------------------- /// Store log messages in memory until you call [`LogSink::drain_backlog`]. @@ -433,9 +437,7 @@ impl MemorySinkStorage { let mut inner = sink.inner.lock(); inner.has_been_used = true; - for message in &inner.msgs { - encoder.append(message)?; - } + encoder.extend(inner.msgs.iter().map(Ok))?; } encoder.finish()?; diff --git a/crates/top/re_sdk/src/recording_stream.rs b/crates/top/re_sdk/src/recording_stream.rs index 2477305bed15..452d617724f3 100644 --- a/crates/top/re_sdk/src/recording_stream.rs +++ b/crates/top/re_sdk/src/recording_stream.rs @@ -82,10 +82,10 @@ pub enum RecordingStreamError { #[error(transparent)] WebSink(#[from] crate::web_viewer::WebViewerSinkError), - /// An error occurred while attempting to use a [`re_data_loader::DataLoader`]. - #[cfg(feature = "data_loaders")] + /// An error occurred while attempting to use a [`re_importer::Importer`]. + #[cfg(feature = "importers")] #[error(transparent)] - DataLoaderError(#[from] re_data_loader::DataLoaderError), + ImporterError(#[from] re_importer::ImporterError), /// Invalid gRPC server address. #[error(transparent)] @@ -579,18 +579,21 @@ impl RecordingStreamBuilder { return Ok(RecordingStream::disabled()); } - let url = format!("rerun+http://{}/proxy", opts.connect_addr()); - // NOTE: If `_RERUN_TEST_FORCE_SAVE` is set, all recording streams will write to disk no matter // what, thus spawning a viewer is pointless (and probably not intended). if forced_sink_path().is_some() { + let url = format!("rerun+http://{}/proxy", opts.connect_addr()); return self.connect_grpc_opts(url); } - // Spawn viewer and connect normally - crate::spawn(opts)?; - - self.connect_grpc_opts(url) + // Spawn viewer and connect normally. + // spawn() returns the actual port used, which may differ from opts.port when --new picks a free port. + let actual_port = crate::spawn(opts)?; + let addr = std::net::SocketAddr::new( + std::net::IpAddr::V4(std::net::Ipv4Addr::LOCALHOST), + actual_port, + ); + self.connect_grpc_opts(format!("rerun+http://{addr}/proxy")) } /// Returns whether or not logging is enabled, a [`StoreInfo`], the associated batcher @@ -766,18 +769,18 @@ impl Drop for RecordingStream { #[inline] fn drop(&mut self) { // If this holds the last strong handle to the recording, make sure that all pending - // `DataLoader` threads that were started from the SDK actually run to completion (they + // importer threads that were started from the SDK actually run to completion (they // all hold a weak handle to this very recording!). // // NOTE: It's very important to do so from the `Drop` implementation of `RecordingStream` - // itself, because the dataloader threads -- by definition -- will have to send data into + // itself, because the importer threads -- by definition -- will have to send data into // this very recording, therefore we must make sure that at least one strong handle still lives // on until they are all finished. if let Either::Left(strong) = &mut self.inner && Arc::strong_count(strong) == 1 { - // Keep the recording alive until all dataloaders are finished. - self.with(|inner| inner.wait_for_dataloaders()); + // Keep the recording alive until all importers are finished. + self.with(|inner| inner.wait_for_importers()); } } } @@ -798,11 +801,11 @@ struct RecordingStreamInner { /// It true, any new sink will update the batcher's configuration (as far as possible). sink_dependent_batcher_config: bool, - /// Keeps track of the top-level threads that were spawned in order to execute the `DataLoader` + /// Keeps track of the top-level threads that were spawned in order to execute the importer /// machinery in the context of this `RecordingStream`. /// /// See [`RecordingStream::log_file_from_path`] and [`RecordingStream::log_file_from_contents`]. - dataloader_handles: Mutex>>, + importer_handles: Mutex>>, pid_at_creation: u32, } @@ -812,7 +815,7 @@ impl fmt::Debug for RecordingStreamInner { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("RecordingStreamInner") .field("store_id", &self.store_info.store_id) - .finish() + .finish_non_exhaustive() } } @@ -825,7 +828,7 @@ impl Drop for RecordingStreamInner { return; } - self.wait_for_dataloaders(); + self.wait_for_importers(); // NOTE: The command channel is private, if we're here, nothing is currently capable of // sending data down the pipeline. @@ -925,7 +928,7 @@ impl RecordingStreamInner { batcher, batcher_to_sink_handle: Some(batcher_to_sink_handle), sink_dependent_batcher_config, - dataloader_handles: Mutex::new(Vec::new()), + importer_handles: Mutex::new(Vec::new()), pid_at_creation: std::process::id(), }) } @@ -935,14 +938,14 @@ impl RecordingStreamInner { self.pid_at_creation != std::process::id() } - /// Make sure all pending top-level `DataLoader` threads that were started from the SDK run to completion. + /// Make sure all pending top-level importer threads that were started from the SDK run to completion. // // TODO(cmc): At some point we might want to make it configurable, though I cannot really // think of a use case where you'd want to drop those threads immediately upon // disconnection. - fn wait_for_dataloaders(&self) { - let dataloader_handles = std::mem::take(&mut *self.dataloader_handles.lock()); - for handle in dataloader_handles { + fn wait_for_importers(&self) { + let importer_handles = std::mem::take(&mut *self.importer_handles.lock()); + for handle in importer_handles { handle.join().ok(); } } @@ -1074,7 +1077,7 @@ impl RecordingStream { /// The entity path can either be a string /// (with special characters escaped, split on unescaped slashes) /// or an [`EntityPath`] constructed with [`crate::entity_path`]. - /// See for more on entity paths. + /// See for more on entity paths. /// /// See also: [`Self::log_static`] for logging static data. /// @@ -1205,7 +1208,7 @@ impl RecordingStream { /// The entity path can either be a string /// (with special characters escaped, split on unescaped slashes) /// or an [`EntityPath`] constructed with [`crate::entity_path`]. - /// See for more on entity paths. + /// See for more on entity paths. /// /// Internally, the stream will automatically micro-batch multiple log calls to optimize /// transport. @@ -1245,7 +1248,7 @@ impl RecordingStream { /// The entity path can either be a string /// (with special characters escaped, split on unescaped slashes) /// or an [`EntityPath`] constructed with [`crate::entity_path`]. - /// See for more on entity paths. + /// See for more on entity paths. /// /// Internally, the stream will automatically micro-batch multiple log calls to optimize /// transport. @@ -1329,15 +1332,15 @@ impl RecordingStream { Ok(()) } - /// Logs the file at the given `path` using all [`re_data_loader::DataLoader`]s available. + /// Logs the file at the given `path` using all [`re_importer::Importer`]s available. /// - /// A single `path` might be handled by more than one loader. + /// A single `path` might be handled by more than one importer. /// - /// This method blocks until either at least one [`re_data_loader::DataLoader`] starts + /// This method blocks until either at least one [`re_importer::Importer`] starts /// streaming data in or all of them fail. /// - /// See for more information. - #[cfg(feature = "data_loaders")] + /// See for more information. + #[cfg(feature = "importers")] pub fn log_file_from_path( &self, filepath: impl AsRef, @@ -1347,15 +1350,15 @@ impl RecordingStream { self.log_file(filepath, None, entity_path_prefix, static_, true) } - /// Logs the given `contents` using all [`re_data_loader::DataLoader`]s available. + /// Logs the given `contents` using all [`re_importer::Importer`]s available. /// - /// A single `path` might be handled by more than one loader. + /// A single `path` might be handled by more than one importer. /// - /// This method blocks until either at least one [`re_data_loader::DataLoader`] starts + /// This method blocks until either at least one [`re_importer::Importer`] starts /// streaming data in or all of them fail. /// - /// See for more information. - #[cfg(feature = "data_loaders")] + /// See for more information. + #[cfg(feature = "importers")] pub fn log_file_from_contents( &self, filepath: impl AsRef, @@ -1366,10 +1369,10 @@ impl RecordingStream { self.log_file(filepath, Some(contents), entity_path_prefix, static_, true) } - /// If `prefer_current_recording` is set (which is always the case for now), the dataloader settings + /// If `prefer_current_recording` is set (which is always the case for now), the importer settings /// will be configured as if the current SDK recording is the currently opened recording. - /// Most dataloaders prefer logging to the currently opened recording if one is set. - #[cfg(feature = "data_loaders")] + /// Most importers prefer logging to the currently opened recording if one is set. + #[cfg(feature = "importers")] #[expect(clippy::fn_params_excessive_bools)] // private function 🤷‍♂️ fn log_file( &self, @@ -1394,7 +1397,7 @@ impl RecordingStream { follow: false, }); - let mut settings = crate::DataLoaderSettings { + let mut settings = crate::ImporterSettings { application_id: Some(store_info.application_id().clone()), recording_id: store_info.recording_id().clone(), opened_store_id: None, @@ -1417,6 +1420,8 @@ impl RecordingStream { }) .unwrap_or_default() }), + timestamp_offset_ns: None, + timeline_type: re_log_types::TimeType::TimestampNs, }; if prefer_current_recording { @@ -1424,7 +1429,7 @@ impl RecordingStream { } if let Some(contents) = contents { - re_data_loader::load_from_file_contents( + re_importer::import_from_file_contents( &settings, re_log_types::FileSource::Sdk, filepath, @@ -1432,12 +1437,7 @@ impl RecordingStream { &tx, )?; } else { - re_data_loader::load_from_path( - &settings, - re_log_types::FileSource::Sdk, - filepath, - &tx, - )?; + re_importer::import_from_path(&settings, re_log_types::FileSource::Sdk, filepath, &tx)?; } drop(tx); @@ -1473,7 +1473,7 @@ impl RecordingStream { err, })?; - self.with(|inner| inner.dataloader_handles.lock().push(handle)); + self.with(|inner| inner.importer_handles.lock().push(handle)); Ok(()) } @@ -1899,11 +1899,11 @@ impl RecordingStream { } let f = move |inner: &RecordingStreamInner| -> Result<(), SinkFlushError> { - // 0. Wait for all pending data loader threads to complete + // 0. Wait for all pending importer threads to complete // // This ensures that data from `log_file_from_path` and `log_file_from_contents` // is fully loaded before we flush the batcher and sink. - inner.wait_for_dataloaders(); + inner.wait_for_importers(); // 1. Synchronously flush the batcher down the chunk channel // @@ -2129,9 +2129,12 @@ impl RecordingStream { return Ok(()); } - crate::spawn(opts)?; - - self.connect_grpc_opts(format!("rerun+http://{}/proxy", opts.connect_addr()))?; + let actual_port = crate::spawn(opts)?; + let addr = std::net::SocketAddr::new( + std::net::IpAddr::V4(std::net::Ipv4Addr::LOCALHOST), + actual_port, + ); + self.connect_grpc_opts(format!("rerun+http://{addr}/proxy"))?; Ok(()) } @@ -2246,9 +2249,9 @@ impl RecordingStream { /// See [`Self::set_sink`] for more information. pub fn disconnect(&self) { let f = move |inner: &RecordingStreamInner| { - // When disconnecting, we need to make sure that pending top-level `DataLoader` threads that + // When disconnecting, we need to make sure that pending top-level importer threads that // were started from the SDK run to completion. - inner.wait_for_dataloaders(); + inner.wait_for_importers(); self.set_sink(Box::new(crate::sink::BufferedSink::new())); }; @@ -2310,7 +2313,7 @@ impl fmt::Debug for RecordingStream { batcher: _, batcher_to_sink_handle: _, sink_dependent_batcher_config, - dataloader_handles, + importer_handles, pid_at_creation, } = inner; @@ -2322,7 +2325,7 @@ impl fmt::Debug for RecordingStream { "sink_dependent_batcher_config", &sink_dependent_batcher_config, ) - .field("pending_dataloaders", &dataloader_handles.lock().len()) + .field("pending_importers", &importer_handles.lock().len()) .field("pid_at_creation", &pid_at_creation) .finish_non_exhaustive() }; diff --git a/crates/top/re_sdk/src/spawn.rs b/crates/top/re_sdk/src/spawn.rs index 773e51c79ef4..590ac0737d87 100644 --- a/crates/top/re_sdk/src/spawn.rs +++ b/crates/top/re_sdk/src/spawn.rs @@ -44,7 +44,7 @@ pub struct SpawnOptions { /// Defaults to `rerun`. pub executable_name: String, - /// Enforce a specific executable to use instead of searching though PATH + /// Enforce a specific executable to use instead of searching through PATH /// for [`Self::executable_name`]. /// /// Unspecified by default. @@ -56,6 +56,11 @@ pub struct SpawnOptions { /// Extra environment variables that will be passed as-is to the Rerun Viewer process. pub extra_env: Vec<(String, String)>, + /// Always start a new viewer. If the port is already in use, a free port will be picked automatically. + /// + /// Equivalent to using `--port auto` on the CLI. + pub new: bool, + /// Hide the welcome screen. pub hide_welcome_screen: bool, @@ -77,6 +82,7 @@ impl Default for SpawnOptions { executable_path: None, extra_args: Vec::new(), extra_env: Vec::new(), + new: false, hide_welcome_screen: false, detach_process: true, } @@ -179,7 +185,7 @@ impl std::fmt::Debug for SpawnError { /// /// This only starts a Viewer process: if you'd like to connect to it and start sending data, refer /// to [`crate::RecordingStream::connect_grpc`] or use [`crate::RecordingStream::spawn`] directly. -pub fn spawn(opts: &SpawnOptions) -> Result<(), SpawnError> { +pub fn spawn(opts: &SpawnOptions) -> Result { use std::net::TcpStream; #[cfg(target_family = "unix")] use std::os::unix::process::CommandExt as _; @@ -196,7 +202,7 @@ pub fn spawn(opts: &SpawnOptions) -> Result<(), SpawnError> { * Using `pip`: `pip3 install rerun-sdk` For more information, refer to our complete install documentation over at: - https://rerun.io/docs/getting-started/installing-viewer + https://rerun.io/docs/overview/installing-rerun/viewer "; const MSG_INSTALL_HOW_TO_VERSIONED: &str = // @@ -207,7 +213,7 @@ pub fn spawn(opts: &SpawnOptions) -> Result<(), SpawnError> { * Using `pip`: `pip3 install rerun-sdk==__VIEWER_VERSION__` For more information, refer to our complete install documentation over at: - https://rerun.io/docs/getting-started/installing-viewer + https://rerun.io/docs/overview/installing-rerun/viewer "; const MSG_VERSION_MISMATCH: &str = // @@ -219,6 +225,14 @@ pub fn spawn(opts: &SpawnOptions) -> Result<(), SpawnError> { > Rerun Viewer: v__VIEWER_VERSION__ (executable: \"__VIEWER_PATH__\") > Rerun SDK: v__SDK_VERSION__"; + if std::env::var_os("CI").is_some() { + re_log::warn!( + "Spawning a Rerun Viewer while the `CI` environment variable is set. \ + This is almost certainly unintended and will hang or fail on most CI runners. \ + Consider removing `spawn=True` from this code path." + ); + } + let port = opts.port; let connect_addr = opts.connect_addr(); let memory_limit = &opts.memory_limit; @@ -226,14 +240,33 @@ pub fn spawn(opts: &SpawnOptions) -> Result<(), SpawnError> { let executable_path = opts.executable_path(); // TODO(#4019): application-level handshake - if TcpStream::connect_timeout(&connect_addr, Duration::from_secs(1)).is_ok() { + if !opts.new && TcpStream::connect_timeout(&connect_addr, Duration::from_secs(1)).is_ok() { re_log::info!( addr = %opts.listen_addr(), - "A process is already listening at this address. Assuming it's a Rerun Viewer." + "A process is already listening at this address. Assuming it's a Rerun Viewer. \ + Use `new: true` in SpawnOptions or `--port auto` on the CLI to force a new viewer." ); - return Ok(()); + return Ok(port); } + // When --new is requested and the default port is already taken, find a free one. + let port = if opts.new + && TcpStream::connect_timeout(&connect_addr, Duration::from_secs(1)).is_ok() + { + let listener = std::net::TcpListener::bind(std::net::SocketAddr::new( + std::net::IpAddr::V4(std::net::Ipv4Addr::LOCALHOST), + 0, + ))?; + let free_port = listener.local_addr()?.port(); + drop(listener); + re_log::info!( + "Default port {port} is already in use, spawning viewer on port {free_port} instead." + ); + free_port + } else { + port + }; + let map_err = |err: std::io::Error| -> SpawnError { if err.kind() == std::io::ErrorKind::NotFound { if let Some(executable_path) = opts.executable_path.as_ref() { @@ -340,17 +373,26 @@ pub fn spawn(opts: &SpawnOptions) -> Result<(), SpawnError> { // NOTE: The timeout only covers the TCP handshake: if no process is bound to that address // at all, the connection will fail immediately, irrelevant of the timeout configuration. // For that reason we use an extra loop. + let bind_addr = + std::net::SocketAddr::new(std::net::IpAddr::V4(std::net::Ipv4Addr::LOCALHOST), port); + let mut bound = false; for i in 0..5 { re_log::debug!("connection attempt {}", i + 1); - if TcpStream::connect_timeout(&connect_addr, Duration::from_secs(1)).is_ok() { + if TcpStream::connect_timeout(&bind_addr, Duration::from_secs(1)).is_ok() { + bound = true; break; } std::thread::sleep(Duration::from_millis(100)); } + + re_log::debug_assert!( + bound, + "Spawned Rerun Viewer did not bind to port {port} in time" + ); } // Simply forget about the child process, we want it to outlive the parent process if needed. _ = rerun_bin; - Ok(()) + Ok(port) } diff --git a/crates/top/re_sdk/tests/lenses/operations.rs b/crates/top/re_sdk/tests/lenses/operations.rs index 0533b08e234f..a87360f4655a 100644 --- a/crates/top/re_sdk/tests/lenses/operations.rs +++ b/crates/top/re_sdk/tests/lenses/operations.rs @@ -1,12 +1,11 @@ -#![expect(clippy::cast_possible_wrap)] #![expect(clippy::unwrap_used)] use std::sync::Arc; -use arrow::array::{AsArray as _, Int32Builder, ListArray, ListBuilder, StringBuilder}; +use arrow::array::{AsArray as _, Int32Builder, ListArray, ListBuilder}; use arrow::datatypes::{DataType, Field}; use re_chunk::{ArrowArray as _, Chunk, ChunkId, TimeColumn, TimelineName}; -use re_sdk::lenses::{Lens, Lenses, Op, OutputMode}; +use re_sdk::lenses::{Lens, Lenses, OutputMode, Selector, op}; use re_sdk_types::ComponentDescriptor; use re_sdk_types::archetypes::Scalars; @@ -131,21 +130,20 @@ fn test_destructure_cast() { let original_chunk = nullability_chunk(); println!("{original_chunk}"); - let destructure = Lens::for_input_column( - re_log_types::EntityPathFilter::parse_forgiving("nullability"), - "structs", - ) - .output_columns_at("nullability/a", |out| { - out.component( - Scalars::descriptor_scalars(), - [Op::selector(".a"), Op::cast(DataType::Float64)], - ) - }) - .unwrap() - .build(); + let destructure = Lens::for_input_column("structs") + .output_columns_at("nullability/a", |out| { + out.component( + Scalars::descriptor_scalars(), + Selector::parse(".a")?.pipe(op::cast(DataType::Float64)), + ) + }) + .unwrap() + .build(); - let mut lenses = Lenses::new(OutputMode::DropUnmatched); - lenses.add_lens(destructure); + let lenses = Lenses::new(OutputMode::DropUnmatched).add_lens_with_filter( + re_log_types::EntityPathFilter::parse_forgiving("nullability"), + destructure, + ); let res: Vec = lenses .apply(&original_chunk) @@ -163,113 +161,17 @@ fn test_destructure() { let original_chunk = nullability_chunk(); println!("{original_chunk}"); - let destructure = Lens::for_input_column( - re_log_types::EntityPathFilter::parse_forgiving("nullability"), - "structs", - ) - .output_columns_at("nullability/b", |out| { - out.component(Scalars::descriptor_scalars(), [Op::selector(".b")]) - }) - .unwrap() - .build(); - - let mut lenses = Lenses::new(OutputMode::DropUnmatched); - lenses.add_lens(destructure); - - let res: Vec = lenses - .apply(&original_chunk) - .collect::>() - .unwrap(); - assert_eq!(res.len(), 1); - - let chunk = &res[0]; - insta::assert_snapshot!("destructure_only", format!("{chunk:-240}")); -} - -#[test] -fn test_inner_count() { - use re_sdk::lenses::OpError; - - let original_chunk = nullability_chunk(); - println!("{original_chunk}"); - - let count_fn = |list_array: &ListArray| -> Result { - let mut builder = ListBuilder::new(Int32Builder::new()); - - for maybe_array in list_array.iter() { - match maybe_array { - None => builder.append_null(), - Some(component_batch_array) => { - builder - .values() - .append_value(component_batch_array.len() as i32); - builder.append(true); - } - } - } - - Ok(builder.finish()) - }; - - let count = Lens::for_input_column( - re_log_types::EntityPathFilter::parse_forgiving("nullability"), - "strings", - ) - .output_columns(|out| { - out.component(ComponentDescriptor::partial("counts"), [Op::func(count_fn)]) - .component(ComponentDescriptor::partial("original"), []) - }) - .unwrap() - .build(); - - let mut lenses = Lenses::new(OutputMode::DropUnmatched); - lenses.add_lens(count); - - let res: Vec = lenses - .apply(&original_chunk) - .collect::>() - .unwrap(); - assert_eq!(res.len(), 1); - - let chunk = &res[0]; - insta::assert_snapshot!("inner_count", format!("{chunk:-240}")); -} - -#[test] -fn test_static_chunk_creation() { - let original_chunk = nullability_chunk(); - - let mut metadata_builder_a = ListBuilder::new(StringBuilder::new()); - metadata_builder_a - .values() - .append_value("static_metadata_a"); - metadata_builder_a.append(true); - - let mut metadata_builder_b = ListBuilder::new(StringBuilder::new()); - metadata_builder_b - .values() - .append_value("static_metadata_b"); - metadata_builder_b.append(true); + let destructure = Lens::for_input_column("structs") + .output_columns_at("nullability/b", |out| { + out.component(Scalars::descriptor_scalars(), Selector::parse(".b")?) + }) + .unwrap() + .build(); - let static_lens = Lens::for_input_column( + let lenses = Lenses::new(OutputMode::DropUnmatched).add_lens_with_filter( re_log_types::EntityPathFilter::parse_forgiving("nullability"), - "strings", - ) - .output_static_columns_at("nullability/static", |out| { - out.component( - ComponentDescriptor::partial("static_metadata_a"), - [Op::constant(metadata_builder_a.finish())], - ) - .component( - ComponentDescriptor::partial("static_metadata_b"), - [Op::constant(metadata_builder_b.finish())], - ) - }) - .unwrap() - .build(); - - let mut lenses = Lenses::new(OutputMode::DropUnmatched); - lenses.add_lens(static_lens); + destructure, + ); let res: Vec = lenses .apply(&original_chunk) @@ -278,7 +180,7 @@ fn test_static_chunk_creation() { assert_eq!(res.len(), 1); let chunk = &res[0]; - insta::assert_snapshot!("single_static", format!("{chunk:-240}")); + insta::assert_snapshot!("destructure_only", format!("{chunk:-240}")); } #[test] @@ -324,19 +226,21 @@ fn test_time_column_extraction() { println!("{original_chunk}"); // Create a lens that extracts the timestamp as a time column and keeps the original timestamp as a component - let time_lens = Lens::for_input_column( - re_log_types::EntityPathFilter::parse_forgiving("timestamped"), - "my_timestamp", - ) - .output_columns(|out| { - out.time("my_timeline", TimeType::Sequence, []) - .component(ComponentDescriptor::partial("extracted_time"), []) - }) - .unwrap() - .build(); + let time_lens = Lens::for_input_column("my_timestamp") + .output_columns(|out| { + out.time("my_timeline", TimeType::Sequence, Selector::parse(".")?)? + .component( + ComponentDescriptor::partial("extracted_time"), + Selector::parse(".")?, + ) + }) + .unwrap() + .build(); - let mut lenses = Lenses::new(OutputMode::DropUnmatched); - lenses.add_lens(time_lens); + let lenses = Lenses::new(OutputMode::DropUnmatched).add_lens_with_filter( + re_log_types::EntityPathFilter::parse_forgiving("timestamped"), + time_lens, + ); let res: Vec = lenses .apply(&original_chunk) @@ -418,16 +322,26 @@ fn create_test_struct_list() -> arrow::array::ListArray { #[test] fn test_scatter_columns() { - use re_arrow_combinators::{Selector, Transform as _}; use re_log_types::TimeType; - use re_sdk::lenses::OpError; - use std::str::FromStr as _; // Create a chunk with list of structs that should be exploded/scattered // Each element is a struct with {timestamp: i64, value: String} let struct_list = create_test_struct_list(); - let components = std::iter::once((ComponentDescriptor::partial("nested_data"), struct_list)); + // An unrelated `tag` column that should be forwarded and scattered along + // with the existing time columns. + let mut tag_builder = ListBuilder::new(arrow::array::StringBuilder::new()); + for tag in ["a", "b", "c"] { + tag_builder.values().append_value(tag); + tag_builder.append(true); + } + let tag_column = tag_builder.finish(); + + let components = [ + (ComponentDescriptor::partial("nested_data"), struct_list), + (ComponentDescriptor::partial("tag"), tag_column), + ] + .into_iter(); let time_column = TimeColumn::new_sequence("tick", [1, 2, 3]); @@ -442,34 +356,24 @@ fn test_scatter_columns() { println!("Original chunk:"); println!("{original_chunk}"); - // Helper to extract value field from structs: List -> List - let extract_value = |list_array: &ListArray| -> Result { - Ok(Selector::from_str(".value")?.transform(list_array)?) - }; - - // Helper to extract timestamp field from structs: List -> List - let extract_timestamp = |list_array: &ListArray| -> Result { - Ok(Selector::from_str(".timestamp")?.transform(list_array)?) - }; - // Create a scatter lens that explodes the nested lists - let scatter_lens = Lens::for_input_column(re_log_types::EntityPathFilter::all(), "nested_data") - .output_scatter_columns_at("scatter_test/exploded", |out| { + let scatter_lens = Lens::for_input_column("nested_data") + .scatter() + .output_columns_at("scatter_test/exploded", |out| { out.component( ComponentDescriptor::partial("exploded_strings"), - [Op::func(extract_value)], - ) + Selector::parse(".value")?, + )? .time( "my_timestamp", TimeType::Sequence, - [Op::func(extract_timestamp)], + Selector::parse(".timestamp")?, ) }) .unwrap() .build(); - let mut lenses = Lenses::new(OutputMode::DropUnmatched); - lenses.add_lens(scatter_lens); + let lenses = Lenses::new(OutputMode::DropUnmatched).add_lens(scatter_lens); let res: Vec = lenses .apply(&original_chunk) @@ -519,10 +423,7 @@ fn test_scatter_columns() { #[test] fn test_scatter_columns_static() { - use re_arrow_combinators::{Selector, Transform as _}; use re_log_types::TimeType; - use re_sdk::lenses::OpError; - use std::str::FromStr as _; // Test scatter with no existing timelines - only exploded timeline outputs let struct_list = create_test_struct_list(); @@ -541,34 +442,24 @@ fn test_scatter_columns_static() { println!("Original chunk (no timelines):"); println!("{original_chunk}"); - // Helper to extract value field from structs: List -> List - let extract_value = |list_array: &ListArray| -> Result { - Ok(Selector::from_str(".value")?.transform(list_array)?) - }; - - // Helper to extract timestamp field from structs: List -> List - let extract_timestamp = |list_array: &ListArray| -> Result { - Ok(Selector::from_str(".timestamp")?.transform(list_array)?) - }; - // Create a scatter lens that explodes the nested lists - let scatter_lens = Lens::for_input_column(re_log_types::EntityPathFilter::all(), "nested_data") - .output_scatter_columns_at("scatter_test/exploded", |out| { + let scatter_lens = Lens::for_input_column("nested_data") + .scatter() + .output_columns_at("scatter_test/exploded", |out| { out.component( ComponentDescriptor::partial("exploded_strings"), - [Op::func(extract_value)], - ) + Selector::parse(".value")?, + )? .time( "my_timestamp", TimeType::Sequence, - [Op::func(extract_timestamp)], + Selector::parse(".timestamp")?, ) }) .unwrap() .build(); - let mut lenses = Lenses::new(OutputMode::DropUnmatched); - lenses.add_lens(scatter_lens); + let lenses = Lenses::new(OutputMode::DropUnmatched).add_lens(scatter_lens); let res: Vec = lenses .apply(&original_chunk) @@ -615,3 +506,62 @@ fn test_scatter_columns_static() { insta::assert_snapshot!("scatter_columns_static", format!("{chunk:-240}")); } + +#[test] +fn test_output_overwrites_same_named_component() { + // The input chunk contains a `value` column. The lens declares its own output + // component named `value`. With `DropUnmatched`, only the lens output is + // produced, so the original `value` column is simply not forwarded. + let mut value_builder = ListBuilder::new(arrow::array::StringBuilder::new()); + for v in ["x", "y"] { + value_builder.values().append_value(v); + value_builder.append(true); + } + let value_column = value_builder.finish(); + + let mut input_builder = ListBuilder::new(arrow::array::StringBuilder::new()); + for v in ["alpha", "beta"] { + input_builder.values().append_value(v); + input_builder.append(true); + } + let input_column = input_builder.finish(); + + let components = [ + (ComponentDescriptor::partial("input"), input_column), + (ComponentDescriptor::partial("value"), value_column), + ] + .into_iter(); + + let original_chunk = Chunk::from_auto_row_ids( + ChunkId::new(), + "collision".into(), + std::iter::once(( + TimelineName::new("tick"), + TimeColumn::new_sequence("tick", 0..2), + )) + .collect(), + components.collect(), + ) + .unwrap(); + + let lens = Lens::for_input_column("input") + .output_columns(|out| { + out.component(ComponentDescriptor::partial("value"), Selector::parse(".")?) + }) + .unwrap() + .build(); + + let lenses = Lenses::new(OutputMode::DropUnmatched).add_lens(lens); + + let results: Vec<_> = lenses.apply(&original_chunk).collect(); + assert_eq!(results.len(), 1); + + let chunk = results.into_iter().next().unwrap().unwrap(); + let value = chunk + .components() + .get(ComponentDescriptor::partial("value").component) + .expect("`value` should be present"); + let strings = value.list_array.values().as_string::(); + assert_eq!(strings.value(0), "alpha"); + assert_eq!(strings.value(1), "beta"); +} diff --git a/crates/top/re_sdk/tests/lenses/output_mode.rs b/crates/top/re_sdk/tests/lenses/output_mode.rs index 6a0ac0111a02..0e13ac9cb573 100644 --- a/crates/top/re_sdk/tests/lenses/output_mode.rs +++ b/crates/top/re_sdk/tests/lenses/output_mode.rs @@ -2,7 +2,8 @@ use arrow::array::{ListBuilder, StringBuilder}; use re_chunk::{Chunk, ChunkId, TimeColumn, TimelineName}; -use re_sdk::lenses::{Lens, Lenses, OutputMode}; +use re_log_types::EntityPathFilter; +use re_sdk::lenses::{Lens, Lenses, OutputMode, Selector}; use re_sdk_types::ComponentDescriptor; /// Helper to create a simple chunk with string data for testing @@ -34,18 +35,18 @@ fn test_output_mode_forward_all() { let unmatched_chunk = create_test_chunk("other/entity", "other_component"); // Create a lens that only matches the first chunk - let lens = Lens::for_input_column( - re_log_types::EntityPathFilter::parse_forgiving("matched/**"), - "test_component", - ) - .output_columns_at("matched/output", |out| { - out.component(ComponentDescriptor::partial("transformed"), []) - }) - .unwrap() - .build(); - - let mut lenses = Lenses::new(OutputMode::ForwardAll); - lenses.add_lens(lens); + let lens = Lens::for_input_column("test_component") + .output_columns_at("matched/output", |out| { + out.component( + ComponentDescriptor::partial("transformed"), + Selector::parse(".")?, + ) + }) + .unwrap() + .build(); + + let lenses = Lenses::new(OutputMode::ForwardAll) + .add_lens_with_filter(EntityPathFilter::parse_forgiving("matched/**"), lens); // Apply to matching chunk let matching_results: Vec<_> = lenses @@ -53,13 +54,13 @@ fn test_output_mode_forward_all() { .collect::>() .unwrap(); - // Should get both the transformed chunk AND the original chunk + // Should get the original chunk first, then the transformed chunk assert_eq!(matching_results.len(), 2); - assert_eq!(matching_results[0].entity_path(), &"matched/output".into()); assert_eq!( - matching_results[1].entity_path(), + matching_results[0].entity_path(), matching_chunk.entity_path() ); + assert_eq!(matching_results[1].entity_path(), &"matched/output".into()); // Apply to unmatched chunk let unmatched_results: Vec<_> = lenses @@ -82,26 +83,26 @@ fn test_output_mode_forward_unmatched() { let unmatched_chunk = create_test_chunk("other/entity", "other_component"); // Create a lens that only matches the first chunk - let lens = Lens::for_input_column( - re_log_types::EntityPathFilter::parse_forgiving("matched/**"), - "test_component", - ) - .output_columns_at("matched/output", |out| { - out.component(ComponentDescriptor::partial("transformed"), []) - }) - .unwrap() - .build(); - - let mut lenses = Lenses::new(OutputMode::ForwardUnmatched); - lenses.add_lens(lens); - - // Apply to matching chunk + let lens = Lens::for_input_column("test_component") + .output_columns_at("matched/output", |out| { + out.component( + ComponentDescriptor::partial("transformed"), + Selector::parse(".")?, + ) + }) + .unwrap() + .build(); + + let lenses = Lenses::new(OutputMode::ForwardUnmatched) + .add_lens_with_filter(EntityPathFilter::parse_forgiving("matched/**"), lens); + + // Apply to matching chunk (all components are matched, so no untouched remainder) let matching_results: Vec<_> = lenses .apply(&matching_chunk) .collect::>() .unwrap(); - // Should get only the transformed chunk (not the original) + // Should get only the transformed chunk (no empty untouched remainder) assert_eq!(matching_results.len(), 1); assert_eq!(matching_results[0].entity_path(), &"matched/output".into()); @@ -126,18 +127,18 @@ fn test_output_mode_drop_unmatched() { let unmatched_chunk = create_test_chunk("other/entity", "other_component"); // Create a lens that only matches the first chunk - let lens = Lens::for_input_column( - re_log_types::EntityPathFilter::parse_forgiving("matched/**"), - "test_component", - ) - .output_columns_at("matched/output", |out| { - out.component(ComponentDescriptor::partial("transformed"), []) - }) - .unwrap() - .build(); - - let mut lenses = Lenses::new(OutputMode::DropUnmatched); - lenses.add_lens(lens); + let lens = Lens::for_input_column("test_component") + .output_columns_at("matched/output", |out| { + out.component( + ComponentDescriptor::partial("transformed"), + Selector::parse(".")?, + ) + }) + .unwrap() + .build(); + + let lenses = Lenses::new(OutputMode::DropUnmatched) + .add_lens_with_filter(EntityPathFilter::parse_forgiving("matched/**"), lens); // Apply to matching chunk let matching_results: Vec<_> = lenses diff --git a/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__destructure_cast.snap b/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__destructure_cast.snap index 1e219ed6d65f..b0eab68c63f6 100644 --- a/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__destructure_cast.snap +++ b/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__destructure_cast.snap @@ -2,33 +2,33 @@ source: crates/top/re_sdk/tests/lenses/operations.rs expression: "format!(\"{chunk:-240}\")" --- -┌────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /nullability/a │ -│ * id: [**REDACTED**] │ -│ * version: [**REDACTED**] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬────────────────────┬───────────────────────────────────┐ │ -│ │ RowId ┆ tick ┆ Scalars:scalars │ │ -│ │ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable i64 ┆ type: nullable List[nullable f64] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: tick ┆ archetype: Scalars │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: Scalars:scalars │ │ -│ │ is_sorted: true ┆ kind: index ┆ component_type: Scalar │ │ -│ │ kind: control ┆ ┆ kind: data │ │ -│ ╞═══════════════════════════════════════════════╪════════════════════╪═══════════════════════════════════╡ │ -│ │ row_[**REDACTED**] ┆ 0 ┆ [0.0] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 1 ┆ [1.0] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 2 ┆ [] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 3 ┆ null │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 4 ┆ [4.0] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 5 ┆ [null] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 6 ┆ [6.0] │ │ -│ └───────────────────────────────────────────────┴────────────────────┴───────────────────────────────────┘ │ -└────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌───────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /nullability/a │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬──────────────────┬────────────────────────────┐ │ +│ │ RowId ┆ tick ┆ Scalars:scalars │ │ +│ │ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(Float64) │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: tick ┆ archetype: Scalars │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: Scalars:scalars │ │ +│ │ is_sorted: true ┆ kind: index ┆ component_type: Scalar │ │ +│ │ kind: control ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪══════════════════╪════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 0 ┆ [0.0] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1 ┆ [1.0] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2 ┆ [] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 3 ┆ null │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 4 ┆ [4.0] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 5 ┆ [null] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 6 ┆ [6.0] │ │ +│ └───────────────────────────────────────────────┴──────────────────┴────────────────────────────┘ │ +└───────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__destructure_only.snap b/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__destructure_only.snap index a521f247bbde..c8ee9c646387 100644 --- a/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__destructure_only.snap +++ b/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__destructure_only.snap @@ -2,33 +2,33 @@ source: crates/top/re_sdk/tests/lenses/operations.rs expression: "format!(\"{chunk:-240}\")" --- -┌────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /nullability/b │ -│ * id: [**REDACTED**] │ -│ * version: [**REDACTED**] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬────────────────────┬───────────────────────────────────┐ │ -│ │ RowId ┆ tick ┆ Scalars:scalars │ │ -│ │ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable i64 ┆ type: nullable List[nullable f64] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: tick ┆ archetype: Scalars │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: Scalars:scalars │ │ -│ │ is_sorted: true ┆ kind: index ┆ component_type: Scalar │ │ -│ │ kind: control ┆ ┆ kind: data │ │ -│ ╞═══════════════════════════════════════════════╪════════════════════╪═══════════════════════════════════╡ │ -│ │ row_[**REDACTED**] ┆ 0 ┆ [0.0] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 1 ┆ [null] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 2 ┆ [] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 3 ┆ null │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 4 ┆ [4.0] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 5 ┆ [null] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 6 ┆ [6.0] │ │ -│ └───────────────────────────────────────────────┴────────────────────┴───────────────────────────────────┘ │ -└────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌───────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /nullability/b │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬──────────────────┬────────────────────────────┐ │ +│ │ RowId ┆ tick ┆ Scalars:scalars │ │ +│ │ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(Float64) │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: tick ┆ archetype: Scalars │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: Scalars:scalars │ │ +│ │ is_sorted: true ┆ kind: index ┆ component_type: Scalar │ │ +│ │ kind: control ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪══════════════════╪════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 0 ┆ [0.0] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1 ┆ [null] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2 ┆ [] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 3 ┆ null │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 4 ┆ [4.0] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 5 ┆ [null] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 6 ┆ [6.0] │ │ +│ └───────────────────────────────────────────────┴──────────────────┴────────────────────────────┘ │ +└───────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__inner_count.snap b/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__inner_count.snap deleted file mode 100644 index 61b307c3af85..000000000000 --- a/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__inner_count.snap +++ /dev/null @@ -1,34 +0,0 @@ ---- -source: crates/top/re_sdk/tests/lenses/operations.rs -expression: "format!(\"{chunk:-240}\")" ---- -┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /nullability │ -│ * id: [**REDACTED**] │ -│ * version: [**REDACTED**] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬────────────────────┬───────────────────────────────────┬────────────────────────────────────┐ │ -│ │ RowId ┆ tick ┆ counts ┆ original │ │ -│ │ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable i64 ┆ type: nullable List[nullable i32] ┆ type: nullable List[nullable Utf8] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: tick ┆ component: counts ┆ component: original │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ kind: data ┆ kind: data │ │ -│ │ is_sorted: true ┆ kind: index ┆ ┆ │ │ -│ │ kind: control ┆ ┆ ┆ │ │ -│ ╞═══════════════════════════════════════════════╪════════════════════╪═══════════════════════════════════╪════════════════════════════════════╡ │ -│ │ row_[**REDACTED**] ┆ 0 ┆ [1] ┆ [zero] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 1 ┆ [2] ┆ [one, 1] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 2 ┆ [0] ┆ [] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 3 ┆ [1] ┆ [three] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 4 ┆ null ┆ null │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 5 ┆ [1] ┆ [five] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 6 ┆ [1] ┆ [null] │ │ -│ └───────────────────────────────────────────────┴────────────────────┴───────────────────────────────────┴────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__scatter_columns.snap b/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__scatter_columns.snap index 5d1ed8289557..3a501fce2b71 100644 --- a/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__scatter_columns.snap +++ b/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__scatter_columns.snap @@ -1,30 +1,31 @@ --- source: crates/top/re_sdk/tests/lenses/operations.rs +assertion_line: 422 expression: "format!(\"{chunk:-240}\")" --- -┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /scatter_test/exploded │ -│ * id: [**REDACTED**] │ -│ * version: [**REDACTED**] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬──────────────────────────┬────────────────────┬────────────────────────────────────┐ │ -│ │ RowId ┆ my_timestamp ┆ tick ┆ exploded_strings │ │ -│ │ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable i64 ┆ type: nullable i64 ┆ type: nullable List[nullable Utf8] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: my_timestamp ┆ index_name: tick ┆ component: exploded_strings │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ is_sorted: true ┆ kind: data │ │ -│ │ is_sorted: true ┆ kind: index ┆ kind: index ┆ │ │ -│ │ kind: control ┆ ┆ ┆ │ │ -│ ╞═══════════════════════════════════════════════╪══════════════════════════╪════════════════════╪════════════════════════════════════╡ │ -│ │ row_[**REDACTED**] ┆ 1 ┆ 1 ┆ [one] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 2 ┆ 1 ┆ [two] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 3 ┆ 1 ┆ [three] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 4 ┆ 2 ┆ [four] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 5 ┆ 3 ┆ null │ │ -│ └───────────────────────────────────────────────┴──────────────────────────┴────────────────────┴────────────────────────────────────┘ │ -└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /scatter_test/exploded │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬──────────────────────────┬──────────────────┬─────────────────────────────┐ │ +│ │ RowId ┆ my_timestamp ┆ tick ┆ exploded_strings │ │ +│ │ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: Int64 ┆ type: List(Utf8) │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: my_timestamp ┆ index_name: tick ┆ component: exploded_strings │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ is_sorted: true ┆ kind: data │ │ +│ │ is_sorted: true ┆ kind: index ┆ kind: index ┆ │ │ +│ │ kind: control ┆ ┆ ┆ │ │ +│ ╞═══════════════════════════════════════════════╪══════════════════════════╪══════════════════╪═════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 1 ┆ 1 ┆ [one] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2 ┆ 1 ┆ [two] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 3 ┆ 1 ┆ [three] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 4 ┆ 2 ┆ [four] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 5 ┆ 3 ┆ null │ │ +│ └───────────────────────────────────────────────┴──────────────────────────┴──────────────────┴─────────────────────────────┘ │ +└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__scatter_columns_static.snap b/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__scatter_columns_static.snap index 6f3d7e088a8e..7bc9dfa32cc9 100644 --- a/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__scatter_columns_static.snap +++ b/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__scatter_columns_static.snap @@ -2,29 +2,29 @@ source: crates/top/re_sdk/tests/lenses/operations.rs expression: "format!(\"{chunk:-240}\")" --- -┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /scatter_test/exploded │ -│ * id: [**REDACTED**] │ -│ * version: [**REDACTED**] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬──────────────────────────┬────────────────────────────────────┐ │ -│ │ RowId ┆ my_timestamp ┆ exploded_strings │ │ -│ │ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable i64 ┆ type: nullable List[nullable Utf8] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: my_timestamp ┆ component: exploded_strings │ │ -│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ kind: data │ │ -│ │ is_sorted: true ┆ kind: index ┆ │ │ -│ │ kind: control ┆ ┆ │ │ -│ ╞═══════════════════════════════════════════════╪══════════════════════════╪════════════════════════════════════╡ │ -│ │ row_[**REDACTED**] ┆ 1 ┆ [one] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 2 ┆ [two] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 3 ┆ [three] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 4 ┆ [four] │ │ -│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ -│ │ row_[**REDACTED**] ┆ 5 ┆ null │ │ -│ └───────────────────────────────────────────────┴──────────────────────────┴────────────────────────────────────┘ │ -└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /scatter_test/exploded │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬──────────────────────────┬─────────────────────────────┐ │ +│ │ RowId ┆ my_timestamp ┆ exploded_strings │ │ +│ │ --- ┆ --- ┆ --- │ │ +│ │ type: non-null FixedSizeBinary(16) ┆ type: Int64 ┆ type: List(Utf8) │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: my_timestamp ┆ component: exploded_strings │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ kind: data │ │ +│ │ is_sorted: true ┆ kind: index ┆ │ │ +│ │ kind: control ┆ ┆ │ │ +│ ╞═══════════════════════════════════════════════╪══════════════════════════╪═════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 1 ┆ [one] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2 ┆ [two] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 3 ┆ [three] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 4 ┆ [four] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 5 ┆ null │ │ +│ └───────────────────────────────────────────────┴──────────────────────────┴─────────────────────────────┘ │ +└────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__single_static.snap b/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__single_static.snap deleted file mode 100644 index d0ff2133ef3f..000000000000 --- a/crates/top/re_sdk/tests/lenses/snapshots/lenses__lenses__operations__single_static.snap +++ /dev/null @@ -1,22 +0,0 @@ ---- -source: crates/top/re_sdk/tests/lenses/operations.rs -expression: "format!(\"{chunk:-240}\")" ---- -┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * entity_path: /nullability/static │ -│ * id: [**REDACTED**] │ -│ * version: [**REDACTED**] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌───────────────────────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┐ │ -│ │ RowId ┆ static_metadata_a ┆ static_metadata_b │ │ -│ │ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable Utf8] │ │ -│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ component: static_metadata_a ┆ component: static_metadata_b │ │ -│ │ ARROW:extension:name: TUID ┆ kind: data ┆ kind: data │ │ -│ │ is_sorted: true ┆ ┆ │ │ -│ │ kind: control ┆ ┆ │ │ -│ ╞═══════════════════════════════════════════════╪════════════════════════════════════╪════════════════════════════════════╡ │ -│ │ row_[**REDACTED**] ┆ [static_metadata_a] ┆ [static_metadata_b] │ │ -│ └───────────────────────────────────────────────┴────────────────────────────────────┴────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/top/rerun-cli/Cargo.toml b/crates/top/rerun-cli/Cargo.toml index 7292a197241d..9a106f47cfd0 100644 --- a/crates/top/rerun-cli/Cargo.toml +++ b/crates/top/rerun-cli/Cargo.toml @@ -116,7 +116,7 @@ re_log = { workspace = true, features = ["setup"] } re_memory.workspace = true rerun = { workspace = true, default-features = false, features = [ "analytics", - "data_loaders", + "importers", "glam", "image", "log", diff --git a/crates/top/rerun-cli/README.md b/crates/top/rerun-cli/README.md index 3d7e7c2ba75d..1c80c3a912ba 100644 --- a/crates/top/rerun-cli/README.md +++ b/crates/top/rerun-cli/README.md @@ -27,9 +27,9 @@ Run `rerun --help` for more. ## What is Rerun? - [Examples](https://github.com/rerun-io/rerun/tree/latest/examples/rust) -- [High-level docs](http://rerun.io/docs) +- [High-level docs](https://rerun.io/docs) - [Rust API docs](https://docs.rs/rerun/) -- [Troubleshooting](https://www.rerun.io/docs/getting-started/troubleshooting) +- [Troubleshooting](https://www.rerun.io/docs/overview/installing-rerun/troubleshooting) ### Running a web viewer diff --git a/crates/top/rerun-cli/src/bin/rerun.rs b/crates/top/rerun-cli/src/bin/rerun.rs index bc0c0f0e4e9f..89c1a9f44d40 100644 --- a/crates/top/rerun-cli/src/bin/rerun.rs +++ b/crates/top/rerun-cli/src/bin/rerun.rs @@ -7,9 +7,9 @@ //! //! ## Links //! - [Examples](https://github.com/rerun-io/rerun/tree/latest/examples/rust) -//! - [High-level docs](http://rerun.io/docs) +//! - [High-level docs](https://rerun.io/docs) //! - [Rust API docs](https://docs.rs/rerun/) -//! - [Troubleshooting](https://www.rerun.io/docs/getting-started/troubleshooting) +//! - [Troubleshooting](https://www.rerun.io/docs/overview/installing-rerun/troubleshooting) use re_memory::AccountingAllocator; #[global_allocator] diff --git a/crates/top/rerun/Cargo.toml b/crates/top/rerun/Cargo.toml index fac3ac8b0d70..ab29a0d2cff7 100644 --- a/crates/top/rerun/Cargo.toml +++ b/crates/top/rerun/Cargo.toml @@ -24,7 +24,7 @@ targets = ["x86_64-unknown-linux-gnu", "wasm32-unknown-unknown"] [features] default = [ "analytics", - "data_loaders", + "importers", "dataframe", "demo", "glam", @@ -49,11 +49,11 @@ analytics = [ ## Integration with `clap`. clap = ["dep:clap"] -## Support for using Rerun's data-loaders directly from the SDK. +## Support for using Rerun's importers directly from the SDK. ## -## See our `log_file` example and +## See our `log_file` example and ## for more information. -data_loaders = ["dep:re_mcap", "re_sdk?/data_loaders"] +importers = ["dep:re_mcap", "re_sdk?/importers"] ## Access to Rerun's dataframe API and related types. dataframe = ["dep:re_dataframe"] @@ -171,6 +171,7 @@ document-features.workspace = true indexmap.workspace = true indicatif.workspace = true itertools.workspace = true +parking_lot.workspace = true similar-asserts.workspace = true tokio = { workspace = true, features = ["rt-multi-thread"] } @@ -209,5 +210,4 @@ re_build_tools.workspace = true [package.metadata.cargo-shear] -# We only depend on re_video so we can enable extra features for it -ignored = ["re_video", "puffin"] +ignored = ["puffin"] diff --git a/crates/top/rerun/README.md b/crates/top/rerun/README.md index 81e24bb81ea5..dab7f4e6dd3d 100644 --- a/crates/top/rerun/README.md +++ b/crates/top/rerun/README.md @@ -31,9 +31,9 @@ rec.log("image", &rerun::archetypes::Image::new(image))?; ## Getting started - [Examples](https://github.com/rerun-io/rerun/tree/latest/examples/rust) -- [High-level docs](http://rerun.io/docs) +- [High-level docs](https://rerun.io/docs) - [Rust API docs](https://docs.rs/rerun/) -- [Troubleshooting](https://www.rerun.io/docs/getting-started/troubleshooting) +- [Troubleshooting](https://www.rerun.io/docs/overview/installing-rerun/troubleshooting) ## Library You can add the `rerun` crate to your project with `cargo add rerun`. diff --git a/crates/top/rerun/src/clap.rs b/crates/top/rerun/src/clap.rs index e24ec3ac1e38..e03bf168e2cb 100644 --- a/crates/top/rerun/src/clap.rs +++ b/crates/top/rerun/src/clap.rs @@ -202,6 +202,8 @@ impl RerunArgs { memory_limit: re_sdk::MemoryLimit::parse(&self.server_memory_limit) .map_err(|err| anyhow::format_err!("Bad --server-memory-limit: {err}"))?, + + cors_allowed_origins: vec![], }; let rec = builder.serve_grpc_opts( diff --git a/crates/top/rerun/src/commands/download.rs b/crates/top/rerun/src/commands/download.rs new file mode 100644 index 000000000000..67aa00255fc6 --- /dev/null +++ b/crates/top/rerun/src/commands/download.rs @@ -0,0 +1,270 @@ +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; + +use parking_lot::Mutex; + +use re_data_source::{FromUriOptions, LogDataSource}; +use re_log_channel::DataSourceMessage; + +/// Download recordings and save them as `.rrd` files. +/// +/// Supports any URI that Rerun can load: gRPC dataset segment URLs, +/// HTTP URLs to `.rrd` files, local file paths, etc. +#[derive(Debug, Clone, clap::Parser)] +pub struct DownloadCommand { + /// One or more URIs to download. + #[clap(required = true)] + urls: Vec, + + /// Override the output directory for the downloaded `.rrd` files. + /// + /// Defaults to the current working directory. + #[clap(short, long)] + output_dir: Option, +} + +impl DownloadCommand { + pub fn run(self, tokio_runtime: &tokio::runtime::Handle) -> anyhow::Result<()> { + let output_dir = self + .output_dir + .unwrap_or_else(|| std::path::PathBuf::from(".")); + + if !output_dir.exists() { + std::fs::create_dir_all(&output_dir)?; + } + + let connection_registry = + re_redap_client::ConnectionRegistry::new_with_stored_credentials(); + + for url in &self.urls { + let data_source = LogDataSource::from_uri( + re_log_types::FileSource::Cli, + url, + &FromUriOptions { + follow: false, + accept_extensionless_http: true, + }, + ); + + let Some(data_source) = data_source else { + anyhow::bail!("Could not interpret URI: {url}"); + }; + + let output_path = output_dir.join(output_filename(&data_source, url)); + + // For gRPC dataset segments, ensure credentials are valid before streaming. + if let LogDataSource::RedapDatasetSegment { ref uri, .. } = data_source { + ensure_credentials(tokio_runtime, &connection_registry, &uri.origin)?; + } + + let auth_error: Arc>> = Arc::new(Mutex::new(None)); + let auth_error_capture = auth_error.clone(); + + let on_auth_err: re_data_source::AuthErrorHandler = Arc::new(move |uri, err| { + let msg = format!("Authentication failed for {uri}: {err}"); + *auth_error_capture.lock() = Some(msg); + }); + + let downloaded = Arc::new(AtomicU64::new(0)); + let downloaded_for_progress = downloaded.clone(); + + let streaming_options = re_redap_client::StreamingOptions { + force_full_download: true, + on_progress: Some(Arc::new(move |bytes_downloaded, total_bytes| { + downloaded_for_progress.store(bytes_downloaded, Ordering::Relaxed); + match total_bytes { + Some(total) => { + let percent = if 0 < total { + 100.0 * bytes_downloaded as f64 / total as f64 + } else { + 100.0 + }; + eprint!( + "\r {:.1}% ({} / {})", + percent, + re_format::format_bytes(bytes_downloaded as _), + re_format::format_bytes(total as _), + ); + } + None => { + eprint!("\r {}", re_format::format_bytes(bytes_downloaded as _),); + } + } + })), + }; + + let readable_url = data_source.as_uri().unwrap_or_else(|| url.clone()); + + let rx = data_source.stream_with_options( + on_auth_err, + &connection_registry, + streaming_options, + )?; + + eprintln!("Downloading {readable_url}…"); + + save_to_rrd(&rx, &output_path)?; + + // Check if the async streaming task encountered an auth error. + if let Some(auth_err_msg) = auth_error.lock().take() { + // Remove the (likely empty/incomplete) output file. + std::fs::remove_file(&output_path).ok(); + anyhow::bail!( + "{auth_err_msg}\n\nRun `rerun auth login` to re-authenticate, then try again." + ); + } + + let total = downloaded.load(Ordering::Relaxed); + if 0 < total { + // Clear the progress line + eprint!("\r\x1b[2K"); + } + + eprintln!("Saved {output_path:?}"); + } + + Ok(()) + } +} + +/// Ensure we have valid credentials for the given origin. +/// +/// If stored credentials exist but are expired, this attempts to refresh them. +/// If the refresh fails (e.g. session ended), it triggers an interactive +/// device-code login flow so the user can re-authenticate. +#[cfg(feature = "auth")] +fn ensure_credentials( + tokio_runtime: &tokio::runtime::Handle, + connection_registry: &re_redap_client::ConnectionRegistryHandle, + origin: &re_uri::Origin, +) -> anyhow::Result<()> { + use re_auth::oauth::login_flow::DeviceCodeFlowState; + + match tokio_runtime.block_on(re_auth::oauth::load_and_refresh_credentials()) { + Ok(Some(_credentials)) => { + // Credentials are valid. + connection_registry.set_credentials(origin, re_redap_client::Credentials::Stored); + } + + Ok(None) => { + // No stored credentials. Proceed without — the server may not require auth. + } + + Err(err) => { + re_log::debug!("Credential refresh failed: {err}"); + eprintln!("Session expired. Logging in again…"); + + // Trigger interactive device-code login flow. + match tokio_runtime.block_on(re_auth::DeviceCodeFlow::init(true)) { + Ok(DeviceCodeFlowState::AlreadyLoggedIn(_)) => { + // Shouldn't happen with force_login=true, but handle it gracefully. + connection_registry + .set_credentials(origin, re_redap_client::Credentials::Stored); + } + + Ok(DeviceCodeFlowState::LoginFlowStarted(mut flow)) => { + let login_url = flow.login_url(); + let user_code = flow.user_code(); + + eprintln!("Open this URL in your browser to log in:\n {login_url}"); + eprintln!("Verify that the code shown in your browser is: {user_code}"); + eprintln!("Waiting for login…"); + + match tokio_runtime.block_on(flow.wait_for_user_confirmation()) { + Ok(credentials) => { + eprintln!("Logged in as {}", credentials.user().email); + // Clear the cached client so a new one is created with fresh credentials. + connection_registry.remove_credentials(origin); + connection_registry + .set_credentials(origin, re_redap_client::Credentials::Stored); + } + Err(err) => { + anyhow::bail!( + "Login failed: {err}\n\nRun `rerun auth login` to authenticate manually." + ); + } + } + } + + Err(err) => { + anyhow::bail!( + "Could not start login flow: {err}\n\nRun `rerun auth login` to authenticate manually." + ); + } + } + } + } + + Ok(()) +} + +#[cfg(not(feature = "auth"))] +fn ensure_credentials( + _tokio_runtime: &tokio::runtime::Handle, + connection_registry: &re_redap_client::ConnectionRegistryHandle, + origin: &re_uri::Origin, +) -> anyhow::Result<()> { + connection_registry.set_credentials(origin, re_redap_client::Credentials::Stored); + Ok(()) +} + +/// Derive an output `.rrd` filename from the data source. +fn output_filename(data_source: &LogDataSource, original_url: &str) -> std::path::PathBuf { + match data_source { + LogDataSource::RedapDatasetSegment { uri, .. } => format!("{}.rrd", uri.segment_id).into(), + + #[cfg(not(target_arch = "wasm32"))] + LogDataSource::FilePath { path, .. } => path + .file_name() + .map(Into::into) + .unwrap_or_else(|| "output.rrd".into()), + + LogDataSource::HttpUrl { url, .. } => { + let path = url.path(); + let filename = path.rsplit('/').next().unwrap_or("output.rrd"); + if filename.is_empty() { + "output.rrd".into() + } else if filename.ends_with(".rrd") || filename.ends_with(".rbl") { + filename.into() + } else { + format!("{filename}.rrd").into() + } + } + + _ => { + re_log::warn!("Cannot derive filename from {original_url:?}, using fallback"); + "output.rrd".into() + } + } +} + +/// Receive all messages from the channel and write them to an `.rrd` file. +fn save_to_rrd( + rx: &re_log_channel::LogReceiver, + output_path: &std::path::Path, +) -> anyhow::Result<()> { + let encoding_options = re_log_encoding::rrd::EncodingOptions::PROTOBUF_COMPRESSED; + let file = std::fs::File::create(output_path)?; + let mut encoder = re_log_encoding::Encoder::new_eager( + re_build_info::CrateVersion::LOCAL, + encoding_options, + file, + )?; + + while let Ok(msg) = rx.recv() { + if let Some(payload) = msg.into_data() { + match payload { + DataSourceMessage::LogMsg(log_msg) => { + encoder.append(&log_msg)?; + } + other => { + re_log::trace!("Skipping {} (not storable in .rrd)", other.variant_name()); + } + } + } + } + + encoder.finish()?; + + Ok(()) +} diff --git a/crates/top/rerun/src/commands/entrypoint.rs b/crates/top/rerun/src/commands/entrypoint.rs index 9bc7551fbc1a..a2f68ed94a77 100644 --- a/crates/top/rerun/src/commands/entrypoint.rs +++ b/crates/top/rerun/src/commands/entrypoint.rs @@ -1,3 +1,4 @@ +use std::fmt::Write as _; use std::net::IpAddr; use std::time::Duration; @@ -14,7 +15,8 @@ use super::auth::AuthCommands; use crate::CallSource; #[cfg(feature = "analytics")] use crate::commands::AnalyticsCommands; -#[cfg(feature = "data_loaders")] +use crate::commands::DownloadCommand; +#[cfg(feature = "importers")] use crate::commands::McapCommands; use crate::commands::RrdCommands; @@ -75,6 +77,51 @@ Examples: rerun --save new_recording.rrd "#; +/// Port argument that accepts either a port number or `auto`. +/// +/// `auto` will use the default port, but find a free one if it's already in use. +#[derive(Debug, Clone)] +pub enum PortArg { + Port(u16), + Auto, +} + +impl PortArg { + pub fn port(&self) -> u16 { + match self { + Self::Port(port) => *port, + Self::Auto => 9876, + } + } + + pub fn is_auto(&self) -> bool { + matches!(self, Self::Auto) + } +} + +impl std::fmt::Display for PortArg { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Port(port) => write!(f, "{port}"), + Self::Auto => write!(f, "auto"), + } + } +} + +impl std::str::FromStr for PortArg { + type Err = String; + + fn from_str(s: &str) -> Result { + if s.eq_ignore_ascii_case("auto") { + Ok(Self::Auto) + } else { + s.parse::() + .map(Self::Port) + .map_err(|err| format!("invalid port: {err}")) + } + } +} + #[derive(Debug, clap::Parser)] #[clap( long_about = LONG_ABOUT, @@ -97,12 +144,12 @@ pub struct Args { #[clap( long, - default_value = "75%", long_help = r"An upper limit on how much memory the Rerun Viewer should use. When this limit is reached, Rerun will drop the oldest data. -Example: `16GB` or `50%` (of system total)." +Example: `16GB` or `50%` (of system total). +You can also set this in the settings panel." )] - pub memory_limit: String, + pub memory_limit: Option, #[clap( long, @@ -118,6 +165,20 @@ Example: `16GB` or `50%` (of system total)." #[clap(long)] pub newest_first: bool, + /// Additional origin patterns allowed to make CORS requests to the gRPC server. + /// + /// Use this when hosting a custom viewer on a different domain. + /// Patterns are matched against the full Origin header (e.g. `https://example.com:8080`), + /// using glob-style matching where `*` matches any sequence of characters. + /// Can be specified multiple times. + /// + /// Examples: + /// `--cors-allow-origin "https://*.example.com"` + /// `--cors-allow-origin "https://example.com:8080"` + /// `--cors-allow-origin "https://example.com:*"` + #[clap(long)] + cors_allow_origin: Vec, + #[clap( long, default_value_t = true, @@ -130,9 +191,17 @@ When persisted, the state will be stored at the following locations: pub persist_state: bool, /// What port do we listen to for SDKs to connect to over gRPC. + /// + /// Use `auto` to always start a new viewer with a free port if the default is taken. // Default is `re_grpc_server::DEFAULT_SERVER_PORT`, can't use symbollically if `server` feature is disabled - #[clap(long, default_value_t = 9876)] - pub port: u16, + #[clap(long, default_value_t = PortArg::Port(9876))] + pub port: PortArg, + + /// Alias for `--port auto`. Always start a new viewer. + /// + /// If the port is already in use, a free port will be picked automatically. + #[clap(long, conflicts_with = "port")] + pub new: bool, /// Start with the puffin profiler running. #[clap(long)] @@ -225,7 +294,7 @@ When persisted, the state will be stored at the following locations: - A path to a Rerun .rrd recording - A path to a Rerun .rbl blueprint - An HTTP(S) URL to an .rrd or .rbl file to load -- A path to an image or mesh, or any other file that Rerun can load (see https://www.rerun.io/docs/reference/data-loaders/overview) +- A path to an image or mesh, or any other file that Rerun can load (see https://www.rerun.io/docs/concepts/logging-and-ingestion/importers/overview?speculative-link) If no arguments are given, a server will be hosted which a Rerun SDK can connect to.")] pub url_or_paths: Vec, @@ -394,7 +463,9 @@ impl Args { return; } - let any_subcommands = cmd.get_subcommands().any(|cmd| cmd.get_name() != "help"); + let any_subcommands = cmd + .get_subcommands() + .any(|cmd| cmd.get_name() != "help" && !cmd.is_hide_set()); let any_positional_args = cmd.get_arguments().any(|arg| arg.is_positional()); let any_floating_args = cmd.get_arguments().any(|arg| { !arg.is_positional() && !arg.is_hide_set() && arg.get_long() != Some("help") @@ -426,9 +497,9 @@ impl Args { let mut rendered = String::new(); if let Some(about) = cmd.get_long_about() { - rendered += &format!("{about}\n\n"); + write!(rendered, "{about}\n\n").ok(); } else if let Some(about) = cmd.get_about() { - rendered += &format!("{about}.\n\n"); + write!(rendered, "{about}.\n\n").ok(); } rendered += format!("**Usage**: `{} {usage}`", full_name.join(" ")).trim(); @@ -446,7 +517,7 @@ impl Args { let commands = any_subcommands.then(|| { let commands = cmd .get_subcommands_mut() - .filter(|cmd| cmd.get_name() != "help") + .filter(|cmd| cmd.get_name() != "help" && !cmd.is_hide_set()) .map(|cmd| { let name = cmd.get_name().to_owned(); let help = cmd.render_help().to_string(); @@ -470,7 +541,7 @@ impl Args { // > - A path to a Rerun .rrd recording // > - A path to a Rerun .rbl blueprint // > - An HTTP(S) URL to an .rrd or .rbl file to load - // > - A path to an image or mesh, or any other file that Rerun can load (see https://www.rerun.io/docs/reference/data-loaders/overview) + // > - A path to an image or mesh, or any other file that Rerun can load (see https://www.rerun.io/docs/concepts/logging-and-ingestion/importers/overview?speculative-link) // > // > If no arguments are given, a server will be hosted which a Rerun SDK can connect to. // """ @@ -516,6 +587,9 @@ impl Args { *out += "\n\n"; for cmd in cmd.get_subcommands_mut() { + if cmd.is_hide_set() { + continue; + } generate_markdown_manual(full_name.clone(), out, cmd); } } @@ -539,13 +613,18 @@ enum Command { #[command(subcommand)] Auth(AuthCommands), + /// Download recordings and save them as .rrd files. + /// + /// Supports downloading from Rerun Cloud as well as any other supported URI. + Download(DownloadCommand), + /// Generates the Rerun CLI manual (markdown). /// /// Example: `rerun man > docs/content/reference/cli.md` #[command(name = "man")] Manual, - #[cfg(feature = "data_loaders")] + #[cfg(feature = "importers")] #[command(subcommand)] Mcap(McapCommands), @@ -616,7 +695,6 @@ where use clap::Parser as _; let mut args = Args::parse_from(args); - #[cfg(feature = "analytics")] record_cli_command_analytics(&args); @@ -652,6 +730,8 @@ where #[cfg(feature = "analytics")] Command::Analytics(analytics) => analytics.run().map_err(Into::into), + Command::Download(cmd) => cmd.run(tokio_runtime.handle()), + Command::Manual => { let man = Args::generate_markdown_manual(); let web_header = unindent::unindent( @@ -666,7 +746,7 @@ where Ok(()) } - #[cfg(feature = "data_loaders")] + #[cfg(feature = "importers")] Command::Mcap(mcap) => mcap.run(), #[cfg(feature = "native_viewer")] @@ -723,7 +803,7 @@ where match res { // Clean success - Ok(_) => Ok(0), + Ok(()) => Ok(0), // Clean failure -- known error AddrInUse Err(err) @@ -751,7 +831,21 @@ pub fn run_impl( //TODO(#10068): populate token passed with `--token` let connection_registry = re_redap_client::ConnectionRegistry::new_with_stored_credentials(); - let server_addr = std::net::SocketAddr::new(args.bind, args.port); + let wants_new = args.new || args.port.is_auto(); + let port = args.port.port(); + + let server_addr = if wants_new + && is_another_server_already_running(std::net::SocketAddr::new(args.bind, port)) + { + let default_port = port; + let free_port = find_free_port(args.bind)?; + re_log::info!( + "Default port {default_port} is already in use, using port {free_port} instead." + ); + std::net::SocketAddr::new(args.bind, free_port) + } else { + std::net::SocketAddr::new(args.bind, port) + }; #[cfg(feature = "server")] let server_options = re_sdk::ServerOptions { @@ -764,6 +858,8 @@ pub fn run_impl( re_memory::MemoryLimit::parse(limit) .map_err(|err| anyhow::format_err!("Bad --server-memory-limit: {err}"))? }, + + cors_allowed_origins: args.cors_allow_origin.clone(), }; // All URLs that we want to process. @@ -853,7 +949,8 @@ pub fn run_impl( ) } } - } else if args.connect.is_none() && is_another_server_already_running(server_addr) { + } else if !wants_new && args.connect.is_none() && is_another_server_already_running(server_addr) + { let receivers = ReceiversFromUrlParams::new( url_or_paths, &UrlParamProcessingConfig::convert_everything_to_data_sources(), @@ -912,6 +1009,7 @@ pub fn start_native_viewer( let connect = args.connect.is_some(); let follow = args.follow; let renderer = args.renderer.as_deref(); + let memory_limit = args.memory_limit.clone(); let (command_tx, command_rx) = re_viewer_context::command_channel(); @@ -958,6 +1056,13 @@ pub fn start_native_viewer( (command_tx, command_rx), ); + if let Some(memory_limit) = memory_limit { + re_log::debug!("Parsing --memory-limit (for Viewer)"); + let memory_limit = re_memory::MemoryLimit::parse(&memory_limit) + .map_err(|err| anyhow::format_err!("Bad --memory-limit: {err}"))?; + app.app_options_mut().memory_limit = memory_limit; + } + #[allow(clippy::allow_attributes, unused_mut)] let ReceiversFromUrlParams { mut log_receivers, @@ -1015,11 +1120,6 @@ pub fn native_startup_options_from_args(args: &Args) -> anyhow::Result anyhow::Result { + let listener = std::net::TcpListener::bind(std::net::SocketAddr::new(bind, 0))?; + Ok(listener.local_addr()?.port()) +} + fn is_another_server_already_running(server_addr: std::net::SocketAddr) -> bool { // Check if there is already a viewer running and if so, send the data to it. use std::net::TcpStream; @@ -1248,7 +1353,7 @@ fn assert_receive_into_entity_db(rx: &LogReceiverSet) -> anyhow::Result { match msg { - DataSourceMessage::RrdManifest(store_id, rrd_manifest) => { + DataSourceMessage::RrdManifest(store_id, manifest) => { let mut_db = match store_id.kind() { re_log_types::StoreKind::Recording => { rec.get_or_insert_with(|| { @@ -1260,7 +1365,22 @@ fn assert_receive_into_entity_db(rx: &LogReceiverSet) -> anyhow::Result { + let mut_db = match store_id.kind() { + re_log_types::StoreKind::Recording => { + rec.get_or_insert_with(|| { + re_entity_db::EntityDb::new(store_id.clone()) + }) + } + re_log_types::StoreKind::Blueprint => bp.get_or_insert_with(|| { + re_entity_db::EntityDb::new(store_id.clone()) + }), + }; + + mut_db.mark_rrd_manifest_complete(); } DataSourceMessage::LogMsg(msg) => { @@ -1348,9 +1468,10 @@ fn initialize_thread_pool(threads_args: i32) { } } else if threads_args == 1 { // 1 means "single-threaded". - // Put all jobs on the caller thread, just to simplify - // the flamegraph and make it more similar to a browser. - builder = builder.num_threads(1).use_current_thread(); + // NOTE: we intentionally do NOT use `.use_current_thread()` here, + // because that causes deadlocks when code does `rayon::spawn()` + // followed by blocking on the result (e.g. in `load_file.rs`). + builder = builder.num_threads(1); re_log::info!("Running in single-threaded mode."); } else { // 0 means "use all cores", and rayon understands that @@ -1528,9 +1649,14 @@ impl ReceiversFromUrlParams { let mut urls_to_pass_on_to_viewer = Vec::new(); for url in input_urls { - if let Some(data_source) = - LogDataSource::from_uri(re_log_types::FileSource::Cli, &url, follow) - { + if let Some(data_source) = LogDataSource::from_uri( + re_log_types::FileSource::Cli, + &url, + &re_data_source::FromUriOptions { + follow, + accept_extensionless_http: true, + }, + ) { match &data_source { LogDataSource::HttpUrl { .. } => { if config.data_sources_from_http_urls { @@ -1631,7 +1757,9 @@ fn record_cli_command_analytics(args: &Args) { bind: _, memory_limit: _, server_memory_limit: _, + cors_allow_origin: _, port: _, + new: _, } = args; let (command, subcommand) = match command { @@ -1661,31 +1789,20 @@ fn record_cli_command_analytics(args: &Args) { Some(Command::Manual) => ("man", None), - #[cfg(feature = "data_loaders")] - Some(Command::Mcap(cmd)) => { - let subcommand = match cmd { - McapCommands::Convert(_) => "convert", - }; - ("mcap", Some(subcommand)) + #[cfg(feature = "importers")] + Some(Command::Mcap(_cmd)) => { + // TODO(RR-4073): Re-enable analytics for MCAP commands. + return; } + Some(Command::Download(_)) => ("download", None), + #[cfg(feature = "native_viewer")] Some(Command::Reset) => ("reset", None), - Some(Command::Rrd(cmd)) => { - let subcommand = match cmd { - RrdCommands::Compact(_) => "compact", - RrdCommands::Compare(_) => "compare", - RrdCommands::Filter(_) => "filter", - RrdCommands::Split(_) => "split", - RrdCommands::Merge(_) => "merge", - RrdCommands::Migrate(_) => "migrate", - RrdCommands::Print(_) => "print", - RrdCommands::Route(_) => "route", - RrdCommands::Stats(_) => "stats", - RrdCommands::Verify(_) => "verify", - }; - ("rrd", Some(subcommand)) + Some(Command::Rrd(_cmd)) => { + // TODO(RR-4073): Re-enable analytics for RRD commands. + return; } #[cfg(feature = "oss_server")] @@ -1796,6 +1913,8 @@ where #[cfg(feature = "analytics")] Command::Analytics(analytics) => analytics.run().map_err(Into::into), + Command::Download(cmd) => cmd.run(tokio_runtime.handle()), + Command::Manual => { let man = Args::generate_markdown_manual(); let web_header = unindent::unindent( @@ -1810,7 +1929,7 @@ where Ok(()) } - #[cfg(feature = "data_loaders")] + #[cfg(feature = "importers")] Command::Mcap(mcap) => mcap.run(), #[cfg(feature = "native_viewer")] @@ -1862,7 +1981,7 @@ fn run_impl_with_wrapper( ) -> anyhow::Result<()> { let connection_registry = re_redap_client::ConnectionRegistry::new_with_stored_credentials(); - let server_addr = std::net::SocketAddr::new(args.bind, args.port); + let server_addr = std::net::SocketAddr::new(args.bind, args.port.port()); #[cfg(feature = "server")] let server_options = re_sdk::ServerOptions { @@ -1871,6 +1990,7 @@ fn run_impl_with_wrapper( re_memory::MemoryLimit::parse(args.server_memory_limit.as_str()) .map_err(|err| anyhow::format_err!("Bad --server-memory-limit: {err}"))? }, + cors_allowed_origins: args.cors_allow_origin.clone(), }; #[allow(clippy::allow_attributes, unused_mut)] @@ -2010,6 +2130,7 @@ fn start_native_viewer_with_wrapper( startup_patch: Option, ) -> anyhow::Result<()> { use re_viewer::external::re_viewer_context; + use crate::external::re_ui::{UICommand, UICommandSender as _}; let mut startup_options = native_startup_options_from_args(args)?; @@ -2022,10 +2143,18 @@ fn start_native_viewer_with_wrapper( let connect = args.connect.is_some(); let follow = args.follow; let renderer = args.renderer.as_deref(); + let memory_limit = args.memory_limit.clone(); let (command_tx, command_rx) = re_viewer_context::command_channel(); + let auth_error_handler = re_viewer::App::auth_error_handler(command_tx.clone()); + let tokio_runtime_handle = tokio_runtime_handle.clone(); + + // Start catching `re_log::info/warn/error` messages + // so we can show them in the notification panel. + // In particular: create this before calling `run_native_app` + // so we catch any warnings produced during startup. let text_log_rx = re_viewer::register_text_log_receiver(); re_viewer::run_native_app( @@ -2035,9 +2164,11 @@ fn start_native_viewer_with_wrapper( let tx = command_tx.clone(); let egui_ctx = cc.egui_ctx.clone(); tokio::spawn(async move { + // We catch ctrl-c commands so we can properly quit. + // Without this, recent state changes might not be persisted. match tokio::signal::ctrl_c().await { Ok(()) => { - re_log::info!("Caught Ctrl-C, quitting…"); + re_log::info!("Caught Ctrl-C, quitting Rerun Viewer…"); tx.send_ui(UICommand::Quit); egui_ctx.request_repaint(); } @@ -2059,6 +2190,13 @@ fn start_native_viewer_with_wrapper( (command_tx, command_rx), ); + if let Some(memory_limit) = memory_limit { + re_log::debug!("Parsing --memory-limit (for Viewer)"); + let memory_limit = re_memory::MemoryLimit::parse(&memory_limit) + .map_err(|err| anyhow::format_err!("Bad --memory-limit: {err}"))?; + app.app_options_mut().memory_limit = memory_limit; + } + #[allow(clippy::allow_attributes, unused_mut)] let ReceiversFromUrlParams { mut log_receivers, @@ -2071,6 +2209,7 @@ fn start_native_viewer_with_wrapper( follow, )?; + // If we're **not** connecting to an existing server, we spawn a new one and add it to the list of receivers. #[cfg(feature = "server")] if !connect { let log_receiver = re_grpc_server::spawn_with_recv( @@ -2078,6 +2217,7 @@ fn start_native_viewer_with_wrapper( server_options, re_grpc_server::shutdown::never(), ); + log_receivers.push(log_receiver); } @@ -2092,7 +2232,7 @@ fn start_native_viewer_with_wrapper( app.set_examples_manifest_url(url); } - // Apply the DimOS wrapper if provided, otherwise return stock App + // Apply the DimOS wrapper if provided, otherwise return stock App. if let Some(wrapper) = app_wrapper { wrapper(app) } else { diff --git a/crates/top/rerun/src/commands/mcap/mod.rs b/crates/top/rerun/src/commands/mcap/mod.rs index e79eb38b0dd4..fe78d750c68d 100644 --- a/crates/top/rerun/src/commands/mcap/mod.rs +++ b/crates/top/rerun/src/commands/mcap/mod.rs @@ -3,11 +3,34 @@ use std::fs::File; use std::io::BufWriter; use clap::Subcommand; +use clap::builder::TypedValueParser as _; use re_log_encoding::Encoder; -use re_log_types::{LogMsg, RecordingId}; -use re_mcap::{LayerIdentifier, SelectedLayers}; -use re_sdk::external::re_data_loader::McapLoader; -use re_sdk::{ApplicationId, DataLoader, DataLoaderSettings, LoadedData}; +use re_log_types::{LogMsg, RecordingId, TimeType}; +use re_mcap::{DecoderIdentifier, SelectedDecoders, TopicFilter}; +use re_sdk::external::re_importer::{McapImporter, supported_mcap_decoder_identifiers}; +use re_sdk::{ApplicationId, ImportedData, Importer, ImporterSettings}; + +fn possible_timeline_types() -> impl clap::builder::TypedValueParser { + clap::builder::PossibleValuesParser::new(["timestamp", "duration"]).map(|value: String| { + match value.as_str() { + "timestamp" => TimeType::TimestampNs, + "duration" => TimeType::DurationNs, + _ => unreachable!("PossibleValuesParser already validated the input"), + } + }) +} + +fn possible_decoders() -> clap::builder::PossibleValuesParser { + static DECODER_IDS: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + supported_mcap_decoder_identifiers(true) + .into_iter() + .map(|identifier| identifier.to_string()) + .collect() + }); + clap::builder::PossibleValuesParser::new( + DECODER_IDS.iter().map(String::as_str).collect::>(), + ) +} #[derive(Debug, Clone, clap::Parser)] pub struct ConvertCommand { @@ -22,13 +45,13 @@ pub struct ConvertCommand { #[clap(long = "application-id")] application_id: Option, - /// Specifies which layers to apply during conversion. - #[clap(short = 'l', long = "layer")] - selected_layers: Vec, + /// Specifies which decoders to apply during conversion. + #[clap(short = 'd', long = "decoder", value_parser = possible_decoders())] + selected_decoders: Vec, - /// Disable using the raw layer as a fallback for unsupported channels. - /// By default, channels that cannot be handled by semantic layers (protobuf, ROS2) - /// will be processed by the raw layer. + /// Disable using the raw decoder as a fallback for unsupported channels. + /// By default, channels that cannot be handled by semantic decoders (protobuf, ROS2) + /// will be processed by the raw decoder. #[clap(long = "disable-raw-fallback")] disable_raw_fallback: bool, @@ -39,6 +62,56 @@ pub struct ConvertCommand { /// output. #[clap(long = "recording-id")] recording_id: Option, + + /// If set, an offset in nanoseconds to add to all timestamp timelines. + /// + /// This can be used to shift all timestamps of the MCAP file if they are not yet + /// relative to the UNIX epoch. + /// + /// Duration and sequence timelines are not affected by this offset. + #[clap(long = "timestamp-offset-ns")] + timestamp_offset_ns: Option, + + /// The timeline type to use for timestamp timelines. + /// + /// "timestamp" (default) creates `TimestampNs` timelines (nanoseconds since Unix epoch). + /// "duration" creates `DurationNs` timelines (nanosecond durations). + #[clap(long = "timeline-type", value_parser = possible_timeline_types(), default_value = "timestamp")] + timeline_type: TimeType, + + /// Include only topics matching this regex (RE2 syntax). Repeatable. + /// + /// If omitted, all topics are included. Patterns are not implicitly anchored; + /// use `^` / `$` if you need anchoring. + /// + /// Example: `-y "^/tf.*" -n ".*depth.*" -y "^/camera/(compressed|camera_info)$"` + #[clap(short = 'y', long = "include-topic-regex")] + include_topic_regex: Vec, + + /// Exclude topics matching this regex (RE2 syntax). Repeatable. + /// + /// Applied after includes: a topic is kept only if it matches an include + /// (or no includes are set) AND matches no exclude. + #[clap(short = 'n', long = "exclude-topic-regex")] + exclude_topic_regex: Vec, +} + +fn compile_topic_filter(include: &[String], exclude: &[String]) -> anyhow::Result { + for pattern in include { + TopicFilter::default() + .with_include_patterns(std::slice::from_ref(pattern)) + .map_err(|err| anyhow::anyhow!("Invalid include topic regex {pattern:?}: {err}"))?; + } + for pattern in exclude { + TopicFilter::default() + .with_exclude_patterns(std::slice::from_ref(pattern)) + .map_err(|err| anyhow::anyhow!("Invalid exclude topic regex {pattern:?}: {err}"))?; + } + + TopicFilter::default() + .with_include_patterns(include) + .and_then(|filter| filter.with_exclude_patterns(exclude)) + .map_err(|err| anyhow::anyhow!("Invalid topic regex in include/exclude filters: {err}")) } impl ConvertCommand { @@ -48,10 +121,16 @@ impl ConvertCommand { path_to_output_rrd, application_id, recording_id, - selected_layers, + selected_decoders, disable_raw_fallback, + timestamp_offset_ns, + timeline_type, + include_topic_regex, + exclude_topic_regex, } = self; + let topic_filter = compile_topic_filter(include_topic_regex, exclude_topic_regex)?; + let start_time = std::time::Instant::now(); let application_id = application_id @@ -64,27 +143,30 @@ impl ConvertCommand { .map(RecordingId::from) .unwrap_or_else(RecordingId::random); - let selected_layers = if selected_layers.is_empty() { - SelectedLayers::All + let selected_decoders = if selected_decoders.is_empty() { + SelectedDecoders::All } else { - SelectedLayers::Subset( - selected_layers + SelectedDecoders::Subset( + selected_decoders .iter() .cloned() - .map(LayerIdentifier::from) + .map(DecoderIdentifier::from) .collect(), ) }; - let loader: &dyn DataLoader = - &McapLoader::new(selected_layers).with_raw_fallback(!*disable_raw_fallback); + let importer: &dyn Importer = &McapImporter::new(&selected_decoders) + .with_raw_fallback(!*disable_raw_fallback) + .with_topic_filter(topic_filter); // TODO(#10862): This currently loads the entire file into memory. - let (tx, rx) = crossbeam::channel::bounded::(1024); - loader.load_from_path( - &DataLoaderSettings { + let (tx, rx) = crossbeam::channel::bounded::(1024); + importer.import_from_path( + &ImporterSettings { application_id: Some(application_id), - ..DataLoaderSettings::recommended(recording_id) + timestamp_offset_ns: *timestamp_offset_ns, + timeline_type: *timeline_type, + ..ImporterSettings::recommended(recording_id) }, path_to_input_mcap.into(), tx, @@ -123,7 +205,7 @@ impl McapCommands { fn process_mcap( writer: W, - receiver: &crossbeam::channel::Receiver, + receiver: &crossbeam::channel::Receiver, ) -> anyhow::Result<()> { let mut num_total_msgs = 0; let mut topics = BTreeSet::new(); @@ -135,13 +217,13 @@ fn process_mcap( num_total_msgs += 1; let log_msg = match res { - LoadedData::LogMsg(_, log_msg) => log_msg, - LoadedData::Chunk(_, store_id, chunk) => { + ImportedData::LogMsg(_, log_msg) => log_msg, + ImportedData::Chunk(_, store_id, chunk) => { topics.insert(chunk.entity_path().clone()); let arrow_msg = chunk.to_arrow_msg()?; LogMsg::ArrowMsg(store_id, arrow_msg) } - LoadedData::ArrowMsg(_, store_id, arrow_msg) => LogMsg::ArrowMsg(store_id, arrow_msg), + ImportedData::ArrowMsg(_, store_id, arrow_msg) => LogMsg::ArrowMsg(store_id, arrow_msg), }; encoder.append(&log_msg)?; } diff --git a/crates/top/rerun/src/commands/mod.rs b/crates/top/rerun/src/commands/mod.rs index bab310f1e0ab..de14da278940 100644 --- a/crates/top/rerun/src/commands/mod.rs +++ b/crates/top/rerun/src/commands/mod.rs @@ -23,9 +23,10 @@ impl CallSource { #[cfg(feature = "auth")] mod auth; +mod download; mod entrypoint; -#[cfg(feature = "data_loaders")] +#[cfg(feature = "importers")] mod mcap; mod rrd; mod stdio; @@ -35,8 +36,12 @@ mod analytics; #[cfg(feature = "analytics")] pub(crate) use self::analytics::AnalyticsCommands; -pub use self::entrypoint::{run, run_with_app_wrapper, AppWrapper, StartupOptionsPatch, Args as RerunArgs, native_startup_options_from_args}; -#[cfg(feature = "data_loaders")] +pub use self::download::DownloadCommand; +pub use self::entrypoint::{ + run, run_with_app_wrapper, AppWrapper, StartupOptionsPatch, Args as RerunArgs, + native_startup_options_from_args, +}; +#[cfg(feature = "importers")] pub use self::mcap::McapCommands; pub use self::rrd::RrdCommands; pub use self::stdio::{ diff --git a/crates/top/rerun/src/commands/rrd/compare.rs b/crates/top/rerun/src/commands/rrd/compare.rs index 06abfaba9e82..9c2445db9eb4 100644 --- a/crates/top/rerun/src/commands/rrd/compare.rs +++ b/crates/top/rerun/src/commands/rrd/compare.rs @@ -1,3 +1,4 @@ +use std::fmt::Write as _; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -71,13 +72,6 @@ impl CompareCommand { "Application IDs do not match: '{app_id1}' vs. '{app_id2}'" ); - anyhow::ensure!( - chunks1.len() == chunks2.len(), - "Number of Chunks does not match: '{}' vs. '{}'", - re_format::format_uint(chunks1.len()), - re_format::format_uint(chunks2.len()), - ); - fn format_chunk(chunk: &Chunk) -> String { re_arrow_util::format_record_batch_opts( &chunk.to_record_batch().expect("Cannot fail in practice"), @@ -112,28 +106,39 @@ impl CompareCommand { let mut error_msg = String::from("Unordered comparison failed:\n"); if !unmatched_chunks1.is_empty() { - error_msg.push_str(&format!( - "\n{} chunk(s) from {path_to_rrd1:?} could not be matched:\n", + writeln!( + error_msg, + "\n{} chunk(s) from {path_to_rrd1:?} could not be matched:", unmatched_chunks1.len() - )); + ) + .ok(); for chunk in &unmatched_chunks1 { - error_msg.push_str(&format!("{}\n", format_chunk(chunk))); + writeln!(error_msg, "{}", format_chunk(chunk)).ok(); } } if !chunks2_remaining.is_empty() { - error_msg.push_str(&format!( - "\n{} chunk(s) from {path_to_rrd2:?} could not be matched:\n", + writeln!( + error_msg, + "\n{} chunk(s) from {path_to_rrd2:?} could not be matched:", chunks2_remaining.len() - )); + ) + .ok(); for chunk in &chunks2_remaining { - error_msg.push_str(&format!("{}\n", format_chunk(chunk))); + writeln!(error_msg, "{}", format_chunk(chunk)).ok(); } } anyhow::bail!(error_msg); } } else { + anyhow::ensure!( + chunks1.len() == chunks2.len(), + "Number of Chunks does not match: '{}' vs. '{}'", + re_format::format_uint(chunks1.len()), + re_format::format_uint(chunks2.len()), + ); + for (chunk1, chunk2) in izip!(chunks1, chunks2) { re_chunk::Chunk::ensure_similar(&chunk1, &chunk2).with_context(|| { format!( diff --git a/crates/top/rerun/src/commands/rrd/merge_compact.rs b/crates/top/rerun/src/commands/rrd/merge_optimize.rs similarity index 66% rename from crates/top/rerun/src/commands/rrd/merge_compact.rs rename to crates/top/rerun/src/commands/rrd/merge_optimize.rs index 86dbd16b9b9e..534135c2a98e 100644 --- a/crates/top/rerun/src/commands/rrd/merge_compact.rs +++ b/crates/top/rerun/src/commands/rrd/merge_optimize.rs @@ -2,7 +2,8 @@ use std::io::{IsTerminal as _, Write as _}; use anyhow::Context as _; use itertools::Either; -use re_chunk_store::{ChunkStore, ChunkStoreConfig, ChunkStoreError}; +use re_byte_size::SizeBytes as _; +use re_chunk_store::{ChunkStoreConfig, CompactionOptions, IsStartOfGop}; use re_entity_db::EntityDb; use re_log_types::StoreId; use re_sdk::StoreKind; @@ -46,11 +47,10 @@ impl MergeCommand { // (e.g. by recompacting it differently), so make sure to disable all these features. let store_config = ChunkStoreConfig::ALL_DISABLED; - let num_passes = 0; merge_and_compact( - num_passes, *continue_on_error, &store_config, + None, // no compaction for merge path_to_input_rrds, path_to_output_rrd.as_ref(), ) @@ -60,7 +60,7 @@ impl MergeCommand { // --- #[derive(Debug, Clone, clap::Parser)] -pub struct CompactCommand { +pub struct OptimizeCommand { /// Paths to read from. Reads from standard input if none are specified. path_to_input_rrds: Vec, @@ -109,9 +109,34 @@ pub struct CompactCommand { /// If set, will try to proceed even in the face of IO and/or decoding errors in the input data. #[clap(long = "continue-on-error", default_value_t = false)] continue_on_error: bool, + + /// Disable rebatching of video stream chunks to GoP (Group of Pictures) boundaries. + /// + /// By default, after compaction, video stream chunks are rebatched on GoP + /// boundaries so that each chunk contains one or more complete GoPs. + /// This flag disables that behavior. + /// + /// Note: GoP rebatching never splits a GoP across chunks, so streams with + /// long keyframe intervals (e.g. 10+ seconds between I-frames) can produce + /// chunks much larger than `--max-bytes`. + #[clap(long = "no-rebatch-videos", default_value_t = false)] + no_rebatch_videos: bool, + + /// If set, split chunks so no two archetype groups sharing a chunk differ in + /// byte size by more than this factor. Values should be `>= 1`; at `1.0`, + /// every archetype is forced into its own chunk. + /// + /// This keeps "thick" columns (images, videos, blobs) out of the same chunk as + /// "thin" columns (scalars, transforms, text), so the viewer can fetch just the + /// thin data without dragging along the thick payload. Components belonging to + /// the same archetype are always kept together. + /// + /// A good starting value is 10.0. If unset, no thick/thin split is performed. + #[arg(long = "split-size-ratio")] + split_size_ratio: Option, } -impl CompactCommand { +impl OptimizeCommand { pub fn run(&self) -> anyhow::Result<()> { let Self { path_to_input_rrds, @@ -121,6 +146,8 @@ impl CompactCommand { max_rows_if_unsorted, num_extra_passes, continue_on_error, + no_rebatch_videos, + split_size_ratio, } = self; if path_to_output_rrd.is_none() { @@ -145,20 +172,57 @@ impl CompactCommand { store_config.chunk_max_rows_if_unsorted = *max_rows_if_unsorted; } + let is_start_of_gop: IsStartOfGop = std::sync::Arc::new(|data, codec| { + re_video::is_start_of_gop(data, codec.into()).map_err(|err| anyhow::anyhow!(err)) + }); + + let compaction_options = CompactionOptions { + config: store_config.clone(), + num_extra_passes: Some(*num_extra_passes as usize), + is_start_of_gop: if *no_rebatch_videos { + None + } else { + Some(is_start_of_gop) + }, + split_size_ratio: *split_size_ratio, + }; + merge_and_compact( - *num_extra_passes, *continue_on_error, &store_config, + Some(&compaction_options), path_to_input_rrds, path_to_output_rrd.as_ref(), ) } } +// --- + +/// Stub for the old `rerun rrd compact` name. Accepts any arguments and errors out with a +/// message pointing at the new name, so users who've scripted the old name get a clear hint. +#[derive(Debug, Clone, clap::Parser)] +pub struct CompactCommand { + #[arg(trailing_var_arg = true, allow_hyphen_values = true, num_args = 0..)] + _ignored: Vec, +} + +impl CompactCommand { + #[expect(clippy::unused_self)] + pub fn run(&self) -> anyhow::Result<()> { + anyhow::bail!( + "`rerun rrd compact` has been renamed to `rerun rrd optimize`. \ + Please run `rerun rrd optimize --help` for usage." + ) + } +} + +// --- + fn merge_and_compact( - num_passes: u32, continue_on_error: bool, store_config: &ChunkStoreConfig, + compaction_options: Option<&CompactionOptions>, path_to_input_rrds: &[String], path_to_output_rrd: Option<&String>, ) -> anyhow::Result<()> { @@ -191,18 +255,15 @@ fn merge_and_compact( match res { Ok(msg) => { num_chunks_before += matches!(msg, re_log_types::LogMsg::ArrowMsg(_, _)) as u64; - if let Err(err) = entity_dbs - .entry(msg.store_id().clone()) - .or_insert_with(|| { - let enable_viewer_indexes = false; // that would just slow us down for no reason - re_entity_db::EntityDb::with_store_config( - msg.store_id().clone(), - enable_viewer_indexes, - store_config.clone(), - ) - }) - .add_log_msg(&msg) - { + let db = entity_dbs.entry(msg.store_id().clone()).or_insert_with(|| { + let enable_viewer_indexes = false; // that would just slow us down for no reason + re_entity_db::EntityDb::with_store_config( + msg.store_id().clone(), + enable_viewer_indexes, + store_config.clone(), + ) + }); + if let Err(err) = db.add_log_msg(&msg) { re_log::error!(%err, "couldn't index corrupt chunk"); is_success = false; } @@ -232,34 +293,27 @@ fn merge_and_compact( } } - for pass in 0..num_passes { - re_log::info!(pass, "running extra compaction pass…"); - + if let Some(compaction_options) = compaction_options { let now = std::time::Instant::now(); let num_chunks_before = entity_dbs .values() .map(|db| db.storage_engine().store().num_physical_chunks() as u64) .sum::(); - let mut num_chunks_after = 0; - entity_dbs = entity_dbs - .into_iter() - .map(|(store_id, db)| { - // Safety: we are the only owners of that data, it's fine. - #[expect(unsafe_code)] - let engine = unsafe { db.storage_engine_raw() }; - - let mut store = ChunkStore::new(store_id.clone(), store_config.clone()); - for chunk in engine.read().store().iter_physical_chunks() { - store.insert_chunk(chunk)?; - } - num_chunks_after += store.num_physical_chunks() as u64; - *engine.write().store() = store; + for db in entity_dbs.values() { + // Safety: we are the only owners of that data, it's fine. + #[expect(unsafe_code)] + let engine = unsafe { db.storage_engine_raw() }; - Ok::<_, ChunkStoreError>((store_id, db)) - }) - .collect::>()?; + let compacted = engine.read().store().compacted(compaction_options)?; + *engine.write().store() = compacted; + } + + let num_chunks_after = entity_dbs + .values() + .map(|db| db.storage_engine().store().num_physical_chunks() as u64) + .sum::(); let num_chunks_reduction = format!( "-{:3.3}%", @@ -267,16 +321,13 @@ fn merge_and_compact( ); re_log::info!( - pass, num_chunks_before, num_chunks_after, num_chunks_reduction, time=?now.elapsed(), - "extra compaction pass completed", + num_chunks_before, num_chunks_after, num_chunks_reduction, time=?now.elapsed(), + "compaction completed", ); - - if num_chunks_before == num_chunks_after { - re_log::info!(pass, time=?now.elapsed(), "cannot possibly improve further, stopping early"); - break; - } } + log_chunk_size_stats(&entity_dbs, "post-compaction"); + let mut rrd_out = if let Some(path) = path_to_output_rrd { Either::Left(std::io::BufWriter::new( std::fs::File::create(path).with_context(|| format!("{path:?}"))?, @@ -339,14 +390,44 @@ fn merge_and_compact( re_log::info!( srcs = ?path_to_input_rrds, time = ?now.elapsed(), - num_chunks_before, - num_chunks_after, - num_chunks_reduction, - srcs_size_bytes = %file_size_to_string(rrds_in_size), - dst_size_bytes = %file_size_to_string(Some(rrd_out_size)), - size_reduction, - "merge/compaction finished" + "merge/compaction finished. Chunk count {} -> {} ({num_chunks_reduction}), size {} -> {} ({size_reduction})", + re_format::format_uint(num_chunks_before), + re_format::format_uint(num_chunks_after), + file_size_to_string(rrds_in_size), + file_size_to_string(Some(rrd_out_size)), ); Ok(()) } + +fn log_chunk_size_stats(entity_dbs: &std::collections::HashMap, label: &str) { + let mut min_bytes = u64::MAX; + let mut max_bytes = 0u64; + let mut total_bytes = 0u64; + let mut num_chunks = 0u64; + + for db in entity_dbs.values() { + for chunk in db.storage_engine().store().iter_physical_chunks() { + let size = chunk.heap_size_bytes(); + min_bytes = min_bytes.min(size); + max_bytes = max_bytes.max(size); + total_bytes += size; + num_chunks += 1; + } + } + + if num_chunks == 0 { + return; + } + + let avg_bytes = total_bytes / num_chunks; + + re_log::info!( + num_chunks, + min = %re_format::format_bytes(min_bytes as _), + max = %re_format::format_bytes(max_bytes as _), + avg = %re_format::format_bytes(avg_bytes as _), + total = %re_format::format_bytes(total_bytes as _), + "{label} chunk size stats", + ); +} diff --git a/crates/top/rerun/src/commands/rrd/mod.rs b/crates/top/rerun/src/commands/rrd/mod.rs index 6c1e47dd9180..bb6ee703d391 100644 --- a/crates/top/rerun/src/commands/rrd/mod.rs +++ b/crates/top/rerun/src/commands/rrd/mod.rs @@ -1,6 +1,6 @@ mod compare; mod filter; -mod merge_compact; +mod merge_optimize; mod migrate; mod print; mod route; @@ -14,7 +14,7 @@ use clap::Subcommand; use self::compare::CompareCommand; use self::filter::FilterCommand; -use self::merge_compact::{CompactCommand, MergeCommand}; +use self::merge_optimize::{CompactCommand, MergeCommand, OptimizeCommand}; use self::migrate::MigrateCommand; use self::print::PrintCommand; use self::route::RouteCommand; @@ -25,26 +25,6 @@ use self::verify::VerifyCommand; /// Manipulate the contents of .rrd and .rbl files. #[derive(Debug, Clone, Subcommand)] pub enum RrdCommands { - /// Compacts the contents of one or more .rrd/.rbl files/streams and writes the result standard output. - /// - /// Reads from standard input if no paths are specified. - /// - /// Uses the usual environment variables to control the compaction thresholds: - /// `RERUN_CHUNK_MAX_ROWS`, - /// `RERUN_CHUNK_MAX_ROWS_IF_UNSORTED`, - /// `RERUN_CHUNK_MAX_BYTES`. - /// - /// Unless explicit flags are passed, in which case they will override environment values. - /// - /// ⚠️ This will automatically migrate the data to the latest version of the RRD protocol, if needed. ⚠️ - /// - /// Examples: - /// - /// * `RERUN_CHUNK_MAX_ROWS=4096 RERUN_CHUNK_MAX_BYTES=1048576 rerun rrd compact /my/recordings/*.rrd -o output.rrd` - /// - /// * `rerun rrd compact --max-rows 4096 --max-bytes=1048576 /my/recordings/*.rrd > output.rrd` - Compact(CompactCommand), - /// Compares the data between 2 .rrd files, returning a successful shell exit code if they /// match. /// @@ -60,13 +40,6 @@ pub enum RrdCommands { /// Example: `rerun rrd filter --drop-timeline log_tick /my/recordings/*.rrd > output.rrd` Filter(FilterCommand), - /// Optimally splits a recording on a specified timeline. - /// - /// The sum of the generated splits will always exactly match the original recording. - /// - /// Example: `rerun rrd split --output-dir ./splits --timeline log_tick --time 33 --time 66 ./my_video.rrd` - Split(SplitCommand), - /// Merges the contents of multiple .rrd/.rbl files/streams, and writes the result to standard output. /// /// Reads from standard input if no paths are specified. @@ -82,6 +55,33 @@ pub enum RrdCommands { /// Results in a `foo.backup.rrd` (copy of the old file) and a new `foo.rrd` (migrated). Migrate(MigrateCommand), + /// Optimizes the contents of one or more .rrd/.rbl files/streams by compacting chunks, and writes the result to standard output. + /// + /// Reads from standard input if no paths are specified. + /// + /// Uses the usual environment variables to control the compaction thresholds: + /// `RERUN_CHUNK_MAX_ROWS`, + /// `RERUN_CHUNK_MAX_ROWS_IF_UNSORTED`, + /// `RERUN_CHUNK_MAX_BYTES`. + /// + /// Unless explicit flags are passed, in which case they will override environment values. + /// + /// Video stream chunks are also rebatched on GoP (keyframe) boundaries so that each + /// chunk holds one or more complete GoPs. Pass `--no-rebatch-videos` to disable that. + /// + /// ⚠️ This will automatically migrate the data to the latest version of the RRD protocol, if needed. ⚠️ + /// + /// Examples: + /// + /// * `RERUN_CHUNK_MAX_ROWS=4096 RERUN_CHUNK_MAX_BYTES=1048576 rerun rrd optimize /my/recordings/*.rrd -o output.rrd` + /// + /// * `rerun rrd optimize --max-rows 4096 --max-bytes=1048576 /my/recordings/*.rrd > output.rrd` + Optimize(OptimizeCommand), + + /// Deprecated: renamed to `optimize`. + #[command(hide = true)] + Compact(CompactCommand), + /// Print the contents of one or more .rrd/.rbl files/streams. /// /// Reads from standard input if no paths are specified. @@ -97,6 +97,13 @@ pub enum RrdCommands { /// Note: Because the payload of the messages is never decoded, no migration or verification will performed. Route(RouteCommand), + /// Optimally splits a recording on a specified timeline. + /// + /// The sum of the generated splits will always exactly match the original recording. + /// + /// Example: `rerun rrd split --output-dir ./splits --timeline log_tick --time 33 --time 66 ./my_video.rrd` + Split(SplitCommand), + /// Compute important statistics for one or more .rrd/.rbl files/streams. /// /// Reads from standard input if no paths are specified. @@ -118,6 +125,7 @@ impl RrdCommands { // Print current directory, this can be useful for debugging issues with relative paths. .with_context(|| format!("current directory {:?}", std::env::current_dir())) } + Self::Optimize(cmd) => cmd.run(), Self::Compact(cmd) => cmd.run(), Self::Filter(cmd) => cmd.run(), Self::Split(cmd) => cmd.run(), diff --git a/crates/top/rerun/src/commands/rrd/route.rs b/crates/top/rerun/src/commands/rrd/route.rs index ff0c4e3a45f1..33d74120a4f5 100644 --- a/crates/top/rerun/src/commands/rrd/route.rs +++ b/crates/top/rerun/src/commands/rrd/route.rs @@ -191,7 +191,7 @@ fn process_messages( #[expect(unsafe_code)] let (byte_span_excluding_header, byte_size_uncompressed) = unsafe { // Reminder: this will implicitly discard RRD footers. - encoder.append_transport(&msg)? + encoder.append_transport_without_footer(&msg)? }; if let re_protos::log_msg::v1alpha1::log_msg::Msg::ArrowMsg(arrow_msg) = msg { diff --git a/crates/top/rerun/src/commands/rrd/split.rs b/crates/top/rerun/src/commands/rrd/split.rs index 8ed172ae031c..0c2205211659 100644 --- a/crates/top/rerun/src/commands/rrd/split.rs +++ b/crates/top/rerun/src/commands/rrd/split.rs @@ -437,7 +437,7 @@ impl SplitCommand { // // Note that this is across *all recordings* in the file/stream. let mut known_timelines: BTreeMap = Default::default(); - for (name, timeline) in stores.values().flat_map(|store| store.timelines()) { + for (name, timeline) in stores.values().flat_map(|store| store.schema().timelines()) { if let Some(existing) = known_timelines.insert(name, timeline) { anyhow::ensure!( existing == timeline, @@ -570,6 +570,7 @@ impl SplitCommand { .into_iter() .filter_map(|entity| { store + .schema() .all_components_for_entity(&entity) .map(|components| (entity, components)) }) @@ -832,8 +833,8 @@ fn extract_keyframes( }; let sample = sample.0.inner().as_slice(); - match re_video::detect_gop_start(sample, codec.into()) { - Ok(re_video::GopStartDetection::StartOfGop(_)) => { + match re_video::is_start_of_gop(sample, codec.into()) { + Ok(true) => { re_log::debug!( entity = %entity_path, time = %time_to_human_string(cutoff_timeline, time), @@ -843,7 +844,7 @@ fn extract_keyframes( keyframes.push(time); } - Ok(re_video::GopStartDetection::NotStartOfGop) => {} + Ok(false) => {} Err(err) => { re_log::warn!(entity = %entity_path, chunk = %chunk.id(), %err, "keyframe detection failed"); @@ -897,8 +898,7 @@ fn extract_chunks_for_single_split( ) .chunks .into_iter() - .map(|chunk| chunk.latest_at(&query_bootstrap, *component)) - .filter(|chunk| !chunk.is_empty()); + .filter_map(|chunk| chunk.latest_at(&query_bootstrap, *component)); // Due to the overlap heuristics, the bootstrap query might return an arbitrary amount of // chunks: we need to find the most relevant in those, which in this case is whichever has @@ -1013,16 +1013,7 @@ fn extract_chunks_for_single_split( chunk.id(), chunk // Reminder: always perform deep copies if the intent is to write back to disk. - .row_sliced_deep(start_idx, slice_len) - // We must generate a new chunk ID due to the persistent slicing. - // The row IDs are safe from duplicates, since we slice the same way for all components. - // The special cases have non-overlapping time spans, and thus are safe too. - // - // This might lead to duplicated data if all the splits are loaded into the same viewer, - // but that's certainly better than missing data. - // TODO(cmc): shared recording IDs have been forbidden for now because they caused too many - // problems with the video decoder, so that last statement doesn't apply anymore, for now. - .with_id(ChunkId::new()), + .row_sliced_deep(start_idx, slice_len), ) }; diff --git a/crates/top/rerun/src/commands/rrd/stats.rs b/crates/top/rerun/src/commands/rrd/stats.rs index 71d0dd3c18fe..1dbae6618412 100644 --- a/crates/top/rerun/src/commands/rrd/stats.rs +++ b/crates/top/rerun/src/commands/rrd/stats.rs @@ -48,13 +48,16 @@ impl StatsCommand { let (rx_raw, _) = read_raw_rrd_streams_from_file_or_stdin(path_to_input_rrds); - let (tx_uncompressed, rx_uncompressed) = crossbeam::channel::bounded(100); + // Each message is accompanied by the original compressed payload size (in bytes). + // For uncompressed messages, this equals the payload size. + let (tx_uncompressed, rx_uncompressed) = + crossbeam::channel::bounded::<(u64, anyhow::Result)>(100); let decompress_thread_handle = std::thread::Builder::new() .name("decompress".to_owned()) .spawn(move || { for (_source, res) in rx_raw { let Ok(Msg::ArrowMsg(mut msg)) = res else { - send_crossbeam(&tx_uncompressed, res)?; + send_crossbeam(&tx_uncompressed, (0, res))?; continue; }; @@ -64,6 +67,8 @@ impl StatsCommand { re_protos::common::v1alpha1::Compression::None as _; const COMPRESSION_LZ4: i32 = re_protos::common::v1alpha1::Compression::Lz4 as _; + let compressed_size = msg.payload.len() as u64; + match msg.compression { COMPRESSION_NONE => {} @@ -82,7 +87,10 @@ impl StatsCommand { send_crossbeam( &tx_uncompressed, - Ok(re_protos::log_msg::v1alpha1::log_msg::Msg::ArrowMsg(msg)), + ( + compressed_size, + Ok(re_protos::log_msg::v1alpha1::log_msg::Msg::ArrowMsg(msg)), + ), )?; } @@ -92,13 +100,13 @@ impl StatsCommand { re_log::info!("processing input…"); let mut num_msgs = 0; let mut last_checkpoint = std::time::Instant::now(); - for res in rx_uncompressed { + for (compressed_size, res) in rx_uncompressed { let mut is_success = true; match res { Ok(msg) => { num_msgs += 1; - match compute_stats(!*no_decode, &msg) { + match compute_stats(!*no_decode, compressed_size, &msg) { Ok(Some(stats)) => { num_chunks += 1; @@ -348,13 +356,13 @@ struct ChunkStatsApplication { num_components: u64, } -fn compute_stats(app: bool, msg: &Msg) -> anyhow::Result> { +fn compute_stats(app: bool, compressed_size: u64, msg: &Msg) -> anyhow::Result> { if let Msg::ArrowMsg(arrow_msg) = msg { let re_protos::log_msg::v1alpha1::ArrowMsg { store_id: _, chunk_id: _, compression: _, - uncompressed_size, + uncompressed_size: _, encoding: _, payload, is_static: _, @@ -458,14 +466,16 @@ fn compute_stats(app: bool, msg: &Msg) -> anyhow::Result> { None }; + let ipc_size_bytes_uncompressed = payload.len() as u64; return Ok(Some(ChunkStats { app, transport: ChunkStatsTransport { - ipc_size_bytes_compressed: payload.len() as _, - ipc_size_bytes_uncompressed: *uncompressed_size, + ipc_size_bytes_compressed: compressed_size, + ipc_size_bytes_uncompressed, ipc_schema_size_bytes, - ipc_data_size_bytes: *uncompressed_size - ipc_schema_size_bytes, + ipc_data_size_bytes: ipc_size_bytes_uncompressed + .saturating_sub(ipc_schema_size_bytes), }, })); } diff --git a/crates/top/rerun/src/lib.rs b/crates/top/rerun/src/lib.rs index 681faae996a7..272133e1ef25 100644 --- a/crates/top/rerun/src/lib.rs +++ b/crates/top/rerun/src/lib.rs @@ -14,9 +14,9 @@ //! //! ## Links //! - [Examples](https://github.com/rerun-io/rerun/tree/latest/examples/rust) -//! - [High-level docs](http://rerun.io/docs) +//! - [High-level docs](https://rerun.io/docs) //! - [Rust API docs](https://docs.rs/rerun/) -//! - [Troubleshooting](https://www.rerun.io/docs/getting-started/troubleshooting) +//! - [Troubleshooting](https://www.rerun.io/docs/overview/installing-rerun/troubleshooting) //! //! There are many different ways of sending data to the Rerun Viewer depending on what you're //! trying to achieve and whether the viewer is running in the same process as your code, in @@ -148,16 +148,28 @@ pub use re_entity_db::external::re_chunk_store::{ }; pub use re_log_types::StoreKind; -/// To register a new external data loader, simply add an executable in your $PATH whose name +/// To register a new external importer, simply add an executable in your $PATH whose name /// starts with this prefix. // NOTE: this constant is duplicated in `re_data_source` to avoid an extra dependency here. -pub const EXTERNAL_DATA_LOADER_PREFIX: &str = "rerun-loader-"; +pub const EXTERNAL_IMPORTER_PREFIX: &str = "rerun-importer-"; -/// When an external `DataLoader` is asked to load some data that it doesn't know +/// When an external importer is asked to load some data that it doesn't know /// how to load, it should exit with this exit code. // NOTE: Always keep in sync with other languages. // NOTE: this constant is duplicated in `re_data_source` to avoid an extra dependency here. -pub const EXTERNAL_DATA_LOADER_INCOMPATIBLE_EXIT_CODE: i32 = 66; +pub const EXTERNAL_IMPORTER_INCOMPATIBLE_EXIT_CODE: i32 = 66; + +/// Deprecated alias for [`EXTERNAL_IMPORTER_PREFIX`]. +#[deprecated(since = "0.32.0", note = "Use `EXTERNAL_IMPORTER_PREFIX` instead.")] +pub const EXTERNAL_DATA_LOADER_PREFIX: &str = EXTERNAL_IMPORTER_PREFIX; + +/// Deprecated alias for [`EXTERNAL_IMPORTER_INCOMPATIBLE_EXIT_CODE`]. +#[deprecated( + since = "0.32.0", + note = "Use `EXTERNAL_IMPORTER_INCOMPATIBLE_EXIT_CODE` instead." +)] +pub const EXTERNAL_DATA_LOADER_INCOMPATIBLE_EXIT_CODE: i32 = + EXTERNAL_IMPORTER_INCOMPATIBLE_EXIT_CODE; /// Re-exports of other crates. pub mod external { diff --git a/crates/top/rerun_c/Cargo.toml b/crates/top/rerun_c/Cargo.toml index 6111cdb84f96..25f220cb3ec7 100644 --- a/crates/top/rerun_c/Cargo.toml +++ b/crates/top/rerun_c/Cargo.toml @@ -39,7 +39,7 @@ re_arrow_util.workspace = true re_build_info.workspace = true re_crash_handler.workspace = true re_log = { workspace = true, features = ["setup"] } -re_sdk = { workspace = true, features = ["data_loaders", "server"] } +re_sdk = { workspace = true, features = ["importers", "server"] } re_video.workspace = true ahash.workspace = true diff --git a/crates/top/rerun_c/src/lib.rs b/crates/top/rerun_c/src/lib.rs index 914f0eb4217a..af529a349f5d 100644 --- a/crates/top/rerun_c/src/lib.rs +++ b/crates/top/rerun_c/src/lib.rs @@ -456,7 +456,10 @@ fn rr_spawn_impl(spawn_opts: *const CSpawnOptions) -> Result<(), CError> { spawn_opts.as_rust()? }; + // Port is unused here — this function only spawns the viewer process. + // The C SDK connects separately via `rr_recording_stream_spawn`. re_sdk::spawn(&spawn_opts) + .map(drop) .map_err(|err| CError::new(CErrorCode::RecordingStreamSpawnFailure, &err.to_string()))?; Ok(()) @@ -813,10 +816,15 @@ fn rr_recording_stream_serve_grpc_impl( port: u16, server_memory_limit: CStringView, newest_first: bool, + cors_allow_origins: &[CStringView], ) -> Result<(), CError> { let stream = recording_stream(stream)?; let bind_ip = bind_ip.as_nonempty_str("bind_ip")?; + let cors_allowed_origins: Vec = cors_allow_origins + .iter() + .map(|s| Ok(s.as_nonempty_str("cors_allow_origin")?.to_owned())) + .collect::, CError>>()?; let server_options = re_sdk::ServerOptions { playback_behavior: re_sdk::PlaybackBehavior::from_newest_first(newest_first), @@ -824,6 +832,8 @@ fn rr_recording_stream_serve_grpc_impl( .as_maybe_empty_str("server_memory_limit")? .parse::() .map_err(|err| CError::new(CErrorCode::InvalidMemoryLimit, &err))?, + + cors_allowed_origins, }; stream @@ -840,17 +850,31 @@ fn rr_recording_stream_serve_grpc_impl( #[expect(unsafe_code)] #[unsafe(no_mangle)] -pub extern "C" fn rr_recording_stream_serve_grpc( +pub unsafe extern "C" fn rr_recording_stream_serve_grpc( id: CRecordingStream, bind_ip: CStringView, port: u16, server_memory_limit: CStringView, newest_first: bool, + cors_allow_origins: *const CStringView, + num_cors_allow_origins: u32, error: *mut CError, ) { - if let Err(err) = - rr_recording_stream_serve_grpc_impl(id, bind_ip, port, server_memory_limit, newest_first) - { + // SAFETY: the caller must ensure `cors_allow_origins` points to at least + // `num_cors_allow_origins` valid `CStringView` elements (or is null / count is 0). + let cors_allow_origins = if cors_allow_origins.is_null() || num_cors_allow_origins == 0 { + &[] + } else { + unsafe { std::slice::from_raw_parts(cors_allow_origins, num_cors_allow_origins as usize) } + }; + if let Err(err) = rr_recording_stream_serve_grpc_impl( + id, + bind_ip, + port, + server_memory_limit, + newest_first, + cors_allow_origins, + ) { err.write_error(error); } } diff --git a/crates/top/rerun_c/src/ptr.rs b/crates/top/rerun_c/src/ptr.rs index 52e2e607b617..e1c29078bc7e 100644 --- a/crates/top/rerun_c/src/ptr.rs +++ b/crates/top/rerun_c/src/ptr.rs @@ -54,6 +54,6 @@ pub fn try_char_ptr_as_str( // The byte slice is match std::str::from_utf8(byte_slice) { Ok(str) => Ok(str), - Err(utf8_error) => Err(CError::invalid_str_argument(argument_name, utf8_error)), + Err(utf8_err) => Err(CError::invalid_str_argument(argument_name, utf8_err)), } } diff --git a/crates/utils/re_analytics/src/lib.rs b/crates/utils/re_analytics/src/lib.rs index a698716bac02..98a933fc3970 100644 --- a/crates/utils/re_analytics/src/lib.rs +++ b/crates/utils/re_analytics/src/lib.rs @@ -39,7 +39,7 @@ use std::borrow::Cow; use std::collections::HashMap; use std::io::Error as IoError; use std::sync::OnceLock; -use std::sync::atomic::{AtomicI64, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicI64, Ordering}; use std::time::Duration; use jiff::Timestamp; @@ -237,6 +237,9 @@ pub struct Analytics { default_append_props: HashMap, Property>, event_id: AtomicI64, + + /// Whether the user is currently logged in. + user_logged_in: AtomicBool, } #[cfg(not(target_arch = "wasm32"))] // NOTE: can't block on web @@ -250,7 +253,7 @@ impl Drop for Analytics { } } -fn load_config() -> Result { +fn load_config(_user_logged_in: bool) -> Result { let config = match Config::load() { Ok(config) => config, @@ -277,7 +280,10 @@ fn load_config() -> Result { #[cfg(not(target_arch = "wasm32"))] if config.is_first_run() { - eprintln!("{DISCLAIMER}"); + if !_user_logged_in { + // Only print the disclaimer if the user is not logged in. + eprintln!("{DISCLAIMER}"); + } config.save()?; re_log::trace!(?config, "saved analytics config"); @@ -301,8 +307,16 @@ impl Analytics { /// /// Return `None` if analytics is disabled or some error occurred. pub fn global_or_init() -> Option<&'static Self> { + Self::global_or_init_with_login_state(false) + } + + /// Get the global analytics instance, initializing it if it's not already initialized. + /// Also sets the login state of the user. + /// + /// Return `None` if analytics is disabled or some error occurred. + pub fn global_or_init_with_login_state(user_logged_in: bool) -> Option<&'static Self> { GLOBAL_ANALYTICS - .get_or_init(|| match Self::new(Duration::from_secs(2)) { + .get_or_init(|| match Self::new(Duration::from_secs(2), user_logged_in) { Ok(analytics) => Some(analytics), Err(err) => { re_log::error!("Failed to initialize analytics: {err}"); @@ -326,8 +340,8 @@ impl Analytics { /// Usually it is better to use [`Self::global_or_init`] instead of calling this directly, /// but there are cases where you might want to create a separate instance, /// e.g. for testing purposes, or when you want to use a different tick duration. - fn new(tick: Duration) -> Result { - let config = load_config()?; + fn new(tick: Duration, user_logged_in: bool) -> Result { + let config = load_config(user_logged_in)?; let pipeline = Pipeline::new(&config, tick)?; re_log::trace!("initialized analytics pipeline"); @@ -336,6 +350,7 @@ impl Analytics { default_append_props: Default::default(), pipeline, event_id: AtomicI64::new(1), // we skip 0 just to be explicit (zeroes can often be implicit) + user_logged_in: AtomicBool::new(user_logged_in), }) } @@ -366,6 +381,12 @@ impl Analytics { } } + /// Update whether the user is currently logged in. + pub fn set_logged_in(&self, logged_in: bool) { + self.user_logged_in + .store(logged_in, std::sync::atomic::Ordering::Relaxed); + } + /// Record an event. /// /// It will be extended with an `event_id`. @@ -393,6 +414,13 @@ pub fn record(cb: impl FnOnce() -> E) { } } +/// Update whether the user is currently logged in. +pub fn set_logged_in(logged_in: bool) { + if let Some(analytics) = Analytics::global_or_init_with_login_state(logged_in) { + analytics.set_logged_in(logged_in); + } +} + #[cfg(not(target_arch = "wasm32"))] pub fn record_and_flush_blocking(cb: impl FnOnce() -> E) { if let Some(analytics) = Analytics::global_or_init() { diff --git a/crates/utils/re_analytics/src/native/pipeline.rs b/crates/utils/re_analytics/src/native/pipeline.rs index 3ab074f36f84..6df0633ec013 100644 --- a/crates/utils/re_analytics/src/native/pipeline.rs +++ b/crates/utils/re_analytics/src/native/pipeline.rs @@ -164,7 +164,7 @@ impl Pipeline { fn try_send_event(event_tx: &channel::Sender, event: PipelineEvent) { match event_tx.try_send(event) { - Ok(_) => {} + Ok(()) => {} Err(channel::TrySendError::Full(_)) => { re_log::trace!("dropped event, analytics channel is full"); } @@ -222,10 +222,10 @@ fn flush_pending_events( sink, abort_signal, ) { - Ok(_) => { + Ok(()) => { re_log::trace!(%analytics_id, %session_id, ?path, "flushed pending events"); match std::fs::remove_file(&path) { - Ok(_) => { + Ok(()) => { re_log::trace!(%analytics_id, %session_id, ?path, "removed session file"); } Err(err) => { diff --git a/crates/utils/re_arrow_util/src/arrays.rs b/crates/utils/re_arrow_util/src/arrays.rs index 900c54392332..15e1831e46ab 100644 --- a/crates/utils/re_arrow_util/src/arrays.rs +++ b/crates/utils/re_arrow_util/src/arrays.rs @@ -558,6 +558,7 @@ mod tests { use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::datatypes::{Field, UnionFields}; use arrow::ipc::writer::StreamWriter; + use std::fmt::Write as _; use std::sync::Arc; use super::*; @@ -658,7 +659,8 @@ mod tests { Arc::new(Field::new("f32", f32s.data_type().clone(), true)), Arc::new(Field::new("i64", i64s.data_type().clone(), true)), ]; - let union_fields = UnionFields::new(type_ids, fields); + let union_fields = + UnionFields::try_new(type_ids, fields).expect("UnionFields should be infallible"); let type_id_buffer = ScalarBuffer::from( (0..NUM_TOTAL as i32) @@ -700,7 +702,8 @@ mod tests { Arc::new(Field::new("f32", f32s.data_type().clone(), true)), Arc::new(Field::new("i64", i64s.data_type().clone(), true)), ]; - let union_fields = UnionFields::new(type_ids, fields); + let union_fields = + UnionFields::try_new(type_ids, fields).expect("UnionFields should be infallible"); let type_id_buffer = ScalarBuffer::from( (0..NUM_TOTAL as i32) @@ -780,7 +783,8 @@ mod tests { Arc::new(Field::new("f32_list", list_f32s.data_type().clone(), true)), Arc::new(Field::new("i64_list", list_i64s.data_type().clone(), true)), ]; - let union_fields = UnionFields::new(type_ids, fields); + let union_fields = + UnionFields::try_new(type_ids, fields).expect("UnionFields should be infallible"); let type_id_buffer = ScalarBuffer::from( (0..(NUM_TOTAL / NUM_PER_BATCH) as i32) @@ -869,7 +873,8 @@ mod tests { Arc::new(Field::new("f32_list", list_f32s.data_type().clone(), true)), Arc::new(Field::new("i64_list", list_i64s.data_type().clone(), true)), ]; - let union_fields = UnionFields::new(type_ids, fields); + let union_fields = UnionFields::try_new(type_ids, fields) + .expect("UnionFields::try_new should be infallible"); let type_id_buffer = ScalarBuffer::from( (0..(NUM_TOTAL / NUM_PER_BATCH) as i32) @@ -941,22 +946,28 @@ mod tests { let deep_sliced = deep_slice_array_erased(&array, offset, len); assert_eq!(&deep_sliced, &sliced); - output += &format!("{descr}:\n"); - output += &format!( - "array[0..]: {} / IPC={:6}\n", + writeln!(output, "{descr}:").ok(); + writeln!( + output, + "array[0..]: {} / IPC={:6}", dump_array_stats(&array), dump_array_to_ipc(array.clone()), - ); - output += &format!( - "slice[{from:5}..{to:5}]: {} / IPC={:6}\n", + ) + .ok(); + writeln!( + output, + "slice[{from:5}..{to:5}]: {} / IPC={:6}", dump_array_stats(&sliced), dump_array_to_ipc(sliced.clone()) - ); - output += &format!( - " deep[{from:5}..{to:5}]: {} / IPC={:6}\n", + ) + .ok(); + writeln!( + output, + " deep[{from:5}..{to:5}]: {} / IPC={:6}", dump_array_stats(&deep_sliced), dump_array_to_ipc(deep_sliced.clone()) - ); + ) + .ok(); output += "\n"; output diff --git a/crates/utils/re_arrow_util/src/batches.rs b/crates/utils/re_arrow_util/src/batches.rs index 556648581ea8..acdefaa133ae 100644 --- a/crates/utils/re_arrow_util/src/batches.rs +++ b/crates/utils/re_arrow_util/src/batches.rs @@ -498,7 +498,7 @@ mod tests { ); batch_concat.schema_metadata_mut().clear(); - insta::assert_debug_snapshot!(batch_concat, @r###" + insta::assert_debug_snapshot!(batch_concat, @r#" RecordBatch { schema: Schema { fields: [ @@ -506,33 +506,21 @@ mod tests { name: "col1", data_type: Int32, nullable: true, - dict_id: 0, - dict_is_ordered: false, - metadata: {}, }, Field { name: "col2", data_type: Utf8, nullable: true, - dict_id: 0, - dict_is_ordered: false, - metadata: {}, }, Field { name: "col3", data_type: Boolean, nullable: true, - dict_id: 0, - dict_is_ordered: false, - metadata: {}, }, Field { name: "col4", data_type: UInt64, nullable: true, - dict_id: 0, - dict_is_ordered: false, - metadata: {}, }, ], metadata: {}, @@ -565,7 +553,7 @@ mod tests { ], row_count: 3, } - "###); + "#); } #[test] diff --git a/crates/utils/re_arrow_util/src/compare.rs b/crates/utils/re_arrow_util/src/compare.rs index b106a4041117..41d54ebde3b5 100644 --- a/crates/utils/re_arrow_util/src/compare.rs +++ b/crates/utils/re_arrow_util/src/compare.rs @@ -2,8 +2,6 @@ use anyhow::{Context as _, bail, ensure}; use half::f16; use itertools::izip; -use crate::format_data_type; - /// Are two arrays equal, ignoring small numeric differences? /// /// Returns `Ok` if similar. @@ -38,7 +36,7 @@ pub fn ensure_similar( for (i, (left_buff, right_buff)) in izip!(left_buffers, right_buffers).enumerate() { ensure_buffers_equal(left_buff, right_buff, data_type) - .with_context(|| format!("Datatype {}", format_data_type(data_type))) + .with_context(|| format!("Datatype {data_type}")) .with_context(|| format!("Buffer {i}"))?; } } @@ -52,7 +50,7 @@ pub fn ensure_similar( for (i, (left_child, right_child)) in izip!(left_children, right_children).enumerate() { ensure_similar(left_child, right_child) - .with_context(|| format!("Datatype {}", format_data_type(data_type))) + .with_context(|| format!("Datatype {data_type}")) .with_context(|| format!("Child {i}"))?; } } diff --git a/crates/utils/re_arrow_util/src/format.rs b/crates/utils/re_arrow_util/src/format.rs index a781724bb79f..3e23047dd645 100644 --- a/crates/utils/re_arrow_util/src/format.rs +++ b/crates/utils/re_arrow_util/src/format.rs @@ -9,7 +9,21 @@ use comfy_table::{Cell, Row, Table, presets}; use itertools::{Either, Itertools as _}; use re_tuid::Tuid; -use crate::{ArrowArrayDowncastRef as _, format_field_datatype}; +use crate::ArrowArrayDowncastRef as _; + +// --- + +/// Format the datatype of a field (column) with optional nullability +pub fn format_field_datatype(field: &Field) -> String { + if field.is_nullable() { + field.data_type().to_string() + } else { + // This follows the notation set by arrow-rs. + // If we change this, we should probably change + // arrow-rs and datafusion to match. + format!("non-null {}", field.data_type()) + } +} // --- diff --git a/crates/utils/re_arrow_util/src/format_data_type.rs b/crates/utils/re_arrow_util/src/format_data_type.rs deleted file mode 100644 index 67edd554b347..000000000000 --- a/crates/utils/re_arrow_util/src/format_data_type.rs +++ /dev/null @@ -1,173 +0,0 @@ -//! `arrow` has `ToString` implemented, but it is way too verbose. -//! -//! TODO(emilk): all this can go away once we update to Arrow 57. - -use std::fmt::Formatter; - -use arrow::datatypes::{DataType, Field, IntervalUnit, TimeUnit}; - -/// A wrapper around `DataType` that implements `Display` with a nice format. -/// -/// For use in error messages etc -#[derive(Clone)] -pub struct DisplayDataType(pub DataType); - -impl std::fmt::Display for DisplayDataType { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - DisplayDataTypeRef(&self.0).fmt(f) - } -} - -impl std::fmt::Debug for DisplayDataType { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - std::fmt::Display::fmt(self, f) - } -} - -impl From for DisplayDataType { - fn from(data_type: DataType) -> Self { - Self(data_type) - } -} - -/// Compact format of an arrow data type. -pub fn format_data_type(data_type: &DataType) -> String { - DisplayDataTypeRef(data_type).to_string() -} - -/// Format the datatype of a field (column) with optional nullability -pub fn format_field_datatype(field: &Field) -> String { - if field.is_nullable() { - format!("nullable {}", format_data_type(field.data_type())) - } else { - format_data_type(field.data_type()) - } -} - -#[repr(transparent)] -struct DisplayTimeUnit(TimeUnit); - -impl std::fmt::Display for DisplayTimeUnit { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let s = match self.0 { - TimeUnit::Second => "s", - TimeUnit::Millisecond => "ms", - TimeUnit::Microsecond => "us", - TimeUnit::Nanosecond => "ns", - }; - f.write_str(s) - } -} - -// arrow has `ToString` implemented, but it is way too verbose. -#[repr(transparent)] -struct DisplayIntervalUnit(IntervalUnit); - -impl std::fmt::Display for DisplayIntervalUnit { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let s = match self.0 { - IntervalUnit::YearMonth => "year/month", - IntervalUnit::DayTime => "day/time", - IntervalUnit::MonthDayNano => "month/day/nano", - }; - f.write_str(s) - } -} - -// arrow has `ToString` implemented, but it is way too verbose. -#[repr(transparent)] -struct DisplayDataTypeRef<'a>(&'a DataType); - -impl std::fmt::Display for DisplayDataTypeRef<'_> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let s = match &self.0 { - DataType::Null => "null", - DataType::Boolean => "bool", - DataType::Int8 => "i8", - DataType::Int16 => "i16", - DataType::Int32 => "i32", - DataType::Int64 => "i64", - DataType::UInt8 => "u8", - DataType::UInt16 => "u16", - DataType::UInt32 => "u32", - DataType::UInt64 => "u64", - DataType::Float16 => "f16", - DataType::Float32 => "f32", - DataType::Float64 => "f64", - DataType::Timestamp(unit, timezone) => { - let s = if let Some(tz) = timezone { - format!("Timestamp({}, {tz})", DisplayTimeUnit(*unit)) - } else { - format!("Timestamp({})", DisplayTimeUnit(*unit)) - }; - return f.write_str(&s); - } - DataType::Date32 => "Date32", - DataType::Date64 => "Date64", - DataType::Time32(unit) => { - let s = format!("Time32({})", DisplayTimeUnit(*unit)); - return f.write_str(&s); - } - DataType::Time64(unit) => { - let s = format!("Time64({})", DisplayTimeUnit(*unit)); - return f.write_str(&s); - } - DataType::Duration(unit) => { - let s = format!("Duration({})", DisplayTimeUnit(*unit)); - return f.write_str(&s); - } - DataType::Interval(unit) => { - let s = format!("Interval({})", DisplayIntervalUnit(*unit)); - return f.write_str(&s); - } - DataType::Binary => "Binary", - DataType::FixedSizeBinary(size) => return write!(f, "FixedSizeBinary[{size}]"), - DataType::LargeBinary => "LargeBinary", - DataType::Utf8 => "Utf8", - DataType::LargeUtf8 => "LargeUtf8", - DataType::List(field) => { - let s = format!("List[{}]", format_inner_field(field)); - return f.write_str(&s); - } - DataType::FixedSizeList(field, len) => { - let s = format!("FixedSizeList[{}; {len}]", format_inner_field(field)); - return f.write_str(&s); - } - DataType::LargeList(field) => { - let s = format!("LargeList[{}]", format_inner_field(field)); - return f.write_str(&s); - } - DataType::Struct(fields) => return write!(f, "Struct[{}]", fields.len()), - DataType::Union(fields, _) => return write!(f, "Union[{}]", fields.len()), - DataType::Map(field, _) => return write!(f, "Map[{}]", format_inner_field(field)), - DataType::Dictionary(key, value) => { - return write!(f, "Dictionary{{{}: {}}}", Self(key), Self(value)); - } - DataType::Decimal32(_, _) => "Decimal32", - DataType::Decimal64(_, _) => "Decimal64", - DataType::Decimal128(_, _) => "Decimal128", - DataType::Decimal256(_, _) => "Decimal256", - DataType::BinaryView => "BinaryView", - DataType::Utf8View => "Utf8View", - DataType::ListView(field) => { - return write!(f, "ListView[{}]", format_inner_field(field)); - } - DataType::LargeListView(field) => { - return write!(f, "LargeListView[{}]", format_inner_field(field)); - } - DataType::RunEndEncoded(_run_ends, values) => { - return write!(f, "RunEndEncoded[{}]", format_inner_field(values)); - } - }; - f.write_str(s) - } -} - -fn format_inner_field(field: &Field) -> String { - let datatype_display = DisplayDataTypeRef(field.data_type()); - if field.is_nullable() { - format!("nullable {datatype_display}") - } else { - datatype_display.to_string() - } -} diff --git a/crates/utils/re_arrow_util/src/lib.rs b/crates/utils/re_arrow_util/src/lib.rs index 0eb01d76c05c..c66347f11f48 100644 --- a/crates/utils/re_arrow_util/src/lib.rs +++ b/crates/utils/re_arrow_util/src/lib.rs @@ -4,7 +4,6 @@ mod arrays; mod batches; mod compare; mod format; -mod format_data_type; mod string_view; #[cfg(feature = "test")] mod test_extensions; @@ -19,10 +18,9 @@ pub use self::arrays::*; pub use self::batches::*; pub use self::compare::*; pub use self::format::{ - RecordBatchFormatOpts, format_record_batch, format_record_batch_opts, + RecordBatchFormatOpts, format_field_datatype, format_record_batch, format_record_batch_opts, format_record_batch_with_width, }; -pub use self::format_data_type::*; pub use self::string_view::*; #[cfg(feature = "test")] pub use self::test_extensions::*; @@ -93,6 +91,45 @@ mod tests { // ---------------------------------------------------------------- +/// Safety gate: reject [`DataType::Union`] in the checked type, and recursively within +/// nested [`DataType::Struct`], [`DataType::List`], [`DataType::LargeList`], and +/// [`DataType::FixedSizeList`] children. +/// +/// This guards merges that would let `Field::try_merge` produce a shape the read-side +/// aligner ([`align_record_batch_to_schema`](../../re_dataframe/utils/fn.align_record_batch_to_schema.html)) +/// cannot adapt. In particular, `try_merge` has a recursive Union arm that can widen +/// children, but the aligner has no Union branch. +/// +/// Known over-rejection: this check inspects only a single datatype tree, not both the +/// current and incoming shapes, so it cannot tell "Union about to widen" (unsafe) from +/// "Union identical across partitions, only a sibling field widens" (would be safe — the +/// aligner's fast-path handles identical Unions). A two-tree check could close this gap; see +/// the `union_over_rejected_when_only_a_sibling_widens` test for a pinned example. In +/// practice this over-rejection only surfaces when a field that *contains* a Union also +/// changes in some unrelated way across partitions. +/// +/// Callers use this as a pre-merge guard on the datatypes they are about to merge. +pub fn reject_unsupported_widenings(dt: &DataType) -> Result<(), arrow::error::ArrowError> { + match dt { + DataType::Union(_, _) => Err(arrow::error::ArrowError::SchemaError( + "union-typed fields in the checked datatype are not supported for schema merging" + .to_owned(), + )), + DataType::Struct(fields) => { + for f in fields { + reject_unsupported_widenings(f.data_type())?; + } + Ok(()) + } + DataType::List(f) | DataType::LargeList(f) | DataType::FixedSizeList(f, _) => { + reject_unsupported_widenings(f.data_type()) + } + _ => Ok(()), + } +} + +// ---------------------------------------------------------------- + /// Error used when a column is missing from a record batch #[derive(Debug, Clone, thiserror::Error)] pub struct MissingColumnError { @@ -148,3 +185,100 @@ impl std::fmt::Display for WrongDatatypeError { } } } + +#[cfg(test)] +mod reject_unsupported_widenings_tests { + use super::*; + use arrow::datatypes::{DataType, Field, Fields}; + + fn small_union_type() -> DataType { + use arrow::datatypes::UnionFields; + let fields = UnionFields::try_new(vec![0], vec![Field::new("a", DataType::Int32, true)]) + .expect("valid union fields"); + DataType::Union(fields, arrow::datatypes::UnionMode::Sparse) + } + + #[test] + fn top_level_union_rejected() { + let err = reject_unsupported_widenings(&small_union_type()).unwrap_err(); + assert!(err.to_string().contains("union-typed"), "msg: {err}"); + } + + #[test] + fn union_nested_inside_struct_rejected() { + let struct_type = DataType::Struct(Fields::from(vec![ + Field::new("a", DataType::Int32, true), + Field::new("u", small_union_type(), true), + ])); + let err = reject_unsupported_widenings(&struct_type).unwrap_err(); + assert!(err.to_string().contains("union-typed"), "msg: {err}"); + } + + #[test] + fn union_nested_inside_list_rejected() { + let list_of_union = DataType::List(Arc::new(Field::new("item", small_union_type(), true))); + let err = reject_unsupported_widenings(&list_of_union).unwrap_err(); + assert!(err.to_string().contains("union-typed"), "msg: {err}"); + } + + /// Documents a known over-rejection that surfaces in `re_server`'s `add_layer` flow + /// (`crates/store/re_server/src/store/dataset.rs`): when a new field differs from the + /// current one by a non-Union sibling, `Schema::try_merge` would accept the pair cleanly + /// and preserve any identical Union subtree untouched, but `reject_unsupported_widenings` + /// walks only the new field and cannot distinguish "safe identical Union" from "unsafe + /// widening Union" — so it rejects unconditionally. + /// + /// Closing this gap would require the gate to see both the current and new schemas and + /// only reject at positions where the Union actually differs. If this test ever flips + /// (i.e., the gate accepts), the aligner's Union handling must be re-verified end-to-end, + /// including `arrow::array::new_null_array(DataType::Union(...), n)` behavior for the + /// partition-missing-column null-pad path. + #[test] + fn union_over_rejected_when_only_a_sibling_widens() { + use std::collections::HashMap; + + use arrow::datatypes::Schema; + + // Two structs with an identical Union child and a sibling whose nullability widens. + let narrow_struct = DataType::Struct(Fields::from(vec![ + Field::new("a", DataType::Int32, false), + Field::new("u", small_union_type(), true), + ])); + let wide_struct = DataType::Struct(Fields::from(vec![ + Field::new("a", DataType::Int32, true), + Field::new("u", small_union_type(), true), + ])); + + // `try_merge` is happy: `a` widens; the Union passes through unchanged. + let lhs = + Schema::new_with_metadata(vec![Field::new("s", narrow_struct, true)], HashMap::new()); + let rhs = Schema::new_with_metadata( + vec![Field::new("s", wide_struct.clone(), true)], + HashMap::new(), + ); + Schema::try_merge([lhs, rhs]) + .expect("try_merge accepts: Union identical, only sibling widens"); + + // The Rerun gate rejects, even though `try_merge` would not widen the Union. + let err = reject_unsupported_widenings(&wide_struct).unwrap_err(); + assert!(err.to_string().contains("union-typed"), "msg: {err}"); + } + + #[test] + fn plain_schema_accepted() { + let schema = DataType::Struct(Fields::from(vec![ + Field::new("a", DataType::Int32, true), + Field::new( + "b", + DataType::List(Arc::new(Field::new("item", DataType::Utf8, false))), + true, + ), + Field::new( + "c", + DataType::Struct(Fields::from(vec![Field::new("d", DataType::Int64, false)])), + true, + ), + ])); + assert!(reject_unsupported_widenings(&schema).is_ok()); + } +} diff --git a/crates/utils/re_arrow_util/src/test_extensions.rs b/crates/utils/re_arrow_util/src/test_extensions.rs index a7cd2081b661..a4c5d31470a0 100644 --- a/crates/utils/re_arrow_util/src/test_extensions.rs +++ b/crates/utils/re_arrow_util/src/test_extensions.rs @@ -390,15 +390,15 @@ impl SchemaTestExt for arrow::datatypes::Schema { format!( "{}: {}{}", field.name(), - if field.is_nullable() { "nullable " } else { "" }, - crate::format_data_type(field.data_type()) + if field.is_nullable() { "" } else { "non-null " }, + field.data_type() ) } else { format!( "{}: {}{} [\n {}\n]", field.name(), - if field.is_nullable() { "nullable " } else { "" }, - crate::format_data_type(field.data_type()), + if field.is_nullable() { "" } else { "non-null " }, + field.data_type(), field .metadata() .iter() diff --git a/crates/utils/re_auth/Cargo.toml b/crates/utils/re_auth/Cargo.toml index d9c2b5267e79..7c519ac4f0f4 100644 --- a/crates/utils/re_auth/Cargo.toml +++ b/crates/utils/re_auth/Cargo.toml @@ -24,7 +24,13 @@ workspace = true [features] cli = ["dep:indicatif", "dep:webbrowser", "oauth"] -oauth = ["dep:directories", "dep:ehttp", "dep:getrandom", "dep:sha2", "dep:tiny_http", "dep:uuid"] +oauth = ["dep:directories", "dep:ehttp", "dep:getrandom", "dep:ring", "dep:tiny_http", "dep:uuid"] + +[package.metadata.cargo-shear] +ignored = [ + "getrandom02", # transitive dependency +] + [dependencies] re_analytics.workspace = true @@ -34,8 +40,12 @@ async-trait.workspace = true base64.workspace = true http.workspace = true jiff = { workspace = true, features = ["serde"] } +hmac.workspace = true jsonwebtoken.workspace = true +ring = { workspace = true, optional = true } parking_lot.workspace = true +sha2.workspace = true +signature.workspace = true saturating_cast.workspace = true serde.workspace = true serde_json.workspace = true @@ -48,11 +58,10 @@ url.workspace = true ## Optional dependencies getrandom = { workspace = true, optional = true } indicatif = { workspace = true, optional = true } -sha2 = { workspace = true, optional = true } uuid = { workspace = true, optional = true, features = ["v4"] } webbrowser = { workspace = true, optional = true } - +# Native: [target.'cfg(not(target_arch = "wasm32"))'.dependencies] directories = { workspace = true, optional = true } ehttp = { workspace = true, optional = true, features = ["json", "native-async"] } @@ -60,11 +69,13 @@ rand = { workspace = true, features = ["std", "std_rng", "os_rng"] } serde.workspace = true tiny_http = { workspace = true, optional = true } +# Web: [target.'cfg(target_arch = "wasm32")'.dependencies] ehttp = { workspace = true, optional = true, features = ["json"] } js-sys.workspace = true wasm-bindgen.workspace = true web-sys = { workspace = true, features = ["Location", "Storage", "Window"] } +getrandom02 = { workspace = true, features = ["js"] } [dev-dependencies] rand = { workspace = true, features = ["std", "std_rng"] } diff --git a/crates/utils/re_auth/src/claims.rs b/crates/utils/re_auth/src/claims.rs new file mode 100644 index 000000000000..d2164791b2d2 --- /dev/null +++ b/crates/utils/re_auth/src/claims.rs @@ -0,0 +1,237 @@ +use crate::Permission; + +#[derive(Debug, serde::Serialize, serde::Deserialize)] +pub struct RedapClaims { + /// The issuer of the token. + /// + /// Could be an identity provider or the storage node directly. + pub iss: String, + + /// The subject (user) of the token. + pub sub: String, + + /// The `aud` claim, identifying the intended consumer of the token. + /// + /// Typically set to `"redap"` for Rerun storage-node tokens. + /// Per RFC 7519, this can be either a single string or an array of strings. + #[serde( + deserialize_with = "deser_string_or_vec", + serialize_with = "ser_string_or_vec" + )] + pub aud: Vec, + + /// Expiry time of the token. + pub exp: u64, + + /// Issued at time of the token. + pub iat: u64, + + #[serde(default)] + pub permissions: Vec, + + /// Host patterns this token is allowed to be sent to. + /// + /// Uses the same domain-matching semantics as [`crate::host_matches_pattern`]. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub allowed_hosts: Vec, +} + +#[derive(Debug, serde::Serialize, serde::Deserialize)] +#[serde(untagged)] +pub enum Claims { + #[cfg(feature = "oauth")] + RerunCloud(crate::oauth::RerunCloudClaims), + + Redap(RedapClaims), +} + +impl Claims { + /// Subject. An email if available, otherwise it's usually the user ID. + pub fn sub(&self) -> &str { + match self { + #[cfg(feature = "oauth")] + Self::RerunCloud(claims) => claims.email.as_deref().unwrap_or(claims.sub.as_str()), + Self::Redap(claims) => claims.sub.as_str(), + } + } + + /// Issuer + pub fn iss(&self) -> &str { + match self { + #[cfg(feature = "oauth")] + Self::RerunCloud(claims) => claims.iss.as_str(), + Self::Redap(claims) => claims.iss.as_str(), + } + } + + pub fn permissions(&self) -> &[Permission] { + match self { + #[cfg(feature = "oauth")] + Self::RerunCloud(claims) => &claims.permissions[..], + Self::Redap(claims) => &claims.permissions[..], + } + } + + pub fn has_read_permission(&self) -> bool { + self.permissions().iter().any(|p| p == &Permission::Read) + } + + pub fn has_write_permission(&self) -> bool { + self.permissions() + .iter() + .any(|p| p == &Permission::ReadWrite) + } +} + +/// Deserializes either a string of an array of strings into an array of strings. +fn deser_string_or_vec<'de, D>(deserializer: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + #[derive(serde::Deserialize)] + #[serde(untagged)] + enum StringOrVec { + One(String), + Many(Vec), + } + + use serde::Deserialize as _; + match StringOrVec::deserialize(deserializer)? { + StringOrVec::One(s) => Ok(vec![s]), + StringOrVec::Many(v) => Ok(v), + } +} + +/// Serializes an array of strings into either a single string if unary, or into an array of strings otherwise. +fn ser_string_or_vec(value: &Vec, serializer: S) -> Result +where + S: serde::Serializer, +{ + use serde::Serialize as _; + if value.len() == 1 { + serializer.serialize_str(&value[0]) + } else { + value.serialize(serializer) + } +} + +// --- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_aud_deserialize_single_string() { + let json = r#"{ + "iss": "test", + "sub": "user123", + "aud": "redap", + "exp": 1234567890, + "iat": 1234567890 + }"#; + + let claims: RedapClaims = serde_json::from_str(json).unwrap(); + assert_eq!(claims.aud, vec!["redap"]); + assert!(claims.allowed_hosts.is_empty()); + } + + #[test] + fn test_aud_deserialize_array() { + let json = r#"{ + "iss": "test", + "sub": "user123", + "aud": ["redap", "other-service"], + "exp": 1234567890, + "iat": 1234567890 + }"#; + + let claims: RedapClaims = serde_json::from_str(json).unwrap(); + assert_eq!(claims.aud, vec!["redap", "other-service"]); + } + + #[test] + fn test_aud_deserialize_empty_array() { + let json = r#"{ + "iss": "test", + "sub": "user123", + "aud": [], + "exp": 1234567890, + "iat": 1234567890 + }"#; + + let claims: RedapClaims = serde_json::from_str(json).unwrap(); + assert_eq!(claims.aud, Vec::::new()); + } + + #[test] + fn test_allowed_hosts_deserialize() { + let json = r#"{ + "iss": "test", + "sub": "user123", + "aud": "redap", + "exp": 1234567890, + "iat": 1234567890, + "allowed_hosts": ["api.acme.cloud.rerun.io"] + }"#; + + let claims: RedapClaims = serde_json::from_str(json).unwrap(); + assert_eq!(claims.aud, vec!["redap"]); + assert_eq!(claims.allowed_hosts, vec!["api.acme.cloud.rerun.io"]); + } + + #[test] + fn test_aud_serialize_single() { + let claims = RedapClaims { + iss: "test".to_owned(), + sub: "user123".to_owned(), + aud: vec!["redap".to_owned()], + exp: 1234567890, + iat: 1234567890, + permissions: vec![], + allowed_hosts: vec![], + }; + + let json = serde_json::to_value(&claims).unwrap(); + // When there's exactly one aud value, it should serialize as a string + assert_eq!(json["aud"], serde_json::json!("redap")); + // Empty allowed_hosts should not appear in JSON + assert!(json.get("allowed_hosts").is_none()); + } + + #[test] + fn test_aud_serialize_multiple() { + let claims = RedapClaims { + iss: "test".to_owned(), + sub: "user123".to_owned(), + aud: vec!["redap".to_owned(), "other".to_owned()], + exp: 1234567890, + iat: 1234567890, + permissions: vec![], + allowed_hosts: vec![], + }; + + let json = serde_json::to_value(&claims).unwrap(); + // When there are multiple aud values, it should serialize as an array + assert_eq!(json["aud"], serde_json::json!(["redap", "other"])); + } + + #[test] + fn test_allowed_hosts_serialize() { + let claims = RedapClaims { + iss: "test".to_owned(), + sub: "user123".to_owned(), + aud: vec!["redap".to_owned()], + exp: 1234567890, + iat: 1234567890, + permissions: vec![], + allowed_hosts: vec!["api.acme.cloud.rerun.io".to_owned()], + }; + + let json = serde_json::to_value(&claims).unwrap(); + assert_eq!( + json["allowed_hosts"], + serde_json::json!(["api.acme.cloud.rerun.io"]) + ); + } +} diff --git a/crates/utils/re_auth/src/cli.rs b/crates/utils/re_auth/src/cli.rs index 744a049889b8..854674b75ebc 100644 --- a/crates/utils/re_auth/src/cli.rs +++ b/crates/utils/re_auth/src/cli.rs @@ -60,7 +60,7 @@ pub async fn login(options: LoginOptions) -> Result<(), Error> { println!("Note: Run `rerun auth login --force` to login again."); return Ok(()); } - credentials + *credentials } OauthLoginFlowState::LoginFlowStarted(login_flow) => { let progress_bar = ProgressBar::new_spinner(); @@ -113,7 +113,7 @@ pub async fn login(options: LoginOptions) -> Result<(), Error> { /// Log out of Rerun by clearing stored credentials. pub fn logout(options: &LogoutOptions) -> Result<(), Error> { - match crate::oauth::clear_credentials() { + match crate::oauth::clear_credentials(None) { Ok(Some(outcome)) => { if options.open_browser { println!("Opening browser to end your session…"); diff --git a/crates/utils/re_auth/src/credentials.rs b/crates/utils/re_auth/src/credentials.rs index 5fce84a2c301..f0d3ec5130b9 100644 --- a/crates/utils/re_auth/src/credentials.rs +++ b/crates/utils/re_auth/src/credentials.rs @@ -73,6 +73,11 @@ pub(crate) mod oauth { subscribers.push(Box::new(callback)); } + /// Clear the credentials cache + pub(crate) fn clear_cache() { + *CACHE.blocking_write() = None; + } + /// Provider which uses `OAuth` credentials stored on the user's machine. #[derive(Debug, Default)] pub struct CliCredentialsProvider { diff --git a/crates/utils/re_auth/src/crypto_provider.rs b/crates/utils/re_auth/src/crypto_provider.rs new file mode 100644 index 000000000000..6fbe86bda250 --- /dev/null +++ b/crates/utils/re_auth/src/crypto_provider.rs @@ -0,0 +1,139 @@ +//! Minimal [`CryptoProvider`] for `jsonwebtoken` that supports HS256 and RS256. + +use hmac::{Hmac, Mac as _}; +use jsonwebtoken::crypto::{CryptoProvider, JwkUtils, JwtSigner, JwtVerifier}; +use jsonwebtoken::errors::{Error, ErrorKind}; +use jsonwebtoken::{Algorithm, DecodingKey, EncodingKey}; +use sha2::Sha256; +use signature::{Signer, Verifier}; + +type HmacSha256 = Hmac; + +// --- HS256 --- + +struct Hs256Signer(HmacSha256); + +impl Hs256Signer { + fn new(key: &EncodingKey) -> Result { + let inner = HmacSha256::new_from_slice(key.try_get_hmac_secret()?) + .map_err(|_ignored| ErrorKind::InvalidKeyFormat)?; + Ok(Self(inner)) + } +} + +impl Signer> for Hs256Signer { + fn try_sign(&self, msg: &[u8]) -> Result, signature::Error> { + let mut mac = self.0.clone(); + mac.update(msg); + Ok(mac.finalize().into_bytes().to_vec()) + } +} + +impl JwtSigner for Hs256Signer { + fn algorithm(&self) -> Algorithm { + Algorithm::HS256 + } +} + +struct Hs256Verifier(HmacSha256); + +impl Hs256Verifier { + fn new(key: &DecodingKey) -> Result { + let inner = HmacSha256::new_from_slice(key.try_get_hmac_secret()?) + .map_err(|_ignored| ErrorKind::InvalidKeyFormat)?; + Ok(Self(inner)) + } +} + +impl Verifier> for Hs256Verifier { + fn verify(&self, msg: &[u8], signature: &Vec) -> Result<(), signature::Error> { + let mut mac = self.0.clone(); + mac.update(msg); + mac.verify_slice(signature) + .map_err(signature::Error::from_source) + } +} + +impl JwtVerifier for Hs256Verifier { + fn algorithm(&self) -> Algorithm { + Algorithm::HS256 + } +} + +// --- RS256 --- + +#[cfg(feature = "oauth")] +struct Rs256Verifier(DecodingKey); + +#[cfg(feature = "oauth")] +impl Rs256Verifier { + fn new(key: &DecodingKey) -> Self { + Self(key.clone()) + } +} + +#[cfg(feature = "oauth")] +impl Verifier> for Rs256Verifier { + fn verify(&self, msg: &[u8], sig: &Vec) -> Result<(), signature::Error> { + use jsonwebtoken::DecodingKeyKind; + use ring::signature as ring_sig; + + match self.0.kind() { + DecodingKeyKind::SecretOrDer(bytes) => { + let public_key = + ring_sig::UnparsedPublicKey::new(&ring_sig::RSA_PKCS1_2048_8192_SHA256, bytes); + public_key.verify(msg, sig) + } + DecodingKeyKind::RsaModulusExponent { n, e } => { + let components = ring_sig::RsaPublicKeyComponents { n, e }; + components.verify(&ring_sig::RSA_PKCS1_2048_8192_SHA256, msg, sig) + } + } + .map_err(|_err| signature::Error::from_source("RSA signature verification failed")) + } +} + +#[cfg(feature = "oauth")] +impl JwtVerifier for Rs256Verifier { + fn algorithm(&self) -> Algorithm { + Algorithm::RS256 + } +} + +// --- + +fn unsupported_algorithm(algo: &Algorithm) -> Error { + re_log::debug_panic!("DEBUG PANIC: unsupported algorithm: {algo:?}"); + + ErrorKind::InvalidAlgorithm.into() +} + +fn signer_factory(algorithm: &Algorithm, key: &EncodingKey) -> Result, Error> { + match algorithm { + Algorithm::HS256 => Ok(Box::new(Hs256Signer::new(key)?)), + other => Err(unsupported_algorithm(other)), + } +} + +fn verifier_factory( + algorithm: &Algorithm, + key: &DecodingKey, +) -> Result, Error> { + match algorithm { + Algorithm::HS256 => Ok(Box::new(Hs256Verifier::new(key)?)), + #[cfg(feature = "oauth")] + Algorithm::RS256 => Ok(Box::new(Rs256Verifier::new(key))), + other => Err(unsupported_algorithm(other)), + } +} + +pub static PROVIDER: CryptoProvider = CryptoProvider { + signer_factory, + verifier_factory, + jwk_utils: JwkUtils::new_unimplemented(), +}; + +/// Install our minimal [`CryptoProvider`]. Safe to call multiple times. +pub fn install() { + PROVIDER.install_default().ok(); +} diff --git a/crates/utils/re_auth/src/lib.rs b/crates/utils/re_auth/src/lib.rs index b58428a13244..60f6eaa7523b 100644 --- a/crates/utils/re_auth/src/lib.rs +++ b/crates/utils/re_auth/src/lib.rs @@ -7,12 +7,16 @@ //! **Warning!** This approach should only be seen as a stop-gap until we have //! integration of _real_ identity-providers, most likely based on `OpenID` Connect. +#[cfg(not(target_arch = "wasm32"))] +mod crypto_provider; + #[cfg(not(target_arch = "wasm32"))] mod error; #[cfg(not(target_arch = "wasm32"))] mod provider; +mod claims; mod service; mod token; @@ -58,12 +62,13 @@ pub mod oauth; #[cfg(all(feature = "oauth", not(target_arch = "wasm32")))] pub mod callback_server; +pub use claims::{Claims, RedapClaims}; #[cfg(not(target_arch = "wasm32"))] pub use error::Error; #[cfg(all(feature = "oauth", not(target_arch = "wasm32")))] pub use oauth::login_flow::{DeviceCodeFlow, OauthLoginFlow}; #[cfg(not(target_arch = "wasm32"))] -pub use provider::{Claims, RedapProvider, SecretKey, VerificationOptions}; +pub use provider::{RedapProvider, SecretKey, VerificationOptions}; pub use service::client; #[cfg(not(target_arch = "wasm32"))] pub use service::server; @@ -82,9 +87,4 @@ pub const ERROR_MESSAGE_MALFORMED_CREDENTIALS: &str = "malformed auth token"; /// The associated status code will always be `Unauthenticated`. pub const ERROR_MESSAGE_MISSING_CREDENTIALS: &str = "missing credentials"; -/// The error message in Tonic's gRPC status when a _valid token_ did not have the required permissions. -/// -/// The associated status code will always be `Unauthenticated`. -pub const ERROR_MESSAGE_INVALID_CREDENTIALS: &str = "invalid credentials"; - mod wasm_compat; diff --git a/crates/utils/re_auth/src/oauth.rs b/crates/utils/re_auth/src/oauth.rs index 75e661788e3f..a3423c37f90a 100644 --- a/crates/utils/re_auth/src/oauth.rs +++ b/crates/utils/re_auth/src/oauth.rs @@ -39,10 +39,10 @@ pub struct CredentialsLoadError(#[from] storage::LoadError); /// Load credentials from storage. pub fn load_credentials() -> Result, CredentialsLoadError> { if let Some(credentials) = storage::load()? { - re_log::debug!("found credentials"); + re_log::debug_once!("Found credentials for {}", credentials.user.email); Ok(Some(credentials)) } else { - re_log::debug!("no credentials stored locally"); + re_log::debug_once!("No credentials stored locally"); Ok(None) } } @@ -69,10 +69,18 @@ pub struct LogoutOutcome { /// On native, this also starts a local callback server so the browser has /// somewhere to redirect after the `WorkOS` session is cleared. /// +/// On web, `signed_out_url` is used as the post-logout redirect. If `None`, +/// no `return_to` is included in the logout URL. +/// /// The logout URL should be opened in the user's browser to also end the /// `WorkOS` session. If no credentials were stored (or the session ID could /// not be determined), `Ok(None)` is returned. -pub fn clear_credentials() -> Result, CredentialsClearError> { +/// `signed_out_url` is only used on web — on native, it is ignored. +pub fn clear_credentials( + signed_out_url: Option<&str>, +) -> Result, CredentialsClearError> { + let _ = &signed_out_url; // only used on web + // Load credentials before clearing so we can extract the session ID. let outcome = storage::load().ok().flatten().map(|creds| { #[cfg(not(target_arch = "wasm32"))] @@ -96,19 +104,16 @@ pub fn clear_credentials() -> Result, CredentialsClearErro #[cfg(target_arch = "wasm32")] { - // On web, redirect to /signed-out on the current origin after logout. - let return_to = web_sys::window() - .and_then(|w| w.location().origin().ok()) - .map(|origin| format!("{origin}/signed-out")); LogoutOutcome { - logout_url: api::logout_url(&creds.claims.sid, return_to.as_deref()), + logout_url: api::logout_url(&creds.claims.sid, signed_out_url), } } }); - storage::clear()?; - + crate::credentials::oauth::clear_cache(); crate::credentials::oauth::auth_update(None); + storage::clear()?; + re_analytics::set_logged_in(false); Ok(outcome) } @@ -231,6 +236,10 @@ pub struct RerunCloudClaims { /// Subject's email address. #[serde(default, skip_serializing_if = "Option::is_none")] pub email: Option, + + /// Organization name. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub org_name: Option, } impl RerunCloudClaims { @@ -283,6 +292,9 @@ pub struct CredentialsStoreError(#[from] storage::StoreError); impl InMemoryCredentials { /// Ensure credentials are persisted to disk before using them. pub fn ensure_stored(self) -> Result { + // Link the analytics ID to the authenticated user. + self.0.link_analytics_id_to_user(); + storage::store(&self.0)?; // Normally if re_analytics discovers this is a brand-new configuration, @@ -300,12 +312,6 @@ impl InMemoryCredentials { config.save().ok(); } - // Link the analytics ID to the authenticated user - re_analytics::record(|| re_analytics::event::SetPersonProperty { - email: self.0.user.email.clone(), - organization_id: self.0.claims.org_id.clone(), - }); - crate::credentials::oauth::auth_update(Some(&self.0.user)); Ok(self.0) @@ -325,8 +331,10 @@ impl Credentials { let jwt = Jwt(res.access_token); let claims = RerunCloudClaims::try_from_unverified_jwt(&jwt)?; let access_token = AccessToken::try_from_unverified_jwt(jwt)?; + let mut user: User = res.user; + user.org_name = claims.org_name.clone(); Ok(InMemoryCredentials(Self { - user: res.user, + user, refresh_token: Some(RefreshToken(res.refresh_token)), access_token, claims, @@ -346,6 +354,7 @@ impl Credentials { let user = User { id: claims.sub.clone(), email, + org_name: claims.org_name.clone(), }; let access_token = AccessToken { token: access_token, @@ -369,12 +378,27 @@ impl Credentials { pub fn user(&self) -> &User { &self.user } + + /// Link the current analytics ID to this user credentials. + pub fn link_analytics_id_to_user(&self) { + re_log::debug!("Linking analytics ID to user: '{}'", self.user.email); + re_analytics::set_logged_in(true); + re_analytics::record(|| re_analytics::event::SetPersonProperty { + email: self.user.email.clone(), + organization_id: self.claims.org_id.clone(), + }); + } } #[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] pub struct User { + /// Opaque user identifier from the auth provider (e.g. `"user_01JZ…"`). + /// + /// This is NOT a human-readable name; use [`Self::email`] for display purposes. pub id: String, + pub email: String, + pub org_name: Option, } /// An access token which was valid at some point in the past. diff --git a/crates/utils/re_auth/src/oauth/api.rs b/crates/utils/re_auth/src/oauth/api.rs index 9bc9ba4e896a..fb4077e8d894 100644 --- a/crates/utils/re_auth/src/oauth/api.rs +++ b/crates/utils/re_auth/src/oauth/api.rs @@ -134,7 +134,7 @@ impl IntoRequest for AuthenticateWithRefresh<'_> { type Res = RefreshResponse; fn into_request(self) -> Result { - ehttp::Request::json( + ehttp::Request::post_json( format_args!("{base}/user_management/authenticate", base = *WORKOS_API), &self, ) @@ -294,7 +294,7 @@ impl IntoRequest for AuthenticateWithCode<'_> { type Res = AuthenticateWithCodeResponse; fn into_request(self) -> Result { - ehttp::Request::json( + ehttp::Request::post_json( format_args!("{base}/user_management/authenticate", base = *WORKOS_API), &self, ) @@ -341,6 +341,7 @@ impl From for crate::oauth::User { Self { id: value.id, email: value.email, + org_name: None, } } } @@ -365,7 +366,7 @@ impl IntoRequest for GetDeviceAuthUrl<'_> { type Res = GetDeviceAuthUrlResponse; fn into_request(self) -> Result { - ehttp::Request::json( + ehttp::Request::post_json( format_args!( "{base}/user_management/authorize/device", base = *WORKOS_API, @@ -427,7 +428,7 @@ impl IntoRequest for AuthenticateWithDeviceCode<'_> { const ALLOW_4XX: bool = true; fn into_request(self) -> Result { - ehttp::Request::json( + ehttp::Request::post_json( format_args!("{base}/user_management/authenticate", base = *WORKOS_API,), &self, ) @@ -457,7 +458,7 @@ impl IntoRequest for GenerateToken<'_> { permission: Permission, } - let mut req = ehttp::Request::json( + let mut req = ehttp::Request::post_json( format_args!( "{origin}/generate-token", origin = self.server.ascii_serialization() diff --git a/crates/utils/re_auth/src/oauth/login_flow.rs b/crates/utils/re_auth/src/oauth/login_flow.rs index 10c553b7fb78..d299998a2527 100644 --- a/crates/utils/re_auth/src/oauth/login_flow.rs +++ b/crates/utils/re_auth/src/oauth/login_flow.rs @@ -12,8 +12,8 @@ use super::api::{ }; pub enum OauthLoginFlowState { - AlreadyLoggedIn(Credentials), - LoginFlowStarted(OauthLoginFlow), + AlreadyLoggedIn(Box), + LoginFlowStarted(Box), } pub struct OauthLoginFlow { @@ -37,7 +37,8 @@ impl OauthLoginFlow { login_hint = Some(credentials.user().email.clone()); match oauth::refresh_credentials(credentials).await { Ok(credentials) => { - return Ok(OauthLoginFlowState::AlreadyLoggedIn(credentials)); + credentials.link_analytics_id_to_user(); + return Ok(OauthLoginFlowState::AlreadyLoggedIn(Box::new(credentials))); } Err(err) => { // Credentials are bad, login again. @@ -62,11 +63,11 @@ impl OauthLoginFlow { let pkce = Pkce::new(); let server = OauthCallbackServer::new(&pkce)?; - Ok(OauthLoginFlowState::LoginFlowStarted(Self { + Ok(OauthLoginFlowState::LoginFlowStarted(Box::new(Self { server, login_hint, pkce, - })) + }))) } pub fn get_login_url(&self) -> &str { @@ -96,10 +97,9 @@ impl OauthLoginFlow { } } -#[expect(clippy::large_enum_variant)] pub enum DeviceCodeFlowState { - AlreadyLoggedIn(Credentials), - LoginFlowStarted(DeviceCodeFlow), + AlreadyLoggedIn(Box), + LoginFlowStarted(Box), } pub struct DeviceCodeFlow { @@ -118,7 +118,8 @@ impl DeviceCodeFlow { Ok(Some(credentials)) => { match oauth::refresh_credentials(credentials).await { Ok(credentials) => { - return Ok(DeviceCodeFlowState::AlreadyLoggedIn(credentials)); + credentials.link_analytics_id_to_user(); + return Ok(DeviceCodeFlowState::AlreadyLoggedIn(Box::new(credentials))); } Err(err) => { // Credentials are bad, login again. @@ -147,19 +148,21 @@ impl DeviceCodeFlow { let interval = Duration::from_secs(res.interval_seconds as u64); - Ok(DeviceCodeFlowState::LoginFlowStarted(Self { + Ok(DeviceCodeFlowState::LoginFlowStarted(Box::new(Self { device_code: res.device_code, user_code: res.user_code, verification_uri: res.verification_uri_complete, interval, - })) + }))) } - pub fn get_login_url(&self) -> &str { + /// The URL the user should open in their browser to authenticate. + pub fn login_url(&self) -> &str { &self.verification_uri } - pub fn get_user_code(&self) -> &str { + /// The code the user should see in their browser to verify they are authenticating the correct session. + pub fn user_code(&self) -> &str { &self.user_code } diff --git a/crates/utils/re_auth/src/provider.rs b/crates/utils/re_auth/src/provider.rs index d8e8dbc128a7..f6cf68272094 100644 --- a/crates/utils/re_auth/src/provider.rs +++ b/crates/utils/re_auth/src/provider.rs @@ -4,7 +4,7 @@ use base64::Engine as _; use base64::engine::general_purpose; use jsonwebtoken::{Algorithm, DecodingKey, EncodingKey, Header, Validation, decode, encode}; -use crate::{Error, Jwt, Permission}; +use crate::{Claims, Error, Jwt, Permission, RedapClaims}; /// Identifies who should be the consumer of a token. In our case, this is the Rerun storage node. const AUDIENCE: &str = "redap"; @@ -74,89 +74,6 @@ impl std::fmt::Debug for SecretKey { } } -#[derive(Debug, serde::Serialize, serde::Deserialize)] -pub struct RedapClaims { - /// The issuer of the token. - /// - /// Could be an identity provider or the storage node directly. - pub iss: String, - - /// The subject (user) of the token. - pub sub: String, - - /// The `aud` claim, identifying the intended consumer of the token. - /// - /// Typically set to `"redap"` for Rerun storage-node tokens. - /// Per RFC 7519, this can be either a single string or an array of strings. - #[serde( - deserialize_with = "deser_string_or_vec", - serialize_with = "ser_string_or_vec" - )] - pub aud: Vec, - - /// Expiry time of the token. - pub exp: u64, - - /// Issued at time of the token. - pub iat: u64, - - #[serde(default)] - pub permissions: Vec, - - /// Host patterns this token is allowed to be sent to. - /// - /// Uses the same domain-matching semantics as [`crate::host_matches_pattern`]. - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub allowed_hosts: Vec, -} - -#[derive(Debug, serde::Serialize, serde::Deserialize)] -#[serde(untagged)] -pub enum Claims { - #[cfg(feature = "oauth")] - RerunCloud(crate::oauth::RerunCloudClaims), - - Redap(RedapClaims), -} - -impl Claims { - /// Subject. An email if available, otherwise it's usually the user ID. - pub fn sub(&self) -> &str { - match self { - #[cfg(feature = "oauth")] - Self::RerunCloud(claims) => claims.email.as_deref().unwrap_or(claims.sub.as_str()), - Self::Redap(claims) => claims.sub.as_str(), - } - } - - /// Issuer - pub fn iss(&self) -> &str { - match self { - #[cfg(feature = "oauth")] - Self::RerunCloud(claims) => claims.iss.as_str(), - Self::Redap(claims) => claims.iss.as_str(), - } - } - - pub fn permissions(&self) -> &[Permission] { - match self { - #[cfg(feature = "oauth")] - Self::RerunCloud(claims) => &claims.permissions[..], - Self::Redap(claims) => &claims.permissions[..], - } - } - - pub fn has_read_permission(&self) -> bool { - self.permissions().iter().any(|p| p == &Permission::Read) - } - - pub fn has_write_permission(&self) -> bool { - self.permissions() - .iter() - .any(|p| p == &Permission::ReadWrite) - } -} - #[derive(Debug, Clone)] pub struct VerificationOptions { leeway: Option, @@ -228,6 +145,7 @@ fn generate_secret_key(mut rng: impl rand::Rng, length: usize) -> Vec { impl RedapProvider { /// Create an authentication provider from a secret key. pub fn from_secret_key(secret_key: SecretKey) -> Self { + crate::crypto_provider::install(); Self { secret_key, #[cfg(feature = "oauth")] @@ -237,6 +155,7 @@ impl RedapProvider { /// Create an authentication provider from a secret key encoded as base64. pub fn from_secret_key_base64(secret_key: &str) -> Result { + crate::crypto_provider::install(); Ok(Self { secret_key: SecretKey::from_base64(secret_key)?, #[cfg(feature = "oauth")] @@ -362,158 +281,3 @@ impl RedapProvider { Ok(token_data.claims) } } - -// --- - -/// Deserializes either a string of an array of strings into an array of strings. -fn deser_string_or_vec<'de, D>(deserializer: D) -> Result, D::Error> -where - D: serde::Deserializer<'de>, -{ - #[derive(serde::Deserialize)] - #[serde(untagged)] - enum StringOrVec { - One(String), - Many(Vec), - } - - use serde::Deserialize as _; - match StringOrVec::deserialize(deserializer)? { - StringOrVec::One(s) => Ok(vec![s]), - StringOrVec::Many(v) => Ok(v), - } -} - -/// Serializes an array of strings into either a single string if unary, or into an array of strings otherwise. -fn ser_string_or_vec(value: &Vec, serializer: S) -> Result -where - S: serde::Serializer, -{ - use serde::Serialize as _; - if value.len() == 1 { - serializer.serialize_str(&value[0]) - } else { - value.serialize(serializer) - } -} - -// --- - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_aud_deserialize_single_string() { - let json = r#"{ - "iss": "test", - "sub": "user123", - "aud": "redap", - "exp": 1234567890, - "iat": 1234567890 - }"#; - - let claims: RedapClaims = serde_json::from_str(json).unwrap(); - assert_eq!(claims.aud, vec!["redap"]); - assert!(claims.allowed_hosts.is_empty()); - } - - #[test] - fn test_aud_deserialize_array() { - let json = r#"{ - "iss": "test", - "sub": "user123", - "aud": ["redap", "other-service"], - "exp": 1234567890, - "iat": 1234567890 - }"#; - - let claims: RedapClaims = serde_json::from_str(json).unwrap(); - assert_eq!(claims.aud, vec!["redap", "other-service"]); - } - - #[test] - fn test_aud_deserialize_empty_array() { - let json = r#"{ - "iss": "test", - "sub": "user123", - "aud": [], - "exp": 1234567890, - "iat": 1234567890 - }"#; - - let claims: RedapClaims = serde_json::from_str(json).unwrap(); - assert_eq!(claims.aud, Vec::::new()); - } - - #[test] - fn test_allowed_hosts_deserialize() { - let json = r#"{ - "iss": "test", - "sub": "user123", - "aud": "redap", - "exp": 1234567890, - "iat": 1234567890, - "allowed_hosts": ["api.acme.cloud.rerun.io"] - }"#; - - let claims: RedapClaims = serde_json::from_str(json).unwrap(); - assert_eq!(claims.aud, vec!["redap"]); - assert_eq!(claims.allowed_hosts, vec!["api.acme.cloud.rerun.io"]); - } - - #[test] - fn test_aud_serialize_single() { - let claims = RedapClaims { - iss: "test".to_owned(), - sub: "user123".to_owned(), - aud: vec!["redap".to_owned()], - exp: 1234567890, - iat: 1234567890, - permissions: vec![], - allowed_hosts: vec![], - }; - - let json = serde_json::to_value(&claims).unwrap(); - // When there's exactly one aud value, it should serialize as a string - assert_eq!(json["aud"], serde_json::json!("redap")); - // Empty allowed_hosts should not appear in JSON - assert!(json.get("allowed_hosts").is_none()); - } - - #[test] - fn test_aud_serialize_multiple() { - let claims = RedapClaims { - iss: "test".to_owned(), - sub: "user123".to_owned(), - aud: vec!["redap".to_owned(), "other".to_owned()], - exp: 1234567890, - iat: 1234567890, - permissions: vec![], - allowed_hosts: vec![], - }; - - let json = serde_json::to_value(&claims).unwrap(); - // When there are multiple aud values, it should serialize as an array - assert_eq!(json["aud"], serde_json::json!(["redap", "other"])); - } - - #[test] - fn test_allowed_hosts_serialize() { - let claims = RedapClaims { - iss: "test".to_owned(), - sub: "user123".to_owned(), - aud: vec!["redap".to_owned()], - exp: 1234567890, - iat: 1234567890, - permissions: vec![], - allowed_hosts: vec!["api.acme.cloud.rerun.io".to_owned()], - }; - - let json = serde_json::to_value(&claims).unwrap(); - assert_eq!( - json["allowed_hosts"], - serde_json::json!(["api.acme.cloud.rerun.io"]) - ); - } -} diff --git a/crates/utils/re_auth/src/service/server.rs b/crates/utils/re_auth/src/service/server.rs index c7cbf6e5bda1..4a20f26ac0bc 100644 --- a/crates/utils/re_auth/src/service/server.rs +++ b/crates/utils/re_auth/src/service/server.rs @@ -1,3 +1,6 @@ +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::SystemTime; + use tonic::metadata::{Ascii, MetadataValue}; use tonic::service::Interceptor; use tonic::{Request, Status}; @@ -53,7 +56,7 @@ impl Authenticator { } impl Interceptor for Authenticator { - fn call(&mut self, req: Request<()>) -> Result, Status> { + fn call(&mut self, req: Request<()>) -> tonic::Result> { let mut req = req; if let Some(token_metadata) = req.metadata().get(AUTHORIZATION_KEY) { @@ -64,8 +67,40 @@ impl Interceptor for Authenticator { let claims = self .provider .verify(&token, VerificationOptions::default()) - .map_err(|_err| { - Status::unauthenticated(crate::ERROR_MESSAGE_INVALID_CREDENTIALS) + .map_err(|err| { + // Log the full error server-side, best-effort + // rate-limited to at most once per second to avoid + // log storms. + static LAST_LOG_MS: AtomicU64 = AtomicU64::new(0); + const ONE_SECOND_MS: u64 = 1_000; + let now_ms = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .map_or(0, |d| d.as_millis() as u64); + if now_ms.saturating_sub(LAST_LOG_MS.load(Ordering::Relaxed)) > ONE_SECOND_MS { + LAST_LOG_MS.store(now_ms, Ordering::Relaxed); + re_log::warn!("Token verification failed: {err:#}"); + } + + // Explicitly provide more detail in the error message, but do not rely + // on the error's `Display` implementation, as it may contain sensitive + // information. + let detail = match err { + Error::Jwt(ref jwt_err) => match jwt_err.kind() { + jsonwebtoken::errors::ErrorKind::ExpiredSignature => { + "token has expired" + } + jsonwebtoken::errors::ErrorKind::InvalidSignature => { + "invalid token signature" + } + jsonwebtoken::errors::ErrorKind::InvalidAlgorithm => { + "unsupported signature algorithm" + } + _ => "invalid token", + }, + Error::MalformedToken => "malformed token", + _ => "invalid credentials", + }; + Status::unauthenticated(detail) })?; req.extensions_mut().insert(UserContext { diff --git a/crates/utils/re_auth/src/user.rs b/crates/utils/re_auth/src/user.rs deleted file mode 100644 index 0401faa417ff..000000000000 --- a/crates/utils/re_auth/src/user.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub struct UserContext { - pub user_id: String, -} diff --git a/crates/utils/re_backoff/Cargo.toml b/crates/utils/re_backoff/Cargo.toml index 72be12aedf4b..0a785032a4ee 100644 --- a/crates/utils/re_backoff/Cargo.toml +++ b/crates/utils/re_backoff/Cargo.toml @@ -37,5 +37,3 @@ getrandom = { workspace = true, features = ["wasm_js"] } js-sys.workspace = true wasm-bindgen-futures.workspace = true web-sys = { workspace = true, features = ["Window"] } - -[dev-dependencies] diff --git a/crates/utils/re_backoff/README.md b/crates/utils/re_backoff/README.md index 54370869c5f3..c85310a2ae5e 100644 --- a/crates/utils/re_backoff/README.md +++ b/crates/utils/re_backoff/README.md @@ -2,8 +2,8 @@ Part of the [`rerun`](https://github.com/rerun-io/rerun) family of crates. -[![Latest version](https://img.shields.io/crates/v/re_arrow_combinators.svg)](https://crates.io/crates/re_arrow_combinators) -[![Documentation](https://docs.rs/re_arrow_combinators/badge.svg)](https://docs.rs/re_arrow_combinators) +[![Latest version](https://img.shields.io/crates/v/re_backoff.svg)](https://crates.io/crates/re_backoff) +[![Documentation](https://docs.rs/re_backoff/badge.svg)](https://docs.rs/re_backoff) ![MIT](https://img.shields.io/badge/license-MIT-blue.svg) ![Apache](https://img.shields.io/badge/license-Apache-blue.svg) diff --git a/crates/utils/re_byte_size/Cargo.toml b/crates/utils/re_byte_size/Cargo.toml index 962c518a0fd1..8b4d4ce1bb7e 100644 --- a/crates/utils/re_byte_size/Cargo.toml +++ b/crates/utils/re_byte_size/Cargo.toml @@ -20,6 +20,7 @@ all-features = true [features] +ecolor = ["dep:ecolor"] glam = ["dep:glam"] @@ -31,6 +32,7 @@ smallvec.workspace = true vec1.workspace = true # Small enough to always depend on it. # Optional dependencies: +ecolor = { workspace = true, optional = true } glam = { workspace = true, optional = true } diff --git a/crates/utils/re_byte_size/src/primitive_sizes.rs b/crates/utils/re_byte_size/src/primitive_sizes.rs index 78a89d1333fa..050d5f9b8ec2 100644 --- a/crates/utils/re_byte_size/src/primitive_sizes.rs +++ b/crates/utils/re_byte_size/src/primitive_sizes.rs @@ -29,5 +29,8 @@ impl_size_bytes_pod!( ); impl_size_bytes_pod!(half::f16); +#[cfg(feature = "ecolor")] +impl_size_bytes_pod!(ecolor::Color32); + #[cfg(feature = "glam")] -impl_size_bytes_pod!(glam::DAffine3); +impl_size_bytes_pod!(glam::Vec3, glam::DAffine3); diff --git a/crates/utils/re_byte_size/src/std_sizes.rs b/crates/utils/re_byte_size/src/std_sizes.rs index 09b698ccaad0..5bdc09f4c5a5 100644 --- a/crates/utils/re_byte_size/src/std_sizes.rs +++ b/crates/utils/re_byte_size/src/std_sizes.rs @@ -205,6 +205,16 @@ impl SizeBytes for Vec { } } +impl SizeBytes for std::borrow::Cow<'_, [T]> { + #[inline] + fn heap_size_bytes(&self) -> u64 { + match self { + std::borrow::Cow::Borrowed(_) => 0, + std::borrow::Cow::Owned(v) => v.heap_size_bytes(), + } + } +} + impl SizeBytes for VecDeque { #[inline] fn heap_size_bytes(&self) -> u64 { diff --git a/crates/utils/re_case/src/lib.rs b/crates/utils/re_case/src/lib.rs index afe98d5b8151..cf3c5895db66 100644 --- a/crates/utils/re_case/src/lib.rs +++ b/crates/utils/re_case/src/lib.rs @@ -15,7 +15,7 @@ pub fn to_snake_case(s: &str) -> String { Boundary::LowerUpper, ]) .set_pattern(Pattern::Lowercase) - .set_delim("_"); + .set_delimiter("_"); let mut parts: Vec<_> = s.split('.').map(ToOwned::to_owned).collect(); if let Some(last) = parts.last_mut() { @@ -185,7 +185,7 @@ pub fn to_human_case(s: &str) -> String { Boundary::LowerUpper, ]) .set_pattern(Pattern::Sentence) - .set_delim(" "); + .set_delimiter(" "); let mut parts: Vec<_> = s.split('.').map(ToOwned::to_owned).collect(); if let Some(last) = parts.last_mut() { diff --git a/crates/utils/re_crash_handler/src/lib.rs b/crates/utils/re_crash_handler/src/lib.rs index c117bcbf003d..ef3027501cfb 100644 --- a/crates/utils/re_crash_handler/src/lib.rs +++ b/crates/utils/re_crash_handler/src/lib.rs @@ -64,7 +64,7 @@ fn install_panic_hook(_build_info: BuildInfo) { eprintln!( "\n\ - Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting \n\ + Troubleshooting Rerun: https://www.rerun.io/docs/overview/installing-rerun/troubleshooting \n\ Report bugs: https://github.com/rerun-io/rerun/issues" ); @@ -101,7 +101,7 @@ fn panic_info_message(panic_info: &std::panic::PanicHookInfo<'_>) -> Option(error: &'a (dyn std::error::Error + 'static)) -> Option<&'a T> +where + T: std::error::Error + 'static, +{ + const MAX_HOPS: usize = 16; + + let mut source: Option<&(dyn std::error::Error + 'static)> = Some(error); + for _ in 0..MAX_HOPS { + let Some(e) = source else { + break; + }; + if let Some(t) = e.downcast_ref::() { + return Some(t); + } + source = e.source(); + } + None +} + +/// The separator used to split error messages into a summary and details. +/// +/// If an error message contains this separator, the notification system +/// will display the part before it as the main message and the part after +/// inside a collapsible "Details" section. +/// +/// Use [`format_with_details`] to format errors using this convention. +pub const DETAILS_SEPARATOR: &str = "\nDetails:"; + /// Format an error, including its chain of sources. /// /// Always use this when displaying an error, especially `anyhow::Error`. @@ -21,6 +55,34 @@ pub fn format_ref(error: &dyn std::error::Error) -> String { string } +/// Format an error with details coming after [`DETAILS_SEPARATOR`]. +pub fn format_with_details(error: impl Into, details: impl Into) -> String { + let error = error.into(); + let details = details.into(); + if details.is_empty() { + error + } else { + format!("{error}{DETAILS_SEPARATOR} {details}") + } +} + +/// Split a message that may contain a [`DETAILS_SEPARATOR`] into summary and optional details. +/// +/// Returns `(summary, Some(details))` if the separator is present, +/// or `(message, None)` if not. +pub fn split_details(message: &str) -> (&str, Option<&str>) { + if let Some((summary, details)) = message.split_once(DETAILS_SEPARATOR) { + let details = details.trim(); + if details.is_empty() { + (summary, None) + } else { + (summary, Some(details)) + } + } else { + (message, None) + } +} + #[test] fn test_format() { let err = anyhow::format_err!("root_cause") @@ -32,3 +94,70 @@ fn test_format() { // Now we do: assert_eq!(format(&err), "outer_context: inner_context: root_cause"); } + +#[test] +fn test_format_with_details() { + assert_eq!( + format_with_details("Error", "The fine print"), + "Error\nDetails: The fine print" + ); +} + +#[test] +fn test_downcast_source() { + #[derive(Debug)] + struct Leaf(&'static str); + + impl std::fmt::Display for Leaf { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.0) + } + } + + impl std::error::Error for Leaf {} + + #[derive(Debug)] + struct Wrap(Box); + + impl std::fmt::Display for Wrap { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "wrap: {}", self.0) + } + } + + impl std::error::Error for Wrap { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + Some(self.0.as_ref()) + } + } + + // Positive: target sits behind a wrapper — walk finds it via `.source()`. + let wrapped = Wrap(Box::new(Leaf("boom"))); + let found = downcast_source::(&wrapped).expect("Leaf should be recoverable"); + assert_eq!(found.0, "boom"); + + // Positive: target IS the top-level error — walk finds it on the first hop. + let direct = Leaf("direct"); + assert!(downcast_source::(&direct).is_some()); + + // Negative: no error in the chain matches `T` — walk terminates with None. + let only_wrap = Wrap(Box::new(Leaf("inner"))); + assert!(downcast_source::(&only_wrap).is_none()); +} + +#[test] +fn test_split_details() { + for (in_summary, in_details) in [("just a message", ""), ("message", "the fine print")] { + let combined = format_with_details(in_summary, in_details); + let (out_summary, out_details) = split_details(&combined); + assert_eq!(out_summary, in_summary); + assert_eq!( + out_details, + if in_details.is_empty() { + None + } else { + Some(in_details) + } + ); + } +} diff --git a/crates/utils/re_format/src/plural.rs b/crates/utils/re_format/src/plural.rs index 9eaae3eb2eac..17b67f0f9ce1 100644 --- a/crates/utils/re_format/src/plural.rs +++ b/crates/utils/re_format/src/plural.rs @@ -1,9 +1,13 @@ use std::fmt::Display; +use re_log::debug_assert; + use crate::{UnsignedAbs, format_int, format_uint}; /// Returns either "1 $NOUN" (if `count` is one), otherwise returns `$N $NOUNs`. pub fn format_plural_s(count: impl num_traits::Unsigned + Display, noun: &'static str) -> String { + debug_assert!(!noun.ends_with('s'), "Expected singular, got {noun:?}"); + if count.is_one() { format!("1 {noun}") } else { @@ -17,6 +21,8 @@ where Int: num_traits::Signed + Display + PartialOrd + num_traits::Zero + UnsignedAbs, Int::Unsigned: Display + num_traits::Unsigned, { + debug_assert!(!noun.ends_with('s'), "Expected singular, got {noun:?}"); + if count.abs().is_one() { format!("{} {noun}", format_int(count)) } else { diff --git a/crates/utils/re_int_histogram/Cargo.toml b/crates/utils/re_int_histogram/Cargo.toml deleted file mode 100644 index 073202abfe34..000000000000 --- a/crates/utils/re_int_histogram/Cargo.toml +++ /dev/null @@ -1,40 +0,0 @@ -[package] -name = "re_int_histogram" -authors.workspace = true -description = "A histogram with `i64` keys and `u32` counts, supporting both sparse and dense uses." -edition.workspace = true -homepage.workspace = true -include.workspace = true -license.workspace = true -publish = true -readme = "README.md" -repository.workspace = true -rust-version.workspace = true -version.workspace = true - -[lints] -workspace = true - -[package.metadata.docs.rs] -all-features = true - - -[dependencies] -re_byte_size.workspace = true -re_log.workspace = true -smallvec.workspace = true -static_assertions.workspace = true - - -[dev-dependencies] -criterion.workspace = true -insta.workspace = true -mimalloc.workspace = true - - -[lib] -bench = false - -[[bench]] -name = "int_histogram_benchmark" -harness = false diff --git a/crates/utils/re_int_histogram/README.md b/crates/utils/re_int_histogram/README.md deleted file mode 100644 index 5d70bd180952..000000000000 --- a/crates/utils/re_int_histogram/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# re_int_histogram -Part of the [`rerun`](https://github.com/rerun-io/rerun) family of crates. - - -[![Latest version](https://img.shields.io/crates/v/re_int_histogram.svg)](https://crates.io/crates/re_int_histogram) -[![Documentation](https://docs.rs/re_int_histogram/badge.svg)](https://docs.rs/re_int_histogram) -![MIT](https://img.shields.io/badge/license-MIT-blue.svg) -![Apache](https://img.shields.io/badge/license-Apache-blue.svg) - -A histogram with `i64` keys and `u32` counts, supporting both sparse and dense uses. - -It supports high-level summaries of the histogram, so that you can quickly get a birds-eye view of the data without having to visit every point in the histogram. - -You can also think of the histogram as a multi-set, where you can insert the same key multiple times and then query how many times you've inserted it. - -Used for noting at which times we have events, so that we can visualize it in the time panel. diff --git a/crates/utils/re_int_histogram/benches/int_histogram_benchmark.rs b/crates/utils/re_int_histogram/benches/int_histogram_benchmark.rs deleted file mode 100644 index e6124b82530f..000000000000 --- a/crates/utils/re_int_histogram/benches/int_histogram_benchmark.rs +++ /dev/null @@ -1,113 +0,0 @@ -#![expect(clippy::cast_possible_wrap)] // u64 -> i64 is fine - -#[global_allocator] -static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; - -use criterion::{Criterion, criterion_group, criterion_main}; - -// ---------------- - -#[cfg(not(debug_assertions))] -const COUNT: u64 = 100_000; - -// `cargo test` also runs the benchmark setup code, so make sure they run quickly: -#[cfg(debug_assertions)] -const COUNT: u64 = 1; - -const SPACING: i64 = 1_000_000; - -// ---------------- - -criterion_group!(benches, btree, int_histogram,); -criterion_main!(benches); - -// ---------------------------------------------------------------------------- - -/// Baseline for performance and memory benchmarks -#[derive(Default)] -pub struct BTreeInt64Histogram { - map: std::collections::BTreeMap, -} - -impl BTreeInt64Histogram { - pub fn increment(&mut self, key: i64, inc: u32) { - *self.map.entry(key).or_default() += inc; - } - - pub fn range( - &self, - range: impl std::ops::RangeBounds, - _cutoff_size: u64, - ) -> impl Iterator { - self.map.range(range) - } -} - -/// Baselines -fn btree(c: &mut Criterion) { - fn create(num_elements: i64, sparseness: i64) -> BTreeInt64Histogram { - let mut histogram = BTreeInt64Histogram::default(); - for i in 0..num_elements { - histogram.increment(i * sparseness, 1); - } - histogram - } - - { - let mut group = c.benchmark_group("btree"); - group.throughput(criterion::Throughput::Elements(COUNT)); - group.bench_function("dense_insert", |b| { - b.iter(|| create(COUNT as _, 1)); - }); - group.bench_function("sparse_insert", |b| { - b.iter(|| create(COUNT as _, SPACING)); - }); - let dense = create(COUNT as _, 1); - group.bench_function("iter_all_dense", |b| { - b.iter(|| dense.range(.., 1).count()); - }); - let sparse = create(COUNT as _, SPACING); - group.bench_function("iter_all_sparse", |b| { - b.iter(|| sparse.range(.., 1).count()); - }); - } -} - -fn int_histogram(c: &mut Criterion) { - use re_int_histogram::Int64Histogram; - - fn create(num_elements: i64, sparseness: i64) -> Int64Histogram { - let mut histogram = Int64Histogram::default(); - for i in 0..num_elements { - histogram.increment(i * sparseness, 1); - } - histogram - } - - { - let mut group = c.benchmark_group("int_histogram"); - group.throughput(criterion::Throughput::Elements(COUNT)); - group.bench_function("dense_insert", |b| { - b.iter(|| create(COUNT as _, 1)); - }); - group.bench_function("sparse_insert", |b| { - b.iter(|| create(COUNT as _, SPACING)); - }); - let dense = create(COUNT as _, 1); - group.bench_function("iter_all_dense", |b| { - b.iter(|| dense.range(.., 1).count()); - }); - let sparse = create(COUNT as _, SPACING); - group.bench_function("iter_all_sparse", |b| { - b.iter(|| sparse.range(.., 1).count()); - }); - let dense = create(COUNT as _, 1); - group.bench_function("iter_some_dense", |b| { - b.iter(|| dense.range(.., 1_000).count()); - }); - let sparse = create(COUNT as _, SPACING); - group.bench_function("iter_some_sparse", |b| { - b.iter(|| sparse.range(.., 1_000 * SPACING as u64).count()); - }); - } -} diff --git a/crates/utils/re_int_histogram/src/lib.rs b/crates/utils/re_int_histogram/src/lib.rs deleted file mode 100644 index 1bbb0db8baee..000000000000 --- a/crates/utils/re_int_histogram/src/lib.rs +++ /dev/null @@ -1,129 +0,0 @@ -//! A histogram with `i64` keys and `u32` counts, supporting both sparse and dense uses. -//! -//! It supports high-level summaries of the histogram, so that you can quickly -//! get a birds-eye view of the data without having to visit every point in the histogram. -//! -//! You can also think of the histogram as a multi-set, -//! where you can insert the same key multiple times and then query -//! how many times you've inserted it. - -mod tree; - -pub use tree::{Int64Histogram, Iter}; - -// ----------------------------------------------------------------------------------- - -/// We use `u64` keys in the internal structures, -/// because it is so much easier to work with -pub(crate) fn u64_key_from_i64_key(key: i64) -> u64 { - // key.wrapping_add_unsigned(i64::MIN.unsigned_abs()) // unstable - (key as i128 + i64::MAX as i128 + 1) as _ -} - -pub(crate) fn i64_key_from_u64_key(key: u64) -> i64 { - (key as i128 + i64::MIN as i128) as _ -} - -#[test] -fn test_u64_i64_key_conversions() { - assert_eq!(u64_key_from_i64_key(i64::MIN), u64::MIN); - assert_eq!(u64_key_from_i64_key(i64::MIN + 1), 1); - assert_eq!(u64_key_from_i64_key(i64::MIN + 2), 2); - assert_eq!(u64_key_from_i64_key(i64::MAX - 2), u64::MAX - 2); - assert_eq!(u64_key_from_i64_key(i64::MAX - 1), u64::MAX - 1); - assert_eq!(u64_key_from_i64_key(i64::MAX), u64::MAX); - - assert_eq!(i64_key_from_u64_key(u64::MIN), i64::MIN); - assert_eq!(i64_key_from_u64_key(1), i64::MIN + 1); - assert_eq!(i64_key_from_u64_key(2), i64::MIN + 2); - assert_eq!(i64_key_from_u64_key(u64::MAX - 2), i64::MAX - 2); - assert_eq!(i64_key_from_u64_key(u64::MAX - 1), i64::MAX - 1); - assert_eq!(i64_key_from_u64_key(u64::MAX), i64::MAX); -} - -// ----------------------------------------------------------------------------------- - -/// An inclusive range. -#[derive(Clone, Copy, PartialEq, Eq)] -pub(crate) struct RangeU64 { - /// inclusive - pub min: u64, - - /// inclusive - pub max: u64, -} - -impl RangeU64 { - pub fn new(min: u64, max: u64) -> Self { - Self { min, max } - } - - pub fn single(value: u64) -> Self { - Self { - min: value, - max: value, - } - } - - #[inline] - pub fn contains(&self, value: u64) -> bool { - self.min <= value && value <= self.max - } - - #[inline] - pub fn contains_all_of(&self, other: Self) -> bool { - self.contains(other.min) && self.contains(other.max) - } - - #[inline] - pub fn intersects(&self, other: Self) -> bool { - self.min <= other.max && other.min <= self.max - } -} - -impl std::fmt::Debug for RangeU64 { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "RangeU64[{}, {}]", self.min, self.max) - } -} - -// ----------------------------------------------------------------------------------- - -/// An inclusive range. -#[derive(Clone, Copy, PartialEq, Eq)] -pub struct RangeI64 { - /// inclusive - pub min: i64, - - /// inclusive - pub max: i64, -} - -impl RangeI64 { - pub fn new(min: i64, max: i64) -> Self { - Self { min, max } - } - - pub fn single(value: i64) -> Self { - Self { - min: value, - max: value, - } - } - - #[inline] - pub fn contains(&self, value: i64) -> bool { - self.min <= value && value <= self.max - } - - #[inline] - pub fn length(&self) -> u64 { - (self.max - self.min + 1) as u64 - } -} - -impl std::fmt::Debug for RangeI64 { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "RangeI64[{}, {}]", self.min, self.max) - } -} diff --git a/crates/utils/re_int_histogram/src/tree.rs b/crates/utils/re_int_histogram/src/tree.rs deleted file mode 100644 index 6ed28cc88ad5..000000000000 --- a/crates/utils/re_int_histogram/src/tree.rs +++ /dev/null @@ -1,1262 +0,0 @@ -//! The histogram is implemented as a trie. -//! -//! Each node in the trie stores a count of a key/address sharing a prefix up to `depth * LEVEL_STEP` bits. -//! The key/address is always 64 bits. -//! -//! There are branch nodes, and two types of leaf nodes: dense, and sparse. -//! Dense leaves are only found at the very bottom of the trie. - -use re_log::{debug_assert, debug_assert_eq}; -use smallvec::{SmallVec, smallvec}; - -use crate::{RangeI64, RangeU64, i64_key_from_u64_key, u64_key_from_i64_key}; - -// ---------------------------------------------------------------------------- - -/// How high up in the tree we are (where root is highest). -/// `1 << level` is the size of the range the next child. -/// So `1 << ROOT_LEVEL` is the size of the range of each children of the root. -type Level = u64; - -// ---------------------------------------------------------------------------- - -#[expect(dead_code)] -mod small_and_slow { - #[allow(clippy::allow_attributes, clippy::wildcard_imports)] // for the sake of doclinks - use super::*; - - // Uses 20x nodes with 8-way (3 bit) branching factor down to a final 16-way (4 bit) dense leaf. - // 20x 3-bit + 4-bit = 64 bit. - // level 1, 4, 7, …, 58, 61 - // This uses about half the memory of 16-way branching, but is also half as fast. - // 5.7 B/dense entry - // 25-35 B/sparse entry - - /// How many bits we progress in each [`BranchNode`] - pub const LEVEL_STEP: u64 = 3; - - /// The level used for [`DenseLeaf`]. - pub const BOTTOM_LEVEL: Level = 1; - - /// Number of children in [`DenseLeaf`]. - pub const NUM_CHILDREN_IN_DENSE: u64 = 16; -} - -// ---------------------------------------------------------------------------- - -mod large_and_fast { - #[allow(clippy::allow_attributes, clippy::wildcard_imports)] // for the sake of doclinks - use super::*; - - // High memory use, faster - // I believe we could trim this path to use much less memory - // by using dynamically sized nodes (no enum, no Vec/SmallVec), - // but that's left as an exercise for later. - // 9.6 B/dense entry - // 26-73 B/sparse entry - - /// How many bits we progress in each [`BranchNode`] - pub const LEVEL_STEP: u64 = 4; - - /// The level used for [`DenseLeaf`]. - pub const BOTTOM_LEVEL: Level = 0; - - /// Number of children in [`DenseLeaf`]. - pub const NUM_CHILDREN_IN_DENSE: u64 = 16; -} - -use large_and_fast::{BOTTOM_LEVEL, LEVEL_STEP, NUM_CHILDREN_IN_DENSE}; - -// ---------------------------------------------------------------------------- - -const ROOT_LEVEL: Level = 64 - LEVEL_STEP; -static_assertions::const_assert_eq!(ROOT_LEVEL + LEVEL_STEP, 64); -static_assertions::const_assert_eq!((ROOT_LEVEL - BOTTOM_LEVEL) % LEVEL_STEP, 0); -const NUM_NODE_STEPS: u64 = (ROOT_LEVEL - BOTTOM_LEVEL) / LEVEL_STEP; -const NUM_STEPS_IN_DENSE_LEAF: u64 = 64 - NUM_NODE_STEPS * LEVEL_STEP; -static_assertions::const_assert_eq!(1 << NUM_STEPS_IN_DENSE_LEAF, NUM_CHILDREN_IN_DENSE); - -const ADDR_MASK: u64 = (1 << LEVEL_STEP) - 1; -const NUM_CHILDREN_IN_NODE: u64 = 1 << LEVEL_STEP; - -/// When a [`SparseLeaf`] goes over this, it becomes a [`BranchNode`]. -const MAX_SPARSE_LEAF_LEN: usize = 32; - -fn child_level_and_size(level: Level) -> (Level, u64) { - let child_level = level - LEVEL_STEP; - let child_size = if child_level == 0 { - NUM_CHILDREN_IN_DENSE - } else { - 1 << level - }; - (child_level, child_size) -} - -fn range_u64_from_range_bounds(range: impl std::ops::RangeBounds) -> RangeU64 { - let min = match range.start_bound() { - std::ops::Bound::Included(min) => *min, - std::ops::Bound::Excluded(min) => min.saturating_add(1), - std::ops::Bound::Unbounded => i64::MIN, - }; - let max = match range.end_bound() { - std::ops::Bound::Included(min) => *min, - std::ops::Bound::Excluded(min) => min.saturating_sub(1), - std::ops::Bound::Unbounded => i64::MAX, - }; - RangeU64 { - min: u64_key_from_i64_key(min), - max: u64_key_from_i64_key(max), - } -} - -// ---------------------------------------------------------------------------- -// High-level API - -/// A histogram, mapping [`i64`] key to a [`u64`] count -/// optimizing for very fast range-queries. -#[derive(Clone, Debug)] -pub struct Int64Histogram { - root: Node, -} - -impl Default for Int64Histogram { - fn default() -> Self { - Self { - root: Node::SparseLeaf(SparseLeaf::default()), - } - } -} - -impl Int64Histogram { - /// Increment the count for the given key. - /// - /// Incrementing with one is similar to inserting the key in a multi-set. - pub fn increment(&mut self, key: i64, inc: u32) { - if inc != 0 { - self.root - .increment(ROOT_LEVEL, u64_key_from_i64_key(key), inc); - } - } - - /// Decrement the count for the given key. - /// - /// The decrement is saturating. - /// - /// Returns how much was actually decremented (found). - /// If the returned value is less than the given value, - /// it means that the key was either no found, or had a lower count. - pub fn decrement(&mut self, key: i64, dec: u32) -> u32 { - if dec == 0 { - 0 - } else { - self.root - .decrement(ROOT_LEVEL, u64_key_from_i64_key(key), dec) - } - } - - /// Remove all data in the given range. - /// - /// Returns how much count was removed. - /// - /// Currently the implementation is optimized for the case of removing - /// large continuous ranges. - /// Removing many small, scattered ranges (e.g. individual elements) - /// may cause performance problems! - /// This can be remedied with some more code. - pub fn remove(&mut self, range: impl std::ops::RangeBounds) -> u64 { - let range = range_u64_from_range_bounds(range); - self.root.remove(0, ROOT_LEVEL, range) - } - - /// Is the total count zero? - /// - /// Note that incrementing a key with zero is a no-op and - /// will leave an empty histogram still empty. - pub fn is_empty(&self) -> bool { - self.total_count() == 0 - } - - /// Total count of all the buckets. - /// - /// NOTE: this is NOT the number of unique keys. - pub fn total_count(&self) -> u64 { - self.root.total_count() - } - - /// Lowest key with a non-zero count. - pub fn min_key(&self) -> Option { - self.root.min_key(0, ROOT_LEVEL).map(i64_key_from_u64_key) - } - - /// Highest key with a non-zero count. - pub fn max_key(&self) -> Option { - self.root.max_key(0, ROOT_LEVEL).map(i64_key_from_u64_key) - } - - /// What is the count of all the buckets in the given range? - pub fn range_count(&self, range: impl std::ops::RangeBounds) -> u64 { - let range = range_u64_from_range_bounds(range); - if range.min <= range.max { - self.root.range_count(0, ROOT_LEVEL, range) - } else { - 0 - } - } - - /// Iterate over a certain range, returning ranges that are at most `cutoff_size` long. - /// - /// To get all individual entries, use `cutoff_size<=1`. - /// - /// When `cutoff_size > 1` you MAY get ranges which include keys that has no count. - /// However, the ends (min/max) of all returned ranges will be keys with a non-zero count. - /// - /// In other words, gaps in the key-space smaller than `cutoff_size` MAY be ignored by this iterator. - /// - /// For example, inserting two elements at `10` and `15` and setting a `cutoff_size=10` - /// you may get a single range `[10, 15]` with the total count. - /// You may also get two ranges of `[10, 10]` and `[15, 15]`. - /// - /// A larger `cutoff_size` will generally yield fewer ranges, and will be faster. - pub fn range(&self, range: impl std::ops::RangeBounds, cutoff_size: u64) -> Iter<'_> { - let range = range_u64_from_range_bounds(range); - Iter { - iter: TreeIterator { - range, - cutoff_size, - stack: smallvec![NodeIterator { - level: ROOT_LEVEL, - abs_addr: 0, - node: &self.root, - index: 0, - }], - }, - } - } - - /// Find the next key greater than the given time. - /// - /// If found, returns that key. Otherwise wraps around and returns the minimum key. - /// Returns `None` only if the histogram is empty. - pub fn next_key_after(&self, time: i64) -> Option { - // Use cutoff_size=1 to get individual keys - if let Some((range, _)) = self - .range( - (std::ops::Bound::Excluded(time), std::ops::Bound::Unbounded), - 1, - ) - .next() - { - Some(range.min) - } else { - // Wrap around to the minimum key - self.min_key() - } - } - - /// Find the previous key less than the given time. - /// - /// If found, returns that key. Otherwise wraps around and returns the maximum key. - /// Returns `None` only if the histogram is empty. - pub fn prev_key_before(&self, time: i64) -> Option { - // Fast path: if the maximum key is less than time, we can return it directly - // This is O(log n) and avoids iterating through ranges - if let Some(max) = self.max_key() { - if max < time { - return Some(max); - } - } else { - // Empty histogram - return None; - } - - // Optimization: Use a larger cutoff_size to reduce the number of ranges we iterate through. - // With cutoff_size=1024, we get ranges up to 1024 keys long, which dramatically reduces - // the number of iterations for sparse histograms. - // According to the documentation, the ends (min/max) of returned ranges are guaranteed - // to be keys with non-zero count, so the max of the last range is the correct answer. - let mut last_range_max = None; - for (range, _) in self.range( - (std::ops::Bound::Unbounded, std::ops::Bound::Excluded(time)), - 1024, - ) { - last_range_max = Some(range.max); - } - - last_range_max.or_else(|| { - // No keys before time, wrap around to max - self.max_key() - }) - } -} - -/// An iterator over an [`Int64Histogram`]. -/// -/// Created with [`Int64Histogram::range`]. -pub struct Iter<'a> { - iter: TreeIterator<'a>, -} - -impl Iterator for Iter<'_> { - type Item = (RangeI64, u64); - - #[inline] - fn next(&mut self) -> Option { - self.iter.next().map(|(range, count)| { - ( - RangeI64 { - min: i64_key_from_u64_key(range.min), - max: i64_key_from_u64_key(range.max), - }, - count, - ) - }) - } -} - -// ---------------------------------------------------------------------------- -// Low-level data structure. - -#[derive(Clone, Debug)] -enum Node { - /// An inner node, addressed by the next few bits of the key/address. - /// - /// Never at the [`BOTTOM_LEVEL`] level. - BranchNode(BranchNode), - - /// A list of `(key, count)` pairs. - /// - /// When this becomes too long, it will be converted into a [`BranchNode`]. - /// - /// Never at the [`BOTTOM_LEVEL`] level. - SparseLeaf(SparseLeaf), - - /// Optimization for dense histograms (entries at `N, N+1, N+2, …`). - /// - /// Always at the [`BOTTOM_LEVEL`] level. - DenseLeaf(DenseLeaf), -} - -#[derive(Clone, Debug, Default)] -struct BranchNode { - /// Very important optimization - total_count: u64, - - /// The index is the next few bits of the key - children: [Option>; NUM_CHILDREN_IN_NODE as usize], -} - -#[derive(Clone, Debug, Default)] -struct SparseLeaf { - /// Two vectors of equal lengths, - /// making up (addr, count) pairs, - /// sorted by `addr`. - addrs: SmallVec<[u64; 3]>, - - /// The count may never be zero. - counts: SmallVec<[u32; 3]>, -} - -#[derive(Clone, Copy, Debug, Default)] -struct DenseLeaf { - /// The last bits of the address, mapped to their counts - counts: [u32; NUM_CHILDREN_IN_DENSE as usize], -} - -// ---------------------------------------------------------------------------- -// Insert - -impl Node { - /// The default node for a certain level. - fn for_level(level: Level) -> Self { - if level == BOTTOM_LEVEL { - Self::DenseLeaf(DenseLeaf::default()) - } else { - Self::SparseLeaf(SparseLeaf::default()) - } - } - - fn increment(&mut self, level: Level, addr: u64, inc: u32) { - match self { - Self::BranchNode(node) => { - node.increment(level, addr, inc); - } - Self::SparseLeaf(sparse) => { - *self = std::mem::take(sparse).increment(level, addr, inc); - } - Self::DenseLeaf(dense) => { - dense.increment(addr, inc); - } - } - } - - /// Returns how much the total count decreased by. - #[must_use] - fn decrement(&mut self, level: Level, addr: u64, dec: u32) -> u32 { - match self { - Self::BranchNode(node) => { - let count_loss = node.decrement(level, addr, dec); - if node.is_empty() { - *self = Self::SparseLeaf(SparseLeaf::default()); - } - // TODO(emilk): if we only have leaf children (sparse or dense) - // and the number of keys in all of them is less then `MAX_SPARSE_LEAF_LEN`, - // then we should convert this BranchNode into a SparseLeaf. - count_loss - } - Self::SparseLeaf(sparse) => sparse.decrement(addr, dec), - Self::DenseLeaf(dense) => dense.decrement(addr, dec), - } - } - - /// Returns how much the total count decreased by. - fn remove(&mut self, my_addr: u64, my_level: Level, range: RangeU64) -> u64 { - match self { - Self::BranchNode(node) => { - let count_loss = node.remove(my_addr, my_level, range); - if node.is_empty() { - *self = Self::SparseLeaf(SparseLeaf::default()); - } - // TODO(emilk): if we only have leaf children (sparse or dense) - // and the number of keys in all of them is less then `MAX_SPARSE_LEAF_LEN`, - // then we should convert this BranchNode into a SparseLeaf. - count_loss - } - Self::SparseLeaf(sparse) => sparse.remove(range), - Self::DenseLeaf(dense) => dense.remove(my_addr, range), - } - } - - fn is_empty(&self) -> bool { - match self { - Self::BranchNode(node) => node.is_empty(), - Self::SparseLeaf(sparse) => sparse.is_empty(), - Self::DenseLeaf(dense) => dense.is_empty(), - } - } - - fn total_count(&self) -> u64 { - match self { - Self::BranchNode(node) => node.total_count(), - Self::SparseLeaf(sparse) => sparse.total_count(), - Self::DenseLeaf(dense) => dense.total_count(), - } - } - - fn min_key(&self, my_addr: u64, my_level: Level) -> Option { - match self { - Self::BranchNode(node) => node.min_key(my_addr, my_level), - Self::SparseLeaf(sparse) => sparse.min_key(), - Self::DenseLeaf(dense) => dense.min_key(my_addr), - } - } - - fn max_key(&self, my_addr: u64, my_level: Level) -> Option { - match self { - Self::BranchNode(node) => node.max_key(my_addr, my_level), - Self::SparseLeaf(sparse) => sparse.max_key(), - Self::DenseLeaf(dense) => dense.max_key(my_addr), - } - } - - fn range_count(&self, my_addr: u64, my_level: Level, range: RangeU64) -> u64 { - match self { - Self::BranchNode(node) => node.range_count(my_addr, my_level, range), - Self::SparseLeaf(sparse) => sparse.range_count(range), - Self::DenseLeaf(dense) => dense.range_count(my_addr, range), - } - } -} - -impl BranchNode { - fn increment(&mut self, level: Level, addr: u64, inc: u32) { - debug_assert!(level != BOTTOM_LEVEL); - let child_level = level - LEVEL_STEP; - let top_addr = (addr >> level) & ADDR_MASK; - self.children[top_addr as usize] - .get_or_insert_with(|| Box::new(Node::for_level(child_level))) - .increment(child_level, addr, inc); - self.total_count += inc as u64; - } - - /// Returns how much the total count decreased by. - #[must_use] - fn decrement(&mut self, level: Level, addr: u64, dec: u32) -> u32 { - debug_assert!(level != BOTTOM_LEVEL); - let child_level = level - LEVEL_STEP; - let top_addr = (addr >> level) & ADDR_MASK; - if let Some(child) = &mut self.children[top_addr as usize] { - let count_loss = child.decrement(child_level, addr, dec); - if child.is_empty() { - self.children[top_addr as usize] = None; - } - self.total_count -= count_loss as u64; - count_loss - } else { - 0 - } - } - - /// Returns how much the total count decreased by. - #[must_use] - fn remove(&mut self, my_addr: u64, my_level: Level, range: RangeU64) -> u64 { - debug_assert!(range.min <= range.max); - debug_assert!(my_level != BOTTOM_LEVEL); - - let mut count_loss = 0; - let (child_level, child_size) = child_level_and_size(my_level); - - for ci in 0..NUM_CHILDREN_IN_NODE { - let child_addr = my_addr + ci * child_size; - let child_range = RangeU64::new(child_addr, child_addr + (child_size - 1)); - if range.intersects(child_range) - && let Some(child) = &mut self.children[ci as usize] - { - if range.contains_all_of(child_range) { - count_loss += child.total_count(); - self.children[ci as usize] = None; - } else { - count_loss += child.remove(child_addr, child_level, range); - if child.is_empty() { - self.children[ci as usize] = None; - } - } - } - } - - self.total_count -= count_loss; - - count_loss - } - - fn is_empty(&self) -> bool { - self.total_count == 0 - } - - fn total_count(&self) -> u64 { - self.total_count - } - - fn min_key(&self, my_addr: u64, my_level: Level) -> Option { - debug_assert!(my_level != BOTTOM_LEVEL); - - let (child_level, child_size) = child_level_and_size(my_level); - - for ci in 0..NUM_CHILDREN_IN_NODE { - let child_addr = my_addr + ci * child_size; - if let Some(child) = &self.children[ci as usize] - && let Some(min_key) = child.min_key(child_addr, child_level) - { - return Some(min_key); - } - } - None - } - - fn max_key(&self, my_addr: u64, my_level: Level) -> Option { - debug_assert!(my_level != BOTTOM_LEVEL); - - let (child_level, child_size) = child_level_and_size(my_level); - - for ci in (0..NUM_CHILDREN_IN_NODE).rev() { - let child_addr = my_addr + ci * child_size; - if let Some(child) = &self.children[ci as usize] - && let Some(max_key) = child.max_key(child_addr, child_level) - { - return Some(max_key); - } - } - None - } - - fn range_count(&self, my_addr: u64, my_level: Level, range: RangeU64) -> u64 { - debug_assert!(range.min <= range.max); - debug_assert!(my_level != BOTTOM_LEVEL); - - let (child_level, child_size) = child_level_and_size(my_level); - - let mut total_count = 0; - - for ci in 0..NUM_CHILDREN_IN_NODE { - let child_addr = my_addr + ci * child_size; - let child_range = RangeU64::new(child_addr, child_addr + (child_size - 1)); - if range.intersects(child_range) - && let Some(child) = &self.children[ci as usize] - { - if range.contains_all_of(child_range) { - total_count += child.total_count(); - } else { - total_count += child.range_count(child_addr, child_level, range); - } - } - } - - total_count - } -} - -impl SparseLeaf { - #[must_use] - fn increment(mut self, level: Level, abs_addr: u64, inc: u32) -> Node { - let index = self.addrs.partition_point(|&addr| addr < abs_addr); - - if let (Some(addr), Some(count)) = (self.addrs.get_mut(index), self.counts.get_mut(index)) - && *addr == abs_addr - { - *count += inc; - return Node::SparseLeaf(self); - } - - if self.addrs.len() < MAX_SPARSE_LEAF_LEN { - self.addrs.insert(index, abs_addr); - self.counts.insert(index, inc); - Node::SparseLeaf(self) - } else { - // Overflow: - let mut node = self.into_branch_node(level); - node.increment(level, abs_addr, inc); - Node::BranchNode(node) - } - } - - /// Called on overflow - #[must_use] - fn into_branch_node(self, level: Level) -> BranchNode { - debug_assert!(level != BOTTOM_LEVEL); - - let mut node = BranchNode::default(); - for (key, count) in self.addrs.iter().zip(&self.counts) { - node.increment(level, *key, *count); - } - node - } - - /// Returns how much the total count decreased by. - #[must_use] - fn decrement(&mut self, abs_addr: u64, dec: u32) -> u32 { - debug_assert_eq!(self.addrs.len(), self.counts.len()); - - let index = self.addrs.partition_point(|&addr| addr < abs_addr); - - if let (Some(addr), Some(count)) = (self.addrs.get_mut(index), self.counts.get_mut(index)) - && *addr == abs_addr - { - return if dec < *count { - *count -= dec; - dec - } else { - let count_loss = *count; - - // The bucket is now empty - remove it: - self.addrs.remove(index); - self.counts.remove(index); - debug_assert_eq!(self.addrs.len(), self.counts.len()); - - count_loss - }; - } - - 0 // not found - } - - /// Returns how much the total count decreased by. - #[must_use] - fn remove(&mut self, range: RangeU64) -> u64 { - debug_assert_eq!(self.addrs.len(), self.counts.len()); - - let mut count_loss = 0; - for (key, count) in self.addrs.iter().zip(&mut self.counts) { - if range.contains(*key) { - count_loss += *count as u64; - *count = 0; - } - } - - self.addrs.retain(|addr| !range.contains(*addr)); - self.counts.retain(|count| *count > 0); - debug_assert_eq!(self.addrs.len(), self.counts.len()); - count_loss - } - - fn is_empty(&self) -> bool { - self.addrs.is_empty() // we don't allow zero-sized buckets - } - - fn total_count(&self) -> u64 { - self.counts.iter().map(|&c| c as u64).sum() - } - - fn min_key(&self) -> Option { - self.addrs.first().copied() - } - - fn max_key(&self) -> Option { - self.addrs.last().copied() - } - - fn range_count(&self, range: RangeU64) -> u64 { - let mut total = 0; - for (key, count) in self.addrs.iter().zip(&self.counts) { - if range.contains(*key) { - total += *count as u64; - } - } - total - } -} - -impl DenseLeaf { - fn increment(&mut self, abs_addr: u64, inc: u32) { - self.counts[(abs_addr & (NUM_CHILDREN_IN_DENSE - 1)) as usize] += inc; - } - - /// Returns how much the total count decreased by. - #[must_use] - fn decrement(&mut self, abs_addr: u64, dec: u32) -> u32 { - let bucket_index = (abs_addr & (NUM_CHILDREN_IN_DENSE - 1)) as usize; - let bucket = &mut self.counts[bucket_index]; - if dec < *bucket { - *bucket -= dec; - dec - } else { - let count_loss = *bucket; - *bucket = 0; - count_loss - } - } - - /// Returns how much the total count decreased by. - #[must_use] - fn remove(&mut self, my_addr: u64, range: RangeU64) -> u64 { - debug_assert!(range.min <= range.max); - let mut count_loss = 0; - for (i, count) in self.counts.iter_mut().enumerate() { - if range.contains(my_addr + i as u64) { - count_loss += *count as u64; - *count = 0; - } - } - count_loss - } - - fn is_empty(&self) -> bool { - self.total_count() == 0 - } - - fn total_count(&self) -> u64 { - self.counts.iter().map(|&c| c as u64).sum() - } - - fn min_key(&self, my_addr: u64) -> Option { - for (i, count) in self.counts.iter().enumerate() { - if *count > 0 { - return Some(my_addr + i as u64); - } - } - None - } - - fn max_key(&self, my_addr: u64) -> Option { - for (i, count) in self.counts.iter().enumerate().rev() { - if *count > 0 { - return Some(my_addr + i as u64); - } - } - None - } - - fn range_count(&self, my_addr: u64, range: RangeU64) -> u64 { - debug_assert!(range.min <= range.max); - let mut total_count = 0; - for (i, count) in self.counts.iter().enumerate() { - if range.contains(my_addr + i as u64) { - total_count += *count as u64; - } - } - total_count - } -} - -// ---------------------------------------------------------------------------- - -struct TreeIterator<'a> { - /// Only returns things in this range - range: RangeU64, - - /// You can stop recursing when you've reached this size - cutoff_size: u64, - - stack: SmallVec<[NodeIterator<'a>; (NUM_NODE_STEPS + 1) as usize]>, -} - -struct NodeIterator<'a> { - level: Level, - abs_addr: u64, - node: &'a Node, - index: usize, -} - -impl Iterator for TreeIterator<'_> { - /// Am inclusive range, and the total count in that range. - type Item = (RangeU64, u64); - - fn next(&mut self) -> Option { - 'outer: while let Some(it) = self.stack.last_mut() { - match it.node { - Node::BranchNode(node) => { - let (child_level, child_size) = child_level_and_size(it.level); - - while it.index < NUM_CHILDREN_IN_NODE as _ { - let child_addr = it.abs_addr + child_size * it.index as u64; - let child_range = RangeU64 { - min: child_addr, - max: child_addr + (child_size - 1), - }; - if self.range.intersects(child_range) - && let Some(Some(child)) = node.children.get(it.index) - { - it.index += 1; - - if child_size <= self.cutoff_size - && self.range.contains_all_of(child_range) - { - // We can return the whole child, but first find a tight range of it: - if let (Some(min_key), Some(max_key)) = ( - child.min_key(child_addr, child_level), - child.max_key(child_addr, child_level), - ) { - return Some(( - RangeU64::new(min_key, max_key), - child.total_count(), - )); - } else { - unreachable!("A `BranchNode` can only have non-empty children"); - } - } - - self.stack.push(NodeIterator { - level: child_level, - abs_addr: child_addr, - node: child, - index: 0, - }); - continue 'outer; - } - it.index += 1; - } - } - Node::SparseLeaf(sparse) => { - while let (Some(abs_addr), Some(count)) = - (sparse.addrs.get(it.index), sparse.counts.get(it.index)) - { - it.index += 1; - if self.range.contains(*abs_addr) { - return Some((RangeU64::single(*abs_addr), *count as u64)); - } - } - } - Node::DenseLeaf(dense) => { - while let Some(count) = dense.counts.get(it.index) { - let abs_addr = it.abs_addr + it.index as u64; - it.index += 1; - if 0 < *count && self.range.contains(abs_addr) { - return Some((RangeU64::single(abs_addr), *count as u64)); - } - } - } - } - self.stack.pop(); - } - None - } -} - -// ---------------------------------------------------------------------------- -// SizeBytes implementation - -impl re_byte_size::SizeBytes for Int64Histogram { - fn heap_size_bytes(&self) -> u64 { - self.root.heap_size_bytes() - } -} - -impl re_byte_size::SizeBytes for Node { - fn heap_size_bytes(&self) -> u64 { - match self { - Self::BranchNode(node) => node.heap_size_bytes(), - Self::SparseLeaf(sparse) => sparse.heap_size_bytes(), - Self::DenseLeaf(dense) => dense.heap_size_bytes(), - } - } -} - -impl re_byte_size::SizeBytes for BranchNode { - fn heap_size_bytes(&self) -> u64 { - let Self { - total_count: _, - children, - } = self; - - children - .iter() - .flatten() - .map(|c| c.total_size_bytes()) - .sum() - } -} - -impl re_byte_size::SizeBytes for SparseLeaf { - fn heap_size_bytes(&self) -> u64 { - let Self { addrs, counts } = self; - - // SmallVec has heap data when it exceeds inline capacity - addrs.heap_size_bytes() + counts.heap_size_bytes() - } -} - -impl re_byte_size::SizeBytes for DenseLeaf { - fn heap_size_bytes(&self) -> u64 { - 0 // DenseLeaf is a fixed-size array on the stack - } -} - -// ---------------------------------------------------------------------------- - -#[cfg(test)] -mod tests { - #![expect(clippy::cast_possible_wrap)] // ok in tests - - use re_log::debug_assert_eq; - - use super::*; - - #[test] - fn test_dense() { - let mut set = Int64Histogram::default(); - debug_assert_eq!(set.min_key(), None); - debug_assert_eq!(set.max_key(), None); - let mut expected_ranges = vec![]; - for i in 0..100 { - debug_assert_eq!(set.total_count(), i); - debug_assert_eq!(set.range_count(-10000..10000), i); - let key = i as i64; - set.increment(key, 1); - - expected_ranges.push((RangeI64::single(key), 1)); - - debug_assert_eq!(set.min_key(), Some(0)); - debug_assert_eq!(set.max_key(), Some(key)); - } - - assert_eq!(set.range(.., 1).collect::>(), expected_ranges); - assert_eq!(set.range(..10, 1).count(), 10); - - assert_eq!(set.decrement(5, 1), 1); - assert_eq!(set.range(..10, 1).count(), 9); - assert_eq!(set.decrement(5, 1), 0); - assert_eq!(set.range(..10, 1).count(), 9); - } - - #[test] - fn test_sparse() { - let inc = 2; - let spacing = 1_000_000; - let mut set = Int64Histogram::default(); - let mut expected_ranges = vec![]; - for i in 0..100 { - debug_assert_eq!(set.total_count(), inc * i); - debug_assert_eq!(set.range_count(-10000..10000 * spacing), inc * i); - let key = i as i64 * spacing; - set.increment(key, inc as u32); - expected_ranges.push((RangeI64::single(key), inc)); - - debug_assert_eq!(set.min_key(), Some(0)); - debug_assert_eq!(set.max_key(), Some(key)); - } - - assert_eq!(set.range(.., 1).collect::>(), expected_ranges); - assert_eq!(set.range(..10 * spacing, 1).count(), 10); - } - - #[test] - fn test_two_dense_ranges() { - let mut set = Int64Histogram::default(); - for i in 0..100 { - set.increment(i, 1); - set.increment(10_000 + i, 1); - set.increment(20_000 + i, 1); - - debug_assert_eq!(set.min_key(), Some(0)); - debug_assert_eq!(set.max_key(), Some(20_000 + i)); - } - - assert_eq!(set.range(..15_000, 1000).count(), 2); - - assert_eq!(set.total_count(), 300); - assert_eq!(set.remove(..10_020), 120); - assert_eq!(set.total_count(), 180); - } - - #[test] - fn test_two_sparse_ranges() { - let mut set = Int64Histogram::default(); - let mut should_contain = vec![]; - let mut should_not_contain = vec![]; - for i in 0..100 { - let a = -1_000_000_000 + i * 1_000; - let b = (i - 50) * 1_000; - let c = 1_000_000_000 + i * 1_000; - set.increment(a, 1); - set.increment(b, 1); - set.increment(c, 1); - - should_contain.push(a); - should_contain.push(b); - should_not_contain.push(c); - } - - let ranges = set.range(..1_000_000_000, 1_000_000).collect::>(); - - assert!(ranges.len() < 10, "We shouldn't get too many ranges"); - - let ranges_contains = |value| ranges.iter().any(|(range, _count)| range.contains(value)); - - for value in should_contain { - assert!(ranges_contains(value)); - } - for value in should_not_contain { - assert!(!ranges_contains(value)); - } - - assert_eq!(set.total_count(), 300); - assert_eq!(set.remove(..0), 150); - assert_eq!(set.total_count(), 150); - } - - /// adjacent ranges closer than the given cutoff are treated as one - fn glue_adjacent_ranges(ranges: &[(RangeI64, u64)], cutoff_size: u64) -> Vec<(RangeI64, u64)> { - if ranges.is_empty() { - return vec![]; - } - - let mut it = ranges.iter(); - let mut result = vec![*it.next().unwrap()]; - for &(new_range, new_count) in it { - let (last_range, last_count) = result.last_mut().unwrap(); - if new_range.min.abs_diff(last_range.max) < cutoff_size { - *last_count += new_count; - last_range.max = new_range.max; - } else { - result.push((new_range, new_count)); - } - } - result - } - - #[test] - fn test_ranges_are_tight() { - let mut set = Int64Histogram::default(); - for i in 1..=99 { - set.increment(10_000_000 + i * 1000, 1); - set.increment(500_000_000 + i * 1000, 1); - set.increment(9_000_000_000 + i * 1000, 1); - } - - let cutoff_size = 100_000; - let ranges = set.range(.., cutoff_size).collect::>(); - assert!(ranges.len() <= 10, "We shouldn't get too many ranges"); - - // The Int64Histogram is allowed to split tight ranges - // if they hit a binary split-line, so we do a pass where we glue - // adjacent ranges together. - let ranges = glue_adjacent_ranges(&ranges, cutoff_size); - - assert_eq!( - ranges, - vec![ - (RangeI64::new(10_001_000, 10_099_000), 99), - (RangeI64::new(500_001_000, 500_099_000), 99), - (RangeI64::new(9_000_001_000, 9_000_099_000), 99), - ] - ); - } - - #[test] - fn test_removal() { - let mut set = Int64Histogram::default(); - set.increment(i64::MAX, 1); - set.increment(i64::MAX - 1, 2); - set.increment(i64::MAX - 2, 3); - set.increment(i64::MIN + 2, 3); - set.increment(i64::MIN + 1, 2); - set.increment(i64::MIN, 1); - - debug_assert_eq!(set.min_key(), Some(i64::MIN)); - debug_assert_eq!(set.max_key(), Some(i64::MAX)); - - debug_assert_eq!(set.range_count((i64::MAX - 1)..=i64::MAX), 3); - debug_assert_eq!( - set.range(0.., 1).collect::>(), - vec![ - (RangeI64::single(i64::MAX - 2), 3), - (RangeI64::single(i64::MAX - 1), 2), - (RangeI64::single(i64::MAX), 1), - ] - ); - - set.remove(i64::MAX..=i64::MAX); - - debug_assert_eq!(set.min_key(), Some(i64::MIN)); - debug_assert_eq!(set.max_key(), Some(i64::MAX - 1)); - - debug_assert_eq!( - set.range(.., 1).collect::>(), - vec![ - (RangeI64::single(i64::MIN), 1), - (RangeI64::single(i64::MIN + 1), 2), - (RangeI64::single(i64::MIN + 2), 3), - (RangeI64::single(i64::MAX - 2), 3), - (RangeI64::single(i64::MAX - 1), 2), - ] - ); - - set.remove(i64::MIN..=(i64::MAX - 2)); - - debug_assert_eq!(set.min_key(), Some(i64::MAX - 1)); - debug_assert_eq!(set.max_key(), Some(i64::MAX - 1)); - - debug_assert_eq!( - set.range(.., 1).collect::>(), - vec![(RangeI64::single(i64::MAX - 1), 2),] - ); - } - - #[test] - fn test_decrement() { - let mut set = Int64Histogram::default(); - - for i in 0..100 { - set.increment(i, 2); - } - - assert_eq!((set.min_key(), set.max_key()), (Some(0), Some(99))); - assert_eq!(set.range(.., 1).count(), 100); - - for i in 0..100 { - assert_eq!(set.decrement(i, 1), 1); - } - - assert_eq!((set.min_key(), set.max_key()), (Some(0), Some(99))); - assert_eq!(set.range(.., 1).count(), 100); - - for i in 0..50 { - assert_eq!(set.decrement(i, 1), 1); - } - - assert_eq!((set.min_key(), set.max_key()), (Some(50), Some(99))); - assert_eq!(set.range(.., 1).count(), 50); - - for i in 0..50 { - assert_eq!( - set.decrement(i, 1), - 0, - "Should already have been decremented" - ); - } - - assert_eq!((set.min_key(), set.max_key()), (Some(50), Some(99))); - assert_eq!(set.range(.., 1).count(), 50); - - for i in 50..99 { - assert_eq!(set.decrement(i, 1), 1); - } - - assert_eq!((set.min_key(), set.max_key()), (Some(99), Some(99))); - assert_eq!(set.range(.., 1).count(), 1); - - assert_eq!(set.decrement(99, 1), 1); - - assert_eq!((set.min_key(), set.max_key()), (None, None)); - assert_eq!(set.range(.., 1).count(), 0); - } - - #[test] - fn test_next_key_after() { - let mut hist = Int64Histogram::default(); - - // Empty histogram - assert_eq!(hist.next_key_after(0), None); - - // Single key - hist.increment(10, 1); - assert_eq!(hist.next_key_after(5), Some(10)); - assert_eq!(hist.next_key_after(10), Some(10)); // wraps around - assert_eq!(hist.next_key_after(15), Some(10)); // wraps around - - // Multiple keys - hist.increment(20, 1); - hist.increment(30, 1); - assert_eq!(hist.next_key_after(5), Some(10)); - assert_eq!(hist.next_key_after(10), Some(20)); - assert_eq!(hist.next_key_after(15), Some(20)); - assert_eq!(hist.next_key_after(25), Some(30)); - assert_eq!(hist.next_key_after(30), Some(10)); // wraps around - assert_eq!(hist.next_key_after(35), Some(10)); // wraps around - - // Sparse keys - hist = Int64Histogram::default(); - hist.increment(1000, 1); - hist.increment(2000, 1); - hist.increment(3000, 1); - assert_eq!(hist.next_key_after(500), Some(1000)); - assert_eq!(hist.next_key_after(1500), Some(2000)); - assert_eq!(hist.next_key_after(2500), Some(3000)); - assert_eq!(hist.next_key_after(3500), Some(1000)); // wraps around - } - - #[test] - fn test_prev_key_before() { - let mut hist = Int64Histogram::default(); - - // Empty histogram - assert_eq!(hist.prev_key_before(0), None); - - // Single key - hist.increment(10, 1); - assert_eq!(hist.prev_key_before(15), Some(10)); - assert_eq!(hist.prev_key_before(10), Some(10)); // wraps around - assert_eq!(hist.prev_key_before(5), Some(10)); // wraps around - - // Multiple keys - hist.increment(20, 1); - hist.increment(30, 1); - assert_eq!(hist.prev_key_before(35), Some(30)); - assert_eq!(hist.prev_key_before(30), Some(20)); - assert_eq!(hist.prev_key_before(25), Some(20)); - assert_eq!(hist.prev_key_before(15), Some(10)); - assert_eq!(hist.prev_key_before(10), Some(30)); // wraps around - assert_eq!(hist.prev_key_before(5), Some(30)); // wraps around - - // Sparse keys - hist = Int64Histogram::default(); - hist.increment(1000, 1); - hist.increment(2000, 1); - hist.increment(3000, 1); - assert_eq!(hist.prev_key_before(3500), Some(3000)); - assert_eq!(hist.prev_key_before(2500), Some(2000)); - assert_eq!(hist.prev_key_before(1500), Some(1000)); - assert_eq!(hist.prev_key_before(500), Some(3000)); // wraps around - - // Fast path: max_key < time - assert_eq!(hist.max_key(), Some(3000)); - assert_eq!(hist.prev_key_before(5000), Some(3000)); - - // Dense histogram with many keys (tests optimization) - hist = Int64Histogram::default(); - for i in 0..1000 { - hist.increment(i, 1); - } - assert_eq!(hist.prev_key_before(500), Some(499)); - assert_eq!(hist.prev_key_before(1000), Some(999)); - assert_eq!(hist.prev_key_before(0), Some(999)); // wraps around - } -} diff --git a/crates/utils/re_int_histogram/tests/memory_test.rs b/crates/utils/re_int_histogram/tests/memory_test.rs deleted file mode 100644 index b2c11bb7cdab..000000000000 --- a/crates/utils/re_int_histogram/tests/memory_test.rs +++ /dev/null @@ -1,96 +0,0 @@ -use insta::assert_debug_snapshot; -use re_byte_size::testing::TrackingAllocator; - -#[global_allocator] -pub static GLOBAL_ALLOCATOR: TrackingAllocator = TrackingAllocator::system(); - -fn memory_use(run: impl Fn() -> R) -> usize { - TrackingAllocator::memory_use(run).1 -} - -// ---------------------------------------------------------------------------- - -/// Baseline for performance and memory benchmarks -#[derive(Default)] -pub struct BTreeInt64Histogram { - map: std::collections::BTreeMap, -} - -impl BTreeInt64Histogram { - pub fn increment(&mut self, key: i64, inc: u32) { - *self.map.entry(key).or_default() += inc; - } -} - -// ---------------------------------------------------------------------------- - -/// Number of elements -const N: i64 = 1_000_000; - -#[test] -fn test_memory_use_btree() { - use BTreeInt64Histogram; - - fn create(num_elements: i64, spacing: i64) -> BTreeInt64Histogram { - let mut histogram = BTreeInt64Histogram::default(); - for i in 0..num_elements { - histogram.increment(i * spacing, 1); - } - histogram - } - - fn bytes_per_entry(num_elements: i64, spacing: i64) -> f64 { - let num_bytes = memory_use(|| create(num_elements, spacing)); - num_bytes as f64 / num_elements as f64 - } - - assert_debug_snapshot!( - "btree", - [ - format!("{:.1} B/entry, dense", bytes_per_entry(N, 1)), - format!("{:.1} B/entry, spacing: 1M", bytes_per_entry(N, 1_000_000)), - format!("{:.1} B/entry, spacing: 2M", bytes_per_entry(N, 2_000_000)), - format!("{:.1} B/entry, spacing: 3M", bytes_per_entry(N, 3_000_000)), - format!("{:.1} B/entry, spacing: 5M", bytes_per_entry(N, 5_000_000)), - format!("{:.1} B/entry, spacing: 8M", bytes_per_entry(N, 8_000_000)), - format!( - "{:.1} B/entry, spacing: 13M", - bytes_per_entry(N, 13_000_000) - ), - ] - ); -} - -#[test] -fn test_memory_use_tree() { - use re_int_histogram::Int64Histogram; - - fn create(num_elements: i64, spacing: i64) -> Int64Histogram { - let mut histogram = Int64Histogram::default(); - for i in 0..num_elements { - histogram.increment(i * spacing, 1); - } - histogram - } - - fn bytes_per_entry(num_elements: i64, spacing: i64) -> f64 { - let num_bytes = memory_use(|| create(num_elements, spacing)); - num_bytes as f64 / num_elements as f64 - } - - assert_debug_snapshot!( - "Int64Histogram", - [ - format!("{:.1} B/entry, dense", bytes_per_entry(N, 1)), - format!("{:.1} B/entry, spacing: 1M", bytes_per_entry(N, 1_000_000)), - format!("{:.1} B/entry, spacing: 2M", bytes_per_entry(N, 2_000_000)), - format!("{:.1} B/entry, spacing: 3M", bytes_per_entry(N, 3_000_000)), - format!("{:.1} B/entry, spacing: 5M", bytes_per_entry(N, 5_000_000)), - format!("{:.1} B/entry, spacing: 8M", bytes_per_entry(N, 8_000_000)), - format!( - "{:.1} B/entry, spacing: 13M", - bytes_per_entry(N, 13_000_000) - ), - ] - ); -} diff --git a/crates/utils/re_int_histogram/tests/snapshots/memory_test__Int64Histogram.snap b/crates/utils/re_int_histogram/tests/snapshots/memory_test__Int64Histogram.snap deleted file mode 100644 index fe0d6cb148f3..000000000000 --- a/crates/utils/re_int_histogram/tests/snapshots/memory_test__Int64Histogram.snap +++ /dev/null @@ -1,13 +0,0 @@ ---- -source: crates/utils/re_int_histogram/tests/memory_test.rs -expression: "[format!(\"{:.1} B/entry, dense\", bytes_per_entry(N, 1)),\nformat!(\"{:.1} B/entry, spacing: 1M\", bytes_per_entry(N, 1_000_000)),\nformat!(\"{:.1} B/entry, spacing: 2M\", bytes_per_entry(N, 2_000_000)),\nformat!(\"{:.1} B/entry, spacing: 3M\", bytes_per_entry(N, 3_000_000)),\nformat!(\"{:.1} B/entry, spacing: 5M\", bytes_per_entry(N, 5_000_000)),\nformat!(\"{:.1} B/entry, spacing: 8M\", bytes_per_entry(N, 8_000_000)),\nformat!(\"{:.1} B/entry, spacing: 13M\", bytes_per_entry(N, 13_000_000)),]" ---- -[ - "9.6 B/entry, dense", - "29.5 B/entry, spacing: 1M", - "34.2 B/entry, spacing: 2M", - "44.6 B/entry, spacing: 3M", - "50.9 B/entry, spacing: 5M", - "73.2 B/entry, spacing: 8M", - "26.0 B/entry, spacing: 13M", -] diff --git a/crates/utils/re_int_histogram/tests/snapshots/memory_test__btree.snap b/crates/utils/re_int_histogram/tests/snapshots/memory_test__btree.snap deleted file mode 100644 index 03fd33e790f6..000000000000 --- a/crates/utils/re_int_histogram/tests/snapshots/memory_test__btree.snap +++ /dev/null @@ -1,13 +0,0 @@ ---- -source: crates/utils/re_int_histogram/tests/memory_test.rs -expression: "[format!(\"{:.1} B/entry, dense\", bytes_per_entry(N, 1)),\nformat!(\"{:.1} B/entry, spacing: 1M\", bytes_per_entry(N, 1_000_000)),\nformat!(\"{:.1} B/entry, spacing: 2M\", bytes_per_entry(N, 2_000_000)),\nformat!(\"{:.1} B/entry, spacing: 3M\", bytes_per_entry(N, 3_000_000)),\nformat!(\"{:.1} B/entry, spacing: 5M\", bytes_per_entry(N, 5_000_000)),\nformat!(\"{:.1} B/entry, spacing: 8M\", bytes_per_entry(N, 8_000_000)),\nformat!(\"{:.1} B/entry, spacing: 13M\", bytes_per_entry(N, 13_000_000)),]" ---- -[ - "26.3 B/entry, dense", - "26.3 B/entry, spacing: 1M", - "26.3 B/entry, spacing: 2M", - "26.3 B/entry, spacing: 3M", - "26.3 B/entry, spacing: 5M", - "26.3 B/entry, spacing: 8M", - "26.3 B/entry, spacing: 13M", -] diff --git a/crates/utils/re_log/src/lib.rs b/crates/utils/re_log/src/lib.rs index aaa6a9b55a4d..ba0430ed3683 100644 --- a/crates/utils/re_log/src/lib.rs +++ b/crates/utils/re_log/src/lib.rs @@ -135,6 +135,7 @@ const CRATES_AT_INFO_LEVEL: &[&str] = &[ "datafusion", "h2", "hyper", + "opentelemetry", // Spams about NoopMeterProvider "prost_build", "reqwest", // Spams "starting new connection: …" "sqlparser", @@ -194,6 +195,8 @@ pub fn log_filter_from_env_or_default(default_base_log_filter: &str) -> String { /// Adds builtin log level filters for crates that are too verbose. #[cfg(not(target_arch = "wasm32"))] fn add_builtin_log_filter(base_log_filter: &str) -> String { + use std::fmt::Write as _; + let mut rust_log = base_log_filter.to_lowercase(); if base_log_filter != "off" { @@ -201,7 +204,7 @@ fn add_builtin_log_filter(base_log_filter: &str) -> String { for crate_name in crate::CRATES_AT_ERROR_LEVEL { if !rust_log.contains(&format!("{crate_name}=")) { - rust_log += &format!(",{crate_name}=error"); + write!(rust_log, ",{crate_name}=error").ok(); } } @@ -210,7 +213,7 @@ fn add_builtin_log_filter(base_log_filter: &str) -> String { for crate_name in crate::CRATES_AT_WARN_LEVEL { if !rust_log.contains(&format!("{crate_name}=")) { - rust_log += &format!(",{crate_name}=warn"); + write!(rust_log, ",{crate_name}=warn").ok(); } } @@ -219,7 +222,7 @@ fn add_builtin_log_filter(base_log_filter: &str) -> String { for crate_name in crate::CRATES_AT_INFO_LEVEL { if !rust_log.contains(&format!("{crate_name}=")) { - rust_log += &format!(",{crate_name}=info"); + write!(rust_log, ",{crate_name}=info").ok(); } } } diff --git a/crates/utils/re_memory/Cargo.toml b/crates/utils/re_memory/Cargo.toml index a5834370bce5..77cc2f16a1a8 100644 --- a/crates/utils/re_memory/Cargo.toml +++ b/crates/utils/re_memory/Cargo.toml @@ -31,6 +31,7 @@ itertools.workspace = true nohash-hasher.workspace = true parking_lot.workspace = true saturating_cast.workspace = true +serde.workspace = true smallvec.workspace = true web-time.workspace = true @@ -38,7 +39,7 @@ web-time.workspace = true [target.'cfg(not(target_arch = "wasm32"))'.dependencies] backtrace.workspace = true memory-stats = { workspace = true, features = ["always_use_statm"] } -sysinfo = { workspace = true, default-features = false } +sysinfo = { workspace = true, features = ["system"] } # web dependencies: [target.'cfg(target_arch = "wasm32")'.dependencies] diff --git a/crates/utils/re_memory/src/accounting_allocator.rs b/crates/utils/re_memory/src/accounting_allocator.rs index 6f591a80ff04..ab45dd0d27e6 100644 --- a/crates/utils/re_memory/src/accounting_allocator.rs +++ b/crates/utils/re_memory/src/accounting_allocator.rs @@ -352,7 +352,10 @@ fn note_alloc(ptr: *mut u8, size: usize) { // track the allocations made by the allocation tracker: IS_THREAD_IN_ALLOCATION_TRACKER.with(|is_thread_in_allocation_tracker| { - if !is_thread_in_allocation_tracker.get() { + if is_thread_in_allocation_tracker.get() { + // This is the ALLOCATION_TRACKER allocating memory. + GLOBAL.overhead.add(size); + } else { is_thread_in_allocation_tracker.set(true); let ptr_hash = PtrHash::new(ptr); @@ -365,9 +368,6 @@ fn note_alloc(ptr: *mut u8, size: usize) { } is_thread_in_allocation_tracker.set(false); - } else { - // This is the ALLOCATION_TRACKER allocating memory. - GLOBAL.overhead.add(size); } }); } @@ -386,7 +386,10 @@ fn note_dealloc(ptr: *mut u8, size: usize) { // Big enough to track - but make sure we don't create a deadlock by trying to // track the allocations made by the allocation tracker: IS_THREAD_IN_ALLOCATION_TRACKER.with(|is_thread_in_allocation_tracker| { - if !is_thread_in_allocation_tracker.get() { + if is_thread_in_allocation_tracker.get() { + // This is the ALLOCATION_TRACKER freeing memory. + GLOBAL.overhead.sub(size); + } else { is_thread_in_allocation_tracker.set(true); let ptr_hash = PtrHash::new(ptr); @@ -399,9 +402,6 @@ fn note_dealloc(ptr: *mut u8, size: usize) { } is_thread_in_allocation_tracker.set(false); - } else { - // This is the ALLOCATION_TRACKER freeing memory. - GLOBAL.overhead.sub(size); } }); } diff --git a/crates/utils/re_memory/src/memory_limit.rs b/crates/utils/re_memory/src/memory_limit.rs index b16f689ac1b6..8c9e9a45bba7 100644 --- a/crates/utils/re_memory/src/memory_limit.rs +++ b/crates/utils/re_memory/src/memory_limit.rs @@ -7,7 +7,8 @@ use saturating_cast::SaturatingCast as _; /// /// It is recommended that they log using [`re_log::info_once`] when they /// drop data because a memory limit is reached. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Deserialize, serde::Serialize)] +#[serde(transparent)] pub struct MemoryLimit { /// Limit in bytes. /// diff --git a/crates/utils/re_memory/src/ram_warner.rs b/crates/utils/re_memory/src/ram_warner.rs index 42ebfeeceb85..7132499d0155 100644 --- a/crates/utils/re_memory/src/ram_warner.rs +++ b/crates/utils/re_memory/src/ram_warner.rs @@ -4,7 +4,8 @@ pub fn total_ram_in_bytes() -> Option { re_tracing::profile_function!(); let mut sys = sysinfo::System::new_with_specifics( - sysinfo::RefreshKind::new().with_memory(sysinfo::MemoryRefreshKind::new().with_ram()), + sysinfo::RefreshKind::nothing() + .with_memory(sysinfo::MemoryRefreshKind::nothing().with_ram()), ); { diff --git a/crates/utils/re_mutex/README.md b/crates/utils/re_mutex/README.md index cd87559e78f7..6e326948e066 100644 --- a/crates/utils/re_mutex/README.md +++ b/crates/utils/re_mutex/README.md @@ -2,8 +2,8 @@ Part of the [`rerun`](https://github.com/rerun-io/rerun) family of crates. -[![Latest version](https://img.shields.io/crates/v/re_mutex.svg)](https://crates.io/crates/re_mutex?speculative-link) -[![Documentation](https://docs.rs/re_mutex/badge.svg)](https://docs.rs/re_mutex?speculative-link) +[![Latest version](https://img.shields.io/crates/v/re_mutex.svg)](https://crates.io/crates/re_mutex) +[![Documentation](https://docs.rs/re_mutex/badge.svg)](https://docs.rs/re_mutex) ![MIT](https://img.shields.io/badge/license-MIT-blue.svg) ![Apache](https://img.shields.io/badge/license-Apache-blue.svg) diff --git a/crates/utils/re_perf_telemetry/Cargo.toml b/crates/utils/re_perf_telemetry/Cargo.toml index b519720a57d7..e0d49fff0bb4 100644 --- a/crates/utils/re_perf_telemetry/Cargo.toml +++ b/crates/utils/re_perf_telemetry/Cargo.toml @@ -65,6 +65,7 @@ opentelemetry-otlp = { workspace = true, features = ["grpc-tonic"] } opentelemetry_sdk = { workspace = true, features = [ "rt-tokio", "experimental_metrics_custom_reader", + "spec_unstable_metrics_views", ] } parking_lot.workspace = true serde.workspace = true @@ -83,5 +84,8 @@ tokio.workspace = true tracing-tracy = { workspace = true, optional = true } pyo3 = { workspace = true, optional = true } +[dev-dependencies] +opentelemetry_sdk = { workspace = true, features = ["testing"] } + [lints] workspace = true diff --git a/crates/utils/re_perf_telemetry/README.md b/crates/utils/re_perf_telemetry/README.md index db1799d13ecb..21e91503c384 100644 --- a/crates/utils/re_perf_telemetry/README.md +++ b/crates/utils/re_perf_telemetry/README.md @@ -54,10 +54,14 @@ df = dataset.dataframe_query_view( print(df.count()) ``` -* Example of out-of-process profiling using Jaeger (run `pixi run compose-dev` in the Redap repository to start a Jaeger instance): +* Example of out-of-process profiling using Jaeger (run `pixi run compose-dev` in the Redap repository to start a Jaeger instance, or `docker run --rm -p 16686:16686 -p 4317:4317 jaegertracing/jaeger:2.11.0` for a standalone instance): ```sh - # Build the SDK with performance telemetry enabled, in the 'examples' environment: - $ py-build-perf-examples + # `perf_telemetry` is a default feature of `rerun_py`, so a plain build is enough for OTel/Jaeger: + $ pixi run py-build + + # If you additionally want Python-side spans (e.g. via the `@with_tracing` decorator in `rerun._tracing`), + # install the tracing extra so the OpenTelemetry Python packages are available: + $ pixi run uv pip install 'rerun-sdk[tracing]' # Run your script with both telemetry and the OpenTelemetry integration enabled: $ TELEMETRY_ENABLED=true OTEL_SDK_ENABLED=true @@ -74,8 +78,8 @@ print(df.count()) * Example of in-process profiling using Tracy: ```sh - # Build the SDK with performance telemetry enabled, in the 'examples' environment: - $ py-build-perf-examples + # Build the SDK with Tracy support (the `tracy` Cargo feature on `re_perf_telemetry`): + $ pixi run py-build-perf-debug # or `py-build-perf-release` # Run your script with both telemetry and the Tracy integration enabled: $ TELEMETRY_ENABLED=true TRACY_ENABLED=true diff --git a/crates/utils/re_perf_telemetry/src/grpc.rs b/crates/utils/re_perf_telemetry/src/grpc.rs index 3ab7ae960c3d..d0ec51a56f51 100644 --- a/crates/utils/re_perf_telemetry/src/grpc.rs +++ b/crates/utils/re_perf_telemetry/src/grpc.rs @@ -66,6 +66,11 @@ impl tower_http::trace::MakeSpan for GrpcMakeSpan { let _guard = parent_ctx.attach(); let endpoint = request.uri().path().to_owned(); + // Split "/package.Service/Method" into rpc.service and rpc.method per OTel conventions. + let (rpc_service, rpc_method) = endpoint + .strip_prefix('/') + .and_then(|s| s.split_once('/')) + .unwrap_or(("", "")); let url = request .uri() .to_string() @@ -111,26 +116,34 @@ impl tower_http::trace::MakeSpan for GrpcMakeSpan { // no sampling at the `tracing` level, only at the `opentelemetry` level. // We use that fact to our advantage in order to carry a bunch of state around across all // the stages of the request (first response, first chunk, end-of-stream, etc). - let mut safe_headers = request.headers().clone(); - _ = safe_headers.remove("authorization"); let span = tracing::span!( tracing::Level::INFO, "", otel.name = %endpoint, url, method = %request.method(), - // Record trace_id and benchmark_id as top level span fields. + + // OTel semantic conventions for gRPC (https://opentelemetry.io/docs/specs/semconv/rpc/grpc/): + rpc.system = "grpc", + rpc.service = %rpc_service, + rpc.method = %rpc_method, + + // Record benchmark_id as a top level span field. // - // At this stage we may not know yet the actual trace_id (depending on whether + // At this stage we may not know yet the actual value (depending on whether // we're generating a new trace or continuing an existing one). However, - // we need to pre-declare these fields if we want to record values for them later. + // we need to pre-declare the field if we want to record a value for it later. + // + // The field will be filled in by a separate [`tracing_subscriber::Layer`] (see + // [`BenchmarkIdLayer`]). // - // The fields will be filled in by a separate [`tracing_subscriber::Layer`] (see - // [`TraceIdLayer`]). - trace_id = tracing::field::Empty, // This will only be filled if we have a benchmark_id in the tracestate. // That's OK, it won't be printed if empty. benchmark_id = tracing::field::Empty, + + // The gRPC status code (e.g. "Ok", "AlreadyExists", "DeadlineExceeded"). + // Filled in later by `GrpcOnResponse` or `GrpcOnEos`, depending on the endpoint type (unary vs streaming). + grpc_status = tracing::field::Empty, ); let size = SpanMetadata::insert_opt( @@ -277,6 +290,16 @@ impl SpanMetadata { fn remove_opt(span_id: Option<&tracing::span::Id>) -> Option { span_id.and_then(Self::remove) } + + /// Silently removes metadata for a span, returning `Some` if it existed. + /// + /// Unlike [`Self::remove`], this does NOT warn when the metadata is missing. + /// Used by [`SpanMetadataCleanupLayer::on_close`] where the metadata has typically + /// already been removed by [`GrpcOnEos`] or [`GrpcOnResponse`]. + fn remove_silent(span_id: &tracing::span::Id) -> Option { + let spans = SPAN_METADATA.get()?; + spans.write().remove(span_id) + } } // --- @@ -343,9 +366,6 @@ impl GrpcOnResponse { let histogram = meter .f64_histogram("grpc_on_response_ms") .with_description("Latency percentiles for all gRPC endpoints (\"time to response\")") - .with_boundaries(vec![ - 10.0, 25.0, 50.0, 75.0, 100.0, 200.0, 350.0, 500.0, 750.0, 1000.0, 2500.0, 5000.0, - ]) .build(); let eos_counter = meter .u64_counter("grpc_on_eos") @@ -430,8 +450,16 @@ impl tower_http::trace::OnResponse for GrpcOnResponse { grpc_eos_classifier: _, } = span_metadata.clone(); - let record = |grpc_code: tonic::Code| { - let grpc_status = format!("{grpc_code:?}"); // NOTE: The debug repr is the enum variant name (e.g. DeadlineExceeded). + // Record response metrics and optionally set the final `grpc_status` span attribute. + // Pass `Some(code)` when the final gRPC status is known, `None` when it will be + // determined later by `GrpcOnEos` (streaming responses). + let record = |span: &tracing::Span, grpc_code: Option| { + let grpc_status = if let Some(grpc_code) = grpc_code { + format!("{grpc_code:?}") + } else { + "".to_owned() + }; + span.record("grpc_status", grpc_status.as_str()); let http_status = response.status().as_str().to_owned(); let client_version = client_version.as_deref().unwrap_or("undefined"); @@ -441,10 +469,17 @@ impl tower_http::trace::OnResponse for GrpcOnResponse { // NOTE: repeat all these attributes so services such as CloudWatch, which don't really // support OTLP, can actually see them. - if grpc_status == "Ok" { - tracing::info!(%endpoint, %grpc_status, %http_status, %client_version, %server_version, %email, %dataset_id, ?latency, "grpc_on_response"); - } else { - tracing::error!(%endpoint, %grpc_status, %http_status, %client_version, %server_version, %email, %dataset_id, ?latency, "grpc_on_response"); + match grpc_status.as_str() { + "Ok" => { + tracing::info!(%endpoint, %grpc_status, %http_status, %client_version, %server_version, %email, %dataset_id, ?latency, "grpc_on_response"); + } + "" => { + // Streaming response — final gRPC status will be logged by `GrpcOnEos`. + tracing::debug!(%endpoint, %grpc_status, %http_status, %client_version, %server_version, %email, %dataset_id, ?latency, "grpc_on_response"); + } + _ => { + tracing::error!(%endpoint, %grpc_status, %http_status, %client_version, %server_version, %email, %dataset_id, ?latency, "grpc_on_response"); + } } self.histogram.record( @@ -473,7 +508,7 @@ impl tower_http::trace::OnResponse for GrpcOnResponse { tonic::Status::from_error(err.into()).code() } }; - record(grpc_code); + record(span, Some(grpc_code)); // For immediate errors, emit grpc_on_eos counter here since on_eos won't be called let grpc_status = format!("{grpc_code:?}"); // NOTE: The debug repr is the enum variant name (e.g. DeadlineExceeded). @@ -494,12 +529,13 @@ impl tower_http::trace::OnResponse for GrpcOnResponse { SpanMetadata::remove_opt(span.id().as_ref()); } - tower_http::classify::ClassifiedResponse::Ready(Ok(_)) => { - record(tonic::Code::Ok); + tower_http::classify::ClassifiedResponse::Ready(Ok(())) => { + record(span, Some(tonic::Code::Ok)); } tower_http::classify::ClassifiedResponse::RequiresEos(eos) => { - record(tonic::Code::Ok); + // Final gRPC status is unknown until end-of-stream; `GrpcOnEos` will set it. + record(span, None); SpanMetadata::insert_opt( span.id(), SpanMetadata { @@ -528,9 +564,6 @@ impl GrpcOnFirstBodyChunk { .with_description( "Latency percentiles for all gRPC endpoints (\"time to first chunk\")", ) - .with_boundaries(vec![ - 10.0, 25.0, 50.0, 75.0, 100.0, 200.0, 350.0, 500.0, 750.0, 1000.0, 2500.0, 5000.0, - ]) .build(); Self { histogram } } @@ -652,6 +685,7 @@ impl tower_http::trace::OnEos for GrpcOnEos { tonic::Code::Unknown }; let grpc_status = format!("{grpc_code:?}"); // NOTE: The debug repr is the enum variant name (e.g. DeadlineExceeded). + span.record("grpc_status", &grpc_status); let client_version = client_version.as_deref().unwrap_or("undefined"); let server_version = server_version.as_deref().unwrap_or("undefined"); @@ -802,25 +836,25 @@ use tracing_opentelemetry::OpenTelemetrySpanExt as _; use tracing_subscriber::Layer; use tracing_subscriber::layer::Context; -/// A `tracing_subscriber::Layer` that injects the opentelemetry `trace_id` as a `benchmark_id` field -/// top level field on every span. +/// A `tracing_subscriber::Layer` that injects the opentelemetry `benchmark_id` as a +/// top level field on every span that pre-declares it. /// -/// This allows us to use the upstream tooling to filter logs within a span by `trace_id` +/// The `benchmark_id` is extracted from the W3C `tracestate` header. #[derive(Default)] -pub struct TraceIdLayer { +pub struct BenchmarkIdLayer { _private: (), } // Just a marker to avoid injecting multiple times per span. -struct TraceIdInjected; +struct BenchmarkIdInjected; -impl Layer for TraceIdLayer +impl Layer for BenchmarkIdLayer where S: Subscriber + for<'a> tracing_subscriber::registry::LookupSpan<'a>, { fn on_enter(&self, id: &Id, ctx: Context<'_, S>) { if let Some(span_ref) = ctx.span(id) { - if span_ref.extensions().get::().is_some() { + if span_ref.extensions().get::().is_some() { return; } @@ -830,14 +864,40 @@ where let span_cx = otel_span.span_context(); if span_cx.is_valid() { - let trace_id = span_cx.trace_id(); let trace_state = span_cx.trace_state(); - current_span.record("trace_id", trace_id.to_string()); if let Some(benchmark_id) = trace_state.get("benchmark_id") { current_span.record("benchmark_id", benchmark_id.to_owned()); } - span_ref.extensions_mut().insert(TraceIdInjected); + span_ref.extensions_mut().insert(BenchmarkIdInjected); } } } } + +// --- + +/// A [`tracing_subscriber::Layer`] that cleans up `SpanMetadata` entries when spans close. +/// +/// In the normal flow, metadata is removed by [`GrpcOnEos`] `on_eos` (streaming responses) +/// or [`GrpcOnResponse`] `on_response` (immediate errors). However, if the client disconnects +/// or a transport error occurs, `on_eos` may never be called, leaving stale entries behind. +/// +/// Because the `tracing` crate recycles span IDs after a span closes, stale entries cause +/// "overwritten span metadata" warnings when the recycled ID is reused by a new request. +/// +/// `on_close` is called when all clones of a span are dropped, and critically, **before** the +/// span ID is recycled. This guarantees any leaked metadata is cleaned up before the ID can +/// be reused. +#[derive(Default)] +pub struct SpanMetadataCleanupLayer { + _private: (), +} + +impl Layer for SpanMetadataCleanupLayer +where + S: Subscriber + for<'a> tracing_subscriber::registry::LookupSpan<'a>, +{ + fn on_close(&self, id: Id, _ctx: Context<'_, S>) { + SpanMetadata::remove_silent(&id); + } +} diff --git a/crates/utils/re_perf_telemetry/src/lib.rs b/crates/utils/re_perf_telemetry/src/lib.rs index 90c9ae083c81..9dd93bab1af8 100644 --- a/crates/utils/re_perf_telemetry/src/lib.rs +++ b/crates/utils/re_perf_telemetry/src/lib.rs @@ -49,8 +49,11 @@ mod grpc; mod memory_telemetry; mod metrics_server; mod prometheus; +#[cfg(feature = "pyo3")] +mod python_bridge; mod shared_reader; mod telemetry; +mod trace_id_format; mod tracestate; mod utils; @@ -60,14 +63,19 @@ use opentelemetry_sdk::propagation::TraceContextPropagator; pub use self::args::{LogFormat, TelemetryArgs}; pub use self::grpc::{ - ClientTelemetryLayer, GrpcMakeSpan, GrpcOnEos, GrpcOnFirstBodyChunk, GrpcOnRequest, - GrpcOnResponse, GrpcOnResponseOptions, ServerTelemetryLayer, TelemetryLayerOptions, - TraceIdLayer, TracingInjectorInterceptor, new_client_telemetry_layer, - new_server_telemetry_layer, + BenchmarkIdLayer, ClientTelemetryLayer, GrpcMakeSpan, GrpcOnEos, GrpcOnFirstBodyChunk, + GrpcOnRequest, GrpcOnResponse, GrpcOnResponseOptions, ServerTelemetryLayer, + SpanMetadataCleanupLayer, TelemetryLayerOptions, TracingInjectorInterceptor, + new_client_telemetry_layer, new_server_telemetry_layer, }; pub use self::telemetry::{Telemetry, TelemetryDropBehavior}; pub use self::utils::to_short_str; +#[cfg(feature = "pyo3")] +pub use self::python_bridge::{ + TRACE_CONTEXT_VAR_NAME, extract_trace_context_from_contextvar, get_trace_context_var, +}; + pub mod external { #[cfg(feature = "tracy")] pub use tracing_tracy; @@ -131,7 +139,7 @@ impl TraceHeaders { pub const TRACEPARENT_KEY: &'static str = "traceparent"; pub const TRACESTATE_KEY: &'static str = "tracestate"; - fn empty() -> Self { + pub(crate) fn empty() -> Self { Self { traceparent: String::new(), tracestate: None, @@ -146,14 +154,32 @@ impl TraceHeaders { } } +impl TraceHeaders { + /// Attach these trace headers as the current `OpenTelemetry` context. + /// + /// Returns a guard that must be kept alive for the duration of the traced scope. + /// Any [`tracing::Span`] created while the guard is alive will be parented under + /// the trace described by these headers. + /// + /// Returns `None` if the headers are empty (no `traceparent`). + #[must_use] + pub fn attach(&self) -> Option { + if self.traceparent.is_empty() { + None + } else { + let parent_ctx = + opentelemetry::global::get_text_map_propagator(|prop| prop.extract(self)); + Some(parent_ctx.attach()) + } + } +} + impl opentelemetry::propagation::Injector for TraceHeaders { fn set(&mut self, key: &str, value: String) { match key { Self::TRACEPARENT_KEY => self.traceparent = value, - Self::TRACESTATE_KEY => { - if !value.is_empty() { - self.tracestate = Some(value); - } + Self::TRACESTATE_KEY if !value.is_empty() => { + self.tracestate = Some(value); } _ => {} } @@ -184,80 +210,6 @@ impl From<&TraceHeaders> for opentelemetry::Context { // --- -/// The name of the `ContextVar` used for trace context propagation -pub const TRACE_CONTEXT_VAR_NAME: &str = "TRACE_CONTEXT"; - -#[cfg(feature = "pyo3")] -/// Get the trace context `ContextVar` object. -/// -/// This returns the same Python `ContextVar` instance every time, ensuring that -/// values set on it can be read back later. It is up to the caller to ensure trace context -/// is reset and cleared as needed. -pub fn get_trace_context_var(py: pyo3::Python<'_>) -> pyo3::PyResult> { - use pyo3::prelude::*; - - static CONTEXT_VAR: parking_lot::Mutex>> = - parking_lot::Mutex::new(None); - - let mut guard = CONTEXT_VAR.lock(); - - if let Some(var) = guard.as_ref() { - return Ok(var.bind(py).clone()); - } - - // Create the trace context ContextVar - let module = py.import("contextvars")?; - let contextvar_class = module.getattr("ContextVar")?; - let trace_ctx_var = contextvar_class.call1((TRACE_CONTEXT_VAR_NAME,))?; - let trace_ctx_unbound = trace_ctx_var.clone().unbind(); - - *guard = Some(trace_ctx_unbound); - - Ok(trace_ctx_var) -} - -#[cfg(feature = "pyo3")] -/// Extract trace context from Python `ContextVar` for cross-boundary propagation. -pub fn extract_trace_context_from_contextvar(py: pyo3::Python<'_>) -> TraceHeaders { - use pyo3::prelude::*; - use pyo3::types::PyDict; - - fn try_extract(py: pyo3::Python<'_>) -> PyResult { - let context_var = get_trace_context_var(py)?; - - match context_var.call_method0("get") { - Ok(trace_data) => { - if let Ok(dict) = trace_data.downcast::() { - let traceparent = dict - .get_item(TraceHeaders::TRACEPARENT_KEY)? - .and_then(|v| v.extract::().ok()) - .unwrap_or_default(); - - let tracestate = dict - .get_item(TraceHeaders::TRACESTATE_KEY)? - .and_then(|v| v.extract::().ok()); - - let headers = TraceHeaders { - traceparent, - tracestate, - }; - - tracing::debug!("Trace headers: {:?}", headers); - Ok(headers) - } else { - Ok(TraceHeaders::empty()) - } - } - Err(_) => Ok(TraceHeaders::empty()), - } - } - - try_extract(py).unwrap_or_else(|err| { - tracing::debug!("Failed to extract trace context: {err}"); - TraceHeaders::empty() - }) -} - // --- // Extension to [`tracing_subscriber:EnvFilter`] that allows to @@ -281,11 +233,11 @@ impl EnvFilterExt for tracing_subscriber::EnvFilter { target: &str, default: &str, ) -> anyhow::Result { - if !base.contains(&format!("{target}=")) { + if base.contains(&format!("{target}=")) { + Ok(self) + } else { let filter = self.add_directive(format!("{target}={default}").parse()?); Ok(filter) - } else { - Ok(self) } } } diff --git a/crates/utils/re_perf_telemetry/src/metrics_server.rs b/crates/utils/re_perf_telemetry/src/metrics_server.rs index 395abc749901..c24cf861484b 100644 --- a/crates/utils/re_perf_telemetry/src/metrics_server.rs +++ b/crates/utils/re_perf_telemetry/src/metrics_server.rs @@ -70,7 +70,7 @@ async fn manual_metrics_handler(State(reader): State>) -> impl // Collect metrics from ManualReader match reader.collect(&mut resource_metrics) { - Ok(_) => { + Ok(()) => { let metrics = Arc::new(Mutex::new(MetricContainer::new())); // Convert ResourceMetrics to Prometheus metrics and get the registry diff --git a/crates/utils/re_perf_telemetry/src/prometheus.rs b/crates/utils/re_perf_telemetry/src/prometheus.rs index 51c010045959..2e0f9510805a 100644 --- a/crates/utils/re_perf_telemetry/src/prometheus.rs +++ b/crates/utils/re_perf_telemetry/src/prometheus.rs @@ -8,12 +8,12 @@ use std::sync::Arc; use opentelemetry::KeyValue; use opentelemetry_sdk::metrics::data::ResourceMetrics; -use parking_lot::Mutex; -use prometheus_client::encoding::EncodeLabelSet; +use parking_lot::{Mutex, RwLock}; +use prometheus_client::encoding::{EncodeLabelSet, EncodeMetric, MetricEncoder, NoLabelSet}; use prometheus_client::metrics::counter::Counter; -use prometheus_client::metrics::family::Family; +use prometheus_client::metrics::family::{Family, MetricConstructor}; use prometheus_client::metrics::gauge::Gauge; -use prometheus_client::metrics::histogram::{Histogram, exponential_buckets}; +use prometheus_client::metrics::{MetricType, TypedMetric}; use prometheus_client::registry::Registry; /// Dynamic labels for metrics that support arbitrary key-value pairs @@ -42,7 +42,10 @@ impl EncodeLabelSet for DynamicLabels { pub struct MetricContainer { pub counters: HashMap>, pub gauges: HashMap>>, - pub histograms: HashMap>, + pub histograms: HashMap< + String, + Family, + >, } impl MetricContainer { @@ -161,8 +164,8 @@ pub fn convert_to_prometheus( ); } } - AggregatedMetrics::F64(MetricData::Histogram(histogram)) => { - register_histogram_f64( + AggregatedMetrics::F64(MetricData::ExponentialHistogram(histogram)) => { + register_exponential_histogram_f64( &mut registry, &metric_name, metric.description(), @@ -170,8 +173,8 @@ pub fn convert_to_prometheus( metrics, ); } - AggregatedMetrics::I64(MetricData::Histogram(histogram)) => { - register_histogram_i64( + AggregatedMetrics::I64(MetricData::ExponentialHistogram(histogram)) => { + register_exponential_histogram_i64( &mut registry, &metric_name, metric.description(), @@ -179,8 +182,8 @@ pub fn convert_to_prometheus( metrics, ); } - AggregatedMetrics::U64(MetricData::Histogram(histogram)) => { - register_histogram_u64( + AggregatedMetrics::U64(MetricData::ExponentialHistogram(histogram)) => { + register_exponential_histogram_u64( &mut registry, &metric_name, metric.description(), @@ -189,10 +192,9 @@ pub fn convert_to_prometheus( ); } _ => { - // ExponentialHistogram or other types not supported + // Other metric types not supported } } - // Note: ExponentialHistogram is not directly supported in Prometheus } } @@ -462,11 +464,150 @@ fn register_gauge_from_sum_u64( registry.register(name, description, gauge_family); } -fn register_histogram_f64( +/// A histogram that holds pre-aggregated data (sum, count, buckets) directly, +/// avoiding the lossy `observe()` approximation. Implements `EncodeMetric` so +/// Prometheus text encoding emits exact values from the `OTel` source. +#[derive(Debug, Clone)] +pub struct PreAggregatedHistogram { + inner: Arc>, +} + +#[derive(Debug)] +struct PreAggregatedHistogramInner { + sum: f64, + count: u64, + + /// `(upper_bound, count)` pairs — non-cumulative per-bucket counts. + /// The prometheus-client encoder converts these to cumulative during + /// text encoding. + buckets: Vec<(f64, u64)>, +} + +impl PreAggregatedHistogram { + /// Populate from an `OTel` exponential histogram data point. + /// + /// Only positive and zero observations are mapped to Prometheus buckets. + /// Negative observations cannot be faithfully represented in Prometheus's + /// `le`-based histogram model, so we log a warning if any are present. + /// In practice all our histograms track durations and sizes which are + /// always non-negative. + fn set_from_exponential( + &self, + scale: i8, + positive_bucket: &opentelemetry_sdk::metrics::data::ExponentialBucket, + negative_bucket: &opentelemetry_sdk::metrics::data::ExponentialBucket, + zero_count: u64, + sum: f64, + count: u64, + ) { + let negative_count: u64 = negative_bucket.counts().sum(); + if negative_count > 0 { + tracing::warn!( + negative_count, + "Histogram has negative observations which \ + cannot be represented in Prometheus and will be dropped" + ); + } + + let positive_counts: Vec = positive_bucket.counts().collect(); + self.set_from_raw_buckets( + scale, + positive_bucket.offset(), + &positive_counts, + zero_count, + sum, + count, + ); + } + + /// Populate from raw exponential histogram bucket data. + /// + /// `scale` controls bucket resolution: base = 2^(2^(-scale)). + /// `offset` is the bucket index of the first entry in `positive_counts`. + /// `positive_counts[i]` is the count for values in (base^(offset+i), base^(offset+i+1)]. + fn set_from_raw_buckets( + &self, + scale: i8, + offset: i32, + positive_counts: &[u64], + zero_count: u64, + sum: f64, + count: u64, + ) { + let base = (2.0_f64).powf((2.0_f64).powi(-(scale as i32))); + + let mut buckets: Vec<(f64, u64)> = positive_counts + .iter() + .enumerate() + .map(|(i, &c)| { + let upper = base.powi(offset + i as i32 + 1); + (upper, c) + }) + .collect(); + + // Place zero-count observations into the first bucket (or create one + // before +Inf if there are no positive buckets). + if zero_count > 0 { + if let Some(first) = buckets.first_mut() { + first.1 += zero_count; + } else { + buckets.push((0.0, zero_count)); + } + } + + // Always add +Inf bucket — required by Prometheus exposition format. + // Count of 0 is correct here because the encoder accumulates cumulatively. + buckets.push((f64::MAX, 0)); + + let mut inner = self.inner.write(); + inner.sum = sum; + inner.count = count; + inner.buckets = buckets; + } +} + +impl Default for PreAggregatedHistogram { + fn default() -> Self { + Self { + inner: Arc::new(RwLock::new(PreAggregatedHistogramInner { + sum: 0.0, + count: 0, + buckets: Vec::new(), + })), + } + } +} + +impl TypedMetric for PreAggregatedHistogram { + const TYPE: MetricType = MetricType::Histogram; +} + +impl EncodeMetric for PreAggregatedHistogram { + fn encode(&self, mut encoder: MetricEncoder<'_>) -> Result<(), std::fmt::Error> { + let inner = self.inner.read(); + encoder.encode_histogram::(inner.sum, inner.count, &inner.buckets, None) + } + + fn metric_type(&self) -> MetricType { + Self::TYPE + } +} + +/// Default constructor for `Family<_, PreAggregatedHistogram, _>`. +#[derive(Clone, Default)] +pub struct PreAggregatedHistogramConstructor; + +impl MetricConstructor for PreAggregatedHistogramConstructor { + fn new_metric(&self) -> PreAggregatedHistogram { + PreAggregatedHistogram::default() + } +} + +fn register_exponential_histogram_f64( registry: &mut Registry, name: &str, description: &str, - histogram: &opentelemetry_sdk::metrics::data::Histogram, + histogram: &opentelemetry_sdk::metrics::data::ExponentialHistogram, metrics: &Arc>, ) { let points: Vec<_> = histogram.data_points().collect(); @@ -474,29 +615,22 @@ fn register_histogram_f64( return; } - // Create histogram with default exponential buckets - // TODO(thz): Consider preserving original bucket boundaries if needed - let histogram_family = Family::::new_with_constructor(|| { - Histogram::new(exponential_buckets(0.005, 2.0, 10)) - }); + let histogram_family = Family::::new_with_constructor( + PreAggregatedHistogramConstructor, + ); - // Note: We can't directly set histogram values in prometheus-client, - // we can only observe individual samples. Since we have pre-aggregated data, - // we'll need to approximate by observing samples. for point in &points { let attrs: Vec<_> = point.attributes().cloned().collect(); let labels = create_dynamic_labels(&attrs); let hist = histogram_family.get_or_create(&labels); - - // Approximate by observing the mean value multiple times - if point.count() > 0 { - let mean = point.sum() / point.count() as f64; - // Observe the mean value to approximate the distribution - // This preserves sum but not the exact distribution - for _ in 0..point.count() { - hist.observe(mean); - } - } + hist.set_from_exponential( + point.scale(), + point.positive_bucket(), + point.negative_bucket(), + point.zero_count(), + point.sum(), + point.count() as u64, + ); } let mut container = metrics.lock(); @@ -506,11 +640,11 @@ fn register_histogram_f64( registry.register(name, description, histogram_family); } -fn register_histogram_i64( +fn register_exponential_histogram_i64( registry: &mut Registry, name: &str, description: &str, - histogram: &opentelemetry_sdk::metrics::data::Histogram, + histogram: &opentelemetry_sdk::metrics::data::ExponentialHistogram, metrics: &Arc>, ) { let points: Vec<_> = histogram.data_points().collect(); @@ -518,21 +652,22 @@ fn register_histogram_i64( return; } - let histogram_family = Family::::new_with_constructor(|| { - Histogram::new(exponential_buckets(0.005, 2.0, 10)) - }); + let histogram_family = Family::::new_with_constructor( + PreAggregatedHistogramConstructor, + ); for point in &points { let attrs: Vec<_> = point.attributes().cloned().collect(); let labels = create_dynamic_labels(&attrs); let hist = histogram_family.get_or_create(&labels); - - if point.count() > 0 { - let mean = point.sum() as f64 / point.count() as f64; - for _ in 0..point.count() { - hist.observe(mean); - } - } + hist.set_from_exponential( + point.scale(), + point.positive_bucket(), + point.negative_bucket(), + point.zero_count(), + point.sum() as f64, + point.count() as u64, + ); } let mut container = metrics.lock(); @@ -542,11 +677,11 @@ fn register_histogram_i64( registry.register(name, description, histogram_family); } -fn register_histogram_u64( +fn register_exponential_histogram_u64( registry: &mut Registry, name: &str, description: &str, - histogram: &opentelemetry_sdk::metrics::data::Histogram, + histogram: &opentelemetry_sdk::metrics::data::ExponentialHistogram, metrics: &Arc>, ) { let points: Vec<_> = histogram.data_points().collect(); @@ -554,21 +689,22 @@ fn register_histogram_u64( return; } - let histogram_family = Family::::new_with_constructor(|| { - Histogram::new(exponential_buckets(0.005, 2.0, 10)) - }); + let histogram_family = Family::::new_with_constructor( + PreAggregatedHistogramConstructor, + ); for point in &points { let attrs: Vec<_> = point.attributes().cloned().collect(); let labels = create_dynamic_labels(&attrs); let hist = histogram_family.get_or_create(&labels); - - if point.count() > 0 { - let mean = point.sum() as f64 / point.count() as f64; - for _ in 0..point.count() { - hist.observe(mean); - } - } + hist.set_from_exponential( + point.scale(), + point.positive_bucket(), + point.negative_bucket(), + point.zero_count(), + point.sum() as f64, + point.count() as u64, + ); } let mut container = metrics.lock(); @@ -600,3 +736,132 @@ fn create_dynamic_labels(attributes: &[KeyValue]) -> DynamicLabels { labels.sort_by(|a, b| a.0.cmp(&b.0)); // Ensure consistent ordering DynamicLabels(labels) } + +#[cfg(test)] +mod tests { + use super::*; + use prometheus_client::encoding::text::encode; + + /// Helper: encode a single `PreAggregatedHistogram` registered as "test" + /// and return the Prometheus text exposition string. + fn encode_histogram(hist: &PreAggregatedHistogram) -> String { + let mut registry = Registry::default(); + registry.register("test", "help", hist.clone()); + let mut buf = String::new(); + encode(&mut buf, ®istry).unwrap(); + buf + } + + #[test] + fn pre_aggregated_histogram_basic_encoding() { + let hist = PreAggregatedHistogram::default(); + + // Scale 0 → base = 2^(2^0) = 2.0 + // offset = 0, counts = [3, 5, 2] + // Bucket boundaries: (0, 2^1=2], (2, 2^2=4], (4, 2^3=8] + hist.set_from_raw_buckets(0, 0, &[3, 5, 2], 0, 42.0, 10); + + let output = encode_histogram(&hist); + assert!(output.contains("test_sum 42.0"), "output: {output}"); + assert!(output.contains("test_count 10"), "output: {output}"); + // Cumulative: le=2 → 3, le=4 → 8, le=8 → 10, le=+Inf → 10 + assert!( + output.contains(r#"test_bucket{le="2.0"} 3"#), + "output: {output}" + ); + assert!( + output.contains(r#"test_bucket{le="4.0"} 8"#), + "output: {output}" + ); + assert!( + output.contains(r#"test_bucket{le="8.0"} 10"#), + "output: {output}" + ); + assert!( + output.contains(r#"test_bucket{le="+Inf"} 10"#), + "output: {output}" + ); + } + + #[test] + fn pre_aggregated_histogram_with_zero_count() { + let hist = PreAggregatedHistogram::default(); + + // Scale 0, offset 0, one positive bucket with count 2, plus 3 zeros + hist.set_from_raw_buckets(0, 0, &[2], 3, 5.0, 5); + + let output = encode_histogram(&hist); + assert!(output.contains("test_count 5"), "output: {output}"); + assert!(output.contains("test_sum 5.0"), "output: {output}"); + // Zeros are added to the first bucket: 2 + 3 = 5 + // Cumulative: le=2 → 5, le=+Inf → 5 + assert!( + output.contains(r#"test_bucket{le="2.0"} 5"#), + "output: {output}" + ); + assert!( + output.contains(r#"test_bucket{le="+Inf"} 5"#), + "output: {output}" + ); + } + + #[test] + fn pre_aggregated_histogram_zero_only() { + let hist = PreAggregatedHistogram::default(); + + // No positive buckets, only zeros + hist.set_from_raw_buckets(0, 0, &[], 7, 0.0, 7); + + let output = encode_histogram(&hist); + assert!(output.contains("test_count 7"), "output: {output}"); + assert!(output.contains("test_sum 0.0"), "output: {output}"); + // Zero-only: creates a (0.0, 7) bucket, then +Inf + assert!( + output.contains(r#"test_bucket{le="0.0"} 7"#), + "output: {output}" + ); + assert!( + output.contains(r#"test_bucket{le="+Inf"} 7"#), + "output: {output}" + ); + } + + #[test] + fn pre_aggregated_histogram_with_offset() { + let hist = PreAggregatedHistogram::default(); + + // Scale 0, offset 2, counts = [4] + // Bucket boundary: base^(2+0+1) = 2^3 = 8 + hist.set_from_raw_buckets(0, 2, &[4], 0, 28.0, 4); + + let output = encode_histogram(&hist); + assert!(output.contains("test_count 4"), "output: {output}"); + assert!( + output.contains(r#"test_bucket{le="8.0"} 4"#), + "output: {output}" + ); + assert!( + output.contains(r#"test_bucket{le="+Inf"} 4"#), + "output: {output}" + ); + } + + #[test] + fn pre_aggregated_histogram_finer_scale() { + let hist = PreAggregatedHistogram::default(); + + // Scale 1 → base = 2^(2^(-1)) = 2^0.5 ≈ 1.4142 + // offset 0, counts = [10] + // Bucket boundary: base^(0+0+1) = √2 ≈ 1.4142 + hist.set_from_raw_buckets(1, 0, &[10], 0, 12.0, 10); + + let output = encode_histogram(&hist); + assert!(output.contains("test_count 10"), "output: {output}"); + // Check that the bucket upper bound is approximately √2 + // The exact value depends on floating point, so just check the prefix + assert!( + output.contains("test_bucket{le=\"1.41421"), + "expected bucket ≈ √2, output: {output}" + ); + } +} diff --git a/crates/utils/re_perf_telemetry/src/python_bridge.rs b/crates/utils/re_perf_telemetry/src/python_bridge.rs new file mode 100644 index 000000000000..cd08667dd5d8 --- /dev/null +++ b/crates/utils/re_perf_telemetry/src/python_bridge.rs @@ -0,0 +1,82 @@ +//! Python↔Rust trace context bridge via a shared [`ContextVar`]. +//! +//! See `rerun_py/src/catalog/trace_context.rs` for the full bridge documentation +//! and the Rust-side entry points that call into these helpers. +//! +//! [`ContextVar`]: https://docs.python.org/3/library/contextvars.html#contextvars.ContextVar + +use crate::TraceHeaders; + +/// The name of the Python `ContextVar` used for trace context propagation. +pub const TRACE_CONTEXT_VAR_NAME: &str = "TRACE_CONTEXT"; + +/// Get the trace context `ContextVar` object. +/// +/// This returns the same Python `ContextVar` instance every time, ensuring that +/// values set on it can be read back later. It is up to the caller to ensure trace context +/// is reset and cleared as needed. +pub fn get_trace_context_var(py: pyo3::Python<'_>) -> pyo3::PyResult> { + use pyo3::prelude::*; + + static CONTEXT_VAR: parking_lot::Mutex>> = + parking_lot::Mutex::new(None); + + let mut guard = CONTEXT_VAR.lock(); + + if let Some(var) = guard.as_ref() { + return Ok(var.bind(py).clone()); + } + + // Create the trace context ContextVar + let module = py.import("contextvars")?; + let contextvar_class = module.getattr("ContextVar")?; + let trace_ctx_var = contextvar_class.call1((TRACE_CONTEXT_VAR_NAME,))?; + let trace_ctx_unbound = trace_ctx_var.clone().unbind(); + + *guard = Some(trace_ctx_unbound); + + Ok(trace_ctx_var) +} + +/// Extract trace context from the Python `ContextVar` for cross-boundary propagation. +/// +/// Returns empty [`TraceHeaders`] if the `ContextVar` is unset or extraction fails. +pub fn extract_trace_context_from_contextvar(py: pyo3::Python<'_>) -> TraceHeaders { + use pyo3::prelude::*; + use pyo3::types::PyDict; + + fn try_extract(py: pyo3::Python<'_>) -> PyResult { + let context_var = get_trace_context_var(py)?; + + match context_var.call_method0("get") { + Ok(trace_data) => { + if let Ok(dict) = trace_data.downcast::() { + let traceparent = dict + .get_item(TraceHeaders::TRACEPARENT_KEY)? + .and_then(|v| v.extract::().ok()) + .unwrap_or_default(); + + let tracestate = dict + .get_item(TraceHeaders::TRACESTATE_KEY)? + .and_then(|v| v.extract::().ok()); + + let headers = TraceHeaders { + traceparent, + tracestate, + }; + + tracing::debug!("Trace headers: {:?}", headers); + Ok(headers) + } else { + Ok(TraceHeaders::empty()) + } + } + Err(_) => Ok(TraceHeaders::empty()), + } + } + + try_extract(py).unwrap_or_else(|err| { + tracing::debug!("Failed to extract trace context: {err}"); + TraceHeaders::empty() + }) +} diff --git a/crates/utils/re_perf_telemetry/src/telemetry.rs b/crates/utils/re_perf_telemetry/src/telemetry.rs index ba2d28c53596..efcaf00bb24d 100644 --- a/crates/utils/re_perf_telemetry/src/telemetry.rs +++ b/crates/utils/re_perf_telemetry/src/telemetry.rs @@ -3,14 +3,15 @@ use std::sync::Arc; use opentelemetry::trace::TracerProvider as _; use opentelemetry_otlp::WithTonicConfig as _; use opentelemetry_sdk::logs::SdkLoggerProvider; -use opentelemetry_sdk::metrics::SdkMeterProvider; +use opentelemetry_sdk::metrics::{Aggregation, SdkMeterProvider}; use opentelemetry_sdk::trace::{BatchConfigBuilder, BatchSpanProcessor, SdkTracerProvider}; use tracing_subscriber::layer::SubscriberExt as _; use tracing_subscriber::util::SubscriberInitExt as _; use tracing_subscriber::{EnvFilter, Layer as _}; use crate::shared_reader::SharedManualReader; -use crate::{LogFormat, TelemetryArgs, TraceIdLayer}; +use crate::trace_id_format::TraceIdFormat; +use crate::{BenchmarkIdLayer, LogFormat, SpanMetadataCleanupLayer, TelemetryArgs}; // --- @@ -230,6 +231,7 @@ impl Telemetry { // .add_directive_if_absent(base, "lance::index", "off")? .add_directive_if_absent(base, "lance::io::exec", "off")? + .add_directive_if_absent(base, "lance::execution", "warn")? .add_directive_if_absent(base, "lance::dataset::scanner", "off")? .add_directive_if_absent(base, "lance_index", "off")? .add_directive_if_absent(base, "lance::dataset::builder", "off")? @@ -268,8 +270,10 @@ impl Telemetry { // Everything is generically typed, which is why this is such a nightmare to do. macro_rules! handle_format { - ($format:ident) => {{ - let layer = layer.$format(); + ($format:ident, $is_json:expr) => {{ + let layer = layer + .$format() + .map_event_format(|f| TraceIdFormat::new(f, $is_json)); if log_test_output { layer.with_test_writer().boxed() } else { @@ -278,9 +282,9 @@ impl Telemetry { }}; } let layer = match log_format { - LogFormat::Pretty => handle_format!(pretty), - LogFormat::Compact => handle_format!(compact), - LogFormat::Json => handle_format!(json), + LogFormat::Pretty => handle_format!(pretty, false), + LogFormat::Compact => handle_format!(compact, false), + LogFormat::Json => handle_format!(json, true), }; layer.with_filter(create_filter(&log_filter, "warn")?) @@ -392,6 +396,32 @@ impl Telemetry { let (metric_provider, metrics_reader) = if otel_enabled { let mut builder = SdkMeterProvider::builder(); + // Use base-2 exponential histograms (OTel equivalent of Prometheus native + // histograms) instead of explicit bucket histograms. This avoids hardcoding + // bucket boundaries and lets the SDK auto-scale resolution. + builder = builder.with_view(|instrument: &opentelemetry_sdk::metrics::Instrument| { + if instrument.kind() == opentelemetry_sdk::metrics::InstrumentKind::Histogram { + opentelemetry_sdk::metrics::Stream::builder() + .with_aggregation(Aggregation::Base2ExponentialHistogram { + // Max buckets per positive/negative range. Negative buckets + // stay empty for duration/size metrics. Comparable to the + // ~10 explicit buckets we had before, but with auto-scaling + // boundaries. + max_size: 20, + // Starting resolution scale. The base of each bucket is + // 2^(2^(-scale)). At scale 20 (the maximum), buckets are + // extremely fine-grained; the SDK automatically downscales + // when observations exceed max_size buckets. + max_scale: 20, + record_min_max: true, + }) + .build() + .ok() + } else { + None + } + }); + // OTLP exporter for push-based metrics let otlp_exporter = opentelemetry_otlp::MetricExporter::builder() .with_temporality(opentelemetry_sdk::metrics::Temporality::Cumulative) @@ -432,7 +462,8 @@ impl Telemetry { .with(layer_logs_otlp) .with(layer_logs_and_traces_stdio) .with(layer_traces_otlp) - .with(TraceIdLayer::default()) + .with(BenchmarkIdLayer::default()) + .with(SpanMetadataCleanupLayer::default()) .with(self::tracy::tracy_layer()) .try_init()?; } @@ -446,7 +477,8 @@ impl Telemetry { .with(layer_logs_otlp) .with(layer_logs_and_traces_stdio) .with(layer_traces_otlp) - .with(TraceIdLayer::default()) + .with(BenchmarkIdLayer::default()) + .with(SpanMetadataCleanupLayer::default()) .try_init()?; } diff --git a/crates/utils/re_perf_telemetry/src/trace_id_format.rs b/crates/utils/re_perf_telemetry/src/trace_id_format.rs new file mode 100644 index 000000000000..a4497058fff6 --- /dev/null +++ b/crates/utils/re_perf_telemetry/src/trace_id_format.rs @@ -0,0 +1,401 @@ +use std::fmt; + +use tracing_subscriber::fmt::format::Writer; +use tracing_subscriber::fmt::{FmtContext, FormatEvent, FormatFields}; +use tracing_subscriber::registry::LookupSpan; + +/// Number of hex characters to show in the text-format log prefix. +/// 8 hex chars = 4 bytes = ~4 billion unique values — plenty for local dev. +const SHORT_TRACE_ID_LEN: usize = 8; + +/// A [`FormatEvent`] wrapper that injects the current `OpenTelemetry` `trace_id` +/// into every log line. +/// +/// For JSON output (`is_json = true`), the full `trace_id` is injected as a +/// top-level JSON field. For text output (`is_json = false`), a short 8-char +/// prefix is prepended in brackets to keep timestamps aligned. +/// +/// The `trace_id` is included regardless of whether the trace is sampled, +/// as long as the span context is valid. +pub struct TraceIdFormat { + inner: F, + is_json: bool, +} + +impl TraceIdFormat { + pub fn new(inner: F, is_json: bool) -> Self { + Self { inner, is_json } + } +} + +fn current_trace_id() -> Option { + use opentelemetry::trace::TraceContextExt as _; + + // Read directly from the OTel thread-local context, which is set by the + // `tracing-opentelemetry` layer's `on_enter` (context activation). + // This is more robust than going through `tracing::Span::current().context()` + // because it doesn't depend on the tracing→otel span lookup. + let cx = opentelemetry::Context::current(); + let span = cx.span(); + let span_cx = span.span_context(); + + span_cx.is_valid().then(|| span_cx.trace_id().to_string()) +} + +impl FormatEvent for TraceIdFormat +where + S: tracing::Subscriber + for<'a> LookupSpan<'a>, + N: for<'a> FormatFields<'a> + 'static, + F: FormatEvent, +{ + fn format_event( + &self, + ctx: &FmtContext<'_, S, N>, + mut writer: Writer<'_>, + event: &tracing::Event<'_>, + ) -> fmt::Result { + let trace_id = current_trace_id(); + + if self.is_json { + // JSON: buffer the full output (ANSI is never used in JSON) and + // inject `"trace_id":"…"` as the first field after the opening brace. + let mut buf = String::with_capacity(512); + let buf_writer = Writer::new(&mut buf); + self.inner.format_event(ctx, buf_writer, event)?; + + if let Some(ref trace_id) = trace_id { + if let Some(after_brace) = buf.strip_prefix('{') { + writer.write_str("{\"trace_id\":\"")?; + writer.write_str(trace_id)?; + writer.write_str("\",")?; + writer.write_str(after_brace)?; + } else { + writer.write_str(&buf)?; + } + } else { + writer.write_str(&buf)?; + } + } else { + // Text (pretty/compact): prepend a short trace_id tag (first 8 hex + // chars) so timestamps stay aligned. Write directly to the real + // writer to preserve ANSI escape sequences. + // + // With trace context: `[a1b2c3d4] 2026-03-06 INFO …` + // Without: `[--------] 2026-03-06 INFO …` + match trace_id { + Some(ref id) => { + // Use `get` to avoid panicking if the id is unexpectedly short + // or non-ASCII. The format spec pads with `-` to keep alignment. + let prefix = id.get(..SHORT_TRACE_ID_LEN).unwrap_or(id.as_str()); + write!(writer, "[{prefix:- writer.write_str("[--------] ")?, + } + self.inner.format_event(ctx, writer, event)?; + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use parking_lot::Mutex; + + use opentelemetry::trace::TracerProvider as _; + use tracing_subscriber::Layer as _; + use tracing_subscriber::layer::SubscriberExt as _; + + use super::*; + + /// A writer that captures output into a shared buffer. + #[derive(Clone)] + struct CaptureWriter { + buf: Arc>>, + } + + impl CaptureWriter { + fn new() -> Self { + Self { + buf: Arc::new(Mutex::new(Vec::new())), + } + } + + fn output(&self) -> String { + String::from_utf8(self.buf.lock().clone()).unwrap() + } + } + + impl std::io::Write for CaptureWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.buf.lock().extend_from_slice(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } + } + + impl<'a> tracing_subscriber::fmt::MakeWriter<'a> for CaptureWriter { + type Writer = Self; + + fn make_writer(&'a self) -> Self::Writer { + self.clone() + } + } + + /// Creates a real SDK tracer that generates valid span contexts but exports nothing. + fn test_tracer_provider() -> opentelemetry_sdk::trace::SdkTracerProvider { + opentelemetry_sdk::trace::SdkTracerProvider::builder().build() + } + + fn make_subscriber_json( + writer: CaptureWriter, + provider: &opentelemetry_sdk::trace::SdkTracerProvider, + ) -> impl tracing::Subscriber + Send + Sync + 'static { + let otel_layer = tracing_opentelemetry::layer().with_tracer(provider.tracer("test")); + + let fmt_layer = tracing_subscriber::fmt::layer() + .with_writer(writer) + .with_target(false) + .with_file(false) + .with_line_number(false) + .json() + .map_event_format(|f| TraceIdFormat::new(f, true)) + .with_filter(tracing_subscriber::filter::LevelFilter::INFO); + + tracing_subscriber::registry() + .with(otel_layer) + .with(fmt_layer) + } + + fn make_subscriber_compact( + writer: CaptureWriter, + provider: &opentelemetry_sdk::trace::SdkTracerProvider, + ) -> impl tracing::Subscriber + Send + Sync + 'static { + let otel_layer = tracing_opentelemetry::layer().with_tracer(provider.tracer("test")); + + let fmt_layer = tracing_subscriber::fmt::layer() + .with_writer(writer) + .with_target(false) + .with_file(false) + .with_line_number(false) + .compact() + .map_event_format(|f| TraceIdFormat::new(f, false)) + .with_filter(tracing_subscriber::filter::LevelFilter::INFO); + + tracing_subscriber::registry() + .with(otel_layer) + .with(fmt_layer) + } + + #[test] + fn json_format_includes_trace_id() { + let provider = test_tracer_provider(); + let writer = CaptureWriter::new(); + let subscriber = make_subscriber_json(writer.clone(), &provider); + + tracing::subscriber::with_default(subscriber, || { + let span = tracing::info_span!("test_span"); + let _enter = span.enter(); + tracing::info!("hello"); + }); + + let output = writer.output(); + let parsed: serde_json::Value = serde_json::from_str(output.trim()).unwrap(); + let trace_id = parsed["trace_id"].as_str(); + assert!( + trace_id.is_some(), + "trace_id should be a top-level JSON field: {output}" + ); + assert_ne!( + trace_id.unwrap(), + "00000000000000000000000000000000", + "trace_id should not be all zeros: {output}" + ); + } + + #[test] + fn json_format_no_trace_id_without_span() { + let provider = test_tracer_provider(); + let writer = CaptureWriter::new(); + let subscriber = make_subscriber_json(writer.clone(), &provider); + + tracing::subscriber::with_default(subscriber, || { + tracing::info!("hello"); + }); + + let output = writer.output(); + let parsed: serde_json::Value = serde_json::from_str(output.trim()).unwrap(); + assert!( + parsed.get("trace_id").is_none(), + "trace_id should be absent without an active span: {output}" + ); + } + + #[test] + fn text_format_includes_short_trace_id_prefix() { + let provider = test_tracer_provider(); + let writer = CaptureWriter::new(); + let subscriber = make_subscriber_compact(writer.clone(), &provider); + + tracing::subscriber::with_default(subscriber, || { + let span = tracing::info_span!("test_span"); + let _enter = span.enter(); + tracing::info!("hello"); + }); + + let output = writer.output(); + // Should start with `[<8 hex chars>] ` + let trimmed = output.trim_start(); + assert!( + trimmed.starts_with('['), + "text output should start with a bracketed trace_id prefix: {output}" + ); + let bracket_end = trimmed.find(']').expect("missing closing bracket"); + let prefix = &trimmed[1..bracket_end]; + assert_eq!( + prefix.len(), + SHORT_TRACE_ID_LEN, + "trace_id prefix should be {SHORT_TRACE_ID_LEN} chars: got {prefix:?}" + ); + assert!( + prefix != "--------", + "trace_id prefix should not be the placeholder: {output}" + ); + } + + #[test] + fn text_format_placeholder_without_span() { + let provider = test_tracer_provider(); + let writer = CaptureWriter::new(); + let subscriber = make_subscriber_compact(writer.clone(), &provider); + + tracing::subscriber::with_default(subscriber, || { + tracing::info!("hello"); + }); + + let output = writer.output(); + assert!( + output.contains("[--------]"), + "text output should contain placeholder when no trace context: {output}" + ); + } + + /// Verifies the `OTel` mechanism used by the Data Platform async tasks to suppress + /// span export while keeping `trace_id` in logs: setting an unsampled parent + /// context on a child span causes the `parentbased_traceidratio` sampler to mark + /// the child (and its subtree) as not-sampled, so spans are not exported, but the + /// `OTel` context is still valid and `trace_id` still appears in log lines. + #[test] + fn unsampled_child_has_trace_id_but_no_exported_spans() { + use opentelemetry::trace::{ + SpanContext, TraceContextExt as _, TraceFlags, TracerProvider as _, + }; + use opentelemetry_sdk::trace::{InMemorySpanExporter, SimpleSpanProcessor}; + use tracing_opentelemetry::OpenTelemetrySpanExt as _; + + let exporter = InMemorySpanExporter::default(); + let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder() + .with_span_processor(SimpleSpanProcessor::new(exporter.clone())) + .build(); + + let writer = CaptureWriter::new(); + + let otel_layer = tracing_opentelemetry::layer().with_tracer(provider.tracer("test")); + + let fmt_layer = tracing_subscriber::fmt::layer() + .with_writer(writer.clone()) + .with_target(false) + .with_file(false) + .with_line_number(false) + .json() + .map_event_format(|f| TraceIdFormat::new(f, true)) + .with_filter(tracing_subscriber::filter::LevelFilter::INFO); + + let subscriber = tracing_subscriber::registry() + .with(otel_layer) + .with(fmt_layer); + + tracing::subscriber::with_default(subscriber, || { + // 1. Create a sampled parent span (this one SHOULD be exported) + let parent = tracing::info_span!("parent_span"); + let _parent_enter = parent.enter(); + + // 2. Create a child span and set its parent to an unsampled context + let child = tracing::info_span!("child_span"); + { + let cx = opentelemetry::Context::current(); + let parent_otel_span = cx.span(); + let parent_sc = parent_otel_span.span_context(); + assert!( + parent_sc.is_valid(), + "parent should have a valid span context" + ); + + let unsampled_sc = SpanContext::new( + parent_sc.trace_id(), + parent_sc.span_id(), + TraceFlags::default(), // not sampled + true, // remote + parent_sc.trace_state().clone(), + ); + let unsampled_cx = cx.with_remote_span_context(unsampled_sc); + child.set_parent(unsampled_cx).ok(); + } + + // 3. Enter the child and emit a log — should still have trace_id + let _child_enter = child.enter(); + tracing::info!("inside unsampled child"); + }); + + // Check that trace_id appeared in the log output + let output = writer.output(); + let parsed: serde_json::Value = serde_json::from_str(output.trim()).unwrap(); + let trace_id = parsed["trace_id"].as_str(); + assert!( + trace_id.is_some(), + "trace_id should be present in logs even for unsampled child: {output}" + ); + assert_ne!( + trace_id.unwrap(), + "00000000000000000000000000000000", + "trace_id should be a real (non-zero) value: {output}" + ); + + // Check that only the parent span was exported, not the child + provider.force_flush().ok(); + let spans = exporter.get_finished_spans().unwrap(); + let span_names: Vec<&str> = spans.iter().map(|s| s.name.as_ref()).collect(); + assert!( + span_names.contains(&"parent_span"), + "parent_span should be exported: {span_names:?}" + ); + assert!( + !span_names.contains(&"child_span"), + "child_span should NOT be exported (unsampled parent): {span_names:?}" + ); + } + + #[test] + fn json_output_is_valid_json_with_fields() { + let provider = test_tracer_provider(); + let writer = CaptureWriter::new(); + let subscriber = make_subscriber_json(writer.clone(), &provider); + + tracing::subscriber::with_default(subscriber, || { + let span = tracing::info_span!("test_span"); + let _enter = span.enter(); + tracing::info!(key = "value", "test message"); + }); + + let output = writer.output(); + let parsed: serde_json::Value = serde_json::from_str(output.trim()) + .unwrap_or_else(|e| panic!("output should be valid JSON: {e}\noutput: {output}")); + assert!(parsed["trace_id"].as_str().is_some()); + assert!(parsed["fields"]["message"].as_str().is_some()); + } +} diff --git a/crates/utils/re_quota_channel/src/async_broadcast_channel.rs b/crates/utils/re_quota_channel/src/async_broadcast_channel.rs index 03e27a299a52..7de68f2c177e 100644 --- a/crates/utils/re_quota_channel/src/async_broadcast_channel.rs +++ b/crates/utils/re_quota_channel/src/async_broadcast_channel.rs @@ -239,8 +239,8 @@ impl Sender { } else { // Wait with timeout so we can check elapsed time tokio::select! { - _ = self.state.state_changed.notified() => {} - _ = tokio::time::sleep(BLOCKED_WARNING_THRESHOLD) => {} + () = self.state.state_changed.notified() => {} + () = tokio::time::sleep(BLOCKED_WARNING_THRESHOLD) => {} } } } diff --git a/crates/utils/re_ros_msg/src/deserialize/mod.rs b/crates/utils/re_ros_msg/src/deserialize/mod.rs index 40c41c89c420..d8c90f84dac3 100644 --- a/crates/utils/re_ros_msg/src/deserialize/mod.rs +++ b/crates/utils/re_ros_msg/src/deserialize/mod.rs @@ -3,7 +3,7 @@ use std::collections::{BTreeMap, HashMap}; use serde::de::{self, DeserializeSeed}; use crate::deserialize::primitive_array::PrimitiveArraySeed; -use crate::message_spec::{ComplexType, MessageSpecification, Type}; +use crate::message_spec::{BuiltInType, ComplexType, MessageSpecification, Type}; pub mod primitive; pub mod primitive_array; @@ -199,51 +199,10 @@ impl<'de, R: TypeResolver> DeserializeSeed<'de> for SchemaSeed<'_, R> { D: de::Deserializer<'de>, { use crate::message_spec::ArraySize::{Bounded, Fixed, Unbounded}; - use crate::message_spec::BuiltInType::{ - Bool, Byte, Char, Float32, Float64, Int8, Int16, Int32, Int64, String, UInt8, UInt16, - UInt32, UInt64, WString, - }; use crate::message_spec::Type; match self.ty { - Type::BuiltIn(primitive_type) => match primitive_type { - Bool => de - .deserialize_bool(PrimitiveVisitor::::new()) - .map(Value::Bool), - Byte | UInt8 => de - .deserialize_u8(PrimitiveVisitor::::new()) - .map(Value::U8), // ROS2: octet - Char | Int8 => de - .deserialize_i8(PrimitiveVisitor::::new()) - .map(Value::I8), // ROS2: char (int8) - Float32 => de - .deserialize_f32(PrimitiveVisitor::::new()) - .map(Value::F32), - Float64 => de - .deserialize_f64(PrimitiveVisitor::::new()) - .map(Value::F64), - Int16 => de - .deserialize_i16(PrimitiveVisitor::::new()) - .map(Value::I16), - Int32 => de - .deserialize_i32(PrimitiveVisitor::::new()) - .map(Value::I32), - Int64 => de - .deserialize_i64(PrimitiveVisitor::::new()) - .map(Value::I64), - UInt16 => de - .deserialize_u16(PrimitiveVisitor::::new()) - .map(Value::U16), - UInt32 => de - .deserialize_u32(PrimitiveVisitor::::new()) - .map(Value::U32), - UInt64 => de - .deserialize_u64(PrimitiveVisitor::::new()) - .map(Value::U64), - String(_bound) | WString(_bound) => { - de.deserialize_string(StringVisitor).map(Value::String) - } - }, + Type::BuiltIn(primitive_type) => deserialize_builtin_type(primitive_type, de), Type::Array { ty, size } => match size { Fixed(len) => { // Check if this is a primitive array and use optimized path @@ -282,12 +241,68 @@ impl<'de, R: TypeResolver> DeserializeSeed<'de> for SchemaSeed<'_, R> { de::Error::custom(format!("unknown ComplexType: {complex_ty:?}")) })?; + // Some ROS2 schemas model enums as separate messages containing only constants. + // On the wire, fields of those types are encoded as a single primitive value. + if let Some(primitive_type) = msg + .underlying_type_if_enum_like() + .map_err(de::Error::custom)? + { + return deserialize_builtin_type(primitive_type, de); + } + MessageSeed::new(msg, self.resolver).deserialize(de) } } } } +fn deserialize_builtin_type<'de, D>(primitive_type: &BuiltInType, de: D) -> Result +where + D: de::Deserializer<'de>, +{ + use crate::message_spec::BuiltInType::{ + Bool, Byte, Char, Float32, Float64, Int8, Int16, Int32, Int64, String, UInt8, UInt16, + UInt32, UInt64, WString, + }; + + match primitive_type { + Bool => de + .deserialize_bool(PrimitiveVisitor::::new()) + .map(Value::Bool), + Byte | UInt8 => de + .deserialize_u8(PrimitiveVisitor::::new()) + .map(Value::U8), // ROS2: octet + Char | Int8 => de + .deserialize_i8(PrimitiveVisitor::::new()) + .map(Value::I8), // ROS2: char (int8) + Float32 => de + .deserialize_f32(PrimitiveVisitor::::new()) + .map(Value::F32), + Float64 => de + .deserialize_f64(PrimitiveVisitor::::new()) + .map(Value::F64), + Int16 => de + .deserialize_i16(PrimitiveVisitor::::new()) + .map(Value::I16), + Int32 => de + .deserialize_i32(PrimitiveVisitor::::new()) + .map(Value::I32), + Int64 => de + .deserialize_i64(PrimitiveVisitor::::new()) + .map(Value::I64), + UInt16 => de + .deserialize_u16(PrimitiveVisitor::::new()) + .map(Value::U16), + UInt32 => de + .deserialize_u32(PrimitiveVisitor::::new()) + .map(Value::U32), + UInt64 => de + .deserialize_u64(PrimitiveVisitor::::new()) + .map(Value::U64), + String(_bound) | WString(_bound) => de.deserialize_string(StringVisitor).map(Value::String), + } +} + // Sequence/array of elements. pub(super) struct SequenceSeed<'a, R: TypeResolver> { elem: &'a Type, diff --git a/crates/utils/re_ros_msg/src/message_spec.rs b/crates/utils/re_ros_msg/src/message_spec.rs index 7811e76d30cc..b3aa3b22c01a 100644 --- a/crates/utils/re_ros_msg/src/message_spec.rs +++ b/crates/utils/re_ros_msg/src/message_spec.rs @@ -80,11 +80,55 @@ impl MessageSpecification { } } - Ok(Self { + let spec = Self { name: name.to_owned(), fields, constants, - }) + }; + + // Sanity check: if this is an enum-like message that only contains constants, + // try if we can determine the underlying type or fail early here. + spec.underlying_type_if_enum_like()?; + + Ok(spec) + } + + /// Returns the primitive type of a constants-only enum-like specification. + /// + /// A spec can be assumed enum-like when it has only constants definitions, no data fields, + /// and all constants share the same built-in type. + /// + /// For example, this has `int8` as its underlying type: + /// ```text + /// int8 FOO=0 + /// int8 BAR=1 + /// ``` + pub fn underlying_type_if_enum_like(&self) -> Result, ParseError> { + if !self.fields.is_empty() || self.constants.is_empty() { + return Ok(None); + } + + let Some(Type::BuiltIn(first_type)) = self.constants.first().map(|constant| &constant.ty) + else { + // This is unreachable for real ROS message definitions. + // Parsed constants are always built-in; this only guards manually constructed specs. + return Err(ParseError::Validate(format!( + "Encountered constant with spec `{}` with non-built-in types. This must be a bug.", + self.name + ))); + }; + + for constant in &self.constants[1..] { + if constant.ty != Type::BuiltIn(first_type.clone()) { + // Ambiguous typing can't be handled. + return Err(ParseError::Validate(format!( + "constants-only spec `{}` uses mixed constant types", + self.name + ))); + } + } + + Ok(Some(first_type)) } } @@ -695,6 +739,39 @@ mod tests { assert!(Field::parse(Type::BuiltIn(BuiltInType::Bool), "enabled", "maybe").is_err()); // invalid bool literal } + /// Tests that the underlying type of a constants-only message definition + /// can be retrieved, if the constants' types are all the same. + #[test] + fn constants_only_spec_has_enum_underlying_type() { + let spec = MessageSpecification::parse( + "test/DummyEnum", + r#" +int8 FOO=0 +int8 BAR=1 +"#, + ) + .unwrap(); + + assert_eq!( + spec.underlying_type_if_enum_like().unwrap(), + Some(&BuiltInType::Int8) + ); + } + + /// Tests that constants-only enum-like specs reject mixed primitive constant types. + #[test] + fn constants_only_spec_rejects_mixed_enum_types() { + let result = MessageSpecification::parse( + "test/DummyEnum", + r#" +int8 FOO=0 +uint8 BAR=1 +"#, + ); + + assert!(result.is_err()); + } + #[test] fn strip_comment_works() { assert_eq!(strip_comment("int32 field # comment"), "int32 field "); diff --git a/crates/utils/re_tracing/Cargo.toml b/crates/utils/re_tracing/Cargo.toml index 869c4b43d935..ed88b2fc3c94 100644 --- a/crates/utils/re_tracing/Cargo.toml +++ b/crates/utils/re_tracing/Cargo.toml @@ -32,6 +32,7 @@ server = ["dep:puffin_http", "dep:re_log", "dep:rfd", "dep:wayland-sys"] [target.'cfg(not(target_arch = "wasm32"))'.dependencies] +parking_lot.workspace = true puffin.workspace = true # Optional dependencies: diff --git a/crates/utils/re_tracing/src/lib.rs b/crates/utils/re_tracing/src/lib.rs index fe1528238415..f30e5a025b8e 100644 --- a/crates/utils/re_tracing/src/lib.rs +++ b/crates/utils/re_tracing/src/lib.rs @@ -1,9 +1,15 @@ //! Helpers for tracing/spans/flamegraphs and such. +#[cfg(not(target_arch = "wasm32"))] +mod profile_capture; + #[cfg(not(target_arch = "wasm32"))] #[cfg(feature = "server")] mod server; +#[cfg(not(target_arch = "wasm32"))] +pub use profile_capture::ProfileCapture; + #[cfg(not(target_arch = "wasm32"))] #[cfg(feature = "server")] pub use server::Profiler; diff --git a/crates/utils/re_tracing/src/profile_capture.rs b/crates/utils/re_tracing/src/profile_capture.rs new file mode 100644 index 000000000000..18019586c3d6 --- /dev/null +++ b/crates/utils/re_tracing/src/profile_capture.rs @@ -0,0 +1,59 @@ +use std::sync::Arc; + +use parking_lot::Mutex; + +/// Captures puffin profile data in memory for a fixed number of frames. +/// +/// Register with [`Self::start`], poll [`Self::is_done`] once per frame, then +/// call [`Self::finish`] to recover the collected frames as a [`puffin::FrameView`]. +pub struct ProfileCapture { + frames: Arc>>>, + sink_id: puffin::FrameSinkId, + target_frames: usize, +} + +impl ProfileCapture { + /// Start capturing puffin profile data. + /// + /// Registers an in-memory sink with the global profiler that accumulates frames, + /// until [`Self::is_done`] returns `true`. + pub fn start(target_frames: usize) -> Self { + puffin::set_scopes_on(true); + + let frames: Arc>>> = + Arc::new(Mutex::new(Vec::with_capacity(target_frames))); + + let sink_frames = frames.clone(); + let sink_id = puffin::GlobalProfiler::lock().add_sink(Box::new(move |frame| { + sink_frames.lock().push(frame); + })); + + Self { + frames, + sink_id, + target_frames, + } + } + + /// Whether enough frames have been collected. + pub fn is_done(&self) -> bool { + self.frames.lock().len() >= self.target_frames + } + + /// Remove the sink and return the collected frames as a [`puffin::FrameView`]. + pub fn finish(self) -> puffin::FrameView { + let Self { + frames, + sink_id, + target_frames: _, + } = self; + + puffin::GlobalProfiler::lock().remove_sink(sink_id); + + let mut view = puffin::FrameView::default(); + for frame in frames.lock().drain(..) { + view.add_frame(frame); + } + view + } +} diff --git a/crates/utils/re_video/Cargo.toml b/crates/utils/re_video/Cargo.toml index 1a59ca8faf0e..78da8f32a32d 100644 --- a/crates/utils/re_video/Cargo.toml +++ b/crates/utils/re_video/Cargo.toml @@ -80,6 +80,10 @@ dav1d = { workspace = true, optional = true, default-features = false, features "bitdepth_8", ] } +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +image = { workspace = true, default-features = false, features = ["png", "jpeg"] } +bytemuck.workspace = true + # web [target.'cfg(target_arch = "wasm32")'.dependencies] @@ -87,16 +91,20 @@ js-sys.workspace = true wasm-bindgen.workspace = true wasm-bindgen-futures.workspace = true web-sys = { workspace = true, features = [ + "Blob", + "BlobPropertyBag", "DomException", "EncodedVideoChunk", "EncodedVideoChunkInit", "EncodedVideoChunkType", "HardwareAcceleration", + "ImageBitmap", "Navigator", "VideoDecoder", "VideoDecoderConfig", "VideoDecoderInit", "VideoFrame", + "VideoFrameInit", "Window", ] } diff --git a/crates/utils/re_video/examples/frames.rs b/crates/utils/re_video/examples/frames.rs index 61002e537d50..ffc39c656a4f 100644 --- a/crates/utils/re_video/examples/frames.rs +++ b/crates/utils/re_video/examples/frames.rs @@ -124,6 +124,9 @@ fn main() { re_video::PixelFormat::Yuv { .. } => { re_log::error_once!("YUV frame writing is not supported"); } + re_video::PixelFormat::L8 | re_video::PixelFormat::L16 => { + re_log::error_once!("L8 & L16 frame writing is not supported"); + } } } } diff --git a/crates/utils/re_video/src/av1.rs b/crates/utils/re_video/src/av1.rs index 93f49e24e405..2d0e906d2e56 100644 --- a/crates/utils/re_video/src/av1.rs +++ b/crates/utils/re_video/src/av1.rs @@ -46,20 +46,32 @@ pub fn detect_av1_keyframe_start(data: &[u8]) -> Result { - if is_keyframe(&mut cursor).map_err(DetectGopStartError::Av1ParserError)? { - keyframe_found = true; - } + ObuType::Frame | ObuType::FrameHeader + if is_keyframe(&mut cursor).map_err(DetectGopStartError::Av1ParserError)? => + { + keyframe_found = true; } _ => { // Skip other OBUs @@ -157,7 +169,7 @@ mod test { match result { Ok(GopStartDetection::StartOfGop(details)) => { // Verify we got expected details from the AV1 stream - assert_eq!(details.codec_string, "av01"); + assert_eq!(details.codec_string, "av01.0.00M.08"); assert_eq!(details.coded_dimensions, [64, 64]); assert_eq!(details.bit_depth, Some(8)); diff --git a/crates/utils/re_video/src/decode/async_decoder_wrapper.rs b/crates/utils/re_video/src/decode/async_decoder_wrapper.rs index ebdabf479d09..c0e836a0bc59 100644 --- a/crates/utils/re_video/src/decode/async_decoder_wrapper.rs +++ b/crates/utils/re_video/src/decode/async_decoder_wrapper.rs @@ -3,7 +3,6 @@ use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use super::{AsyncDecoder, Chunk, Result}; -#[cfg(with_dav1d)] use crate::{VideoDataDescription, decode::FrameResult}; use crate::{Receiver, Sender}; @@ -47,7 +46,6 @@ impl Default for Comms { } /// Blocking decoder of video chunks. -#[cfg(with_dav1d)] pub trait SyncDecoder { /// Submit some work and read the results. /// @@ -159,7 +157,6 @@ fn decoder_thread( ) { while let Ok(command) = command_rx.recv() { if comms.should_stop.load(Ordering::Acquire) { - re_log::debug!("Should stop"); return; } @@ -177,7 +174,6 @@ fn decoder_thread( comms.num_outstanding_resets.fetch_sub(1, Ordering::Release); } Command::Stop => { - re_log::debug!("Stop"); return; } } diff --git a/crates/utils/re_video/src/decode/av1.rs b/crates/utils/re_video/src/decode/av1.rs index 9b33af399639..9ac4a6ed75d3 100644 --- a/crates/utils/re_video/src/decode/av1.rs +++ b/crates/utils/re_video/src/decode/av1.rs @@ -283,7 +283,7 @@ fn create_frame(debug_name: &str, picture: &dav1d::Picture) -> FrameResult { } fn yuv_matrix_coefficients(debug_name: &str, picture: &dav1d::Picture) -> YuvMatrixCoefficients { - // Quotes are from https://wiki.x266.mov/docs/colorimetry/matrix (if not noted otherwise) + // Quotes are from https://web.archive.org/web/20260318141742/https://wiki.x266.mov/docs/colorimetry/matrix (if not noted otherwise) match picture.matrix_coefficients() { dav1d::pixel::MatrixCoefficients::Identity => YuvMatrixCoefficients::Identity, @@ -309,8 +309,8 @@ fn yuv_matrix_coefficients(debug_name: &str, picture: &dav1d::Picture) -> YuvMat // } // // This is also what the mpv player does (and probably others): - // https://wiki.x266.mov/docs/colorimetry/matrix#2-unspecified - // (and similar for primaries! https://wiki.x266.mov/docs/colorimetry/primaries#2-unspecified) + // https://web.archive.org/web/20260318141742/https://wiki.x266.mov/docs/colorimetry/matrix#2-unspecified + // (and similar for primaries! https://web.archive.org/web/20260318141807/https://wiki.x266.mov/docs/colorimetry/primaries#2-unspecified) // // …then again, eyeballing VLC it looks like it just always assumes BT.709. // The handwavy test case employed here was the same video in low & high resolution diff --git a/crates/utils/re_video/src/decode/ffmpeg_cli/ffmpeg.rs b/crates/utils/re_video/src/decode/ffmpeg_cli/ffmpeg.rs index f0e3924676d2..1e66f4b2a1fa 100644 --- a/crates/utils/re_video/src/decode/ffmpeg_cli/ffmpeg.rs +++ b/crates/utils/re_video/src/decode/ffmpeg_cli/ffmpeg.rs @@ -204,10 +204,10 @@ fn send_output( output_sender: &OutputSender, result: FrameResult, ) -> Result<(), SendError> { - if !output_sender.stop_signal.load(Ordering::Acquire) { - output_sender.sender.send(result) - } else { + if output_sender.stop_signal.load(Ordering::Acquire) { Err(SendError(result)) + } else { + output_sender.sender.send(result) } } @@ -242,7 +242,7 @@ impl FFmpegProcessAndListener { fn new( debug_name: &str, output_sender: Sender, - encoding_details: &Option, + encoding_details: Option<&VideoEncodingDetails>, ffmpeg_path: Option<&std::path::Path>, codec: &crate::VideoCodec, ) -> Result { @@ -251,7 +251,7 @@ impl FFmpegProcessAndListener { // TODO(andreas): should get SPS also without AVCC from ongoing stream. let (pixel_format, ffmpeg_pix_fmt) = if let Some(chroma_subsampling) = - encoding_details.as_ref().and_then(|e| e.chroma_subsampling) + encoding_details.and_then(|e| e.chroma_subsampling) { // We always get planar layouts back from ffmpeg. let (layout, ffmpeg_pix_fmt) = match chroma_subsampling { @@ -373,7 +373,6 @@ impl FFmpegProcessAndListener { .expect("Failed to spawn ffmpeg listener thread"); let codec_meta = encoding_details - .as_ref() .and_then(|e| e.stsd.as_ref()) .and_then(CodecMeta::from_stsd) .unwrap_or(CodecMeta::RawBytestream); @@ -905,7 +904,7 @@ pub struct FFmpegCliDecoder { impl FFmpegCliDecoder { pub fn new( debug_name: String, - encoding_details: &Option, + encoding_details: Option<&VideoEncodingDetails>, output_sender: Sender, ffmpeg_path: Option, codec: &crate::VideoCodec, @@ -934,7 +933,7 @@ impl FFmpegCliDecoder { ffmpeg, output_sender, ffmpeg_path, - codec: *codec, + codec: codec.clone(), }) } } @@ -995,7 +994,7 @@ impl AsyncDecoder for FFmpegCliDecoder { self.ffmpeg = FFmpegProcessAndListener::new( &self.debug_name, self.output_sender.clone(), - &video_descr.encoding_details, + video_descr.encoding_details.as_ref(), self.ffmpeg_path.as_deref(), &self.codec, )?; diff --git a/crates/utils/re_video/src/decode/image_decoder.rs b/crates/utils/re_video/src/decode/image_decoder.rs new file mode 100644 index 000000000000..59becf424711 --- /dev/null +++ b/crates/utils/re_video/src/decode/image_decoder.rs @@ -0,0 +1,132 @@ +use crate::{PixelFormat, decode::async_decoder_wrapper::SyncDecoder}; + +pub struct SyncImageDecoder { + image_format: image::ImageFormat, +} + +impl SyncImageDecoder { + pub fn try_new(descr: &crate::VideoDataDescription) -> Option { + Some(Self { + image_format: image::ImageFormat::from_mime_type(descr.image_codec_mime_type()?)?, + }) + } + + pub fn mime_type(&self) -> &'static str { + self.image_format.to_mime_type() + } +} + +impl SyncDecoder for SyncImageDecoder { + // TODO(isse): We could potentially cache decoded blobs, but that's missing some things: + // - A way to purge the cache, i.e have a purge function on video decoders that gets called from `VideoStreamCache`? + // - Some unique hash to identify blobs. We don't want to hash the whole blob + // to get that. For `StoredBlobCacheKey` we use row id + component identifier, + // but we don't have an obvious way to pass that here. Could potentially + // use the samples `source_id` + `byte_span`. + fn submit_chunk( + &mut self, + should_stop: &std::sync::atomic::AtomicBool, + chunk: super::Chunk, + output_sender: &re_quota_channel::Sender, + ) { + if should_stop.load(std::sync::atomic::Ordering::Relaxed) { + return; + } + + let mut reader = image::ImageReader::new(std::io::Cursor::new(chunk.data)); + + reader.set_format(self.image_format); + + let content = match decode_to_frame_content(reader) { + Ok(content) => content, + Err(err) => { + let _send_error = output_sender.send(crate::FrameResult::Err(err)); + return; + } + }; + + let _send_error = output_sender.send(crate::FrameResult::Ok(crate::Frame { + content, + info: crate::FrameInfo { + is_sync: Some(true), + sample_idx: Some(chunk.sample_idx), + frame_nr: Some(chunk.frame_nr), + presentation_timestamp: chunk.presentation_timestamp, + duration: chunk.duration, + latest_decode_timestamp: Some(chunk.decode_timestamp), + }, + })); + } + + fn reset(&mut self, descr: &crate::VideoDataDescription) { + if let Some(new) = Self::try_new(descr) { + *self = new; + } + } +} + +fn decode_to_frame_content( + reader: image::ImageReader>>, +) -> Result { + let dynamic_image = reader + .decode() + .map_err(|err| crate::DecodeError::ImageDecoder(err.to_string()))?; + + // RGB -> RGBA padding happens at a later stage. + let converted_rgb; + let converted_rgba; + let (data, (width, height), format): (&[u8], (u32, u32), PixelFormat) = match &dynamic_image { + image::DynamicImage::ImageLuma8(image) => { + (image.as_raw(), image.dimensions(), PixelFormat::L8) + } + image::DynamicImage::ImageLumaA8(image) => { + (image.as_raw(), image.dimensions(), PixelFormat::L8) + } + image::DynamicImage::ImageRgb8(image) => { + (image.as_raw(), image.dimensions(), PixelFormat::Rgb8Unorm) + } + image::DynamicImage::ImageRgba8(image) => { + (image.as_raw(), image.dimensions(), PixelFormat::Rgba8Unorm) + } + image::DynamicImage::ImageLuma16(image) => ( + bytemuck::cast_slice(image.as_raw()), + image.dimensions(), + PixelFormat::L16, + ), + image::DynamicImage::ImageLumaA16(image) => ( + bytemuck::cast_slice(image.as_raw()), + image.dimensions(), + PixelFormat::L16, + ), + image::DynamicImage::ImageRgb16(_) | image::DynamicImage::ImageRgb32F(_) => { + converted_rgb = dynamic_image.to_rgb8(); + + ( + converted_rgb.as_raw(), + converted_rgb.dimensions(), + PixelFormat::Rgb8Unorm, + ) + } + image::DynamicImage::ImageRgba16(_) | image::DynamicImage::ImageRgba32F(_) => { + converted_rgba = dynamic_image.to_rgba8(); + + ( + converted_rgba.as_raw(), + converted_rgba.dimensions(), + PixelFormat::Rgba8Unorm, + ) + } + _ => { + return Err(crate::DecodeError::ImageDecoder( + "Unsupported image layout".to_owned(), + )); + } + }; + + Ok(crate::FrameContent { + data: data.to_owned(), + width, + height, + format, + }) +} diff --git a/crates/utils/re_video/src/decode/mod.rs b/crates/utils/re_video/src/decode/mod.rs index 4b85c04f9dde..7b110d04f60f 100644 --- a/crates/utils/re_video/src/decode/mod.rs +++ b/crates/utils/re_video/src/decode/mod.rs @@ -4,7 +4,7 @@ //! Whirlwind tour of how to interpret picture data (from a Video perspective) //! --------------------------------------------------------------------------------- //! -//! Extracted from the [av1 codec wiki](https://wiki.x266.mov/docs/colorimetry/intro) and other sources. +//! Extracted from the [av1 codec wiki](https://web.archive.org/web/20260318141718/https://wiki.x266.mov/docs/colorimetry/intro) and other sources. //! Follows the trail of information we get from our AV1 decoder. //! //! ### How to get from YUV to RGB? @@ -29,7 +29,7 @@ //! //! ### Given a normalized YUV triplet, how do we get color? //! -//! * `picture.matrix_coefficients()` (see ) +//! * `picture.matrix_coefficients()` (see ) //! * this tells us what to multiply the incoming YUV data with to get SOME RGB data //! * there's various standards of how to do this, but the most common is BT.709 //! * here's a fun special one: `identity` means it's not actually YUV, but GBR! @@ -77,8 +77,11 @@ //! supporting HDR content at which point more properties will be important! //! -#[cfg(with_dav1d)] +#[cfg(not(target_arch = "wasm32"))] mod async_decoder_wrapper; +#[cfg(not(target_arch = "wasm32"))] +mod image_decoder; + #[cfg(with_dav1d)] mod av1; @@ -92,6 +95,8 @@ pub use ffmpeg_cli::{ Error as FFmpegError, FFmpegVersion, FFmpegVersionParseError, ffmpeg_download_url, }; +#[cfg(target_arch = "wasm32")] +mod web_image_decoder; #[cfg(target_arch = "wasm32")] mod webcodecs; @@ -99,6 +104,9 @@ use crate::{SampleIndex, Time, VideoDataDescription, player::VideoPlaybackIssueS #[derive(thiserror::Error, Debug, Clone)] pub enum DecodeError { + #[error("Waiting for encoding details")] + WaitingForCodecDetails, + #[error("Unsupported codec: {0}")] UnsupportedCodec(String), @@ -114,6 +122,10 @@ pub enum DecodeError { )] NoDav1dOnLinuxArm64, + #[cfg(not(target_arch = "wasm32"))] + #[error("Image decode error: {0}")] + ImageDecoder(String), + #[cfg(target_arch = "wasm32")] #[error(transparent)] WebDecoder(#[from] webcodecs::WebError), @@ -138,12 +150,18 @@ impl DecodeError { // Gotta keep trying! match self { // Unsupported codec / decoder not available: - Self::UnsupportedCodec(_) | Self::Dav1dWithoutNasm | Self::NoDav1dOnLinuxArm64 => false, + Self::WaitingForCodecDetails + | Self::UnsupportedCodec(_) + | Self::Dav1dWithoutNasm + | Self::NoDav1dOnLinuxArm64 => false, // Issue with AV1 decoding. #[cfg(with_dav1d)] Self::Dav1d(_) => true, + #[cfg(not(target_arch = "wasm32"))] + Self::ImageDecoder(_) => false, + // Issue with WebCodecs decoding. #[cfg(target_arch = "wasm32")] Self::WebDecoder(_) => true, @@ -159,11 +177,14 @@ impl DecodeError { pub fn severity(&self) -> VideoPlaybackIssueSeverity { match self { + Self::WaitingForCodecDetails => VideoPlaybackIssueSeverity::Loading, #[cfg(with_dav1d)] Self::Dav1d(err) => match err { dav1d::Error::Again => VideoPlaybackIssueSeverity::Loading, _ => VideoPlaybackIssueSeverity::Error, }, + #[cfg(not(target_arch = "wasm32"))] + Self::ImageDecoder(_) => VideoPlaybackIssueSeverity::Error, #[cfg(target_arch = "wasm32")] Self::WebDecoder(err) => err.severity(), #[cfg(with_ffmpeg)] @@ -243,14 +264,27 @@ pub fn new_decoder( ); #[cfg(target_arch = "wasm32")] - return Ok(Box::new(webcodecs::WebVideoDecoder::new( - video, - decode_settings.hw_acceleration, - output_sender, - )?)); + { + return match video.codec { + crate::VideoCodec::ImageSequence(_) => { + if let Some(decoder) = + web_image_decoder::WebImageDecoder::try_new(video, output_sender.clone()) + { + Ok(Box::new(decoder)) + } else { + Err(DecodeError::WaitingForCodecDetails) + } + } + _ => Ok(Box::new(webcodecs::WebVideoDecoder::new( + video, + decode_settings.hw_acceleration, + output_sender, + )?)), + }; + } #[cfg(not(target_arch = "wasm32"))] - match video.codec { + match &video.codec { #[cfg(feature = "av1")] crate::VideoCodec::AV1 => { #[cfg(linux_arm64)] @@ -272,12 +306,24 @@ pub fn new_decoder( #[cfg(with_ffmpeg)] crate::VideoCodec::H264 | crate::VideoCodec::H265 => Ok(Box::new(FFmpegCliDecoder::new( debug_name.to_owned(), - &video.encoding_details, + video.encoding_details.as_ref(), output_sender, decode_settings.ffmpeg_path.clone(), &video.codec, )?)), + crate::VideoCodec::ImageSequence(codec) => { + if let Some(decoder) = image_decoder::SyncImageDecoder::try_new(video) { + Ok(Box::new(async_decoder_wrapper::AsyncDecoderWrapper::new( + format!("image decoder ({})", decoder.mime_type()), + Box::new(decoder), + output_sender, + ))) + } else { + Err(DecodeError::WaitingForCodecDetails) + } + } + _ => Err(DecodeError::UnsupportedCodec( video.human_readable_codec_string(), )), @@ -483,6 +529,8 @@ impl re_byte_size::SizeBytes for Frame { /// Pixel format/layout used by [`FrameContent::data`]. #[derive(Debug, Clone)] pub enum PixelFormat { + L8, + L16, Rgb8Unorm, Rgba8Unorm, @@ -499,6 +547,8 @@ pub enum PixelFormat { impl PixelFormat { pub fn bits_per_pixel(&self) -> u32 { match self { + Self::L8 => 8, + Self::L16 => 16, Self::Rgb8Unorm { .. } => 24, Self::Rgba8Unorm { .. } => 32, Self::Yuv { layout, .. } => match layout { diff --git a/crates/utils/re_video/src/decode/web_image_decoder.rs b/crates/utils/re_video/src/decode/web_image_decoder.rs new file mode 100644 index 000000000000..2479bdcb4a21 --- /dev/null +++ b/crates/utils/re_video/src/decode/web_image_decoder.rs @@ -0,0 +1,124 @@ +use js_sys::Uint8Array; +use wasm_bindgen::JsCast as _; + +use super::{AsyncDecoder, Chunk, Frame, FrameInfo, Result, webcodecs::string_from_js_value}; +use crate::{DecodeError, FrameResult, Sender, VideoDataDescription}; + +pub struct WebImageDecoder { + image_mime_type: String, + output_sender: Sender, +} + +impl WebImageDecoder { + pub fn try_new( + video_descr: &VideoDataDescription, + output_sender: Sender, + ) -> Option { + Some(Self { + image_mime_type: video_descr.image_codec_mime_type()?.to_owned(), + output_sender, + }) + } +} + +impl AsyncDecoder for WebImageDecoder { + fn submit_chunk(&mut self, chunk: Chunk) -> Result<()> { + let output_sender = self.output_sender.clone(); + let mime_type = self.image_mime_type.clone(); + + wasm_bindgen_futures::spawn_local(async move { + match decode_image(chunk, &mime_type).await { + Ok(frame) => { + output_sender.send(Ok(frame)).ok(); + } + Err(err) => { + output_sender.send(Err(err)).ok(); + } + } + }); + + Ok(()) + } + + fn reset(&mut self, descr: &VideoDataDescription) -> Result<()> { + if let Some(encoding_details) = &descr.encoding_details { + self.image_mime_type = encoding_details.codec_string.clone(); + } + Ok(()) + } +} + +async fn decode_image(chunk: Chunk, mime_type: &str) -> Result { + // Create a Blob from the image data. + let uint8_array = Uint8Array::from(chunk.data.as_slice()); + let parts = js_sys::Array::new(); + parts.push(&uint8_array); + + let options = web_sys::BlobPropertyBag::new(); + if !mime_type.is_empty() { + options.set_type(mime_type); + } + + let blob = + web_sys::Blob::new_with_u8_array_sequence_and_options(&parts, &options).map_err(|err| { + DecodeError::WebDecoder(super::webcodecs::WebError::Decoding(format!( + "Failed to create Blob: {}", + string_from_js_value(&err) + ))) + })?; + + // Decode the image using createImageBitmap. + let window = web_sys::window().ok_or_else(|| { + DecodeError::WebDecoder(super::webcodecs::WebError::Decoding( + "No global window object".to_owned(), + )) + })?; + + let promise = window.create_image_bitmap_with_blob(&blob).map_err(|err| { + DecodeError::WebDecoder(super::webcodecs::WebError::Decoding(format!( + "createImageBitmap failed: {}", + string_from_js_value(&err) + ))) + })?; + + let bitmap_js = wasm_bindgen_futures::JsFuture::from(promise) + .await + .map_err(|err| { + DecodeError::WebDecoder(super::webcodecs::WebError::Decoding(format!( + "createImageBitmap rejected: {}", + string_from_js_value(&err) + ))) + })?; + + let bitmap: web_sys::ImageBitmap = bitmap_js.dyn_into().map_err(|_js_err| { + DecodeError::WebDecoder(super::webcodecs::WebError::Decoding( + "createImageBitmap did not return an ImageBitmap".to_owned(), + )) + })?; + + // Create a VideoFrame from the ImageBitmap. + // The timestamp is required by the VideoFrame constructor. + let init = web_sys::VideoFrameInit::new(); + init.set_timestamp(0); + let video_frame = web_sys::VideoFrame::new_with_image_bitmap_and_video_frame_init( + &bitmap, &init, + ) + .map_err(|err| { + DecodeError::WebDecoder(super::webcodecs::WebError::Decoding(format!( + "Failed to create VideoFrame from ImageBitmap: {}", + string_from_js_value(&err) + ))) + })?; + + Ok(Frame { + content: super::webcodecs::WebVideoFrame(video_frame), + info: FrameInfo { + is_sync: Some(true), + sample_idx: Some(chunk.sample_idx), + frame_nr: Some(chunk.frame_nr), + presentation_timestamp: chunk.presentation_timestamp, + duration: chunk.duration, + latest_decode_timestamp: Some(chunk.decode_timestamp), + }, + }) +} diff --git a/crates/utils/re_video/src/decode/webcodecs.rs b/crates/utils/re_video/src/decode/webcodecs.rs index a7d1b20f2943..46cd4df8f965 100644 --- a/crates/utils/re_video/src/decode/webcodecs.rs +++ b/crates/utils/re_video/src/decode/webcodecs.rs @@ -20,7 +20,7 @@ use crate::{ #[derive(Clone)] #[repr(transparent)] -pub struct WebVideoFrame(web_sys::VideoFrame); +pub struct WebVideoFrame(pub(super) web_sys::VideoFrame); impl re_byte_size::SizeBytes for WebVideoFrame { fn heap_size_bytes(&self) -> u64 { @@ -155,7 +155,7 @@ impl Drop for WebVideoDecoder { re_log::warn!( "Error when closing video decoder: {}", - js_error_to_string(&err) + string_from_js_value(&err) ); } } @@ -182,7 +182,7 @@ impl WebVideoDecoder { .map_or(Time::ZERO, |s| s.presentation_timestamp); Ok(Self { - codec: video_descr.codec, + codec: video_descr.codec.clone(), timescale, first_frame_pts, @@ -253,14 +253,14 @@ impl AsyncDecoder for WebVideoDecoder { // Given that we err on the side of providing too much than too little information. if let Some(duration) = video_chunk.duration { let duration_micros = 1e-3 * duration.duration(self.timescale).as_nanos() as f64; - web_chunk.set_duration(duration_micros); + web_chunk.set_duration_f64(duration_micros); } let web_chunk = EncodedVideoChunk::new(&web_chunk) - .map_err(|err| WebError::CreateChunk(js_error_to_string(&err)))?; + .map_err(|err| WebError::CreateChunk(string_from_js_value(&err)))?; self.decoder .decode(&web_chunk) - .map_err(|err| WebError::DecodeChunk(js_error_to_string(&err)))?; + .map_err(|err| WebError::DecodeChunk(string_from_js_value(&err)))?; Ok(()) } @@ -307,7 +307,7 @@ impl AsyncDecoder for WebVideoDecoder { encoding_details, self.hw_acceleration, )) - .map_err(|err| WebError::ConfigureFailure(js_error_to_string(&err)).into()) + .map_err(|err| WebError::ConfigureFailure(string_from_js_value(&err)).into()) } /// Called after submitting the last chunk. @@ -335,18 +335,15 @@ impl AsyncDecoder for WebVideoDecoder { // If we don't handle potential flush errors, we'll get a lot of spam in the console. wasm_bindgen_futures::spawn_local(async move { let flush_result = wasm_bindgen_futures::JsFuture::from(flush_promise).await; - if let Err(flush_error) = flush_result { - if let Some(dom_exception) = flush_error.dyn_ref::() + if let Err(err) = flush_result { + if let Some(dom_exception) = err.dyn_ref::() && dom_exception.code() == web_sys::DomException::ABORT_ERR { // Video decoder got closed, that's fine. return; } - re_log::debug!( - "Failed to flush video: {}", - js_error_to_string(&flush_error) - ); + re_log::debug!("Failed to flush video: {}", string_from_js_value(&err)); } }); @@ -425,14 +422,9 @@ fn init_video_decoder( } } - let Some(web_timestamp_us_raw) = frame.timestamp() else { - // Spec says this should never happen. - re_log::warn_once!("WebCodec decoded video frame without any timestamp data."); - return; - }; // WebCodec timestamps are internally represented as i64 according to the spec. // Any floating point part would be a violation of the spec. - let web_timestamp_us = web_timestamp_us_raw as u64; + let web_timestamp_us = frame.timestamp() as u64; match pending_frame_infos.entry(web_timestamp_us) { Entry::Occupied(mut entry) => { @@ -462,7 +454,7 @@ fn init_video_decoder( Entry::Vacant(_) => { re_log::warn!( - "Decoder produced a frame at timestamp {web_timestamp_us_raw}us for which we don't have a valid frame info." + "Decoder produced a frame at timestamp {web_timestamp_us}us for which we don't have a valid frame info." ); } } @@ -472,7 +464,7 @@ fn init_video_decoder( let on_error = Closure::wrap(Box::new(move |err: js_sys::Error| { output_sender .send(Err(super::DecodeError::WebDecoder(WebError::Decoding( - js_error_to_string(&err), + string_from_js_value(&err), )))) .ok(); }) as Box); @@ -485,7 +477,7 @@ fn init_video_decoder( }; let decoder = web_sys::VideoDecoder::new(&VideoDecoderInit::new(&on_error, &on_output)) - .map_err(|err| WebError::DecoderSetupFailure(js_error_to_string(&err)))?; + .map_err(|err| WebError::DecoderSetupFailure(string_from_js_value(&err)))?; Ok((decoder, output_callback_tx)) } @@ -546,7 +538,7 @@ fn js_video_decoder_config( js } -fn js_error_to_string(v: &wasm_bindgen::JsValue) -> String { +pub fn string_from_js_value(v: &wasm_bindgen::JsValue) -> String { if let Some(v) = v.as_string() { return v; } diff --git a/crates/utils/re_video/src/demux/mod.rs b/crates/utils/re_video/src/demux/mod.rs index 42148ffb919f..0867ef08823e 100644 --- a/crates/utils/re_video/src/demux/mod.rs +++ b/crates/utils/re_video/src/demux/mod.rs @@ -59,7 +59,7 @@ impl std::fmt::Display for ChromaSubsamplingModes { } /// The basic codec family used to encode the video. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum VideoCodec { /// Advanced Video Coding (AVC/H.264) /// @@ -85,33 +85,13 @@ pub enum VideoCodec { /// /// See VP9, -} -impl VideoCodec { - /// Base part of the web codec string, without additional parameters. + /// Not a video in the traditional sense, but the data is a sequence + /// of images that are decoded by image decoders. /// - /// See - pub fn base_webcodec_string(&self) -> &'static str { - match self { - // https://www.w3.org/TR/webcodecs-av1-codec-registration/#fully-qualified-codec-strings - Self::AV1 => "av01", - - // https://www.w3.org/TR/webcodecs-avc-codec-registration/#fully-qualified-codec-strings - // avc3 is valid as well. - Self::H264 => "avc1", - - // https://www.w3.org/TR/webcodecs-hevc-codec-registration/#fully-qualified-codec-strings - // hvc1 is valid as well. - Self::H265 => "hev1", - - // https://www.w3.org/TR/webcodecs-vp8-codec-registration/#fully-qualified-codec-strings - // Special! This *is* the fully qualified codec string. - Self::VP8 => "vp8", - - // https://www.w3.org/TR/webcodecs-vp9-codec-registration/#fully-qualified-codec-strings - Self::VP9 => "vp09", - } - } + /// The stored string is the mime-type of the image format. For example + /// `image/png`. If this is None it means it should be guessed. + ImageSequence(Option), } /// Index used for referencing into [`VideoDataDescription::samples`]. @@ -121,7 +101,7 @@ pub type SampleIndex = usize; pub type KeyframeIndex = usize; /// Distinguishes static videos from potentially ongoing video streams. -#[derive(Clone)] +#[derive(Clone, Debug)] pub enum VideoDeliveryMethod { /// A static video with a fixed, known duration which won't be updated further. Static { duration: Time }, @@ -237,6 +217,20 @@ impl VideoDataDescription { ) } + /// If this video is a [`VideoCodec::ImageSequence`], returns the + /// specified or guessed image mime-type if it exists. + pub fn image_codec_mime_type(&self) -> Option<&str> { + let VideoCodec::ImageSequence(codec) = &self.codec else { + return None; + }; + + if let Some(details) = &self.encoding_details { + Some(&details.codec_string) + } else { + codec.as_deref() + } + } + /// Checks various invariants that the video description should always uphold. /// /// Violation of any of these variants is **not** a user(-data) error, but instead an @@ -361,19 +355,21 @@ impl VideoDataDescription { chunk: &crate::Chunk, ) -> Result, SampleConversionError> { match self.codec { - VideoCodec::AV1 => Ok(chunk.data.clone()), + VideoCodec::AV1 | VideoCodec::ImageSequence(_) => Ok(chunk.data.clone()), VideoCodec::H264 => { let stsd = self .encoding_details .as_ref() - .ok_or(SampleConversionError::MissingEncodingDetails(self.codec))? + .ok_or_else(|| { + SampleConversionError::MissingEncodingDetails(self.codec.clone()) + })? .stsd .as_ref() - .ok_or(SampleConversionError::MissingStsd(self.codec))?; + .ok_or_else(|| SampleConversionError::MissingStsd(self.codec.clone()))?; let re_mp4::StsdBoxContent::Avc1(avc1_box) = &stsd.contents else { return Err(SampleConversionError::UnexpectedStsdContent { - codec: self.codec, + codec: self.codec.clone(), found: format!("{:?}", stsd.contents), }); }; @@ -387,17 +383,19 @@ impl VideoDataDescription { let stsd = self .encoding_details .as_ref() - .ok_or(SampleConversionError::MissingEncodingDetails(self.codec))? + .ok_or_else(|| { + SampleConversionError::MissingEncodingDetails(self.codec.clone()) + })? .stsd .as_ref() - .ok_or(SampleConversionError::MissingStsd(self.codec))?; + .ok_or_else(|| SampleConversionError::MissingStsd(self.codec.clone()))?; let hvcc_box = match &stsd.contents { re_mp4::StsdBoxContent::Hvc1(hvc1_box) | re_mp4::StsdBoxContent::Hev1(hvc1_box) => hvc1_box, other => { return Err(SampleConversionError::UnexpectedStsdContent { - codec: self.codec, + codec: self.codec.clone(), found: format!("{other:?}"), }); } @@ -410,7 +408,7 @@ impl VideoDataDescription { } VideoCodec::VP8 | VideoCodec::VP9 => { // TODO(#10186): Support VP8/VP9 for the `VideoStream` archetype - Err(SampleConversionError::UnsupportedCodec(self.codec)) + Err(SampleConversionError::UnsupportedCodec(self.codec.clone())) } } } @@ -444,6 +442,9 @@ pub struct VideoEncodingDetails { /// Detailed codec string as specified by the `WebCodecs` codec registry. /// /// See + /// + /// For image based decoders this is the mime-type of the image format. For + /// example `image/png`. pub codec_string: String, /// Encoded width & height. @@ -474,6 +475,19 @@ pub struct VideoEncodingDetails { pub stsd: Option, } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct GopSizes { + pub smallest: usize, + pub largest: usize, +} + +impl GopSizes { + const NO_SAMPLES: Self = Self { + smallest: 0, + largest: 0, + }; +} + /// Meta information about the video samples. #[derive(Clone, Debug, PartialEq, Eq)] pub struct SamplesStatistics { @@ -489,6 +503,11 @@ pub struct SamplesStatistics { /// TODO(andreas): We don't have a mechanism for shrinking this bitvec when dropping samples, i.e. it will keep growing. /// ([`StableIndexDeque`] makes sure that indices in the bitvec will still match up with the samples even when samples are dropped from the front.) pub has_sample_highest_pts_so_far: Option, + + /// Stats about the gop sizes that have been observed. + /// + /// This can be 0 if we haven't seen any samples yet. + pub gop_sizes: GopSizes, } impl re_byte_size::SizeBytes for SamplesStatistics { @@ -496,6 +515,7 @@ impl re_byte_size::SizeBytes for SamplesStatistics { let Self { dts_always_equal_pts: _, has_sample_highest_pts_so_far, + gop_sizes: _, } = self; has_sample_highest_pts_so_far .as_ref() @@ -511,6 +531,7 @@ impl SamplesStatistics { pub const NO_BFRAMES: Self = Self { dts_always_equal_pts: true, has_sample_highest_pts_so_far: None, + gop_sizes: GopSizes::NO_SAMPLES, }; pub fn new(samples: &StableIndexDeque) -> Self { @@ -538,9 +559,44 @@ impl SamplesStatistics { .collect() }); + let gop_sizes = samples + .iter() + .fold( + (None::, 0usize), + |(mut sizes, mut count), sample| { + let ends_gop = match sample { + SampleMetadataState::Present(s) => s.is_sync, + SampleMetadataState::Unloaded { .. } => true, + }; + + if ends_gop && count > 0 { + sizes = Some(match sizes { + Some(s) => GopSizes { + smallest: s.smallest.min(count), + largest: s.largest.max(count), + }, + None => GopSizes { + smallest: count, + largest: count, + }, + }); + count = 0; + } + + if sample.is_loaded() { + count += 1; + } + + (sizes, count) + }, + ) + .0 + .unwrap_or(GopSizes::NO_SAMPLES); + Self { dts_always_equal_pts, has_sample_highest_pts_so_far, + gop_sizes, } } } @@ -586,6 +642,13 @@ impl VideoDataDescription { VideoCodec::H265 => "H.265 HEV1", VideoCodec::VP8 => "VP8", VideoCodec::VP9 => "VP9", + VideoCodec::ImageSequence(_) => { + if let Some(codec) = self.image_codec_mime_type() { + return codec.to_owned(); + } else { + return "unknown".to_owned(); + } + } } .to_owned(); @@ -686,79 +749,46 @@ impl VideoDataDescription { /// For a given decode (!) timestamp, returns the index of the first sample whose /// decode timestamp is lesser than or equal to the given timestamp. fn latest_sample_index_at_decode_timestamp( - keyframes: &[KeyframeIndex], samples: &StableIndexDeque, decode_time: Time, ) -> Option { - // First find what keyframe this decode timestamp is in, as an optimization since - // we can't efficiently binary search the sample list with possible gaps. - // - // Keyframes will always be [`SampleMetadataState::Present`] and - // have a decode timestamp we can compare against. - let keyframe_idx = keyframes - .partition_point(|p| { - samples - .get(*p) - .map(|s| s.sample()) - .inspect(|_s| { - debug_assert!(_s.is_some(), "Keyframes mentioned in the keyframe lookup list should always be loaded"); - }) - .flatten() - .is_some_and(|s| s.decode_timestamp <= decode_time) - }) - .checked_sub(1)?; - - let start = *keyframes.get(keyframe_idx)?; - let end = keyframes - .get(keyframe_idx + 1) - .copied() - .unwrap_or_else(|| samples.next_index()); - - // Within that keyframe's range, find the most suitable frame for the given decode time. - let range = start..end; - - let mut found_sample_idx = None; - for (idx, sample) in samples.iter_index_range_clamped(&range) { - let Some(s) = sample.sample() else { - continue; - }; - - if s.decode_timestamp <= decode_time { - found_sample_idx = Some(idx); - } else { - break; - } - } - - found_sample_idx + samples + .partition_point(|sample| sample.decode_timestamp() <= decode_time) + .checked_sub(1) } /// See [`Self::latest_sample_index_at_presentation_timestamp`], split out for testing purposes. /// - /// The returned sample index is guaranteed to be [`SampleMetadataState::Present`]. + /// If ok, the returned sample index is guaranteed to be [`SampleMetadataState::Present`]. + /// + /// ### Returns + /// If we find a loaded sample with the best presentation timestamp, returns `Ok(sample_idx)`. + /// If we run into some unloaded sample that should be decoded at this timestamp, returns + /// `Err(Some(sample_idx))`. Otherwise, returns `Err(None)`. fn latest_sample_index_at_presentation_timestamp_internal( - keyframes: &[KeyframeIndex], samples: &StableIndexDeque, sample_statistics: &SamplesStatistics, presentation_timestamp: Time, - ) -> Option { + ) -> Result> { // Find the latest sample where `decode_timestamp <= presentation_timestamp`. // Because `decode <= presentation`, we never have to look further backwards in the // video than this. - let decode_sample_idx = Self::latest_sample_index_at_decode_timestamp( - keyframes, - samples, - presentation_timestamp, - ); + let decode_sample_idx = + Self::latest_sample_index_at_decode_timestamp(samples, presentation_timestamp) + .ok_or(None)?; - let decode_sample_idx = decode_sample_idx?; + if let Some(sample) = samples.get(decode_sample_idx) + && sample.is_unloaded() + { + return Err(Some(decode_sample_idx)); + } // It's very common that dts==pts in which case we're done! let Some(has_sample_highest_pts_so_far) = sample_statistics.has_sample_highest_pts_so_far.as_ref() else { debug_assert!(sample_statistics.dts_always_equal_pts); - return Some(decode_sample_idx); + return Ok(decode_sample_idx); }; debug_assert!(has_sample_highest_pts_so_far.len() == samples.next_index()); @@ -772,13 +802,13 @@ impl VideoDataDescription { let mut best_pts = Time::MIN; for sample_idx in (samples.min_index()..=decode_sample_idx).rev() { let Some(sample) = samples[sample_idx].sample() else { - continue; + return Err(Some(sample_idx)); }; if sample.presentation_timestamp == presentation_timestamp { // Clean hit. Take this one, no questions asked :) // (assuming that each PTS is unique!) - return Some(sample_idx); + return Ok(sample_idx); } if sample.presentation_timestamp < presentation_timestamp @@ -790,11 +820,11 @@ impl VideoDataDescription { if best_pts != Time::MIN && has_sample_highest_pts_so_far[sample_idx] { // We won't see any bigger PTS values anymore, meaning we're as close as we can get to the requested PTS! - return Some(best_index); + return Ok(best_index); } } - None + Err(None) } /// For a given presentation timestamp, return the index of the first sample @@ -802,12 +832,16 @@ impl VideoDataDescription { /// /// Remember that samples after (i.e. with higher index) may have a *lower* presentation time /// if the stream has sample reordering! + /// + /// ### Returns + /// If we find a loaded sample with the best presentation timestamp, returns `Ok(sample_idx)`. + /// If we run into some unloaded sample that should be decoded at this timestamp, returns + /// `Err(Some(sample_idx))`. Otherwise, returns `Err(None)`. pub fn latest_sample_index_at_presentation_timestamp( &self, presentation_timestamp: Time, - ) -> Option { + ) -> Result> { Self::latest_sample_index_at_presentation_timestamp_internal( - &self.keyframe_indices, &self.samples, &self.samples_statistics, presentation_timestamp, @@ -821,11 +855,11 @@ impl VideoDataDescription { /// Therefore, this may be a jump on sample index. pub fn previous_presented_sample(&self, sample: &SampleMetadata) -> Option<&SampleMetadata> { let idx = Self::latest_sample_index_at_presentation_timestamp_internal( - &self.keyframe_indices, &self.samples, &self.samples_statistics, sample.presentation_timestamp - Time::new(1), - )?; + ) + .ok()?; match self.samples.get(idx) { Some(SampleMetadataState::Present(sample)) => Some(sample), None | Some(_) => unreachable!(), @@ -1025,7 +1059,7 @@ pub struct SampleMetadata { /// May be unknown if this is the last sample in an ongoing video stream. pub duration: Option
You can see that this matches very closely the diagram above: -* A single *control* column, that contains the globally unique row IDs. +* A single *control* column, that contains the unique row IDs. * Multiple *time*/*index* columns (`log_tick`, `log_time`, `stable_time`). * Multiple component columns (`Points3D:colors`, `Points3D:positions`, `Points3D:radii`). @@ -57,7 +57,7 @@ The data in this specific chunk was logged with the following code: snippet: concepts/how_helix_was_logged -You can learn more about chunks and how they came to be in [this blog post](http://rerun.io/blog/column-chunks#storage-is-based-around-chunks-of-component-columns). +You can learn more about chunks and how they came to be in [this blog post](https://rerun.io/blog/column-chunks#storage-is-based-around-chunks-of-component-columns). ## Getting chunks into Rerun diff --git a/docs/content/concepts/logging-and-ingestion/data-loaders.md b/docs/content/concepts/logging-and-ingestion/data-loaders.md deleted file mode 100644 index e6c4ea48f708..000000000000 --- a/docs/content/concepts/logging-and-ingestion/data-loaders.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: Data-loaders -order: 800 ---- - -Extending Rerun's file loading capabilities with custom data-loaders. - - diff --git a/docs/content/concepts/logging-and-ingestion/importers.md b/docs/content/concepts/logging-and-ingestion/importers.md new file mode 100644 index 000000000000..e51c4db0c18a --- /dev/null +++ b/docs/content/concepts/logging-and-ingestion/importers.md @@ -0,0 +1,8 @@ +--- +title: Importers +order: 800 +--- + +Extending Rerun's file loading capabilities with custom importers. + + diff --git a/docs/content/concepts/logging-and-ingestion/data-loaders/cpp.md b/docs/content/concepts/logging-and-ingestion/importers/cpp.md similarity index 81% rename from docs/content/concepts/logging-and-ingestion/data-loaders/cpp.md rename to docs/content/concepts/logging-and-ingestion/importers/cpp.md index b477abb9a480..464e6d615fdf 100644 --- a/docs/content/concepts/logging-and-ingestion/data-loaders/cpp.md +++ b/docs/content/concepts/logging-and-ingestion/importers/cpp.md @@ -1,5 +1,5 @@ --- title: 🌊 C++ example order: 100 -redirect: https://github.com/rerun-io/rerun/tree/main/examples/cpp/external_data_loader +redirect: https://github.com/rerun-io/rerun/tree/main/examples/cpp/external_importer --- diff --git a/docs/content/concepts/logging-and-ingestion/data-loaders/overview.md b/docs/content/concepts/logging-and-ingestion/importers/overview.md similarity index 57% rename from docs/content/concepts/logging-and-ingestion/data-loaders/overview.md rename to docs/content/concepts/logging-and-ingestion/importers/overview.md index 40c4b23fe33e..e62c47639b47 100644 --- a/docs/content/concepts/logging-and-ingestion/data-loaders/overview.md +++ b/docs/content/concepts/logging-and-ingestion/importers/overview.md @@ -3,28 +3,28 @@ title: Overview order: 50 --- -Internally, the [`DataLoader`](https://docs.rs/re_data_loader/latest/re_data_loader/trait.DataLoader.html) trait takes care of loading files into the Viewer and/or SDK. +Internally, the [`Importer`](https://docs.rs/re_importer/latest/re_importer/trait.Importer.html?speculative-link) trait takes care of loading files into the Viewer and/or SDK. -There are 3 broad kinds of `DataLoader`s: _builtin_, _external_ and _custom_. +There are 3 broad kinds of `Importer`s: _builtin_, _external_ and _custom_. _External_ and _custom_ are the two ways of extending the file loading system that we'll describe below. -When a user attempts to open a file in the Viewer/SDK, **all** known `DataLoader`s are notified of the path to be opened, unconditionally. -This gives `DataLoader`s maximum flexibility to decide what files they are interested in, as opposed to e.g. only being able to look at a file's extension. +When a user attempts to open a file in the Viewer/SDK, **all** known `Importer`s are notified of the path to be opened, unconditionally. +This gives `Importer`s maximum flexibility to decide what files they are interested in, as opposed to e.g. only being able to look at a file's extension. -Once notified, a `DataLoader` can return a [`DataLoaderError::Incompatible`](https://docs.rs/re_data_loader/latest/re_data_loader/enum.DataLoaderError.html#variant.Incompatible) error to indicate that it doesn't support a given file type. -If, and only if, all loaders known to the Viewer/SDK return an `Incompatible` error code, then an error message is shown to the user indicating that this file type is not (_yet_) supported. +Once notified, an `Importer` can return an [`ImporterError::Incompatible`](https://docs.rs/re_importer/latest/re_importer/enum.ImporterError.html?speculative-link#variant.Incompatible) error to indicate that it doesn't support a given file type. +If, and only if, all importers known to the Viewer/SDK return an `Incompatible` error code, then an error message is shown to the user indicating that this file type is not (_yet_) supported. -In these instances of unsupported files, we expose two ways of implementing and registering your `DataLoader`s, explained below. +In these instances of unsupported files, we expose two ways of implementing and registering your `Importer`s, explained below. -### External data-loaders +### External importers -The easiest way to create your own `DataLoader` is by implementing what we call an "external loader": a stand alone executable written in any language that the Rerun SDK ships for. Any executable on your `$PATH` with a name that starts with `rerun-loader-` will be treated as a `DataLoader`. +The easiest way to create your own `Importer` is by implementing what we call an "external importer": a stand alone executable written in any language that the Rerun SDK ships for. Any executable on your `$PATH` with a name that starts with `rerun-importer-` (or `rerun-loader-` for backwards compatibility) will be treated as an `Importer`. This executable takes a file path as a command line argument and outputs Rerun logs on `stdout`. It will be called by the Rerun Viewer/SDK when the user opens a file, and be passed the path to that file. -From there, it can log data as usual, using the [`stdout` logging sink](../../../reference/sdk/operating-modes.md#standard-inputoutput). +From there, it can log data as usual, using the [`stdout` logging sink](../../../reference/sdk/operating-modes.md#standard-inputoutput-stdout). -The Rerun Viewer/SDK will then automatically load the data streamed to the external loader's standard output. +The Rerun Viewer/SDK will then automatically load the data streamed to the external importer's standard output. @@ -34,10 +34,10 @@ The Rerun Viewer/SDK will then automatically load the data streamed to the exter -Like any other `DataLoader`, an external loader will be notified of all file openings, unconditionally. -To indicate that it does not support a given file, the loader has to exit with a [dedicated status code](https://docs.rs/rerun/latest/rerun/constant.EXTERNAL_DATA_LOADER_INCOMPATIBLE_EXIT_CODE.html). +Like any other `Importer`, an external importer will be notified of all file openings, unconditionally. +To indicate that it does not support a given file, the importer has to exit with a [dedicated status code](https://docs.rs/rerun/latest/rerun/constant.EXTERNAL_DATA_LOADER_INCOMPATIBLE_EXIT_CODE.html). -When the Viewer and/or SDK executes an external loader, it will pass to it a set of recommended settings in the form of CLI parameters (in addition to the file path to be loaded, which is passed as the one and only positional argument): +When the Viewer and/or SDK executes an external importer, it will pass to it a set of recommended settings in the form of CLI parameters (in addition to the file path to be loaded, which is passed as the one and only positional argument): * `--application-id ` @@ -52,7 +52,7 @@ When the Viewer and/or SDK executes an external loader, it will pass to it a set The recommended `RecordingId` to log the data to. Log data to this recording if you want it to appear in a new recording shared by all - data-loaders for the current loading session. + importers for the current loading session. * `--opened-recording-id ` (optional) @@ -82,12 +82,12 @@ When the Viewer and/or SDK executes an external loader, it will pass to it a set The timestamps are expected to be in nanoseconds since Unix epoch: use `rr.set_time_timestamp_nanos` (Python) / `RecordingStream::set_time_timestamp_nanos` (C++, Rust) appropriately. -Check out our examples for [C++](https://github.com/rerun-io/rerun/tree/main/examples/cpp/external_data_loader), [Python](https://github.com/rerun-io/rerun/tree/main/examples/python/external_data_loader) and [Rust](https://github.com/rerun-io/rerun/tree/main/examples/rust/external_data_loader) that cover every steps in details. +Check out our examples for [C++](https://github.com/rerun-io/rerun/tree/main/examples/cpp/external_importer), [Python](https://github.com/rerun-io/rerun/tree/main/examples/python/external_importer) and [Rust](https://github.com/rerun-io/rerun/tree/main/examples/rust/external_importer) that cover every steps in details. -### Custom Rust data-loaders +### Custom Rust importers -Another Rust-specific approach is to implement the `DataLoader` trait yourself and register it in the Rerun Viewer/SDK. +Another Rust-specific approach is to implement the `Importer` trait yourself and register it in the Rerun Viewer/SDK. -To do so, you'll need to import `rerun` as a library, register your `DataLoader` and then start the Viewer/SDK from code. +To do so, you'll need to import `rerun` as a library, register your `Importer` and then start the Viewer/SDK from code. -Check out our [example](https://github.com/rerun-io/rerun/tree/main/examples/rust/custom_data_loader) that cover all these steps in details. +Check out our [example](https://github.com/rerun-io/rerun/tree/main/examples/rust/custom_importer) that covers all these steps in details. diff --git a/docs/content/concepts/logging-and-ingestion/data-loaders/python.md b/docs/content/concepts/logging-and-ingestion/importers/python.md similarity index 80% rename from docs/content/concepts/logging-and-ingestion/data-loaders/python.md rename to docs/content/concepts/logging-and-ingestion/importers/python.md index a5a0c9408daa..4d67b2ad5fbe 100644 --- a/docs/content/concepts/logging-and-ingestion/data-loaders/python.md +++ b/docs/content/concepts/logging-and-ingestion/importers/python.md @@ -1,5 +1,5 @@ --- title: 🐍 Python example order: 100 -redirect: https://github.com/rerun-io/rerun/tree/main/examples/python/external_data_loader +redirect: https://github.com/rerun-io/rerun/tree/main/examples/python/external_importer --- diff --git a/docs/content/concepts/logging-and-ingestion/data-loaders/rust.md b/docs/content/concepts/logging-and-ingestion/importers/rust.md similarity index 81% rename from docs/content/concepts/logging-and-ingestion/data-loaders/rust.md rename to docs/content/concepts/logging-and-ingestion/importers/rust.md index 310c3cd62ad0..0400c8112d94 100644 --- a/docs/content/concepts/logging-and-ingestion/data-loaders/rust.md +++ b/docs/content/concepts/logging-and-ingestion/importers/rust.md @@ -1,5 +1,5 @@ --- title: 🦀 Rust example order: 120 -redirect: https://github.com/rerun-io/rerun/tree/main/examples/rust/external_data_loader +redirect: https://github.com/rerun-io/rerun/tree/main/examples/rust/external_importer --- diff --git a/docs/content/concepts/logging-and-ingestion/mcap.md b/docs/content/concepts/logging-and-ingestion/mcap.md index 867a185ddeec..23449de9e223 100644 --- a/docs/content/concepts/logging-and-ingestion/mcap.md +++ b/docs/content/concepts/logging-and-ingestion/mcap.md @@ -11,5 +11,5 @@ Working with MCAP files in Rerun: * [Supported Message Formats](mcap/message-formats.md) Technical details and advanced usage: -* [MCAP Layers Explained](mcap/layers-explained.md) +* [MCAP Decoders Explained](mcap/decoders-explained.md) * [CLI Reference for MCAP](mcap/cli-reference.md) diff --git a/docs/content/concepts/logging-and-ingestion/mcap/cli-reference.md b/docs/content/concepts/logging-and-ingestion/mcap/cli-reference.md index c04ca42c6678..6973773de9bd 100644 --- a/docs/content/concepts/logging-and-ingestion/mcap/cli-reference.md +++ b/docs/content/concepts/logging-and-ingestion/mcap/cli-reference.md @@ -37,31 +37,36 @@ rerun mcap convert input.mcap -o output.rrd rerun mcap convert data.mcap -o /path/to/output.rrd ``` -## Layer selection +## Decoder selection -### Using specific layers +### Using specific decoders -Control which processing layers are applied during conversion: +Control which processing decoders are applied during conversion: ```bash # Use only protobuf decoding and file statistics -rerun mcap convert input.mcap -l protobuf -l stats -o output.rrd +rerun mcap convert input.mcap -d protobuf -d stats -o output.rrd # Use only ROS2 semantic interpretation for robotics data -rerun mcap convert input.mcap -l ros2msg -o output.rrd +rerun mcap convert input.mcap -d ros2msg -o output.rrd -# Combine multiple layers for comprehensive data access -rerun mcap convert input.mcap -l ros2msg -l raw -l recording_info -o output.rrd +# Add robot geometry from ROS robot_description topics +rerun mcap convert input.mcap -d ros2msg -d urdf -o output.rrd + +# Combine multiple decoders for comprehensive data access +rerun mcap convert input.mcap -d ros2msg -d raw -d recording_info -o output.rrd ``` -### Available layer options +### Available decoder options Decoding: - **`raw`**: Preserve original message bytes - **`schema`**: Extract metadata and schema information - **`stats`**: Compute file and channel statistics +- **`metadata`**: Extract metadata records into RRD `__properties`, if present - **`protobuf`**: Decode protobuf messages using into generic Arrow data without Rerun visualization components - **`recording_info`**: Extract recording session metadata +- **`urdf`**: Use Rerun's built-in URDF loader when a ROS 2 `/robot_description` topic is present Semantic: - **`foxglove`**: Semantic interpretation of Foxglove Protobuf messages @@ -69,20 +74,22 @@ Semantic: ### Default behavior -When no `-l` flags are specified, all available layers are used: +When no `-d` flags are specified, all available decoders are used: ```bash -# These commands are equivalent (default uses all layers): +# These commands are equivalent (default uses all decoders): rerun mcap convert input.mcap -o output.rrd rerun mcap convert input.mcap \ - -l raw \ - -l schema \ - -l stats \ - -l protobuf \ - -l ros2msg \ - -l foxglove \ - -l recording_info \ + -d raw \ + -d schema \ + -d stats \ + -d metadata \ + -d protobuf \ + -d recording_info \ + -d urdf \ + -d ros2msg \ + -d foxglove \ -o output.rrd ``` diff --git a/docs/content/concepts/logging-and-ingestion/mcap/decoders-explained.md b/docs/content/concepts/logging-and-ingestion/mcap/decoders-explained.md new file mode 100644 index 000000000000..e2c0759ae222 --- /dev/null +++ b/docs/content/concepts/logging-and-ingestion/mcap/decoders-explained.md @@ -0,0 +1,97 @@ +--- +title: MCAP Decoders Explained +order: 300 +--- + +MCAP processing in Rerun uses a decoder architecture where each decoder represents a different way to interpret and extract data from the same MCAP source. +By default, when opening a file Rerun analyzes an MCAP file to determine which decoders are active to provide the most comprehensive view of your data, while avoiding duplication. +You can specify which decoders to use during conversion, allowing you to extract exactly the information you need for your analysis. + +## Understanding decoders with an example + +When multiple decoders are enabled, they each process the same messages independently, creating different component types on identical entity paths. This can result in data duplication—for instance, enabling both `raw` and `protobuf` decoders stores the same message as both structured field data and raw binary blobs. + +Consider an MCAP file from a ROS2 robot containing sensor data on the topic `/robot/camera/image_raw` with ROS2 `sensor_msgs/msg/Image` messages: + +- With only the `ros2msg` decoder: Creates an [Image](../../../reference/types/archetypes/image.md) archetype for direct visualization in Rerun's viewer +- With only the `raw` decoder: Creates an [McapMessage](../../../reference/types/archetypes/mcap_message.md) containing the original CDR-encoded message bytes +- With both decoders enabled: All representations coexist on the same entity path `/robot/camera/image_raw` + +## Schema and statistics decoders + +The `schema` decoder extracts structural information about the MCAP file's organization, creating metadata entities that describe channel definitions, topic names with their message types, and schema definitions. This decoder is particularly useful for understanding unfamiliar MCAP files or getting an overview of available topics and channels before deeper processing. + +The `stats` decoder computes file-level metrics and statistics, creating entities with message counts per channel, temporal ranges, file size information, and data rate analysis. This gives you insight into the scale and characteristics of your dataset for quality assessment and planning storage requirements. + +## Message interpretation decoders + +### Semantic interpretation + +The `ros2msg` and `foxglove` decoders provide semantic interpretation and visualization of standard ROS 2 and Foxglove message types, creating meaningful Rerun visualization archetypes from data. Unlike the `protobuf` decoder, this decoder understands the semantics of the messages and creates appropriate visualizations: images become [Image](../../../reference/types/archetypes/image.md), point clouds become [Points3D](../../../reference/types/archetypes/points3d.md), IMU messages become [SeriesLines](../../../reference/types/archetypes/series_lines.md) with the data plotted over time, and so on. + +See [Message Formats](message-formats.md) for the complete list of supported message types. + +### Protobuf decoding + +The `protobuf` decoder automatically decodes protobuf-encoded messages using reflection, creating structured component data based on the protobuf schema. Message fields become Rerun components that you can query and analyze. + +However, this decoder provides structured access without semantic visualization meaning. While the data becomes queryable, it won't automatically appear as meaningful visualizations like images or point clouds, it gives you the data structure, not the visual interpretation. + +## The raw decoder + +The `raw` decoder preserves the original message bytes without any interpretation, creating blob entities containing the unprocessed message data. Each message appears as a binary blob that can be accessed programmatically for custom analysis tools. + +## Recording info + +The `recording_info` decoder extracts metadata about the recording session and capture context, creating metadata entities with information about recording timestamps, source system details, and capture software versions. + +## The URDF option + +The `urdf` option uses Rerun's built-in URDF loader if there is a ROS 2 string topic named `/robot_description`, logging the robot model as static 3D geometry. In this MCAP workflow, joint transforms are not loaded from the URDF itself; they are expected to come from TF topics in the MCAP (e.g. `/tf` or `/tf_static`). + +For general information about how to load URDF files, see [here](../../../howto/logging-and-ingestion/urdf.md). + +## Decoder selection and performance + +### Selecting decoders + +By default, Rerun processes MCAP files with all decoders active. You can control which decoders are used when [converting MCAP files via the CLI](cli-reference.md) using the `-d` flag: + +```bash +# Use only specific decoders +rerun mcap convert input.mcap -d protobuf -d stats -o output.rrd + +# Use multiple decoders for different perspectives +rerun mcap convert input.mcap -d ros2msg -d raw -d recording_info -o output.rrd + +# Add robot geometry from ROS robot_description topics +rerun mcap convert input.mcap -d ros2msg -d urdf -o output.rrd +``` + +## Accessing decoder data + +Each decoder creates different types of components on entity paths (derived from MCAP channel topics) that can be accessed through Rerun's SDK: + +- Data from the `ros2msg` decoder and supported Foxglove messages appears as native Rerun visualization archetypes (see [here](message-formats.md#overview) for an overview) +- Other data from the `protobuf` or `ros2_reflection` decoders appears as structured components that can be queried by field name or manually added to certain views ([example](message-formats.md#example-timeseries-plot-for-custom-message-scalars)) +- Data from the `raw` decoder appears as blob components containing the original message bytes +- Data from the `urdf` option appears as static 3D robot geometry loaded from the ROS 2 `/robot_description` topic +- Metadata from `schema`, `stats`, and `recording_info` decoders appears as dedicated metadata entities + +For more information on querying data and working with archetypes, see the [Data Queries documentation](../../../howto/query-and-transform/get-data-out.md). + +Each of these decoders contributes their own [chunks](../chunks.md) to the Rerun-native data. +Below is a table showing the mapping between MCAP data and Rerun components: + +| MCAP Data | Rerun component | Description | +| ---------------- | ------------------------------- | ----------------------------------------------------------------------------- | +| Schema name | `mcap.Schema:name` | Message type name from schema definition | +| Schema data | `mcap.Schema:data` | Raw schema definition (protobuf, ROS2 msg, etc.) | +| Schema encoding | `mcap.Schema:encoding` | Schema format type | +| | | | +| Channel topic | `mcap.Channel:topic` | Topic name from MCAP channel | +| Channel ID | `mcap.Channel:id` | Numeric channel identifier | +| Message encoding | `mcap.Channel:message_encoding` | Encoding format (e.g., `protobuf`, `cdr`) | +| | | | +| Statistics | `mcap.Statistics` | File-level metrics like message counts and time ranges | +| Raw message data | `mcap.Message:data` | Unprocessed message bytes stored as binary blobs, handled by the `raw` decoder. | diff --git a/docs/content/concepts/logging-and-ingestion/mcap/layers-explained.md b/docs/content/concepts/logging-and-ingestion/mcap/layers-explained.md deleted file mode 100644 index 424dc268f44f..000000000000 --- a/docs/content/concepts/logging-and-ingestion/mcap/layers-explained.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: MCAP Layers Explained -order: 300 ---- - -MCAP processing in Rerun uses a layered architecture where each layer represents a different way to interpret and extract data from the same MCAP source. -By default, when opening a file Rerun analyzes an MCAP file to determine which layers are active to provide the most comprehensive view of your data, while avoiding duplication. -You can specify which layers to use during conversion, allowing you to extract exactly the information you need for your analysis. - -## Understanding layers with an example - -When multiple layers are enabled, they each process the same messages independently, creating different component types on identical entity paths. This can result in data duplication—for instance, enabling both `raw` and `protobuf` layers stores the same message as both structured field data and raw binary blobs. - -Consider an MCAP file from a ROS2 robot containing sensor data on the topic `/robot/camera/image_raw` with ROS2 `sensor_msgs/msg/Image` messages: - -- With only the `ros2msg` layer: Creates an [Image](../../../reference/types/archetypes/image.md) archetype for direct visualization in Rerun's viewer -- With only the `raw` layer: Creates an [McapMessage](../../../reference/types/archetypes/mcap_message.md) containing the original CDR-encoded message bytes -- With both layers enabled: All representations coexist on the same entity path `/robot/camera/image_raw` - -## Schema and statistics layers - -The `schema` layer extracts structural information about the MCAP file's organization, creating metadata entities that describe channel definitions, topic names with their message types, and schema definitions. This layer is particularly useful for understanding unfamiliar MCAP files or getting an overview of available topics and channels before deeper processing. - -The `stats` layer computes file-level metrics and statistics, creating entities with message counts per channel, temporal ranges, file size information, and data rate analysis. This gives you insight into the scale and characteristics of your dataset for quality assessment and planning storage requirements. - -## Message interpretation layers - -### Semantic interpretation - -The `ros2msg` and `foxglove` layers provide semantic interpretation and visualization of standard ROS 2 and Foxglove message types, creating meaningful Rerun visualization archetypes from data. Unlike the `protobuf` layer, this layer understands the semantics of the messages and creates appropriate visualizations: images become [Image](../../../reference/types/archetypes/image.md), point clouds become [Points3D](../../../reference/types/archetypes/points3d.md), IMU messages become [SeriesLines](../../../reference/types/archetypes/series_lines.md) with the data plotted over time, and so on. - -See [Message Formats](message-formats.md) for the complete list of supported message types. - -### Protobuf decoding - -The `protobuf` layer automatically decodes protobuf-encoded messages using reflection, creating structured component data based on the protobuf schema. Message fields become Rerun components that you can query and analyze. - -However, this layer provides structured access without semantic visualization meaning. While the data becomes queryable, it won't automatically appear as meaningful visualizations like images or point clouds, it gives you the data structure, not the visual interpretation. - -## The raw layer - -The `raw` layer preserves the original message bytes without any interpretation, creating blob entities containing the unprocessed message data. Each message appears as a binary blob that can be accessed programmatically for custom analysis tools. - -## Recording info - -The `recording_info` layer extracts metadata about the recording session and capture context, creating metadata entities with information about recording timestamps, source system details, and capture software versions. - -## Layer selection and performance - -### Selecting layers - -By default, Rerun processes MCAP files with all layers active. You can control which layers are used when [converting MCAP files via the CLI](cli-reference.md) using the `-l` flag: - -```bash -# Use only specific layers -rerun mcap convert input.mcap -l protobuf -l stats -o output.rrd - -# Use multiple layers for different perspectives -rerun mcap convert input.mcap -l ros2msg -l raw -l recording_info -o output.rrd -``` - -## Accessing layer data - -Each layer creates different types of components on entity paths (derived from MCAP channel topics) that can be accessed through Rerun's SDK: - -- Data from the `ros2msg` layer and supported Foxglove messages appears as native Rerun visualization archetypes (see [here](message-formats.md#overview) for an overview) -- Other data from the `protobuf` or `ros2_reflection` layers appears as structured components that can be queried by field name or manually added to certain views ([example](message-formats.md#example-time-series-plot-for-custom-message-scalars)) -- Data from the `raw` layer appears as blob components containing the original message bytes -- Metadata from `schema`, `stats`, and `recording_info` layers appears as dedicated metadata entities - -For more information on querying data and working with archetypes, see the [Data Queries documentation](../../../howto/query-and-transform/get-data-out.md). - -Each of these layers contributes their own [chunks](../chunks.md) to the Rerun-native data. -Below is a table showing the mapping between MCAP data and Rerun components: - -| MCAP Data | Rerun component | Description | -| ---------------- | ------------------------------- | ----------------------------------------------------------------------------- | -| Schema name | `mcap.Schema:name` | Message type name from schema definition | -| Schema data | `mcap.Schema:data` | Raw schema definition (protobuf, ROS2 msg, etc.) | -| Schema encoding | `mcap.Schema:encoding` | Schema format type | -| | | | -| Channel topic | `mcap.Channel:topic` | Topic name from MCAP channel | -| Channel ID | `mcap.Channel:id` | Numeric channel identifier | -| Message encoding | `mcap.Channel:message_encoding` | Encoding format (e.g., `protobuf`, `cdr`) | -| | | | -| Statistics | `mcap.Statistics` | File-level metrics like message counts and time ranges | -| Raw message data | `mcap.Message:data` | Unprocessed message bytes stored as binary blobs, handled by the `raw` layer. | diff --git a/docs/content/concepts/logging-and-ingestion/mcap/message-formats.md b/docs/content/concepts/logging-and-ingestion/mcap/message-formats.md index 100b70d8c8d9..c153a28d514f 100644 --- a/docs/content/concepts/logging-and-ingestion/mcap/message-formats.md +++ b/docs/content/concepts/logging-and-ingestion/mcap/message-formats.md @@ -21,7 +21,7 @@ We are continually adding support for more standard message types. | Video | `sensor_msgs/CompressedImage` (h264) | `CompressedVideo` | [VideoStream](../../../reference/types/archetypes/video_stream.md) | | Camera calibration | `sensor_msgs/CameraInfo` | `CameraCalibration` | [Pinhole](../../../reference/types/archetypes/pinhole.md) | | Point cloud | `sensor_msgs/PointCloud2` | `PointCloud` | [Points3D](../../../reference/types/archetypes/points3d.md) | -| Geo points | `sensor_msgs/NavSatFix` | `LocationFix`, `LocationFixes`* | [GeoPoints](../../../reference/types/archetypes/geo_points.md) | +| Geo points | `sensor_msgs/NavSatFix` | `LocationFix`, `LocationFixes` | [GeoPoints](../../../reference/types/archetypes/geo_points.md) | | Transforms | `tf2_msgs/TFMessage` | `FrameTransform`, `FrameTransforms` | [Transform3D](../../../reference/types/archetypes/transform3d.md) | | Poses | `geometry_msgs/PoseStamped` | `PoseInFrame`, `PosesInFrame` | [InstancePoses3D](../../../reference/types/archetypes/instance_poses3d.md) | | Coordinate frame | `.frame_id` field in `std_msgs/Header` | `.frame_id` field | [CoordinateFrame](../../../reference/types/archetypes/coordinate_frame.md) @@ -29,12 +29,11 @@ We are continually adding support for more standard message types. | Misc. scalar sensor data | `sensor_msgs/Imu`, `sensor_msgs/JointState`, `sensor_msgs/Temperature`, `sensor_msgs/FluidPressure`, `sensor_msgs/RelativeHumidity`, `sensor_msgs/Illuminance`, `sensor_msgs/Range`, `sensor_msgs/BatteryState`, `sensor_msgs/Joy` | - *(usually covered via custom schemas, see [Schema reflection](#schema-reflection) below on this page)* | [Scalars](../../../reference/types/archetypes/scalars.md) | | Text | `std_msgs/String` | - | [TextDocument](../../../reference/types/archetypes/text_document.md) | | Log messages | `rcl_interfaces/Log` | `Log` | [TextLog](../../../reference/types/archetypes/text_log.md) | - -> *Support for `LocationFix` is coming soon. +| 2D grid map | `nav_msgs/OccupancyGrid` | - | [GridMap](../../../reference/types/archetypes/grid_map.md) | ### Timelines -The MCAP data loader adds [timelines](../../../concepts/logging-and-ingestion/timelines.md) based on the message timestamps. +The MCAP importer adds [timelines](../../../concepts/logging-and-ingestion/timelines.md) based on the message timestamps. In addition to the `message_log_time` and `message_publish_time` timestamps that are part of every MCAP message, we also add timelines with the application-specific timestamps from ROS and Foxglove schemas. @@ -56,7 +55,7 @@ The timestamps of the individual transforms are put onto the respective timeline > You can read more about how Rerun handles transforms and "TF-style" frame names [here](https://rerun.io/docs/concepts/transforms#named-transform-frames). To see the transforms in the viewer, you can select the entity corresponding to the topic and add a visualizer for `TransformAxes3D` as shown in the video here. -If you have transforms that correspond to joints in a robot model, you can also read more about how to load `URDF` models into a recording [here](https://rerun.io/docs/howto/urdf#load-urdf-into-an-existing-recording). +If you have transforms that correspond to joints in a robot model, you can also read more about how to load `URDF` models into a recording [here](https://rerun.io/docs/howto/logging-and-ingestion/urdf#load-urdf-into-an-existing-recording).