From 8aa110e06b6cfca696218bdbfa6e2fbc9b2c9968 Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 9 Jan 2026 14:38:05 -0700 Subject: [PATCH 1/5] add image type field to spec --- .pre-commit-config.yaml | 2 +- src/ome_arrow/ingest.py | 26 +++++++++++++++++++++++++- src/ome_arrow/meta.py | 2 ++ tests/conftest.py | 1 + 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6c9cfca..0226bd6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -39,7 +39,7 @@ repos: - id: yamllint exclude: pre-commit-config.yaml - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.14.10" + rev: "v0.14.11" hooks: - id: ruff-format - id: ruff-check diff --git a/src/ome_arrow/ingest.py b/src/ome_arrow/ingest.py index f925582..86e0276 100644 --- a/src/ome_arrow/ingest.py +++ b/src/ome_arrow/ingest.py @@ -50,8 +50,9 @@ def _ome_arrow_from_table( # 1) Locate the OME-Arrow column def _struct_matches_ome_fields(t: pa.StructType) -> bool: ome_fields = {f.name for f in OME_ARROW_STRUCT} + required_fields = ome_fields - {"image_type"} col_fields = {f.name for f in t} - return ome_fields == col_fields + return required_fields.issubset(col_fields) requested_name = column_name candidate_col = None @@ -105,6 +106,11 @@ def _struct_matches_ome_fields(t: pa.StructType) -> bool: # 2) Extract the row as a Python dict record_dict: Dict[str, Any] = candidate_col.slice(row_index, 1).to_pylist()[0] + # Back-compat: older files won't include image_type; default to None. + if "image_type" not in record_dict: + record_dict["image_type"] = None + # Drop unexpected fields before casting to the canonical schema. + record_dict = {f.name: record_dict.get(f.name) for f in OME_ARROW_STRUCT} # 3) Reconstruct a typed StructScalar using the canonical schema scalar = pa.scalar(record_dict, type=OME_ARROW_STRUCT) @@ -248,6 +254,7 @@ def to_ome_arrow( version: str = OME_ARROW_TAG_VERSION, image_id: str = "unnamed", name: str = "unknown", + image_type: str | None = "image", acquisition_datetime: Optional[datetime] = None, dimension_order: str = "XYZCT", dtype: str = "uint16", @@ -276,6 +283,7 @@ def to_ome_arrow( version: Specification version string. image_id: Unique image identifier. name: Human-friendly name. + image_type: Open-ended image kind (e.g., "image", "label"). acquisition_datetime: Datetime of acquisition (defaults to now). dimension_order: Dimension order ("XYZCT" or "XYCT"). dtype: Pixel data type string (e.g., "uint16"). @@ -299,6 +307,7 @@ def to_ome_arrow( version = str(version) image_id = str(image_id) name = str(name) + image_type = None if image_type is None else str(image_type) dimension_order = str(dimension_order) dtype = str(dtype) physical_size_unit = str(physical_size_unit) @@ -333,6 +342,7 @@ def to_ome_arrow( "version": version, "id": image_id, "name": name, + "image_type": image_type, "acquisition_datetime": acquisition_datetime or datetime.now(timezone.utc), "pixels_meta": { "dimension_order": dimension_order, @@ -363,6 +373,7 @@ def from_numpy( dim_order: str = "TCZYX", image_id: Optional[str] = None, name: Optional[str] = None, + image_type: Optional[str] = None, channel_names: Optional[Sequence[str]] = None, acquisition_datetime: Optional[datetime] = None, clamp_to_uint16: bool = True, @@ -385,6 +396,8 @@ def from_numpy( Supported examples: "YX", "ZYX", "CYX", "CZYX", "TYX", "TCYX", "TCZYX". image_id, name : Optional[str] Identifiers to embed in the record. + image_type : Optional[str] + Open-ended image kind (e.g., "image", "label"). channel_names : Optional[Sequence[str]] Names for channels; defaults to C0..C{n-1}. acquisition_datetime : Optional[datetime] @@ -496,6 +509,7 @@ def from_numpy( return to_ome_arrow( image_id=str(image_id or "unnamed"), name=str(name or "unknown"), + image_type=image_type, acquisition_datetime=acquisition_datetime or datetime.now(timezone.utc), dimension_order=meta_dim_order, dtype=dtype_str, @@ -518,6 +532,7 @@ def from_tiff( tiff_path: str | Path, image_id: Optional[str] = None, name: Optional[str] = None, + image_type: Optional[str] = None, channel_names: Optional[Sequence[str]] = None, acquisition_datetime: Optional[datetime] = None, clamp_to_uint16: bool = True, @@ -532,6 +547,7 @@ def from_tiff( tiff_path: Path to a TIFF readable by bioio. image_id: Optional stable image identifier (defaults to stem). name: Optional human label (defaults to file name). + image_type: Optional image kind (e.g., "image", "label"). channel_names: Optional channel names; defaults to C0..C{n-1}. acquisition_datetime: Optional acquisition time (UTC now if None). clamp_to_uint16: If True, clamp/cast planes to uint16. @@ -601,6 +617,7 @@ def from_tiff( return to_ome_arrow( image_id=img_id, name=display_name, + image_type=image_type, acquisition_datetime=acquisition_datetime or datetime.now(timezone.utc), dimension_order=dim_order, dtype="uint16", @@ -627,6 +644,7 @@ def from_stack_pattern_path( channel_names: Optional[List[str]] = None, image_id: Optional[str] = None, name: Optional[str] = None, + image_type: Optional[str] = None, ) -> pa.StructScalar: """Build an OME-Arrow record from a filename pattern describing a stack. @@ -638,6 +656,7 @@ def from_stack_pattern_path( channel_names: Optional list of channel names to apply. image_id: Optional image identifier override. name: Optional display name override. + image_type: Optional image kind (e.g., "image", "label"). Returns: A validated OME-Arrow StructScalar describing the stack. @@ -907,6 +926,7 @@ def _ensure_u16(arr: np.ndarray) -> np.ndarray: return to_ome_arrow( image_id=str(img_id), name=str(display_name), + image_type=image_type, acquisition_datetime=None, dimension_order=dim_order, dtype="uint16", @@ -929,6 +949,7 @@ def from_ome_zarr( zarr_path: str | Path, image_id: Optional[str] = None, name: Optional[str] = None, + image_type: Optional[str] = None, channel_names: Optional[Sequence[str]] = None, acquisition_datetime: Optional[datetime] = None, clamp_to_uint16: bool = True, @@ -947,6 +968,8 @@ def from_ome_zarr( Optional stable image identifier (defaults to directory stem). name: Optional display name (defaults to directory name). + image_type: + Optional image kind (e.g., "image", "label"). channel_names: Optional list of channel names. Defaults to C0, C1, ... acquisition_datetime: @@ -1028,6 +1051,7 @@ def from_ome_zarr( return to_ome_arrow( image_id=img_id, name=display_name, + image_type=image_type, acquisition_datetime=acquisition_datetime or datetime.now(timezone.utc), dimension_order=dim_order, dtype="uint16", diff --git a/src/ome_arrow/meta.py b/src/ome_arrow/meta.py index 86ea11d..c4b5395 100644 --- a/src/ome_arrow/meta.py +++ b/src/ome_arrow/meta.py @@ -12,6 +12,7 @@ # OME_ARROW_STRUCT: ome-arrow record (describes one image/value). # - type/version: quick identity & evolution. # - id/name/acquisition_datetime: identity & provenance. +# - image_type: open-ended image kind (e.g., "image", "label"). # - pixels_meta: pixels struct (sizes, units, channels). # - planes: list of planes struct entries, one per (t,c,z). # - masks: reserved for future labels/ROIs (placeholder). @@ -21,6 +22,7 @@ pa.field("version", pa.string()), # e.g., "1.0.0" pa.field("id", pa.string()), # stable image identifier pa.field("name", pa.string()), # human label + pa.field("image_type", pa.string()), # open-ended (e.g., "image", "label") pa.field("acquisition_datetime", pa.timestamp("us")), # PIXELS: OME-like "Pixels" header summarizing shape & scale. # - dimension_order: hint like "XYZCT" (or "XYCT" when Z==1). diff --git a/tests/conftest.py b/tests/conftest.py index 9943b86..518ca5d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,6 +17,7 @@ def example_correct_data() -> dict: "version": "1.0.0", "id": "img-0001", "name": "Example image", + "image_type": "image", "acquisition_datetime": datetime(2025, 1, 1, 12, 0, 0), "pixels_meta": { "dimension_order": "XYCT", # Z==1, so XYCT is fine From 79729ef3f89ceafd00216cbb31ccf831b099bae2 Mon Sep 17 00:00:00 2001 From: d33bs Date: Sat, 10 Jan 2026 16:52:40 -0700 Subject: [PATCH 2/5] ensure we can override and add test --- src/ome_arrow/core.py | 59 ++++++++++++++++++++++++++++++++++++++++--- tests/test_core.py | 12 +++++++++ 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/src/ome_arrow/core.py b/src/ome_arrow/core.py index d2b1b7f..b473621 100644 --- a/src/ome_arrow/core.py +++ b/src/ome_arrow/core.py @@ -59,6 +59,7 @@ def __init__( tcz: Tuple[int, int, int] = (0, 0, 0), column_name: str = "ome_arrow", row_index: int = 0, + image_type: str | None = None, ) -> None: """ Construct an OMEArrow from: @@ -71,6 +72,7 @@ def __init__( with from_numpy defaults) - a dict already matching the OME-Arrow schema - a pa.StructScalar already typed to OME_ARROW_STRUCT + - optionally override/set image_type metadata on ingest """ # set the tcz for viewing @@ -83,6 +85,7 @@ def __init__( default_dim_for_unspecified="C", map_series_to="T", clamp_to_uint16=True, + image_type=image_type, ) # --- 2) String path/URL: OME-Zarr / OME-Parquet / OME-TIFF --------------- @@ -98,6 +101,14 @@ def __init__( or (path.exists() and path.is_dir() and path.suffix.lower() == ".zarr") ): self.data = from_ome_zarr(s) + if image_type is not None: + self.data = pa.scalar( + { + **self.data.as_py(), + "image_type": str(image_type), + }, + type=OME_ARROW_STRUCT, + ) # OME-Parquet elif s.lower().endswith((".parquet", ".pq")) or path.suffix.lower() in { @@ -107,18 +118,42 @@ def __init__( self.data = from_ome_parquet( s, column_name=column_name, row_index=row_index ) + if image_type is not None: + self.data = pa.scalar( + { + **self.data.as_py(), + "image_type": str(image_type), + }, + type=OME_ARROW_STRUCT, + ) # Vortex elif s.lower().endswith(".vortex") or path.suffix.lower() == ".vortex": self.data = from_ome_vortex( s, column_name=column_name, row_index=row_index ) + if image_type is not None: + self.data = pa.scalar( + { + **self.data.as_py(), + "image_type": str(image_type), + }, + type=OME_ARROW_STRUCT, + ) # TIFF elif path.suffix.lower() in {".tif", ".tiff"} or s.lower().endswith( (".tif", ".tiff") ): self.data = from_tiff(s) + if image_type is not None: + self.data = pa.scalar( + { + **self.data.as_py(), + "image_type": str(image_type), + }, + type=OME_ARROW_STRUCT, + ) elif path.exists() and path.is_dir(): raise ValueError( @@ -140,15 +175,33 @@ def __init__( # Uses from_numpy defaults: dim_order="TCZYX", clamp_to_uint16=True, etc. # If the array is YX/ZYX/CYX/etc., # from_numpy will expand/reorder accordingly. - self.data = from_numpy(data) + self.data = from_numpy(data, image_type=image_type) # --- 4) Already-typed Arrow scalar --------------------------------------- elif isinstance(data, pa.StructScalar): - self.data = data + if image_type is None: + self.data = data + else: + self.data = pa.scalar( + { + **data.as_py(), + "image_type": str(image_type), + }, + type=OME_ARROW_STRUCT, + ) # --- 5) Plain dict matching the schema ----------------------------------- elif isinstance(data, dict): - self.data = pa.scalar(data, type=OME_ARROW_STRUCT) + if image_type is None: + self.data = pa.scalar(data, type=OME_ARROW_STRUCT) + else: + self.data = pa.scalar( + { + **data, + "image_type": str(image_type), + }, + type=OME_ARROW_STRUCT, + ) # --- otherwise ------------------------------------------------------------ else: diff --git a/tests/test_core.py b/tests/test_core.py index 33484cc..56906e1 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -369,6 +369,18 @@ def test_vortex_roundtrip(tmp_path: pathlib.Path) -> None: assert reloaded.info() == oa.info() +def test_parquet_roundtrip_preserves_image_type(tmp_path: pathlib.Path) -> None: + """Ensure image_type round-trips through OME-Parquet.""" + arr = np.arange(16, dtype=np.uint16).reshape(1, 1, 1, 4, 4) + oa = OMEArrow(arr, image_type="label") + out = tmp_path / "example.ome.parquet" + + oa.export(how="omeparquet", out=str(out)) + reloaded = OMEArrow(str(out)) + + assert reloaded.data.as_py()["image_type"] == "label" + + def test_vortex_custom_column_name(tmp_path: pathlib.Path) -> None: """Ensure custom Vortex column names are preserved on round-trip.""" pytest.importorskip( From eda1da4c39d3785d6a9853769fdf615a8754ed74 Mon Sep 17 00:00:00 2001 From: d33bs Date: Thu, 15 Jan 2026 08:39:33 -0700 Subject: [PATCH 3/5] updates based on nitpicks --- src/ome_arrow/core.py | 70 +++++++++++++---------------------------- src/ome_arrow/ingest.py | 4 ++- 2 files changed, 25 insertions(+), 49 deletions(-) diff --git a/src/ome_arrow/core.py b/src/ome_arrow/core.py index b473621..402701d 100644 --- a/src/ome_arrow/core.py +++ b/src/ome_arrow/core.py @@ -102,13 +102,7 @@ def __init__( ): self.data = from_ome_zarr(s) if image_type is not None: - self.data = pa.scalar( - { - **self.data.as_py(), - "image_type": str(image_type), - }, - type=OME_ARROW_STRUCT, - ) + self.data = self._wrap_with_image_type(self.data, image_type) # OME-Parquet elif s.lower().endswith((".parquet", ".pq")) or path.suffix.lower() in { @@ -119,13 +113,7 @@ def __init__( s, column_name=column_name, row_index=row_index ) if image_type is not None: - self.data = pa.scalar( - { - **self.data.as_py(), - "image_type": str(image_type), - }, - type=OME_ARROW_STRUCT, - ) + self.data = self._wrap_with_image_type(self.data, image_type) # Vortex elif s.lower().endswith(".vortex") or path.suffix.lower() == ".vortex": @@ -133,13 +121,7 @@ def __init__( s, column_name=column_name, row_index=row_index ) if image_type is not None: - self.data = pa.scalar( - { - **self.data.as_py(), - "image_type": str(image_type), - }, - type=OME_ARROW_STRUCT, - ) + self.data = self._wrap_with_image_type(self.data, image_type) # TIFF elif path.suffix.lower() in {".tif", ".tiff"} or s.lower().endswith( @@ -147,13 +129,7 @@ def __init__( ): self.data = from_tiff(s) if image_type is not None: - self.data = pa.scalar( - { - **self.data.as_py(), - "image_type": str(image_type), - }, - type=OME_ARROW_STRUCT, - ) + self.data = self._wrap_with_image_type(self.data, image_type) elif path.exists() and path.is_dir(): raise ValueError( @@ -179,29 +155,15 @@ def __init__( # --- 4) Already-typed Arrow scalar --------------------------------------- elif isinstance(data, pa.StructScalar): - if image_type is None: - self.data = data - else: - self.data = pa.scalar( - { - **data.as_py(), - "image_type": str(image_type), - }, - type=OME_ARROW_STRUCT, - ) + self.data = data + if image_type is not None: + self.data = self._wrap_with_image_type(self.data, image_type) # --- 5) Plain dict matching the schema ----------------------------------- elif isinstance(data, dict): - if image_type is None: - self.data = pa.scalar(data, type=OME_ARROW_STRUCT) - else: - self.data = pa.scalar( - { - **data, - "image_type": str(image_type), - }, - type=OME_ARROW_STRUCT, - ) + self.data = pa.scalar(data, type=OME_ARROW_STRUCT) + if image_type is not None: + self.data = self._wrap_with_image_type(self.data, image_type) # --- otherwise ------------------------------------------------------------ else: @@ -209,6 +171,18 @@ def __init__( "input data must be str, dict, pa.StructScalar, or numpy.ndarray" ) + @staticmethod + def _wrap_with_image_type( + data: pa.StructScalar, image_type: str + ) -> pa.StructScalar: + return pa.scalar( + { + **data.as_py(), + "image_type": str(image_type), + }, + type=OME_ARROW_STRUCT, + ) + def export( # noqa: PLR0911 self, how: str = "numpy", diff --git a/src/ome_arrow/ingest.py b/src/ome_arrow/ingest.py index 86e0276..25ea49b 100644 --- a/src/ome_arrow/ingest.py +++ b/src/ome_arrow/ingest.py @@ -283,7 +283,9 @@ def to_ome_arrow( version: Specification version string. image_id: Unique image identifier. name: Human-friendly name. - image_type: Open-ended image kind (e.g., "image", "label"). + image_type: Open-ended image kind (e.g., "image", "label"). Note that + from_* helpers pass image_type=None by default to preserve + "unspecified" vs explicitly set ("image"). acquisition_datetime: Datetime of acquisition (defaults to now). dimension_order: Dimension order ("XYZCT" or "XYCT"). dtype: Pixel data type string (e.g., "uint16"). From ae7bc256139270325cac914266021a1a3247a572 Mon Sep 17 00:00:00 2001 From: d33bs Date: Thu, 15 Jan 2026 08:39:45 -0700 Subject: [PATCH 4/5] Update .pre-commit-config.yaml --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0226bd6..8304be9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,7 +34,7 @@ repos: additional_dependencies: - mdformat-gfm - repo: https://github.com/adrienverge/yamllint - rev: v1.37.1 + rev: v1.38.0 hooks: - id: yamllint exclude: pre-commit-config.yaml From 6c8b9e713c970cd014d1a9091d08ea12e0f1778b Mon Sep 17 00:00:00 2001 From: d33bs Date: Thu, 15 Jan 2026 17:23:33 -0700 Subject: [PATCH 5/5] Update .pre-commit-config.yaml --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8304be9..d7bf0c0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -39,7 +39,7 @@ repos: - id: yamllint exclude: pre-commit-config.yaml - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.14.11" + rev: "v0.14.13" hooks: - id: ruff-format - id: ruff-check