Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ repos:
additional_dependencies:
- mdformat-gfm
- repo: https://github.com/adrienverge/yamllint
rev: v1.37.1
rev: v1.38.0
hooks:
- id: yamllint
exclude: pre-commit-config.yaml
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.14.10"
rev: "v0.14.13"
hooks:
- id: ruff-format
- id: ruff-check
Expand Down
29 changes: 28 additions & 1 deletion src/ome_arrow/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def __init__(
tcz: Tuple[int, int, int] = (0, 0, 0),
column_name: str = "ome_arrow",
row_index: int = 0,
image_type: str | None = None,
) -> None:
"""
Construct an OMEArrow from:
Expand All @@ -71,6 +72,7 @@ def __init__(
with from_numpy defaults)
- a dict already matching the OME-Arrow schema
- a pa.StructScalar already typed to OME_ARROW_STRUCT
- optionally override/set image_type metadata on ingest
"""

# set the tcz for viewing
Expand All @@ -83,6 +85,7 @@ def __init__(
default_dim_for_unspecified="C",
map_series_to="T",
clamp_to_uint16=True,
image_type=image_type,
)

# --- 2) String path/URL: OME-Zarr / OME-Parquet / OME-TIFF ---------------
Expand All @@ -98,6 +101,8 @@ def __init__(
or (path.exists() and path.is_dir() and path.suffix.lower() == ".zarr")
):
self.data = from_ome_zarr(s)
if image_type is not None:
self.data = self._wrap_with_image_type(self.data, image_type)

# OME-Parquet
elif s.lower().endswith((".parquet", ".pq")) or path.suffix.lower() in {
Expand All @@ -107,18 +112,24 @@ def __init__(
self.data = from_ome_parquet(
s, column_name=column_name, row_index=row_index
)
if image_type is not None:
self.data = self._wrap_with_image_type(self.data, image_type)

# Vortex
elif s.lower().endswith(".vortex") or path.suffix.lower() == ".vortex":
self.data = from_ome_vortex(
s, column_name=column_name, row_index=row_index
)
if image_type is not None:
self.data = self._wrap_with_image_type(self.data, image_type)

# TIFF
elif path.suffix.lower() in {".tif", ".tiff"} or s.lower().endswith(
(".tif", ".tiff")
):
self.data = from_tiff(s)
if image_type is not None:
self.data = self._wrap_with_image_type(self.data, image_type)

elif path.exists() and path.is_dir():
raise ValueError(
Expand All @@ -140,22 +151,38 @@ def __init__(
# Uses from_numpy defaults: dim_order="TCZYX", clamp_to_uint16=True, etc.
# If the array is YX/ZYX/CYX/etc.,
# from_numpy will expand/reorder accordingly.
self.data = from_numpy(data)
self.data = from_numpy(data, image_type=image_type)

# --- 4) Already-typed Arrow scalar ---------------------------------------
elif isinstance(data, pa.StructScalar):
self.data = data
if image_type is not None:
self.data = self._wrap_with_image_type(self.data, image_type)

# --- 5) Plain dict matching the schema -----------------------------------
elif isinstance(data, dict):
self.data = pa.scalar(data, type=OME_ARROW_STRUCT)
if image_type is not None:
self.data = self._wrap_with_image_type(self.data, image_type)

# --- otherwise ------------------------------------------------------------
else:
raise TypeError(
"input data must be str, dict, pa.StructScalar, or numpy.ndarray"
)

@staticmethod
def _wrap_with_image_type(
data: pa.StructScalar, image_type: str
) -> pa.StructScalar:
return pa.scalar(
{
**data.as_py(),
"image_type": str(image_type),
},
type=OME_ARROW_STRUCT,
)

def export( # noqa: PLR0911
self,
how: str = "numpy",
Expand Down
28 changes: 27 additions & 1 deletion src/ome_arrow/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,9 @@ def _ome_arrow_from_table(
# 1) Locate the OME-Arrow column
def _struct_matches_ome_fields(t: pa.StructType) -> bool:
ome_fields = {f.name for f in OME_ARROW_STRUCT}
required_fields = ome_fields - {"image_type"}
col_fields = {f.name for f in t}
return ome_fields == col_fields
return required_fields.issubset(col_fields)

requested_name = column_name
candidate_col = None
Expand Down Expand Up @@ -105,6 +106,11 @@ def _struct_matches_ome_fields(t: pa.StructType) -> bool:

# 2) Extract the row as a Python dict
record_dict: Dict[str, Any] = candidate_col.slice(row_index, 1).to_pylist()[0]
# Back-compat: older files won't include image_type; default to None.
if "image_type" not in record_dict:
record_dict["image_type"] = None
# Drop unexpected fields before casting to the canonical schema.
record_dict = {f.name: record_dict.get(f.name) for f in OME_ARROW_STRUCT}

# 3) Reconstruct a typed StructScalar using the canonical schema
scalar = pa.scalar(record_dict, type=OME_ARROW_STRUCT)
Expand Down Expand Up @@ -248,6 +254,7 @@ def to_ome_arrow(
version: str = OME_ARROW_TAG_VERSION,
image_id: str = "unnamed",
name: str = "unknown",
image_type: str | None = "image",
acquisition_datetime: Optional[datetime] = None,
dimension_order: str = "XYZCT",
dtype: str = "uint16",
Expand Down Expand Up @@ -276,6 +283,9 @@ def to_ome_arrow(
version: Specification version string.
image_id: Unique image identifier.
name: Human-friendly name.
image_type: Open-ended image kind (e.g., "image", "label"). Note that
from_* helpers pass image_type=None by default to preserve
"unspecified" vs explicitly set ("image").
acquisition_datetime: Datetime of acquisition (defaults to now).
dimension_order: Dimension order ("XYZCT" or "XYCT").
dtype: Pixel data type string (e.g., "uint16").
Expand All @@ -299,6 +309,7 @@ def to_ome_arrow(
version = str(version)
image_id = str(image_id)
name = str(name)
image_type = None if image_type is None else str(image_type)
dimension_order = str(dimension_order)
dtype = str(dtype)
physical_size_unit = str(physical_size_unit)
Expand Down Expand Up @@ -333,6 +344,7 @@ def to_ome_arrow(
"version": version,
"id": image_id,
"name": name,
"image_type": image_type,
"acquisition_datetime": acquisition_datetime or datetime.now(timezone.utc),
"pixels_meta": {
"dimension_order": dimension_order,
Expand Down Expand Up @@ -363,6 +375,7 @@ def from_numpy(
dim_order: str = "TCZYX",
image_id: Optional[str] = None,
name: Optional[str] = None,
image_type: Optional[str] = None,
channel_names: Optional[Sequence[str]] = None,
acquisition_datetime: Optional[datetime] = None,
clamp_to_uint16: bool = True,
Expand All @@ -385,6 +398,8 @@ def from_numpy(
Supported examples: "YX", "ZYX", "CYX", "CZYX", "TYX", "TCYX", "TCZYX".
image_id, name : Optional[str]
Identifiers to embed in the record.
image_type : Optional[str]
Open-ended image kind (e.g., "image", "label").
channel_names : Optional[Sequence[str]]
Names for channels; defaults to C0..C{n-1}.
acquisition_datetime : Optional[datetime]
Expand Down Expand Up @@ -496,6 +511,7 @@ def from_numpy(
return to_ome_arrow(
image_id=str(image_id or "unnamed"),
name=str(name or "unknown"),
image_type=image_type,
acquisition_datetime=acquisition_datetime or datetime.now(timezone.utc),
dimension_order=meta_dim_order,
dtype=dtype_str,
Expand All @@ -518,6 +534,7 @@ def from_tiff(
tiff_path: str | Path,
image_id: Optional[str] = None,
name: Optional[str] = None,
image_type: Optional[str] = None,
channel_names: Optional[Sequence[str]] = None,
acquisition_datetime: Optional[datetime] = None,
clamp_to_uint16: bool = True,
Expand All @@ -532,6 +549,7 @@ def from_tiff(
tiff_path: Path to a TIFF readable by bioio.
image_id: Optional stable image identifier (defaults to stem).
name: Optional human label (defaults to file name).
image_type: Optional image kind (e.g., "image", "label").
channel_names: Optional channel names; defaults to C0..C{n-1}.
acquisition_datetime: Optional acquisition time (UTC now if None).
clamp_to_uint16: If True, clamp/cast planes to uint16.
Expand Down Expand Up @@ -601,6 +619,7 @@ def from_tiff(
return to_ome_arrow(
image_id=img_id,
name=display_name,
image_type=image_type,
acquisition_datetime=acquisition_datetime or datetime.now(timezone.utc),
dimension_order=dim_order,
dtype="uint16",
Expand All @@ -627,6 +646,7 @@ def from_stack_pattern_path(
channel_names: Optional[List[str]] = None,
image_id: Optional[str] = None,
name: Optional[str] = None,
image_type: Optional[str] = None,
) -> pa.StructScalar:
"""Build an OME-Arrow record from a filename pattern describing a stack.

Expand All @@ -638,6 +658,7 @@ def from_stack_pattern_path(
channel_names: Optional list of channel names to apply.
image_id: Optional image identifier override.
name: Optional display name override.
image_type: Optional image kind (e.g., "image", "label").

Returns:
A validated OME-Arrow StructScalar describing the stack.
Expand Down Expand Up @@ -907,6 +928,7 @@ def _ensure_u16(arr: np.ndarray) -> np.ndarray:
return to_ome_arrow(
image_id=str(img_id),
name=str(display_name),
image_type=image_type,
acquisition_datetime=None,
dimension_order=dim_order,
dtype="uint16",
Expand All @@ -929,6 +951,7 @@ def from_ome_zarr(
zarr_path: str | Path,
image_id: Optional[str] = None,
name: Optional[str] = None,
image_type: Optional[str] = None,
channel_names: Optional[Sequence[str]] = None,
acquisition_datetime: Optional[datetime] = None,
clamp_to_uint16: bool = True,
Expand All @@ -947,6 +970,8 @@ def from_ome_zarr(
Optional stable image identifier (defaults to directory stem).
name:
Optional display name (defaults to directory name).
image_type:
Optional image kind (e.g., "image", "label").
channel_names:
Optional list of channel names. Defaults to C0, C1, ...
acquisition_datetime:
Expand Down Expand Up @@ -1028,6 +1053,7 @@ def from_ome_zarr(
return to_ome_arrow(
image_id=img_id,
name=display_name,
image_type=image_type,
acquisition_datetime=acquisition_datetime or datetime.now(timezone.utc),
dimension_order=dim_order,
dtype="uint16",
Expand Down
2 changes: 2 additions & 0 deletions src/ome_arrow/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# OME_ARROW_STRUCT: ome-arrow record (describes one image/value).
# - type/version: quick identity & evolution.
# - id/name/acquisition_datetime: identity & provenance.
# - image_type: open-ended image kind (e.g., "image", "label").
# - pixels_meta: pixels struct (sizes, units, channels).
# - planes: list of planes struct entries, one per (t,c,z).
# - masks: reserved for future labels/ROIs (placeholder).
Expand All @@ -21,6 +22,7 @@
pa.field("version", pa.string()), # e.g., "1.0.0"
pa.field("id", pa.string()), # stable image identifier
pa.field("name", pa.string()), # human label
pa.field("image_type", pa.string()), # open-ended (e.g., "image", "label")
pa.field("acquisition_datetime", pa.timestamp("us")),
# PIXELS: OME-like "Pixels" header summarizing shape & scale.
# - dimension_order: hint like "XYZCT" (or "XYCT" when Z==1).
Expand Down
1 change: 1 addition & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def example_correct_data() -> dict:
"version": "1.0.0",
"id": "img-0001",
"name": "Example image",
"image_type": "image",
"acquisition_datetime": datetime(2025, 1, 1, 12, 0, 0),
"pixels_meta": {
"dimension_order": "XYCT", # Z==1, so XYCT is fine
Expand Down
12 changes: 12 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,18 @@ def test_vortex_roundtrip(tmp_path: pathlib.Path) -> None:
assert reloaded.info() == oa.info()


def test_parquet_roundtrip_preserves_image_type(tmp_path: pathlib.Path) -> None:
"""Ensure image_type round-trips through OME-Parquet."""
arr = np.arange(16, dtype=np.uint16).reshape(1, 1, 1, 4, 4)
oa = OMEArrow(arr, image_type="label")
out = tmp_path / "example.ome.parquet"

oa.export(how="omeparquet", out=str(out))
reloaded = OMEArrow(str(out))

assert reloaded.data.as_py()["image_type"] == "label"


def test_vortex_custom_column_name(tmp_path: pathlib.Path) -> None:
"""Ensure custom Vortex column names are preserved on round-trip."""
pytest.importorskip(
Expand Down
Loading