diff --git a/formats/image-dimension-stacking-tool/.bumpversion.cfg b/formats/image-dimension-stacking-tool/.bumpversion.cfg index 7b757ea47..b858082da 100644 --- a/formats/image-dimension-stacking-tool/.bumpversion.cfg +++ b/formats/image-dimension-stacking-tool/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.1.2 +current_version = 0.2.0-dev0 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? diff --git a/formats/image-dimension-stacking-tool/README.md b/formats/image-dimension-stacking-tool/README.md index c4d9aa7e3..fd5ddc7ef 100644 --- a/formats/image-dimension-stacking-tool/README.md +++ b/formats/image-dimension-stacking-tool/README.md @@ -1,19 +1,17 @@ -# Image dimension stacking(0.1.2) +# Image dimension stacking(0.2.0-dev0) This plugin leverages the [filepattern](https://filepattern2.readthedocs.io/en/latest/Home.html) library and employs the filepattern `groupBy` functionality to enable the matching of image filenames, facilitating their stacking into multi-dimensional images. The filepattern must include the variables `c`, `t`, and `z`. If all these variables are present in the pattern, the plugin will group images according to the order `z, c, t`. If only one variable is present in the file pattern, the plugin will group images according to that variable. - Currently, the plugin supports the following dimensions and user can choose the relevant variable for the `groupBy` input argument. -1. `tubhiswt_z{z:d+}_c{c:d+}_t{t:d+}.ome.tif`\ - Images are grouped based on `z` variable -2. `tubhiswt_.*_.*_t{t:d+}.ome.tif`\ - Images are grouped based on `t` variable -3. `00001_01_{c:d+}.ome.tif`\ - Images are grouped based on `c` variable - -#### Note: + +1. `tubhiswt_z{z:d+}_c{c:d+}_t{t:d+}.ome.tif`: Images are grouped based on `z` variable +2. `tubhiswt_.*_.*_t{t:d+}.ome.tif`: Images are grouped based on `t` variable +3. `00001_01_{c:d+}.ome.tif`: Images are grouped based on `c` variable + +## Note: + Filename patterns may consist of any other filepattern variables, combined with other valid regular expression arguments, excluding the `groupBy` variable. For more information on WIPP, visit the @@ -33,18 +31,20 @@ contents of `plugin.json` into the pop-up window and submit. This plugin takes three input argument and one output argument: -| Name | Description | I/O | Type | -|---------------|-------------------------|--------|--------| -| `--inpDir` | Input image collection | Input | Collection | -| `--filePattern` | Pattern to parse image files | Input | String | -| `--outDir` | Output image collection | Output | Collection | -| `--preview` | Generate a JSON file to view outputs | Output | Boolean | +| Name | Description | I/O | Type | +| --------------- | ------------------------------------ | ------ | ---------- | +| `--inpDir` | Input image collection | Input | Collection | +| `--filePattern` | Pattern to parse image files | Input | String | +| `--axis` | Dimension to stack images | Input | String | +| `--outDir` | Output image collection | Output | Collection | +| `--preview` | Generate a JSON file to view outputs | Output | Boolean | -### Run the Docker Container +## Run the Docker Container ```bash -docker run -v /path/to/data:/data polusai/image-dimension-stacking-plugin:0.1.2-dev \ - --inpDir "Path/To/Data" \ +docker run -v /path/to/data:/data polusai/image-dimension-stacking-plugin:0.2.0-dev0-dev \ + --inpDir "path/to/data" \ --filePattern "tubhiswt_C1-z{z:d+}.ome.tif" \ + --axis "z" \ --outDir "Path/To/Output/Dir" ``` diff --git a/formats/image-dimension-stacking-tool/VERSION b/formats/image-dimension-stacking-tool/VERSION index d917d3e26..ce0f6f878 100644 --- a/formats/image-dimension-stacking-tool/VERSION +++ b/formats/image-dimension-stacking-tool/VERSION @@ -1 +1 @@ -0.1.2 +0.2.0-dev0 diff --git a/formats/image-dimension-stacking-tool/ict.yaml b/formats/image-dimension-stacking-tool/ict.yaml index 28281c6f0..171ddf8d8 100644 --- a/formats/image-dimension-stacking-tool/ict.yaml +++ b/formats/image-dimension-stacking-tool/ict.yaml @@ -1,8 +1,9 @@ author: - Nick Schaub - Hamdah Shafqat Abbasi +- Najib Ishaq contact: nick.schaub@nih.gov -container: polusai/image-dimension-stacking-tool:0.1.2 +container: polusai/image-dimension-stacking-tool:0.2.0-dev0 description: Stacking of image dimensions (c, z, t) to create a multidimensional image. entrypoint: python3 -m polus.images.formats.image_dimension_stacking inputs: @@ -18,6 +19,12 @@ inputs: name: filePattern required: true type: string +- description: The axis to stack the images on. One of 'c', 'z', or 't'. + format: + - string + name: axis + required: true + type: string - description: Generate an output preview format: - boolean @@ -44,8 +51,12 @@ ui: key: inputs.filePattern title: Filename pattern type: text +- description: The axis to stack the images on. One of 'c', 'z', or 't'. + key: inputs.axis + title: Axis + type: text - description: Generate an output preview key: inputs.preview title: Generate preview type: boolean -version: 0.1.2-dev0 +version: 0.2.0-dev0-dev0 diff --git a/formats/image-dimension-stacking-tool/plugin.json b/formats/image-dimension-stacking-tool/plugin.json index 62632ca57..d3a19e7e4 100644 --- a/formats/image-dimension-stacking-tool/plugin.json +++ b/formats/image-dimension-stacking-tool/plugin.json @@ -1,13 +1,13 @@ { "name": "Image Dimension Stacking", - "version": "0.1.2", + "version": "0.2.0-dev0", "title": "Stacking of image dimensions (c, z, t) to create a multidimensional image.", "description": "Stacking of image dimensions (c, z, t) to create a multidimensional image.", "author": "Nick Schaub (nick.schaub@nih.gov), Hamdah Shafqat Abbasi (hamdahshafqat.abbasi@nih.gov)", "institution": "National Center for Advancing Translational Sciences, National Institutes of Health", "repository": "https://github.com/PolusAI/image-tools", "website": "https://ncats.nih.gov/preclinical/core/informatics", - "containerId": "polusai/image-dimension-stacking-tool:0.1.2", + "containerId": "polusai/image-dimension-stacking-tool:0.2.0-dev0", "baseCommand": [ "python3", "-m", @@ -26,6 +26,13 @@ "description": "Filename pattern used to separate data.", "required": "True" }, + "axis": { + "type": "string", + "title": "Axis", + "description": "Axis to stack. One of 'c', 'z', 't'.", + "required": "False", + "default": "z" + }, "preview": { "type": "boolean", "title": "Preview", @@ -52,6 +59,13 @@ "description": "Filename pattern used to separate data.", "required": "True" }, + "axis": { + "type": "string", + "title": "Axis", + "description": "Axis to stack. One of 'c', 'z', 't'.", + "required": "False", + "default": "z" + }, "preview": { "type": "boolean", "title": "Preview example output of this plugin", diff --git a/formats/image-dimension-stacking-tool/pyproject.toml b/formats/image-dimension-stacking-tool/pyproject.toml index 4cc57c09d..25b846cd9 100644 --- a/formats/image-dimension-stacking-tool/pyproject.toml +++ b/formats/image-dimension-stacking-tool/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "polus-images-formats-image-dimension-stacking" -version = "0.1.2" +version = "0.2.0-dev0" description = "Stacking multi-dimensional images" authors = [ "Nick Schaub ", @@ -12,24 +12,20 @@ packages = [{include = "polus", from = "src"}] [tool.poetry.dependencies] python = ">=3.9,<3.12" -bfio = {version = "2.4.1", extras = ["all"]} +bfio = {version = "2.4.3", extras = ["all"]} typer = "^0.7.0" tqdm = "^4.66.1" filepattern = "^2.0.4" -pydantic = "^1.10.4" -preadator="0.4.0.dev2" -numpy = "<2.0.0" +numpy = ">=2.0,<2.1" [tool.poetry.group.dev.dependencies] bump2version = "^1.0.1" -flake8 = "^6.0.0" pre-commit = "^3.2.1" -flake8-docstrings = "^1.7.0" -black = "^23.3.0" -mypy = "^1.1.1" -pytest = "^7.2.2" -ruff = "^0.0.270" -scikit-image = "^0.22.0" +pytest = "^8.3.2" +pytest-sugar = "^1.0.0" +pytest-xdist = "^3.6.1" +scikit-image = "^0.24.0" +scipy = "<1.14" [build-system] requires = ["poetry-core"] diff --git a/formats/image-dimension-stacking-tool/run-plugin.sh b/formats/image-dimension-stacking-tool/run-plugin.sh index a1cc45c10..f0d078bb1 100644 --- a/formats/image-dimension-stacking-tool/run-plugin.sh +++ b/formats/image-dimension-stacking-tool/run-plugin.sh @@ -5,8 +5,9 @@ datapath=$(readlink --canonicalize ../data) # Inputs inpDir=/data/path_to_files - filePattern="tubhiswt_z{z:d+}_c{c:d+}_t{t:d+}.ome.tif" +axis="Z" + # Output paths outDir=/data/path_to_output @@ -19,4 +20,5 @@ docker run --mount type=bind,source=${datapath},target=/data/ \ polusai/image-dimension-stacking-plugin:${version} \ --inpDir ${inpDir} \ --filePattern ${filePattern} \ + --axis ${axis} \ --outDir ${outDir} diff --git a/formats/image-dimension-stacking-tool/src/polus/images/formats/image_dimension_stacking/__init__.py b/formats/image-dimension-stacking-tool/src/polus/images/formats/image_dimension_stacking/__init__.py index 9433afbfe..d706b3b3b 100644 --- a/formats/image-dimension-stacking-tool/src/polus/images/formats/image_dimension_stacking/__init__.py +++ b/formats/image-dimension-stacking-tool/src/polus/images/formats/image_dimension_stacking/__init__.py @@ -1,3 +1,14 @@ """Image dimension stacking package.""" -__version__ = "0.1.2" +from . import utils +from .dimension_stacking import copy_stack +from .dimension_stacking import write_stack + +__version__ = "0.2.0-dev0" + +__all__ = [ + "utils", + "copy_stack", + "write_stack", + "__version__", +] diff --git a/formats/image-dimension-stacking-tool/src/polus/images/formats/image_dimension_stacking/__main__.py b/formats/image-dimension-stacking-tool/src/polus/images/formats/image_dimension_stacking/__main__.py index 165eb6c20..e82c30d22 100644 --- a/formats/image-dimension-stacking-tool/src/polus/images/formats/image_dimension_stacking/__main__.py +++ b/formats/image-dimension-stacking-tool/src/polus/images/formats/image_dimension_stacking/__main__.py @@ -1,14 +1,17 @@ """Image dimension stacking package.""" + import json import logging +import os +import pathlib import warnings -from os import environ -from pathlib import Path -from typing import Any -import filepattern as fp -import polus.images.formats.image_dimension_stacking.dimension_stacking as st +import filepattern +import tqdm import typer +from polus.images.formats.image_dimension_stacking import copy_stack +from polus.images.formats.image_dimension_stacking import utils +from polus.images.formats.image_dimension_stacking import write_stack warnings.filterwarnings("ignore") @@ -16,95 +19,87 @@ format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", datefmt="%d-%b-%y %H:%M:%S", ) -POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO")) -logger = logging.getLogger("polus.plugins.formats.image_dimension_stacking") +POLUS_LOG = getattr(logging, os.environ.get("POLUS_LOG", "INFO")) +logger = logging.getLogger("polus.images.formats.image_dimension_stacking") logger.setLevel(POLUS_LOG) -logging.getLogger("bfio").setLevel(POLUS_LOG) - - -app = typer.Typer(help="Stack multi dimensional image into single image.") - -def generate_preview(out_dir: Path, file_pattern: str) -> None: - """Generate preview of the plugin outputs.""" - with Path.open(Path(out_dir, "preview.json"), "w") as jfile: - out_json: dict[str, Any] = { - "filepattern": file_pattern, - "outDir": [], - } - - fps = fp.FilePattern(out_dir, file_pattern) - out_name = fps.output_name() - out_json["outDir"].append(out_name) - json.dump(out_json, jfile, indent=2) +app = typer.Typer() @app.command() def main( - inp_dir: Path = typer.Option( + inp_dir: pathlib.Path = typer.Option( ..., "--inpDir", - "-i", help="Path to input directory containing binary images.", + exists=True, + readable=True, + resolve_path=True, + file_okay=False, + dir_okay=True, ), file_pattern: str = typer.Option( - ".*", + ..., "--filePattern", - "-f", help="Filename pattern used to separate data.", ), - out_dir: Path = typer.Option( + axis: utils.StackableAxis = typer.Option( + utils.StackableAxis.Z, + "--axis", + help="Axis to stack images along.", + ), + out_dir: pathlib.Path = typer.Option( ..., "--outDir", - "-o", help="Output collection.", + exists=True, + writable=True, + resolve_path=True, + file_okay=False, + dir_okay=True, ), preview: bool = typer.Option( False, "--preview", - "-p", help="Generate preview of expected outputs.", ), ) -> None: - """Image dimension stacking plugin.""" - logger.info(f"--inpDir: {inp_dir}") - logger.info(f"--filePattern: {file_pattern}") - logger.info(f"--outDir: {out_dir}") - - if not inp_dir.exists(): - msg = "inpDir does not exist" - raise ValueError(msg, inp_dir) - - if not out_dir.exists(): - msg = "outDir does not exist" - raise ValueError(msg, out_dir) - - fps = fp.FilePattern(inp_dir, file_pattern) - list_val = ["c", "t", "z"] - variables = sorted([f for f in fps.get_variables() if f in list_val]) - - if len(variables) == 0: - msg = "Could not detect c, t or z variables in a pattern" + """Image dimension stacking tool.""" + # Get the file pattern + fp = filepattern.FilePattern(inp_dir, file_pattern) + variables = fp.get_variables() + + # Check if the axis is present among the variables + if axis.value not in variables: + msg = f"Axis {axis} not present among the variables {variables}." + logger.error(msg) raise ValueError(msg) - if variables == list_val or variables == ["z"]: - group_by = "z" - - if variables == ["c", "t"] or variables == ["c"]: - group_by = "c" + # Collect the files into groups to stack + groups: dict[pathlib.Path, list[pathlib.Path]] = {} - if variables == ["t"]: - group_by = "t" + # Group the files by all variables except the axis + group_by = [v for v in variables if v != axis.value] + for _, files in fp(group_by=group_by): + out_path = out_dir / fp.output_name(files) + groups[out_path] = [p for _, [p] in files] if preview: - generate_preview(out_dir=out_dir, file_pattern=file_pattern) - - st.dimension_stacking( - inp_dir=inp_dir, - file_pattern=file_pattern, - group_by=group_by, - out_dir=out_dir, - ) + with (out_dir / "preview.json").open("w") as f: + preview_data = {"outDir": [str(p) for p in groups]} + json.dump(preview_data, f, indent=2) + return + + # TODO: Use some parallelism here + for out_path, inp_paths in tqdm.tqdm( + groups.items(), + desc="Stacking groups", + total=len(groups), + ): + if str(out_path).endswith(".ome.zarr"): + copy_stack(inp_paths, axis, out_path) + else: + write_stack(inp_paths, axis, out_path) if __name__ == "__main__": diff --git a/formats/image-dimension-stacking-tool/src/polus/images/formats/image_dimension_stacking/dimension_stacking.py b/formats/image-dimension-stacking-tool/src/polus/images/formats/image_dimension_stacking/dimension_stacking.py index b05e25ad4..6bd69e3c1 100644 --- a/formats/image-dimension-stacking-tool/src/polus/images/formats/image_dimension_stacking/dimension_stacking.py +++ b/formats/image-dimension-stacking-tool/src/polus/images/formats/image_dimension_stacking/dimension_stacking.py @@ -1,218 +1,186 @@ -"""Image dimension stacking package.""" -import logging -import os -import re -import time -from concurrent.futures import as_completed -from multiprocessing import cpu_count -from pathlib import Path +"""Stacking images along a given dimension.""" -import filepattern as fp -import numpy as np -import preadator -from bfio import BioReader -from bfio import BioWriter -from tqdm import tqdm +import pathlib +import shutil -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) -POLUS_IMG_EXT = os.environ.get("POLUS_IMG_EXT", ".ome.zarr") +import bfio -chunk_size = 1024 +from . import utils -num_workers = max([cpu_count(), 2]) +logger = utils.make_logger(__name__) -# Units for conversion -UNITS = { - "m": 10**9, - "cm": 10**7, - "mm": 10**6, - "µm": 10**3, - "nm": 1, - "Å": 10**-1, -} - - -def z_distance(file: Path) -> tuple[float, str]: - """Get physical z-distance. +def write_stack( + inp_paths: list[pathlib.Path], + axis: utils.StackableAxis, + out_path: pathlib.Path, +) -> None: + """Stack the input images along the given axis. - This estimates zdistance if not provided by averaging physical distances of x and y. + This will read all the images from the input directory and stack them along + the given axis. The output will be written to the output directory. Args: - file : Path to input image file - Returns: - A tuple of float and string values. + inp_paths: List of paths to input images. Should be sorted by filepattern. + axis: Axis to stack images along. + out_path: Path to output directory. """ - # Get some basic info about the files to stack - with BioReader(file) as br: - # Get the physical z-distance if available, set to physical x if not - ps_z = br.ps_z + logger.debug(f"Stacking images along {axis} axis.") + logger.debug(f"Input: {inp_paths}") + logger.debug(f"Output: {out_path}") + + # Get the metadata from the first image + with bfio.BioReader(inp_paths[0]) as reader: + metadata = reader.metadata + z_unit_distance = utils.z_unit_distance(reader) + + # Open all the input images + readers = [bfio.BioReader(p) for p in inp_paths] + + # Create the output writer + with bfio.BioWriter(out_path, metadata=metadata) as writer: + if axis.value == "z": + writer.Z = len(inp_paths) + writer.ps_z = z_unit_distance + elif axis.value == "c": + writer.C = len(inp_paths) + elif axis.value == "t": + writer.T = len(inp_paths) + + for y_min in range(0, writer.Y, utils.TILE_SIZE): + y_max = min(writer.Y, y_min + utils.TILE_SIZE) + + for x_min in range(0, writer.X, utils.TILE_SIZE): + x_max = min(writer.X, x_min + utils.TILE_SIZE) + + # Read the tiles from the input images + tiles = [ + axis.read_tile(r, (x_min, x_max), (y_min, y_max)) for r in readers + ] + + # Write the tiles to the output image + for i, tile in enumerate(tiles): + axis.write_tile(writer, (x_min, x_max), (y_min, y_max), tile, i) + + # Close the readers + [r.close() for r in readers] + + +def copy_stack( + inp_paths: list[pathlib.Path], + axis: utils.StackableAxis, + out_path: pathlib.Path, +) -> None: + """Copy the input images to the output directory. - # If the z-distances are undefined, average the x and y together - if None in ps_z: - # Get the size and units for x and y - x_val, xunits = br.ps_x - y_val, yunits = br.ps_y + This will copy the input images to the output directory without any stacking. - x_units = xunits.value - y_units = yunits.value + Args: + inp_paths: List of paths to input images. Should be sorted by filepattern. + axis: Axis to stack images along. + out_path: Path to output directory. - # Convert x and y values to the same units and average - z_val = (x_val * UNITS[x_units] + y_val * UNITS[y_units]) / 2 + Raises: + ValueError: If any of the input images or the output image is not .ome.zarr. + """ + if not ( + all(p.name.endswith(".ome.zarr") for p in inp_paths) + and out_path.name.endswith(".ome.zarr") + ): + msg = "Cannot copy, not all files are .ome.zarr." + logger.error(msg) + raise ValueError(msg) + + logger.debug("Copying images.") + logger.debug(f"Input: {inp_paths}") + logger.debug(f"Output: {out_path}") + + for i, p in enumerate(inp_paths): + logger.info(f"Copying image {i} from {p.name} to {out_path.name}") + + # Get the metadata from the first image + with bfio.BioReader(inp_paths[0]) as reader: + metadata = reader.metadata + z_unit_distance = utils.z_unit_distance(reader) + + # Create the output writer + with bfio.BioWriter(out_path, metadata=metadata) as writer: + if axis.value == "z": + writer.Z = len(inp_paths) + writer.ps_z = z_unit_distance + writer[:, :, 0, :, :] = reader[:, :, 0, :, :] + elif axis.value == "c": + writer.C = len(inp_paths) + writer[:, :, :, 0, :] = reader[:, :, :, 0, :] + elif axis.value == "t": + writer.T = len(inp_paths) + writer[:, :, :, :, 0] = reader[:, :, :, :, 0] + + # writer[0:utils.TILE_SIZE, 0:utils.TILE_SIZE, 0, 0, 0] = numpy.zeros( + + # for y_min in range(0, writer.Y, utils.TILE_SIZE): + + # for x_min in range(0, writer.X, utils.TILE_SIZE): + + for i, p in enumerate(inp_paths): + copy_zarr_stack(p, i, axis, out_path) + + logger.debug(f"Done copying {out_path}") + + +def copy_zarr_stack( + inp_path: pathlib.Path, + index: int, + axis: utils.StackableAxis, + out_path: bfio.BioWriter, +) -> None: + """Copy image stack. - # Set z units to the smaller of the units between x and y - z_units = x_units if UNITS[x_units] < UNITS[y_units] else y_units + This function works like write_stack except it copies rather than performs a + read and write operation. - # Convert z to the proper unit scale - z_val /= UNITS[z_units] - ps_z = (z_val, z_units) + This can only be used by .ome.zarr files using v0.4. - if not ps_z: - msg = f"Unable to find physical z-size {ps_z}" - raise ValueError( - msg, - ) + Args: + inp_path: Path to input image file. + index: Index along dimension being stacked. + axis: Name of the axis being stacked. + out_path: Path to output zarr file. + """ + base_path = inp_path / "0" + destination = out_path / "0" - return ps_z + for src in base_path.rglob("*"): + chunk = str(src.relative_to(base_path)) + if chunk.startswith("."): + logger.debug(f"Skipping {chunk}") + continue + logger.debug(f"src: {src}") + logger.debug(f"Chunk: {chunk}") -def write_image_stack(file: Path, di: int, group_by: str, bw: BioWriter) -> None: - """Write image stack. + dims = chunk.split("." if "." in chunk else "/") - This function writes stacked images of either dimensions (z, c, t). + logger.debug(f"dims: {dims}") - Args: - file : Path to input image file - di : Index of dimension - group_by : A single string variable to group filenames by - bw : bfio.BioWriter. + if len(dims) >= 3: # noqa: PLR2004 + if axis.value == "z": + dims[2] = str(index) + elif axis.value == "c": + dims[1] = str(index) + elif axis.value == "t": + dims[0] = str(index) - """ - with BioReader(file, max_workers=num_workers) as br: - for t in range(br.T): - for c in range(br.C): - for z in range(br.Z): - for y in range(0, br.Y, chunk_size): - y_max = min([br.Y, y + chunk_size]) - for x in range(0, br.X, chunk_size): - x_max = min([br.X, x + chunk_size]) - if group_by == "c": - tile = br[y:y_max, x:x_max, 0, c : c + 1, 0] - - bw[y:y_max, x:x_max, 0, di : di + 1, 0] = tile - if group_by == "t": - tile = br[y:y_max, x:x_max, 0, t : t + 1, 0] - bw[y:y_max, x:x_max, 0, 0, di : di + 1] = tile - - if group_by == "z": - tile = br[y:y_max, x:x_max, z : z + 1, 0, 0] - bw[y:y_max, x:x_max, di : di + 1, 0, 0] = tile - - -def dimension_stacking( - inp_dir: Path, - file_pattern: str, - group_by: str, - out_dir: Path, -) -> None: - """Image dimension stacking. + new_slice = "/".join(dims) + logger.debug(f"New slice: {new_slice}") - This function enables to write stack image of dimensions (z, c, t). - inp_dir : Path to input directory containing images - file_pattern : Pattern to parse image files - group_by : A single string variable to group filenames by - out_dir : Path to output directory. + destination.joinpath(new_slice).parent.mkdir(parents=True, exist_ok=True) + dest = destination.joinpath(new_slice) - """ - fps = fp.FilePattern(inp_dir, file_pattern) - groups = [fi[0] for fi, _ in fps(group_by=group_by)] - dimensions = [v for t in groups for v in t if isinstance(v, int)] - dim_min = min(dimensions) - dim_max = max(dimensions) - replace_value = f"({dim_min}-{dim_max})" - - # Get the number of layers to stack - dim_size = len(dimensions) - - group_range = np.unique([len(f) for gp, f in fps(group_by=group_by)])[0] - - for gi in range(0, group_range): - images = [f2[gi][1][0].name for f1, f2 in fps(group_by=group_by)] - input_files = [f2[gi][1][0] for f1, f2 in fps(group_by=group_by)] - pattern = fp.infer_pattern(files=images) - - if POLUS_IMG_EXT == ".ome.tif": - backend = "python" - if POLUS_IMG_EXT == ".ome.zarr": - backend = "tensorstore" - - out_name = re.sub(r"\{(.*?)\}", replace_value, pattern) - out_name = re.split(r"\.", out_name)[0] + POLUS_IMG_EXT - - with BioReader(input_files[0]) as br: - metadata = br.metadata - - z_size = 1 - t_size = 1 - c_size = 1 - - if group_by == "c": - c_size = dim_size - elif group_by == "t": - t_size = dim_size - elif group_by == "z": - z_size = dim_size + logger.debug(f"Copying {src} to {dest}") + if src.is_file(): + shutil.copyfile(src, dest) else: - pass - - with BioWriter( - out_dir.joinpath(out_name), - metadata=metadata, - max_workers=num_workers, - backend=backend, - Z=z_size, - C=c_size, - T=t_size, - ) as bw: - # Adjust the dimensions before writing - if group_by == "z": - bw.ps_z = z_distance(Path(input_files[0])) - - with preadator.ProcessManager( - name=f"Stacking images of {group_by} dimensions", - num_processes=num_workers, - threads_per_process=4, - ) as pm: - starttime = time.time() - threads = [] - for file, di in zip( - input_files, - range(0, dim_size), - ): - thread = pm.submit_thread( - write_image_stack, - file, - di=di, - group_by=group_by, - bw=bw, - ) - threads.append(thread) - - pm.join_threads() - - for f in tqdm( - as_completed(threads), - total=len(threads), - mininterval=1, - desc=f"Stacking images of {group_by} dimensions", - initial=0, - unit_scale=True, - colour="cyan", - ): - f.result() - - endtime = (time.time() - starttime) / 60 - logger.info( - f"Total time taken for execution: {endtime:.4f} minutes", - ) + shutil.copytree(src, dest, dirs_exist_ok=True) + + logger.debug(f"Done copying {inp_path} to {out_path}") diff --git a/formats/image-dimension-stacking-tool/src/polus/images/formats/image_dimension_stacking/utils.py b/formats/image-dimension-stacking-tool/src/polus/images/formats/image_dimension_stacking/utils.py new file mode 100644 index 000000000..64d0922e8 --- /dev/null +++ b/formats/image-dimension-stacking-tool/src/polus/images/formats/image_dimension_stacking/utils.py @@ -0,0 +1,113 @@ +"""Helpers for the tool.""" + +import enum +import logging +import os + +import bfio +import numpy + +POLUS_LOG = getattr(logging, os.environ.get("POLUS_LOG", "INFO")) +POLUS_IMG_EXT = os.environ.get("POLUS_IMG_EXT", ".ome.zarr") +TILE_SIZE = 1024 + +# Units for conversion +UNITS = { + "m": 10**9, + "cm": 10**7, + "mm": 10**6, + "µm": 10**3, + "nm": 1, + "Å": 10**-1, +} + + +def make_logger(name: str) -> logging.Logger: + """Create a logger with the given name.""" + logger = logging.getLogger(name) + logger.setLevel(POLUS_LOG) + return logger + + +logger = make_logger(__name__) + + +def z_unit_distance(reader: bfio.BioReader) -> tuple[float, str]: + """Get physical z-distance. + + This estimates zdistance if not provided by averaging physical distances of x and y. + + Args: + reader: BioReader object. + + Returns: + A tuple of float and string values. + """ + # Get the physical z-distance if available, set to physical x if not + ps_z = reader.ps_z + + # If the z-distances are undefined, average the x and y together + if None in ps_z: + # Get the size and units for x and y + x_val, xunits = reader.ps_x + y_val, yunits = reader.ps_y + + x_units = xunits.value + y_units = yunits.value + + # Convert x and y values to the same units and average + z_val = (x_val * UNITS[x_units] + y_val * UNITS[y_units]) / 2 + + # Set z units to the smaller of the units between x and y + z_units = x_units if UNITS[x_units] < UNITS[y_units] else y_units + + # Convert z to the proper unit scale + z_val /= UNITS[z_units] + ps_z = (z_val, z_units) + + if not ps_z: + msg = f"Unable to find physical z-size {ps_z}" + raise ValueError( + msg, + ) + + return ps_z + + +class StackableAxis(str, enum.Enum): + """Axis along which images can be stacked.""" + + Z = "z" + C = "c" + T = "t" + + def read_tile( + self, + reader: bfio.BioReader, + x: tuple[int, int], + y: tuple[int, int], + ) -> numpy.ndarray: + """Read a tile from the reader.""" + if self == StackableAxis.Z: + return reader[y[0] : y[1], x[0] : x[1], 0, :, :] + if self == StackableAxis.C: + return reader[y[0] : y[1], x[0] : x[1], :, 0, :] + if self == StackableAxis.T: + return reader[y[0] : y[1], x[0] : x[1], :, :, 0] + return None + + def write_tile( # noqa: PLR0913 + self, + writer: bfio.BioWriter, + x: tuple[int, int], + y: tuple[int, int], + tile: numpy.ndarray, + index: int, + ) -> None: + """Write a tile to the writer.""" + if self == StackableAxis.Z: + writer[y[0] : y[1], x[0] : x[1], index, :, :] = tile + if self == StackableAxis.C: + writer[y[0] : y[1], x[0] : x[1], :, index, :] = tile + if self == StackableAxis.T: + writer[y[0] : y[1], x[0] : x[1], :, :, index] = tile diff --git a/formats/image-dimension-stacking-tool/tests/__init__.py b/formats/image-dimension-stacking-tool/tests/__init__.py index 39483ede7..399a9ac06 100644 --- a/formats/image-dimension-stacking-tool/tests/__init__.py +++ b/formats/image-dimension-stacking-tool/tests/__init__.py @@ -1 +1 @@ -"""Image dimension stacking package.""" +"""Tests for the tool.""" diff --git a/formats/image-dimension-stacking-tool/tests/conftest.py b/formats/image-dimension-stacking-tool/tests/conftest.py index e65f78b2b..5760cb96a 100644 --- a/formats/image-dimension-stacking-tool/tests/conftest.py +++ b/formats/image-dimension-stacking-tool/tests/conftest.py @@ -1,26 +1,6 @@ -"""Test fixtures. +"""Configuration for pytest.""" -Set up all data used in tests. -""" - -import shutil -import tempfile -from pathlib import Path -from typing import Union - -import numpy as np import pytest -from bfio import BioReader -from bfio import BioWriter -from skimage import filters -from skimage import io - - -def clean_directories() -> None: - """Remove all temporary directories.""" - for d in Path(".").cwd().iterdir(): - if d.is_dir() and d.name.startswith("tmp"): - shutil.rmtree(d) def pytest_addoption(parser: pytest.Parser) -> None: @@ -30,99 +10,5 @@ def pytest_addoption(parser: pytest.Parser) -> None: action="store_true", dest="slow", default=False, - help="run tests that download large data files", + help="run slow tests", ) - - -@pytest.fixture() -def output_directory() -> Union[str, Path]: - """Create output directory.""" - return Path(tempfile.mkdtemp(dir=Path.cwd())) - - -@pytest.fixture() -def inp_dir() -> Union[str, Path]: - """Create input directory.""" - return Path(tempfile.mkdtemp(dir=Path.cwd())) - - -@pytest.fixture( - params=[ - ("c", "image_x01_y01_c{c:d+}.ome.tif"), - ("z", "image_x01_y01_z{z:d+}.ome.tif"), - ("t", "image_x01_y01_t{t:d+}.ome.tif"), - ], -) -def get_params(request: pytest.FixtureRequest) -> pytest.FixtureRequest: - """To get the parameter of the fixture.""" - return request.param - - -@pytest.fixture() -def synthetic_images( - inp_dir: Union[str, Path], - get_params: pytest.FixtureRequest, -) -> tuple[Union[str, Path], str, str]: - """Generate random synthetic images.""" - image_sizes = 1024 - variable, pattern = get_params - for i in range(0, 10): - im = np.zeros((image_sizes, image_sizes)) - points = image_sizes * np.random.random((2, 10**2)) - im[(points[0]).astype(int), (points[1]).astype(int)] = 1 - im = filters.gaussian(im, sigma=image_sizes / (20.0 * 10)) - outname = f"image_x01_y01_{variable}{str(i).zfill(2)}.tif" - io.imsave(Path(inp_dir, outname), im) - - for inp in Path(inp_dir).iterdir(): - if inp.suffix == ".tif": - with BioReader(inp) as br: - img = br.read().squeeze() - outname = inp.stem + ".ome.tif" - with BioWriter( - file_path=Path(inp_dir, outname), - metadata=br.metadata, - ) as bw: - bw[:] = img - bw.close() - Path.unlink(inp) - - return inp_dir, variable, pattern - - -@pytest.fixture() -def synthetic_multi_images( - inp_dir: Union[str, Path], -) -> Union[str, Path]: - """Generate random synthetic images.""" - image_sizes = 1024 - - for i in range(0, 4): - im = np.zeros((image_sizes, image_sizes)) - points = image_sizes * np.random.random((2, 10**2)) - im[(points[0]).astype(int), (points[1]).astype(int)] = 1 - im = filters.gaussian(im, sigma=image_sizes / (20.0 * 10)) - outname_1 = f"tubhiswt_z00_c00_t{str(i).zfill(2)}.tif" - outname_2 = f"tubhiswt_z01_c00_t{str(i).zfill(2)}.tif" - outname_3 = f"tubhiswt_z00_c01_t{str(i).zfill(2)}.tif" - outname_4 = f"tubhiswt_z01_c01_t{str(i).zfill(2)}.tif" - - io.imsave(Path(inp_dir, outname_1), im) - io.imsave(Path(inp_dir, outname_2), im) - io.imsave(Path(inp_dir, outname_3), im) - io.imsave(Path(inp_dir, outname_4), im) - - for inp in Path(inp_dir).iterdir(): - if inp.suffix == ".tif": - with BioReader(inp) as br: - img = br.read().squeeze() - outname = inp.stem + ".ome.tif" - with BioWriter( - file_path=Path(inp_dir, outname), - metadata=br.metadata, - ) as bw: - bw[:] = img - bw.close() - Path.unlink(inp) - - return inp_dir diff --git a/formats/image-dimension-stacking-tool/tests/test_cli.py b/formats/image-dimension-stacking-tool/tests/test_cli.py index 92cccade1..4c454c766 100644 --- a/formats/image-dimension-stacking-tool/tests/test_cli.py +++ b/formats/image-dimension-stacking-tool/tests/test_cli.py @@ -1,73 +1,145 @@ -"""Test Command line Tool.""" +"""Tests for the CLI.""" -from pathlib import Path -from typing import Union +import pathlib +import shutil -from typer.testing import CliRunner +import bfio +import numpy import pytest +import scipy.ndimage +import typer.testing + from polus.images.formats.image_dimension_stacking.__main__ import app -def test_cli(synthetic_images: tuple[Union[str, Path]], output_directory: Path) -> None: - """Test the command line.""" - inp_dir, _, pattern = synthetic_images +def gen_images( + inp_dir: pathlib.Path, + pattern: str, + axis: str, + num_images: int, + size: int, +) -> list[pathlib.Path]: + """Generate synthetic images.""" - runner = CliRunner() - result = runner.invoke( - app, - [ - "--inpDir", - inp_dir, - "--filePattern", - pattern, - "--outDir", - output_directory, - ], - ) + # Generate a image with a square in the middle + square = numpy.zeros((size, size), dtype=numpy.uint8) + lq = size // 4 + hq = 3 * size // 4 + square[lq:hq, lq:hq] = 255 - assert result.exit_code == 0 + paths = [] + for i in range(num_images): + # Rotate the image by i degrees + img = scipy.ndimage.rotate(square, i * 3, reshape=False) + + name = pattern.format(**{axis: i}) + path = inp_dir / name + paths.append(path) + + with bfio.BioWriter(path) as writer: + writer.X = size + writer.Y = size + writer.Z = 1 + writer.C = 1 + writer.T = 1 + writer.dtype = square.dtype + writer.ps_x = (1, "mm") + writer.ps_y = (1, "mm") -@pytest.mark.skipif("not config.getoption('slow')") -def test_multipattern_cli( - synthetic_multi_images: Union[str, Path], output_directory: Path + writer[:] = img.astype(numpy.uint8) + + return paths + + +@pytest.mark.parametrize("axis", ["z", "c", "t"]) +@pytest.mark.parametrize("ext", ["ome.tif", "ome.zarr"]) +def test_cli( + axis: str, + ext: str, ) -> None: """Test the command line.""" - inp_dir = synthetic_multi_images - pattern = "tubhiswt_z{z:d+}_c{c:d+}_t{t:d+}.ome.tif" - runner = CliRunner() + data_dir = pathlib.Path(__file__).parent / "data" + if data_dir.exists(): + shutil.rmtree(data_dir) + data_dir.mkdir(exist_ok=True) + + # data_dir = pathlib.Path(tempfile.mkdtemp(suffix="_data_dir")) + inp_dir = data_dir / "input" + out_dir = data_dir / "output" + + for d in [inp_dir, out_dir]: + d.mkdir(exist_ok=True) + + num_images = 10 + size = 1024 + + pattern = f"image_{axis}" + "{" + f"{axis}" + ":03d}" + f".{ext}" + inp_paths = gen_images(inp_dir, pattern, axis, num_images, size) + assert len(inp_paths) == num_images + + pattern = f"image_{axis}" + "{" + f"{axis}" + ":d+}" + f".{ext}" + + runner = typer.testing.CliRunner() result = runner.invoke( app, [ "--inpDir", - inp_dir, + str(inp_dir), "--filePattern", pattern, + "--axis", + axis, "--outDir", - output_directory, + str(out_dir), ], ) assert result.exit_code == 0 + # Check the output + start = f"{0:03d}" + end = f"{num_images - 1:03d}" + out_path = out_dir / f"image_{axis}({start}-{end}).{ext}" + assert out_path.exists() -def test_short_cli( - synthetic_images: tuple[Union[str, Path]], output_directory: Path -) -> None: - """Test the short cli command line.""" - inp_dir, _, pattern = synthetic_images - runner = CliRunner() - result = runner.invoke( - app, - [ - "-i", - inp_dir, - "-f", - pattern, - "-o", - output_directory, - ], - ) + with bfio.BioReader(out_path) as out_reader: + assert out_reader.X == size + assert out_reader.Y == size - assert result.exit_code == 0 + if axis == "z": + assert out_reader.Z == num_images + assert out_reader.C == 1 + assert out_reader.T == 1 + elif axis == "c": + assert out_reader.Z == 1 + assert out_reader.C == num_images + assert out_reader.T == 1 + elif axis == "t": + assert out_reader.Z == 1 + assert out_reader.C == 1 + assert out_reader.T == num_images + + for i, p in enumerate(inp_paths[1:], start=1): + if axis == "z": + out_img = out_reader[:, :, i, 0, 0] + elif axis == "c": + out_img = out_reader[:, :, 0, i, 0] + elif axis == "t": + out_img = out_reader[:, :, 0, 0, i] + else: + pytest.fail(f"Unknown axis {axis}") + + out_img = numpy.squeeze(out_img) + + with bfio.BioReader(p) as inp_reader: + inp_img = numpy.squeeze(inp_reader[:, :, 0, 0, 0]) + + error = numpy.mean(numpy.abs(out_img - inp_img)) + + numpy.testing.assert_array_equal( + inp_img, + out_img, + err_msg=f"Image {i} does not match. Error: {error}", + ) diff --git a/formats/image-dimension-stacking-tool/tests/test_dimension_stacking.py b/formats/image-dimension-stacking-tool/tests/test_dimension_stacking.py deleted file mode 100644 index 6d915bffa..000000000 --- a/formats/image-dimension-stacking-tool/tests/test_dimension_stacking.py +++ /dev/null @@ -1,78 +0,0 @@ -"""Testing of image dimension stacking.""" - -from pathlib import Path -from typing import Union - -import polus.images.formats.image_dimension_stacking.dimension_stacking as ds -import pytest -from bfio import BioReader -from bfio import BioWriter - -from .conftest import * # noqa:F403 -from .conftest import clean_directories - - -# Later Todo -@pytest.mark.skipif("not config.getoption('slow')") -def test_dimension_stacking( - synthetic_images: tuple[Union[str, Path], str, str], - output_directory: Path, -) -> None: - """Test dimension stacking.""" - inp_dir, variable, pattern = synthetic_images - - ds.dimension_stacking( - inp_dir=inp_dir, - file_pattern=pattern, - group_by=variable, - out_dir=output_directory, - ) - - outfile = [ - f for f in output_directory.iterdir() if f"{variable}0(0-9).ome.tif" in f.name - ] - - assert all(outfile) is True - assert len(outfile) == 1 - - total_dimensions = 10 - - br = BioReader(outfile[0]) - if variable == "c": - assert total_dimensions == br.C - if variable == "z": - assert total_dimensions == br.Z - if variable == "t": - assert total_dimensions == br.T - - -def test_write_image_stack( - synthetic_images: tuple[Union[str, Path], str, str], - output_directory: Path, -) -> None: - """Test writing stacked images.""" - inp_dir, variable, _ = synthetic_images - - for file in Path(inp_dir).iterdir(): - if file.name.endswith(".ome.tif"): - with BioReader(file) as br: - metadata = br.metadata - - with BioWriter( - output_directory.joinpath(file.name), - metadata=metadata, - ) as bw: - ds.write_image_stack(file=file, di=0, group_by=variable, bw=bw) - total_dimensions = 10 - assert len(list(output_directory.iterdir())) == total_dimensions - - -def test_z_distance(synthetic_images: tuple[Union[str, Path], str, str]) -> None: - """Test estimating z-distance.""" - inp_dir, _, _ = synthetic_images - distances = [] - for file in Path(inp_dir).iterdir(): - ps_z = ds.z_distance(file=file) - distances.append(ps_z) - assert all(distances) is not None - clean_directories()