diff --git a/src/labelformat/formats/__init__.py b/src/labelformat/formats/__init__.py index c667f7f..844a330 100644 --- a/src/labelformat/formats/__init__.py +++ b/src/labelformat/formats/__init__.py @@ -29,6 +29,7 @@ ) from labelformat.formats.semantic_segmentation.pascalvoc import ( PascalVOCSemanticSegmentationInput, + PascalVOCSemanticSegmentationOutput, ) from labelformat.formats.yolov5 import ( YOLOv5ObjectDetectionInput, @@ -89,6 +90,7 @@ "PascalVOCObjectDetectionInput", "PascalVOCObjectDetectionOutput", "PascalVOCSemanticSegmentationInput", + "PascalVOCSemanticSegmentationOutput", "RTDETRObjectDetectionInput", "RTDETRObjectDetectionOutput", "RTDETRv2ObjectDetectionInput", diff --git a/src/labelformat/formats/semantic_segmentation/pascalvoc.py b/src/labelformat/formats/semantic_segmentation/pascalvoc.py index 3da65ba..025ccf6 100644 --- a/src/labelformat/formats/semantic_segmentation/pascalvoc.py +++ b/src/labelformat/formats/semantic_segmentation/pascalvoc.py @@ -1,4 +1,4 @@ -"""Pascal VOC semantic segmentation input. +"""Pascal VOC semantic segmentation input and output. Assumptions: - Masks live under a separate directory mirroring the images directory structure. @@ -8,6 +8,7 @@ from __future__ import annotations +import json from argparse import ArgumentParser from collections.abc import Iterable, Mapping from dataclasses import dataclass @@ -16,15 +17,20 @@ import numpy as np from numpy.typing import NDArray from PIL import Image as PILImage +from PIL import ImageDraw from labelformat import utils +from labelformat.cli.registry import Task, cli_register +from labelformat.model.binary_mask_segmentation import BinaryMaskSegmentation from labelformat.model.category import Category from labelformat.model.image import Image from labelformat.model.instance_segmentation import ( ImageInstanceSegmentation, InstanceSegmentationInput, + InstanceSegmentationOutput, SingleInstanceSegmentation, ) +from labelformat.model.multipolygon import MultiPolygon from labelformat.model.semantic_segmentation import SemanticSegmentationMask """TODO(Malte, 11/2025): @@ -150,6 +156,188 @@ def _get_mask(self, image_filepath: str) -> SemanticSegmentationMask: return SemanticSegmentationMask.from_array(array=mask_np) +@cli_register(format="pascalvoc", task=Task.INSTANCE_SEGMENTATION) +class PascalVOCSemanticSegmentationOutput(InstanceSegmentationOutput): + """Pascal VOC semantic segmentation output format. + + Saves one semantic PNG mask per image to + ``//...`` and stores the class mapping as JSON in + ``/``. + """ + + @staticmethod + def add_cli_arguments(parser: ArgumentParser) -> None: + parser.add_argument( + "--output-folder", + type=Path, + required=True, + help="Output folder for Pascal VOC semantic segmentation files.", + ) + parser.add_argument( + "--masks-folder-name", + type=str, + default="SegmentationClass", + help="Subfolder name where semantic masks are written.", + ) + parser.add_argument( + "--class-map-filename", + type=str, + default="class_id_to_name.json", + help="JSON filename for class ID to name mapping.", + ) + parser.add_argument( + "--background-class-id", + type=int, + default=0, + help="Class ID used for unlabeled/background pixels.", + ) + + def __init__( + self, + output_folder: Path, + masks_folder_name: str = "SegmentationClass", + class_map_filename: str = "class_id_to_name.json", + background_class_id: int = 0, + ) -> None: + if background_class_id < 0 or background_class_id > 255: + raise ValueError( + "background_class_id must be in [0,255] for Pascal VOC export." + ) + + self._output_folder = output_folder + self._masks_folder_name = masks_folder_name + self._class_map_filename = class_map_filename + self._background_class_id = background_class_id + + def save(self, label_input: InstanceSegmentationInput) -> None: + category_id_to_name = _get_category_id_to_name( + categories=label_input.get_categories(), + background_class_id=self._background_class_id, + ) + + masks_dir = self._output_folder / self._masks_folder_name + masks_dir.mkdir(parents=True, exist_ok=True) + + for image_label in label_input.get_labels(): + # Initialize an (H, W) mask where every pixel starts as background. + mask = np.full( + (image_label.image.height, image_label.image.width), + fill_value=self._background_class_id, + dtype=np.int_, + ) + for obj in image_label.objects: + if obj.category.id not in category_id_to_name: + raise ValueError( + f"Category id {obj.category.id} is used in labels but " + "missing from categories." + ) + obj_mask = _segmentation_to_binary_mask( + segmentation=obj.segmentation, image=image_label.image + ) + mask[obj_mask] = obj.category.id + + mask_path = (masks_dir / image_label.image.filename).with_suffix(".png") + mask_path.parent.mkdir(parents=True, exist_ok=True) + _save_mask(mask_path=mask_path, mask=mask) + + class_map_path = self._output_folder / self._class_map_filename + with class_map_path.open("w") as f: + json.dump( + {str(k): v for k, v in sorted(category_id_to_name.items())}, + f, + indent=2, + ) + + +def _get_category_id_to_name( + categories: Iterable[Category], background_class_id: int +) -> dict[int, str]: + """Build class-id mapping and validate duplicates.""" + category_id_to_name: dict[int, str] = {} + for category in categories: + if not 0 <= category.id <= 255: + raise ValueError( + "Pascal VOC semantic segmentation export only supports class IDs " + f"in the range [0, 255]. Got: {category.id}" + ) + existing_name = category_id_to_name.get(category.id) + if existing_name is not None and existing_name != category.name: + raise ValueError( + "Conflicting names for category id " + f"{category.id}: '{existing_name}' vs '{category.name}'." + ) + category_id_to_name[category.id] = category.name + + if background_class_id not in category_id_to_name: + category_id_to_name[background_class_id] = "background" + return category_id_to_name + + +def _segmentation_to_binary_mask( + segmentation: BinaryMaskSegmentation | MultiPolygon, image: Image +) -> NDArray[np.bool_]: + if isinstance(segmentation, BinaryMaskSegmentation): + binary_mask = segmentation.get_binary_mask().astype(np.uint8, copy=False) + elif isinstance(segmentation, MultiPolygon): + binary_mask = _multipolygon_to_binary_mask( + multipolygon=segmentation, + width=image.width, + height=image.height, + ) + else: + raise ValueError(f"Unsupported segmentation type: {type(segmentation)}") + + expected_shape = (image.height, image.width) + if binary_mask.shape != expected_shape: + raise ValueError( + f"Segmentation mask shape must match image dimensions for " + f"'{image.filename}': got {binary_mask.shape}, expected {expected_shape}." + ) + return binary_mask > 0 + + +def _multipolygon_to_binary_mask( + multipolygon: MultiPolygon, width: int, height: int +) -> NDArray[np.uint8]: + mask_img = PILImage.new(mode="L", size=(width, height), color=0) + draw = ImageDraw.Draw(mask_img) + for polygon in multipolygon.polygons: + if len(polygon) < 3: + raise ValueError( + f"Polygon must contain at least 3 points, got {len(polygon)}." + ) + draw.polygon(xy=polygon, fill=1, outline=1) + return np.asarray(mask_img, dtype=np.uint8) + + +def _pascal_voc_palette() -> list[int]: + """Build the standard Pascal VOC palette (256 colors, RGB triples).""" + palette = [0] * (256 * 3) + for class_id in range(256): + label = class_id + red = 0 + green = 0 + blue = 0 + bit_index = 0 + while label: + red |= ((label >> 0) & 1) << (7 - bit_index) + green |= ((label >> 1) & 1) << (7 - bit_index) + blue |= ((label >> 2) & 1) << (7 - bit_index) + bit_index += 1 + label >>= 3 + palette[(class_id * 3) : (class_id * 3 + 3)] = [red, green, blue] + return palette + + +_PASCAL_VOC_PALETTE = _pascal_voc_palette() + + +def _save_mask(mask_path: Path, mask: NDArray[np.int_]) -> None: + mask_img = PILImage.fromarray(mask.astype(np.uint8), mode="P") + mask_img.putpalette(_PASCAL_VOC_PALETTE) + mask_img.save(mask_path) + + def _validate_mask( image_obj: Image, mask_np: NDArray[np.int_], valid_class_ids: set[int] ) -> None: diff --git a/tests/unit/formats/semantic_segmentation/test_pascalvoc.py b/tests/unit/formats/semantic_segmentation/test_pascalvoc.py index be77318..aa2a04f 100644 --- a/tests/unit/formats/semantic_segmentation/test_pascalvoc.py +++ b/tests/unit/formats/semantic_segmentation/test_pascalvoc.py @@ -1,8 +1,9 @@ from __future__ import annotations import json +from argparse import ArgumentParser from pathlib import Path -from typing import Dict +from typing import Dict, Iterable import cv2 import numpy as np @@ -12,9 +13,17 @@ from labelformat.formats.semantic_segmentation import pascalvoc as pascalvoc_module from labelformat.formats.semantic_segmentation.pascalvoc import ( PascalVOCSemanticSegmentationInput, + PascalVOCSemanticSegmentationOutput, ) from labelformat.model.binary_mask_segmentation import BinaryMaskSegmentation +from labelformat.model.category import Category from labelformat.model.image import Image +from labelformat.model.instance_segmentation import ( + ImageInstanceSegmentation, + InstanceSegmentationInput, + SingleInstanceSegmentation, +) +from labelformat.model.multipolygon import MultiPolygon from tests.unit.test_utils import FIXTURES_DIR FIXTURES_ROOT_PASCALVOC = FIXTURES_DIR / "semantic_segmentation/pascalvoc" @@ -29,6 +38,102 @@ def _load_class_mapping_int_keys() -> Dict[int, str]: return {int(k): str(v) for k, v in data.items()} +class _SimplePolygonInput(InstanceSegmentationInput): + @staticmethod + def add_cli_arguments(parser: ArgumentParser) -> None: + raise NotImplementedError() + + def __init__(self) -> None: + self._image = Image(id=0, filename="nested/example.jpg", width=6, height=5) + + def get_categories(self) -> Iterable[Category]: + return [ + Category(id=1, name="car"), + Category(id=2, name="person"), + ] + + def get_images(self) -> Iterable[Image]: + return [self._image] + + def get_labels(self) -> Iterable[ImageInstanceSegmentation]: + return [ + ImageInstanceSegmentation( + image=self._image, + objects=[ + SingleInstanceSegmentation( + category=Category(id=1, name="car"), + segmentation=MultiPolygon( + polygons=[ + [ + (1.0, 1.0), + (1.0, 3.0), + (3.0, 3.0), + (3.0, 1.0), + ] + ] + ), + ), + SingleInstanceSegmentation( + category=Category(id=2, name="person"), + segmentation=MultiPolygon( + polygons=[ + [ + (2.0, 2.0), + (2.0, 4.0), + (4.0, 4.0), + (4.0, 2.0), + ] + ] + ), + ), + ], + ) + ] + + +class _SimpleRLEInput(InstanceSegmentationInput): + @staticmethod + def add_cli_arguments(parser: ArgumentParser) -> None: + raise NotImplementedError() + + def __init__(self) -> None: + self._image = Image(id=0, filename="nested/rle_example.jpg", width=5, height=4) + + def get_categories(self) -> Iterable[Category]: + return [ + Category(id=1, name="car"), + Category(id=2, name="person"), + ] + + def get_images(self) -> Iterable[Image]: + return [self._image] + + def get_labels(self) -> Iterable[ImageInstanceSegmentation]: + return [ + ImageInstanceSegmentation( + image=self._image, + objects=[ + SingleInstanceSegmentation( + category=Category(id=1, name="car"), + segmentation=BinaryMaskSegmentation.from_rle( + rle_row_wise=[1, 2, 3, 1, 13], + width=5, + height=4, + ), + ), + SingleInstanceSegmentation( + category=Category(id=2, name="person"), + segmentation=BinaryMaskSegmentation.from_rle( + rle_row_wise=[13, 2, 3, 1, 1], + width=5, + height=4, + ), + ), + ], + ) + ] + + class TestPascalVOCSemanticSegmentationInput: def test_from_dirs__builds_categories_and_images(self) -> None: mapping = _load_class_mapping_int_keys() @@ -158,6 +263,135 @@ def test_get_labels(self, tmp_path: Path) -> None: ] +class TestPascalVOCSemanticSegmentationOutput: + def test_save__writes_fixture_masks_and_class_mapping(self, tmp_path: Path) -> None: + mapping = _load_class_mapping_int_keys() + label_input = PascalVOCSemanticSegmentationInput.from_dirs( + images_dir=IMAGES_DIR, + masks_dir=MASKS_DIR, + class_id_to_name=mapping, + ) + + PascalVOCSemanticSegmentationOutput(output_folder=tmp_path).save( + label_input=label_input + ) + + class_map_json = json.loads((tmp_path / "class_id_to_name.json").read_text()) + class_map = {int(k): str(v) for k, v in class_map_json.items()} + assert class_map == mapping + + for image in label_input.get_images(): + rel_mask_path = Path(image.filename).with_suffix(".png") + class_mask_path = tmp_path / "SegmentationClass" / rel_mask_path + with PILImage.open(class_mask_path) as class_mask_img: + actual_mask = np.asarray(class_mask_img, dtype=np.int_) + with PILImage.open(MASKS_DIR / rel_mask_path) as expected_mask_img: + expected_mask = np.asarray(expected_mask_img, dtype=np.int_) + assert np.array_equal(actual_mask, expected_mask) + + def test_save__rasterizes_polygon_masks_and_adds_background_class( + self, tmp_path: Path + ) -> None: + PascalVOCSemanticSegmentationOutput(output_folder=tmp_path).save( + label_input=_SimplePolygonInput() + ) + + mask = np.asarray( + PILImage.open(tmp_path / "SegmentationClass" / "nested/example.png"), + dtype=np.int_, + ) + expected_mask = np.array( + [ + [0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 0, 0], + [0, 1, 2, 2, 2, 0], + [0, 1, 2, 2, 2, 0], + [0, 0, 2, 2, 2, 0], + ], + dtype=np.int_, + ) + assert np.array_equal(mask, expected_mask) + + class_map_json = json.loads((tmp_path / "class_id_to_name.json").read_text()) + class_map = {int(k): str(v) for k, v in class_map_json.items()} + assert class_map == {0: "background", 1: "car", 2: "person"} + + def test_save__writes_rle_masks_and_adds_background_class( + self, tmp_path: Path + ) -> None: + PascalVOCSemanticSegmentationOutput(output_folder=tmp_path).save( + label_input=_SimpleRLEInput() + ) + + mask = np.asarray( + PILImage.open(tmp_path / "SegmentationClass" / "nested/rle_example.png"), + dtype=np.int_, + ) + expected_mask = np.array( + [ + [0, 1, 1, 0, 0], + [0, 1, 0, 0, 0], + [0, 0, 0, 2, 2], + [0, 0, 0, 2, 0], + ], + dtype=np.int_, + ) + assert np.array_equal(mask, expected_mask) + + class_map_json = json.loads((tmp_path / "class_id_to_name.json").read_text()) + class_map = {int(k): str(v) for k, v in class_map_json.items()} + assert class_map == {0: "background", 1: "car", 2: "person"} + + @pytest.mark.parametrize("background_class_id", [-1, 256]) + def test_init__background_class_id_out_of_range_raises( + self, tmp_path: Path, background_class_id: int + ) -> None: + with pytest.raises( + ValueError, + match=r"background_class_id must be in \[0,255\] for Pascal VOC export\.", + ): + PascalVOCSemanticSegmentationOutput( + output_folder=tmp_path, + background_class_id=background_class_id, + ) + + def test__segmentation_to_binary_mask__shape_mismatch_raises(self) -> None: + image = Image(id=0, filename="image.jpg", width=4, height=3) + bad_shape_segmentation = BinaryMaskSegmentation.from_rle( + rle_row_wise=[0, 4], + width=2, + height=2, + ) + with pytest.raises(ValueError, match=r"Segmentation mask shape must match"): + pascalvoc_module._segmentation_to_binary_mask( + segmentation=bad_shape_segmentation, + image=image, + ) + + @pytest.mark.parametrize("category_id", [-1, 256]) + def test__get_category_id_to_name__category_id_out_of_range_raises( + self, category_id: int + ) -> None: + with pytest.raises(ValueError, match=rf"range \[0, 255\].*Got: {category_id}"): + pascalvoc_module._get_category_id_to_name( + categories=[Category(id=category_id, name="out_of_range")], + background_class_id=0, + ) + + def test__segmentation_to_binary_mask__polygon_with_less_than_3_points_raises( + self, + ) -> None: + image = Image(id=0, filename="invalid_polygon.jpg", width=4, height=3) + invalid_polygon = MultiPolygon(polygons=[[(1.0, 1.0), (2.0, 2.0)]]) + with pytest.raises( + ValueError, match=r"Polygon must contain at least 3 points, got 2" + ): + pascalvoc_module._segmentation_to_binary_mask( + segmentation=invalid_polygon, + image=image, + ) + + def test__validate_mask__unknown_class_value_raises() -> None: # Arrange: simple image and a mask with out-of-vocabulary value img = Image(id=0, filename="foo.jpg", width=4, height=3)