diff --git a/tensorflow_datasets/image/__init__.py b/tensorflow_datasets/image/__init__.py index 0ee29769986..a6948609be8 100644 --- a/tensorflow_datasets/image/__init__.py +++ b/tensorflow_datasets/image/__init__.py @@ -29,6 +29,7 @@ from tensorflow_datasets.image.cifar import Cifar10 from tensorflow_datasets.image.cifar import Cifar100 from tensorflow_datasets.image.cifar10_corrupted import Cifar10Corrupted +from tensorflow_datasets.image.cityscapes import Cityscapes from tensorflow_datasets.image.clevr import CLEVR from tensorflow_datasets.image.coco import Coco from tensorflow_datasets.image.coco2014_legacy import Coco2014 @@ -47,6 +48,7 @@ from tensorflow_datasets.image.imagenet import Imagenet2012 from tensorflow_datasets.image.imagenet2012_corrupted import Imagenet2012Corrupted from tensorflow_datasets.image.kitti import Kitti +from tensorflow_datasets.image.lost_and_found import LostAndFound from tensorflow_datasets.image.lsun import Lsun from tensorflow_datasets.image.mnist import EMNIST from tensorflow_datasets.image.mnist import FashionMNIST diff --git a/tensorflow_datasets/image/cityscapes.py b/tensorflow_datasets/image/cityscapes.py new file mode 100644 index 00000000000..5c221c0c46e --- /dev/null +++ b/tensorflow_datasets/image/cityscapes.py @@ -0,0 +1,280 @@ +'''Cityscapes Datasets.''' + +import math +import os +import re + +import tensorflow as tf +import tensorflow_datasets.public_api as tfds +from tensorflow_datasets.core import api_utils + +_CITATION = '''\ +@inproceedings{Cordts2016Cityscapes, + title={The Cityscapes Dataset for Semantic Urban Scene Understanding}, + author={Cordts, Marius and Omran, Mohamed and Ramos, Sebastian and Rehfeld, Timo and Enzweiler, Markus and Benenson, Rodrigo and Franke, Uwe and Roth, Stefan and Schiele, Bernt}, + booktitle={Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year={2016} +} +''' + +_DESCRIPTION = '''\ + Cityscapes is a dataset consisting of diverse urban street scenes across 50 different cities + at varying times of the year as well as ground truths for several vision tasks including + semantic segmentation, instance level segmentation (TODO), and stereo pair disparity inference. + + + For segmentation tasks (default split, accessible via 'cityscapes/semantic_segmentation'), Cityscapes provides + dense pixel level annotations for 5000 images at 1024 * 2048 resolution pre-split into training (2975), + validation (500) and test (1525) sets. Label annotations for segmentation tasks span across 30+ classes + commonly encountered during driving scene perception. Detailed label information may be found here: + https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py#L52-L99 + + Cityscapes also provides coarse grain segmentation annotations (accessible via 'cityscapes/semantic_segmentation_extra') + for 19998 images in a 'train_extra' split which may prove useful for pretraining / data-heavy models. + + + Besides segmentation, cityscapes also provides stereo image pairs and ground truths for disparity inference + tasks on both the normal and extra splits (accessible via 'cityscapes/stereo_disparity' and + 'cityscapes/stereo_disparity_extra' respectively). + + Ingored examples: + - For 'cityscapes/stereo_disparity_extra': + - troisdorf_000000_000073_{*} images (no disparity map present) + + WARNING: this dataset requires users to setup a login and password in order to get the files. +''' + +# TODO add instance ids (might need to import cityScapesScripts) + +class CityscapesConfig(tfds.core.BuilderConfig): + '''BuilderConfig for Cityscapes + + Args: + right_images (bool): Enables right images for stereo image tasks. + segmentation_labels (bool): Enables image segmentation labels. + disparity_maps (bool): Enables disparity maps. + train_extra_split (bool): Enables train_extra split. This automatically + enables coarse grain segmentations, if segmentation labels are used. + ''' + + @api_utils.disallow_positional_args + def __init__(self, right_images=False, segmentation_labels=True, + disparity_maps=False, train_extra_split=False, **kwargs): + super().__init__(**kwargs) + + self.right_images = right_images + self.segmentation_labels = segmentation_labels + self.disparity_maps = disparity_maps + self.train_extra_split = train_extra_split + + self.ignored_ids = set() + + # Setup required zips and their root dir names + self.zip_root = {} + self.zip_root['images_left'] =\ + ('leftImg8bit_trainvaltest.zip', 'leftImg8bit') + + if self.train_extra_split: + self.zip_root['images_left/extra'] =\ + ('leftImg8bit_trainextra.zip', 'leftImg8bit') + + if self.right_images: + self.zip_root['images_right'] =\ + ('rightImg8bit_trainvaltest.zip', 'rightImg8bit') + if self.train_extra_split: + self.zip_root['images_right/extra'] =\ + ('rightImg8bit_trainextra.zip', 'rightImg8bit') + + if self.segmentation_labels: + if not self.train_extra_split: + self.zip_root['segmentation_labels'] =\ + ('gtFine_trainvaltest.zip', 'gtFine') + self.label_suffix = 'gtFine_labelIds' + else: + # The 'train extra' split only has coarse labels unlike train and val. + # Therefore, for consistency across splits, we also enable coarse labels + # using the train_extra_split flag. + self.zip_root['segmentation_labels'] = ('gtCoarse.zip', 'gtCoarse') + self.zip_root['segmentation_labels/extra'] = \ + ('gtCoarse.zip', 'gtCoarse') + self.label_suffix = 'gtCoarse_labelIds' + + if self.disparity_maps: + self.zip_root['disparity_maps'] =\ + ('disparity_trainvaltest.zip', 'disparity') + if self.train_extra_split: + self.zip_root['disparity_maps/extra'] =\ + ('disparity_trainextra.zip', 'disparity') + self.ignored_ids.add('troisdorf_000000_000073') # No disparity for this file + + +class Cityscapes(tfds.core.GeneratorBasedBuilder): + '''Base class for Cityscapes datasets''' + + BUILDER_CONFIGS = [ + CityscapesConfig( + name='semantic_segmentation', + description='Cityscapes semantic segmentation dataset.', + version="1.0.0", + right_images=False, + segmentation_labels=True, + disparity_maps=False, + train_extra_split=False, + ), + CityscapesConfig( + name='semantic_segmentation_extra', + description='Cityscapes semantic segmentation dataset with train_extra split and coarse labels.', # pylint: disable=line-too-long + version="1.0.0", + right_images=False, + segmentation_labels=True, + disparity_maps=False, + train_extra_split=True, + ), + CityscapesConfig( + name='stereo_disparity', + description='Cityscapes stereo image and disparity maps dataset.', + version="1.0.0", + right_images=True, + segmentation_labels=False, + disparity_maps=True, + train_extra_split=False, + ), + CityscapesConfig( + name='stereo_disparity_extra', + description='Cityscapes stereo image and disparity maps dataset with train_extra split.', # pylint: disable=line-too-long + version="1.0.0", + right_images=True, + segmentation_labels=False, + disparity_maps=True, + train_extra_split=True, + ), + ] + + VERSION = tfds.core.Version('1.0.0') + + def _info(self): + # Enable features as necessary + features = {} + features['image_id'] = tfds.features.Text() + features['image_left'] =\ + tfds.features.Image(shape=(1024, 2048, 3), encoding_format='png') + + if self.builder_config.right_images: + features['image_right'] =\ + tfds.features.Image(shape=(1024, 2048, 3), encoding_format='png') + + if self.builder_config.segmentation_labels: + features['segmentation_label'] =\ + tfds.features.Image(shape=(1024, 2048, 1), encoding_format='png') + + if self.builder_config.disparity_maps: + features['disparity_map'] =\ + tfds.features.Image(shape=(1024, 2048, 1), encoding_format='png') + + return tfds.core.DatasetInfo( + builder=self, + description=(_DESCRIPTION), + features=tfds.features.FeaturesDict(features), + urls=['https://www.cityscapes-dataset.com', 'https://github.com/mcordts/cityscapesScripts'], + citation=_CITATION, + ) + + def _split_generators(self, dl_manager): + paths = {} + for split, (zip_file, zip_root) in self.builder_config.zip_root.items(): + paths[split] = os.path.join(dl_manager.manual_dir, zip_file) + + if any(not os.path.exists(z) for z in paths.values()): + msg = 'You must download the dataset files manually and place them in: ' + msg += ', '.join(paths.values()) + raise AssertionError(msg) + + for split, (_, zip_root) in self.builder_config.zip_root.items(): + paths[split] = os.path.join(dl_manager.extract(paths[split]), zip_root) + + splits = [ + tfds.core.SplitGenerator( + name=tfds.Split.TRAIN, + gen_kwargs={ + feat_dir: os.path.join(path, 'train') + for feat_dir, path in paths.items() + if not feat_dir.endswith('/extra') + }, + ), + tfds.core.SplitGenerator( + name=tfds.Split.VALIDATION, + gen_kwargs={ + feat_dir: os.path.join(path, 'val') + for feat_dir, path in paths.items() + if not feat_dir.endswith('/extra') + }, + ), + ] + + # Test split does not exist in coarse dataset + if not self.builder_config.train_extra_split: + splits.append(tfds.core.SplitGenerator( + name=tfds.Split.TEST, + gen_kwargs={ + feat_dir: os.path.join(path, 'test') + for feat_dir, path in paths.items() + if not feat_dir.endswith('/extra') + }, + )) + else: + splits.append(tfds.core.SplitGenerator( + name='train_extra', + gen_kwargs={ + feat_dir.replace('/extra', ''): os.path.join(path, 'train_extra') + for feat_dir, path in paths.items() + if feat_dir.endswith('/extra') + }, + )) + return splits + + def _generate_examples(self, **paths): + left_imgs_root = paths['images_left'] + for city_id in tf.io.gfile.listdir(left_imgs_root): + paths_city_root = {feat_dir: os.path.join(path, city_id) + for feat_dir, path in paths.items()} + + left_city_root = paths_city_root['images_left'] + for left_img in tf.io.gfile.listdir(left_city_root): + left_img_path = os.path.join(left_city_root, left_img) + image_id = _get_left_image_id(left_img) + + if image_id in self.builder_config.ignored_ids: + continue + + features = { + 'image_id': image_id, + 'image_left': left_img_path + } + + if self.builder_config.right_images: + features['image_right'] = os.path.join( + paths_city_root['images_right'], f'{image_id}_rightImg8bit.png') + + if self.builder_config.segmentation_labels: + features['segmentation_label'] = os.path.join( + paths_city_root['segmentation_labels'], + f'{image_id}_{self.builder_config.label_suffix}.png') + + if self.builder_config.disparity_maps: + features['disparity_map'] = os.path.join( + paths_city_root['disparity_maps'], f'{image_id}_disparity.png') + + yield image_id, features + +# Helper functions + +LEFT_IMAGE_FILE_RE = re.compile(r'([a-z\-]+)_(\d+)_(\d+)_leftImg8bit\.png') + +def _get_left_image_id(left_image): + '''Returns the id of an image file. Used to associate an image file + with its corresponding label. + Example: + 'bonn_000001_000019_leftImg8bit' -> 'bonn_000001_000019' + ''' + match = LEFT_IMAGE_FILE_RE.match(left_image) + return f'{match.group(1)}_{match.group(2)}_{match.group(3)}' diff --git a/tensorflow_datasets/image/cityscapes_test.py b/tensorflow_datasets/image/cityscapes_test.py new file mode 100644 index 00000000000..ea729bf2ff7 --- /dev/null +++ b/tensorflow_datasets/image/cityscapes_test.py @@ -0,0 +1,53 @@ + +'''Tests for Cityscapes dataset module.''' + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow_datasets import testing +from tensorflow_datasets.image import cityscapes + +# TODO add tests for features and files per configuration +class CityscapesSegmentationTest(testing.DatasetBuilderTestCase): + DATASET_CLASS = cityscapes.Cityscapes + BUILDER_CONFIG_NAMES_TO_TEST = ['semantic_segmentation'] + SPLITS = { + 'train': 3, + 'validation': 1, + 'test': 2, + } + + +class CityscapesSegmentationExtraTest(testing.DatasetBuilderTestCase): + DATASET_CLASS = cityscapes.Cityscapes + BUILDER_CONFIG_NAMES_TO_TEST = ['semantic_segmentation_extra'] + SPLITS = { + 'train': 3, + 'train_extra': 4, + 'validation': 1, + } + + +class CityscapesStereoDisparityTest(testing.DatasetBuilderTestCase): + DATASET_CLASS = cityscapes.Cityscapes + BUILDER_CONFIG_NAMES_TO_TEST = ['stereo_disparity'] + SPLITS = { + 'train': 3, + 'validation': 1, + 'test': 2, + } + + +class CityscapesStereoDisparityExtraTest(testing.DatasetBuilderTestCase): + DATASET_CLASS = cityscapes.Cityscapes + BUILDER_CONFIG_NAMES_TO_TEST = ['stereo_disparity_extra'] + SPLITS = { + 'train': 3, + 'train_extra': 4, + 'validation': 1, + } + + +if __name__ == '__main__': + testing.test_main() diff --git a/tensorflow_datasets/image/lost_and_found.py b/tensorflow_datasets/image/lost_and_found.py new file mode 100644 index 00000000000..4e746bb65b2 --- /dev/null +++ b/tensorflow_datasets/image/lost_and_found.py @@ -0,0 +1,225 @@ +"""Lost and Found Road Hazard Dataset.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from os import path +import re + +import tensorflow as tf +import tensorflow_datasets as tfds +from tensorflow_datasets.core import api_utils + + +_CITATION = """ +@inproceedings{pinggera2016lost, + title={Lost and found: detecting small road hazards for self-driving vehicles}, + author={Pinggera, Peter and Ramos, Sebastian and Gehrig, Stefan and Franke, Uwe and Rother, Carsten and Mester, Rudolf}, + booktitle={2016 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, + year={2016} +} +""" + +_DESCRIPTION = """ +The LostAndFound Dataset addresses the problem of detecting unexpected small obstacles on +the road often caused by lost cargo. The dataset comprises 112 stereo video sequences +with 2104 annotated frames (picking roughly every tenth frame from the recorded data). + +The dataset is designed analogous to the 'Cityscapes' dataset. The datset provides: +- stereo image pairs in either 8 or 16 bit color resolution +- precomputed disparity maps +- coarse semantic labels for objects and street + +Descriptions of the labels are given here: http://www.6d-vision.com/laf_table.pdf +""" + + +class LostAndFoundConfig(tfds.core.BuilderConfig): + '''BuilderConfig for 'Lost and Found' + + Args: + right_images (bool): Enables right images for stereo image tasks. + segmentation_labels (bool): Enables image segmentation labels. + instance_ids (bool): Enables instance-id labels. + disparity_maps (bool): Enables disparity maps. + use_16bit (bool): Loads 16 bit (rgb) images instead of 8bit. + ''' + + @api_utils.disallow_positional_args + def __init__(self, right_images=False, segmentation_labels=False, + instance_ids=False, disparity_maps=False, use_16bit=False, + **kwargs): + super().__init__(**kwargs) + + self.features = ['image_left'] + if right_images: + self.features.append('image_right') + if segmentation_labels: + self.features.append('segmentation_label') + if instance_ids: + self.features.append('instance_id') + if disparity_maps: + self.features.append('disparity_map') + + self.left_image_string = 'leftImg{}bit'.format('16' if use_16bit else '8') + self.right_image_string = 'rightImg{}bit'.format('16' if use_16bit else '8') + + +class LostAndFound(tfds.core.GeneratorBasedBuilder): + """Lost and Found Road Hazard Dataset.""" + + VERSION = tfds.core.Version('1.0.0') + + BUILDER_CONFIGS = [ + LostAndFoundConfig( + name='semantic_segmentation', + description='Lost and Found semantic segmentation dataset.', + version="1.0.0", + right_images=False, + segmentation_labels=True, + instance_ids=False, + disparity_maps=False, + use_16bit=False, + ), + LostAndFoundConfig( + name='stereo_disparity', + description='Lost and Found stereo images and disparity maps.', + version="1.0.0", + right_images=True, + segmentation_labels=False, + instance_ids=False, + disparity_maps=True, + use_16bit=False, + ), + LostAndFoundConfig( + name='full', + description='Full Lost and Found dataset.', + version="1.0.0", + right_images=True, + segmentation_labels=True, + instance_ids=True, + disparity_maps=True, + use_16bit=False, + ), + LostAndFoundConfig( + name='full_16bit', + description='Full Lost and Found dataset.', + version="1.0.0", + right_images=True, + segmentation_labels=True, + instance_ids=True, + disparity_maps=True, + use_16bit=True, + )] + + def _info(self): + possible_features = { + 'image_left': tfds.features.Image(shape=(1024, 2048, 3), + encoding_format='png'), + 'image_right': tfds.features.Image(shape=(1024, 2048, 3), + encoding_format='png'), + 'segmentation_label': tfds.features.Image(shape=(1024, 2048, 1), + encoding_format='png'), + 'instance_id': tfds.features.Image(shape=(1024, 2048, 1), + encoding_format='png'), + 'disparity_map': tfds.features.Image(shape=(1024, 2048, 1), + encoding_format='png')} + return tfds.core.DatasetInfo( + builder=self, + # This is the description that will appear on the datasets page. + description=_DESCRIPTION, + # tfds.features.FeatureConnectors + features=tfds.features.FeaturesDict({ + 'image_id': tfds.features.Text(), + **{feat: possible_features[feat] + for feat in self.builder_config.features}}), + # Homepage of the dataset for documentation + urls=['http://www.6d-vision.com/lostandfounddataset'], + citation=_CITATION, + ) + + def _split_generators(self, dl_manager): + """Returns SplitGenerators.""" + base_url = 'http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/{}.zip' + download_urls = { + 'image_left': base_url.format(self.builder_config.left_image_string)} + if 'image_right' in self.builder_config.features: + download_urls['image_right'] = base_url.format( + self.builder_config.right_image_string) + if 'segmentation_label' in self.builder_config.features \ + or 'instance_id' in self.builder_config.features: + download_urls['gt'] = base_url.format('gtCoarse') + if 'disparity_map' in self.builder_config.features: + download_urls['disparity_map'] = base_url.format('disparity') + # split into two steps to save space for testing data + dl_paths = dl_manager.download(download_urls) + dl_paths = dl_manager.extract(dl_paths) + + # point segmentation label and instance IDs both to directory for ground-truth + if 'gt' in dl_paths: + dl_paths['segmentation_label'] = dl_paths['gt'] + dl_paths['instance_id'] = dl_paths['gt'] + + # first directory in the zipfile, dependent on feature to load + sub_dirs = { + 'image_left': self.builder_config.left_image_string, + 'image_right': self.builder_config.right_image_string, + 'segmentation_label': 'gtCoarse', + 'instance_id': 'gtCoarse', + 'disparity_map': 'disparity'} + + return [ + tfds.core.SplitGenerator( + name=tfds.Split.TRAIN, + # These kwargs will be passed to _generate_examples + gen_kwargs={feat: path.join(dl_paths[feat], sub_dirs[feat], 'train') + for feat in self.builder_config.features}, + ), + tfds.core.SplitGenerator( + name=tfds.Split.TEST, + # These kwargs will be passed to _generate_examples + gen_kwargs={feat: path.join(dl_paths[feat], sub_dirs[feat], 'test') + for feat in self.builder_config.features}, + ) + ] + + def _generate_examples(self, **paths): + """Yields examples.""" + # different file-suffixes dependent on the feature to load + file_suffix = { + 'image_left': self.builder_config.left_image_string, + 'image_right': self.builder_config.right_image_string, + 'segmentation_label': 'gtCoarse_labelIds', + 'instance_id': 'gtCoarse_instanceIds', + 'disparity_map': 'disparity'} + + for scene_id in tf.io.gfile.listdir(paths['image_left']): + paths_city_root = {feat: path.join(feat_dir, scene_id) + for feat, feat_dir in paths.items()} + + left_city_root = paths_city_root['image_left'] + for left_img in tf.io.gfile.listdir(left_city_root): + image_id = _get_id_from_left_image(left_img) + + features = { + 'image_id': image_id, + **{feat: path.join(paths_city_root[feat], + '{}_{}.png'.format(image_id, file_suffix[feat])) + for feat in paths}} + + yield image_id, features + +# Helper functions + + +LEFT_IMAGE_FILE_RE = re.compile(r'(.+)_leftImg(?:8|16)bit\.png') + + +def _get_id_from_left_image(left_image): + '''Returns the id of an image file. Used to associate an image file + with its corresponding label. + Example: + 'bonn_000001_000019_leftImg8bit' -> 'bonn_000001_000019' + ''' + return LEFT_IMAGE_FILE_RE.match(left_image).group(1) diff --git a/tensorflow_datasets/image/lost_and_found_test.py b/tensorflow_datasets/image/lost_and_found_test.py new file mode 100644 index 00000000000..9600c30f569 --- /dev/null +++ b/tensorflow_datasets/image/lost_and_found_test.py @@ -0,0 +1,61 @@ +"""Tests for LostAndFound dataset module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from tensorflow_datasets import testing +from tensorflow_datasets.image import lost_and_found +from tensorflow_datasets.testing.cityscapes import generate_ids, create_zipfile + + +class LostAndFoundTest(testing.DatasetBuilderTestCase): + DATASET_CLASS = lost_and_found.LostAndFound + BUILDER_CONFIG_NAMES_TO_TEST = ['semantic_segmentation', 'full'] + SPLITS = { + "train": 4, # Number of fake train example + "test": 2, # Number of fake test example + } + # files as generated by fake data functions below + DL_EXTRACT_RESULT = { + 'image_left': 'leftImg8bit.zip', + 'image_right': 'rightImg8bit.zip', + 'disparity_map': 'disparity.zip', + 'gt': 'gtCoarse.zip'} + + +if __name__ == "__main__": + tf.compat.v1.enable_eager_execution() + + # create fake files + example_dir = ('tensorflow_datasets/testing/test_data/fake_examples/' + 'lost_and_found') + testing.test_utils.remake_dir(example_dir) + base_path = example_dir + '/{}.zip' + # generate image ids matching between zipfiles + train_ids = [*generate_ids('01_Turmstr_17'), + *generate_ids('02_Goethe_Str_6')] + test_ids = list(generate_ids('03_Schlossallee_1')) + splits = {'train': train_ids, 'test': test_ids} + with tf.Graph().as_default(): + create_zipfile(base_path.format('leftImg8bit'), + splits_with_ids=splits, + suffixes=['leftImg8bit']) + create_zipfile(base_path.format('gtCoarse'), + splits_with_ids=splits, + suffixes=['gtCoarse_instanceIds', + 'gtCoarse_labelIds', + 'gtCoarse_color']) + create_zipfile(base_path.format('rightImg8bit'), + splits_with_ids=splits, + suffixes=['rightImg8bit']) + create_zipfile(base_path.format('disparity'), + splits_with_ids=splits, + suffixes=['disparity']) + + testing.test_main() + + # remove fake files + testing.test_utils.remake_dir(example_dir) diff --git a/tensorflow_datasets/testing/cityscapes.py b/tensorflow_datasets/testing/cityscapes.py new file mode 100644 index 00000000000..ce128c1303e --- /dev/null +++ b/tensorflow_datasets/testing/cityscapes.py @@ -0,0 +1,54 @@ +"""Helper functions to generate fake Cityscapes-like data for testing.""" + +import re +from os import path +from zipfile import ZipFile +from random import randint + +from tensorflow_datasets.testing.fake_data_utils import get_random_png + + +CITY_IN_ID_RE = re.compile(r'(.+)_[0-9]+_[0-9]+') + + +def generate_ids(city, num=2): + """ Generates image ids following the format of the cityscapes dataset. + + Args: + city (str): The city/scene the ids belong to, used as a prefix to the id. + num (int): Number of random ids to generate. + Returns: + Generator for id strings. + """ + for _ in range(num): + yield '{}_{:06d}_{:06d}'.format(city, randint(0, 999999), + randint(0, 999999)) + + +def create_zipfile(zip_filepath, splits_with_ids, suffixes, maindir=None): + """ + Generates a zipfile with a cityscapes-like file structure and random pngs. + + Args: + zip_filepath (str): filepath to the zip archive that will be created + splits_with_ids (Dict[str, List[str]]): data-splits like 'train' or 'val' + that map to a list of image ids + suffixes (List[str]): suffix per modality that should be created e.g. + 'leftImg8bit' + maindir (str): name of the root directory of the zipfile, defaults to the + name of the zipfile + """ + with ZipFile(zip_filepath, 'w') as z: + for split, ids in splits_with_ids.items(): + if maindir is None: + maindir = path.basename(zip_filepath).strip('.zip') + split = path.join(maindir, split) + for img_id in ids: + city = CITY_IN_ID_RE.match(img_id).group(1) + for suffix in suffixes: + if 'Img' in suffix: + img = get_random_png(height=1024, width=2048, channels=3) + else: + img = get_random_png(height=1024, width=2048, channels=1) + z.write(img, + path.join(split, city, '{}_{}.png'.format(img_id, suffix))) diff --git a/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/disparity_trainextra.zip b/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/disparity_trainextra.zip new file mode 100644 index 00000000000..e5696d97c37 Binary files /dev/null and b/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/disparity_trainextra.zip differ diff --git a/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/disparity_trainvaltest.zip b/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/disparity_trainvaltest.zip new file mode 100644 index 00000000000..98fd116e1af Binary files /dev/null and b/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/disparity_trainvaltest.zip differ diff --git a/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/gtCoarse.zip b/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/gtCoarse.zip new file mode 100644 index 00000000000..0124e0198ea Binary files /dev/null and b/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/gtCoarse.zip differ diff --git a/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/gtFine_trainvaltest.zip b/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/gtFine_trainvaltest.zip new file mode 100644 index 00000000000..2fd33b4493a Binary files /dev/null and b/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/gtFine_trainvaltest.zip differ diff --git a/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/leftImg8bit_trainextra.zip b/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/leftImg8bit_trainextra.zip new file mode 100644 index 00000000000..7df50ad56df Binary files /dev/null and b/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/leftImg8bit_trainextra.zip differ diff --git a/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/leftImg8bit_trainvaltest.zip b/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/leftImg8bit_trainvaltest.zip new file mode 100644 index 00000000000..a9d1b5ee4e3 Binary files /dev/null and b/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/leftImg8bit_trainvaltest.zip differ diff --git a/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/rightImg8bit_trainextra.zip b/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/rightImg8bit_trainextra.zip new file mode 100644 index 00000000000..6b1d5f954a5 Binary files /dev/null and b/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/rightImg8bit_trainextra.zip differ diff --git a/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/rightImg8bit_trainvaltest.zip b/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/rightImg8bit_trainvaltest.zip new file mode 100644 index 00000000000..dd5472eec36 Binary files /dev/null and b/tensorflow_datasets/testing/test_data/fake_examples/cityscapes/rightImg8bit_trainvaltest.zip differ diff --git a/tensorflow_datasets/url_checksums/lost_and_found.txt b/tensorflow_datasets/url_checksums/lost_and_found.txt new file mode 100644 index 00000000000..7fb974baaf3 --- /dev/null +++ b/tensorflow_datasets/url_checksums/lost_and_found.txt @@ -0,0 +1,6 @@ +http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/disparity.zip 1461824611 1e06350d082f3bd686ff889940ad60ed85bfb1e8aa691a547a259c52fa3b60b1 +http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/gtCoarse.zip 37756896 53b6d3ab000f08b1fb59d70c1398eecc4d82a7baf4e9cf74fbf60d1858abe9ac +http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/leftImg16bit.zip 18039875634 f3530514163f30ccafc05210b643ea690c4ba17cdb4497d8a7d4f9c324c71da8 +http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/leftImg8bit.zip 5802953400 307f66002023ab597d309963b94990f5b9a8e5735ee729c3292647a66e9f2b18 +http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/rightImg16bit.zip 17938768019 db492fc9b9e0adf7a662cd589a6c8ea5cd8cf68d08600b2099d0cd7e0c58f6dd +http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/rightImg8bit.zip 5787134165 d5219f49e730a1ce064a9d118227e71cd39681bcc7f8a87ab4061c86cd7dc6fb