From 3de425d51782cb4395d634ed14e51efed2f4e28b Mon Sep 17 00:00:00 2001 From: Hermann Date: Tue, 27 Aug 2019 10:58:31 +0200 Subject: [PATCH 01/11] sort imports --- tensorflow_datasets/image/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow_datasets/image/__init__.py b/tensorflow_datasets/image/__init__.py index 625c2b3f495..f3344f86841 100644 --- a/tensorflow_datasets/image/__init__.py +++ b/tensorflow_datasets/image/__init__.py @@ -53,6 +53,7 @@ from tensorflow_datasets.image.imagenet2012_corrupted import Imagenet2012Corrupted from tensorflow_datasets.image.kitti import Kitti from tensorflow_datasets.image.lfw import LFW +from tensorflow_datasets.image.lost_and_found import LostAndFound from tensorflow_datasets.image.lsun import Lsun from tensorflow_datasets.image.malaria import Malaria from tensorflow_datasets.image.mnist import EMNIST From 818744cb23505f02898e785f20a045b9e2964ef8 Mon Sep 17 00:00:00 2001 From: Hermann Date: Tue, 27 Aug 2019 10:55:21 +0200 Subject: [PATCH 02/11] ititial working version of lostandfound --- tensorflow_datasets/image/__init__.py | 1 + tensorflow_datasets/image/lost_and_found.py | 228 ++++++++++++++++++ .../image/lost_and_found_test.py | 60 +++++ tensorflow_datasets/testing/cityscapes.py | 59 +++++ .../url_checksums/lost_and_found.txt | 6 + 5 files changed, 354 insertions(+) create mode 100644 tensorflow_datasets/image/lost_and_found.py create mode 100644 tensorflow_datasets/image/lost_and_found_test.py create mode 100644 tensorflow_datasets/testing/cityscapes.py create mode 100644 tensorflow_datasets/url_checksums/lost_and_found.txt diff --git a/tensorflow_datasets/image/__init__.py b/tensorflow_datasets/image/__init__.py index f3344f86841..ed9c567f9f2 100644 --- a/tensorflow_datasets/image/__init__.py +++ b/tensorflow_datasets/image/__init__.py @@ -83,3 +83,4 @@ from tensorflow_datasets.image.uc_merced import UcMerced from tensorflow_datasets.image.visual_domain_decathlon import VisualDomainDecathlon from tensorflow_datasets.image.voc import Voc2007 +from tensorflow_datasets.image.lost_and_found import LostAndFound # TODO(lost_and_found) Sort alphabetically diff --git a/tensorflow_datasets/image/lost_and_found.py b/tensorflow_datasets/image/lost_and_found.py new file mode 100644 index 00000000000..725a7c5c11e --- /dev/null +++ b/tensorflow_datasets/image/lost_and_found.py @@ -0,0 +1,228 @@ +"""TODO(lost_and_found): Add a description here.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from os import path, listdir +import re + +import tensorflow as tf +import tensorflow_datasets as tfds +from tensorflow_datasets.core import api_utils + + +_CITATION = """ +@inproceedings{pinggera2016lost, + title={Lost and found: detecting small road hazards for self-driving vehicles}, + author={Pinggera, Peter and Ramos, Sebastian and Gehrig, Stefan and Franke, Uwe and Rother, Carsten and Mester, Rudolf}, + booktitle={2016 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, + year={2016} +} +""" + +_DESCRIPTION = """ +The LostAndFound Dataset addresses the problem of detecting unexpected small obstacles on +the road often caused by lost cargo. The dataset comprises 112 stereo video sequences +with 2104 annotated frames (picking roughly every tenth frame from the recorded data). + +The dataset is designed analogous to the 'Cityscapes' dataset. The datset provides: +- stereo image pairs in either 8 or 16 bit color resolution +- precomputed disparity maps +- coarse semantic labels for objects and street +""" + + +class LostAndFoundConfig(tfds.core.BuilderConfig): + '''BuilderConfig for 'Lost and Found' + + Args: + right_images (bool): Enables right images for stereo image tasks. + segmentation_labels (bool): Enables image segmentation labels. + disparity_maps (bool): Enables disparity maps. + use_16bit (bool): Loads 16 bit (rgb) images instead of 8bit. + ''' + + @api_utils.disallow_positional_args + def __init__(self, right_images=False, segmentation_labels=False, instance_ids=False, + disparity_maps=False, use_16bit=False, **kwargs): + super().__init__(**kwargs) + + self.ignored_ids = set() + + self.features = ['image_left'] + if right_images: + self.features.append('image_right') + # this image causes IO errors + # self.ignored_ids.add('06_Galgenbergstr_40_000000_000040') + if segmentation_labels: + self.features.append('segmentation_label') + if disparity_maps: + self.features.append('disparity_map') + + self.left_image_string = 'leftImg{}bit'.format('16' if use_16bit else '8') + self.right_image_string = 'rightImg{}bit'.format('16' if use_16bit else '8') + + +class LostAndFound(tfds.core.GeneratorBasedBuilder): + """TODO(lost_and_found): Short description of my dataset.""" + + VERSION = tfds.core.Version('1.0.0') + + BUILDER_CONFIGS = [ + LostAndFoundConfig( + name='semantic_segmentation', + description='Lost and Found semantic segmentation dataset.', + version="1.0.0", + right_images=False, + segmentation_labels=True, + instance_ids=False, + disparity_maps=False, + use_16bit=False, + ), + LostAndFoundConfig( + name='stereo_disparity', + description='Lost and Found stereo images and disparity maps.', + version="1.0.0", + right_images=True, + segmentation_labels=False, + instance_ids=False, + disparity_maps=True, + use_16bit=False, + ), + LostAndFoundConfig( + name='full', + description='Full Lost and Found dataset.', + version="1.0.0", + right_images=True, + segmentation_labels=True, + instance_ids=True, + disparity_maps=True, + use_16bit=False, + ), + LostAndFoundConfig( + name='full_16bit', + description='Full Lost and Found dataset.', + version="1.0.0", + right_images=True, + segmentation_labels=True, + instance_ids=True, + disparity_maps=True, + use_16bit=True, + )] + + def _info(self): + possible_features = { + 'image_left': tfds.features.Image(shape=(1024, 2048, 3), encoding_format='png'), + 'image_right': tfds.features.Image(shape=(1024, 2048, 3), encoding_format='png'), + 'segmentation_label': tfds.features.Image(shape=(1024, 2048, 1), + encoding_format='png'), + 'disparity_map': tfds.features.Image(shape=(1024, 2048, 1), + encoding_format='png')} + return tfds.core.DatasetInfo( + builder=self, + # This is the description that will appear on the datasets page. + description=_DESCRIPTION, + # tfds.features.FeatureConnectors + features=tfds.features.FeaturesDict({ + 'image_id': tfds.features.Text(), + **{feat: possible_features[feat] for feat in self.builder_config.features}}), + # Homepage of the dataset for documentation + urls=['http://www.6d-vision.com/lostandfounddataset'], + citation=_CITATION, + ) + + def _split_generators(self, dl_manager): + """Returns SplitGenerators.""" + # TODO(lost_and_found): Downloads the data and defines the splits + # dl_manager is a tfds.download.DownloadManager that can be used to + # download and extract URLs + base_url = 'http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/{}.zip' + download_urls = { + 'image_left': base_url.format(self.builder_config.left_image_string)} + if 'image_right' in self.builder_config.features: + download_urls['image_right'] = base_url.format( + self.builder_config.right_image_string) + if 'segmentation_label' in self.builder_config.features \ + or 'instance_id' in self.builder_config.features: + download_urls['gt'] = base_url.format('gtCoarse') + if 'disparity_map' in self.builder_config.features: + download_urls['disparity_map'] = base_url.format('disparity') + # split into two steps to save space for testing data + dl_paths = dl_manager.download(download_urls) + dl_paths = dl_manager.extract(dl_paths) + + # point segmentation label and instance IDs both to directory for ground-truth + if 'gt' in dl_paths: + dl_paths['segmentation_label'] = dl_paths['gt'] + dl_paths['instance_id'] = dl_paths['gt'] + + # first directory in the zipfile, dependent on feature to load + sub_dirs = { + 'image_left': self.builder_config.left_image_string, + 'image_right': self.builder_config.right_image_string, + 'segmentation_label': 'gtCoarse', + 'instance_id': 'gtCoarse', + 'disparity_map': 'disparity'} + + return [ + tfds.core.SplitGenerator( + name=tfds.Split.TRAIN, + # These kwargs will be passed to _generate_examples + gen_kwargs={feat: path.join(dl_paths[feat], sub_dirs[feat], 'train') + for feat in self.builder_config.features}, + ), + tfds.core.SplitGenerator( + name=tfds.Split.TEST, + # These kwargs will be passed to _generate_examples + gen_kwargs={feat: path.join(dl_paths[feat], sub_dirs[feat], 'test') + for feat in self.builder_config.features}, + ) + ] + + def _generate_examples(self, **paths): + """Yields examples.""" + # different file-suffixes dependent on the feature to load + file_suffix = { + 'image_left': self.builder_config.left_image_string, + 'image_right': self.builder_config.right_image_string, + 'segmentation_label': 'gtCoarse_labelIds', + 'instance_id': 'gtCoarse_instanceIds', + 'disparity_map': 'disparity'} + + print(paths['image_left']) + + for scene_id in tf.io.gfile.listdir(paths['image_left']): + paths_city_root = {feat: path.join(feat_dir, scene_id) + for feat, feat_dir in paths.items()} + + left_city_root = paths_city_root['image_left'] + for left_img in tf.io.gfile.listdir(left_city_root): + image_id = _get_id_from_left_image(left_img) + + if image_id in self.builder_config.ignored_ids: + continue + + print(listdir(paths_city_root['segmentation_label'])) + + features = { + 'image_id': image_id, + **{feat: path.join(paths_city_root[feat], + '{}_{}.png'.format(image_id, file_suffix[feat])) + for feat in paths}} + + yield image_id, features + +# Helper functions + + +LEFT_IMAGE_FILE_RE = re.compile(r'(.+)_leftImg(?:8|16)bit\.png') + + +def _get_id_from_left_image(left_image): + '''Returns the id of an image file. Used to associate an image file + with its corresponding label. + Example: + 'bonn_000001_000019_leftImg8bit' -> 'bonn_000001_000019' + ''' + return LEFT_IMAGE_FILE_RE.match(left_image).group(1) diff --git a/tensorflow_datasets/image/lost_and_found_test.py b/tensorflow_datasets/image/lost_and_found_test.py new file mode 100644 index 00000000000..cf9fd9b1423 --- /dev/null +++ b/tensorflow_datasets/image/lost_and_found_test.py @@ -0,0 +1,60 @@ +"""TODO(lost_and_found): Add a description here.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from os import path, listdir, remove +import tensorflow as tf + +from tensorflow_datasets import testing +from tensorflow_datasets.image import lost_and_found +from tensorflow_datasets.testing.cityscapes import generate_ids, create_zipfile + + +class LostAndFoundTest(testing.DatasetBuilderTestCase): + DATASET_CLASS = lost_and_found.LostAndFound + BUILDER_CONFIG_NAMES_TO_TEST = ['semantic_segmentation', 'full'] + SPLITS = { + "train": 4, # Number of fake train example + "test": 2, # Number of fake test example + } + + # If you are calling `download/download_and_extract` with a dict, like: + # dl_manager.download({'some_key': 'http://a.org/out.txt', ...}) + # then the tests needs to provide the fake output paths relative to the + # fake data directory + DL_EXTRACT_RESULT = { + 'image_left': 'leftImg8bit.zip', + 'image_right': 'rightImg8bit.zip', + 'disparity_map': 'disparity.zip', + 'gt': 'gtCoarse.zip'} + + +if __name__ == "__main__": + tf.compat.v1.enable_eager_execution() + + # create fake files + testing.test_utils.remake_dir( + 'tensorflow_datasets/testing/test_data/fake_examples/lost_and_found') + base_path = 'tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/{}.zip' + # generate image ids matching between zipfiles + train_ids = [*generate_ids('01_Turmstr_17'), *generate_ids('02_Goethe_Str_6')] + test_ids = list(generate_ids('03_Schlossallee_1')) + splits = {'train': train_ids, 'test': test_ids} + with tf.Graph().as_default(): + create_zipfile( + base_path.format('leftImg8bit'), splits_with_ids=splits, suffixes=['leftImg8bit']) + create_zipfile( + base_path.format('gtCoarse'), splits_with_ids=splits, + suffixes=['gtCoarse_instanceIds', 'gtCoarse_labelIds', 'gtCoarse_color']) + create_zipfile( + base_path.format('rightImg8bit'), splits_with_ids=splits, suffixes=['rightImg8bit']) + create_zipfile( + base_path.format('disparity'), splits_with_ids=splits, suffixes=['disparity']) + + testing.test_main() + + # remove fake files + testing.test_utils.remake_dir( + 'tensorflow_datasets/testing/test_data/fake_examples/lost_and_found') diff --git a/tensorflow_datasets/testing/cityscapes.py b/tensorflow_datasets/testing/cityscapes.py new file mode 100644 index 00000000000..a5446f06f20 --- /dev/null +++ b/tensorflow_datasets/testing/cityscapes.py @@ -0,0 +1,59 @@ +# coding=utf-8 +# Copyright 2019 The TensorFlow Datasets Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Helper functions to generate fake Cityscapes-like zip archives for testing.""" + +from tensorflow_datasets.testing.fake_data_utils import get_random_png + +import re +from os import path +from zipfile import ZipFile +from random import randint + + +CITY_IN_ID_RE = re.compile(r'(.+)_[0-9]+_[0-9]+') + + +def generate_ids(city, num=2): + for _ in range(num): + yield '{}_{:06d}_{:06d}'.format(city, randint(0, 999999), randint(0, 999999)) + + +def create_zipfile(zip_filepath, splits_with_ids, suffixes=['leftImg8bit'], + maindir=None): + """ + Generates a zipfile with a cityscapes-like file structure and random pngs. + + Args: + zip_filepath (str): filepath to the zip archive that will be created + splits_with_ids (Dict[str, List[str]]): data-splits like 'train' or 'val' that map to + a list of image ids + suffixes (List[str]): suffix per modality that should be created e.g. 'leftImg8bit' + maindir (str): name of the root directory of the zipfile, defaults to the name of the + zipfile + """ + with ZipFile(zip_filepath, 'w') as z: + for split, ids in splits_with_ids.items(): + if maindir is None: + maindir = path.basename(zip_filepath).strip('.zip') + split = path.join(maindir, split) + for img_id in ids: + city = CITY_IN_ID_RE.match(img_id).group(1) + for suffix in suffixes: + if 'Img' in suffix: + img = get_random_png(height=1024, width=2048, channels=3) + else: + img = get_random_png(height=1024, width=2048, channels=1) + z.write(img, path.join(split, city, '{}_{}.png'.format(img_id, suffix))) diff --git a/tensorflow_datasets/url_checksums/lost_and_found.txt b/tensorflow_datasets/url_checksums/lost_and_found.txt new file mode 100644 index 00000000000..7fb974baaf3 --- /dev/null +++ b/tensorflow_datasets/url_checksums/lost_and_found.txt @@ -0,0 +1,6 @@ +http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/disparity.zip 1461824611 1e06350d082f3bd686ff889940ad60ed85bfb1e8aa691a547a259c52fa3b60b1 +http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/gtCoarse.zip 37756896 53b6d3ab000f08b1fb59d70c1398eecc4d82a7baf4e9cf74fbf60d1858abe9ac +http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/leftImg16bit.zip 18039875634 f3530514163f30ccafc05210b643ea690c4ba17cdb4497d8a7d4f9c324c71da8 +http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/leftImg8bit.zip 5802953400 307f66002023ab597d309963b94990f5b9a8e5735ee729c3292647a66e9f2b18 +http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/rightImg16bit.zip 17938768019 db492fc9b9e0adf7a662cd589a6c8ea5cd8cf68d08600b2099d0cd7e0c58f6dd +http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/rightImg8bit.zip 5787134165 d5219f49e730a1ce064a9d118227e71cd39681bcc7f8a87ab4061c86cd7dc6fb From 43d2f999fbe612ab2067606e845e765817b587da Mon Sep 17 00:00:00 2001 From: Hermann Date: Tue, 27 Aug 2019 11:06:28 +0200 Subject: [PATCH 03/11] refine docstrings, remove all todos --- tensorflow_datasets/image/lost_and_found.py | 12 +++++----- .../image/lost_and_found_test.py | 2 +- tensorflow_datasets/testing/cityscapes.py | 23 +++++++------------ 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/tensorflow_datasets/image/lost_and_found.py b/tensorflow_datasets/image/lost_and_found.py index 725a7c5c11e..38fdca151b9 100644 --- a/tensorflow_datasets/image/lost_and_found.py +++ b/tensorflow_datasets/image/lost_and_found.py @@ -1,4 +1,4 @@ -"""TODO(lost_and_found): Add a description here.""" +"""Lost and Found Road Hazard Dataset.""" from __future__ import absolute_import from __future__ import division @@ -30,6 +30,8 @@ - stereo image pairs in either 8 or 16 bit color resolution - precomputed disparity maps - coarse semantic labels for objects and street + +Descriptions of the labels are given here: http://www.6d-vision.com/laf_table.pdf """ @@ -39,6 +41,7 @@ class LostAndFoundConfig(tfds.core.BuilderConfig): Args: right_images (bool): Enables right images for stereo image tasks. segmentation_labels (bool): Enables image segmentation labels. + instance_ids (bool): Enables instance-id labels. disparity_maps (bool): Enables disparity maps. use_16bit (bool): Loads 16 bit (rgb) images instead of 8bit. ''' @@ -65,7 +68,7 @@ def __init__(self, right_images=False, segmentation_labels=False, instance_ids=F class LostAndFound(tfds.core.GeneratorBasedBuilder): - """TODO(lost_and_found): Short description of my dataset.""" + """Lost and Found Road Hazard Dataset.""" VERSION = tfds.core.Version('1.0.0') @@ -134,10 +137,7 @@ def _info(self): def _split_generators(self, dl_manager): """Returns SplitGenerators.""" - # TODO(lost_and_found): Downloads the data and defines the splits - # dl_manager is a tfds.download.DownloadManager that can be used to - # download and extract URLs - base_url = 'http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/{}.zip' + base_url = 'http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/{}.zip' download_urls = { 'image_left': base_url.format(self.builder_config.left_image_string)} if 'image_right' in self.builder_config.features: diff --git a/tensorflow_datasets/image/lost_and_found_test.py b/tensorflow_datasets/image/lost_and_found_test.py index cf9fd9b1423..3bb27f8b3bc 100644 --- a/tensorflow_datasets/image/lost_and_found_test.py +++ b/tensorflow_datasets/image/lost_and_found_test.py @@ -1,4 +1,4 @@ -"""TODO(lost_and_found): Add a description here.""" +"""Tests for LostAndFound dataset module.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow_datasets/testing/cityscapes.py b/tensorflow_datasets/testing/cityscapes.py index a5446f06f20..f428b42cd5a 100644 --- a/tensorflow_datasets/testing/cityscapes.py +++ b/tensorflow_datasets/testing/cityscapes.py @@ -1,18 +1,3 @@ -# coding=utf-8 -# Copyright 2019 The TensorFlow Datasets Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - """Helper functions to generate fake Cityscapes-like zip archives for testing.""" from tensorflow_datasets.testing.fake_data_utils import get_random_png @@ -27,6 +12,14 @@ def generate_ids(city, num=2): + """ Generates image ids following the format of the cityscapes dataset. + + Args: + city (str): The city/scene the ids belong to, used as a prefix to the id. + num (int): Number of random ids to generate. + Returns: + Generator for id strings. + """ for _ in range(num): yield '{}_{:06d}_{:06d}'.format(city, randint(0, 999999), randint(0, 999999)) From 37a8f28fe1788a64c38d29d825c8c6cc579928a6 Mon Sep 17 00:00:00 2001 From: Hermann Date: Tue, 27 Aug 2019 13:06:08 +0200 Subject: [PATCH 04/11] fix style errors --- tensorflow_datasets/image/lost_and_found.py | 35 +++++++-------- .../image/lost_and_found_test.py | 43 ++++++++++--------- tensorflow_datasets/testing/cityscapes.py | 26 +++++------ 3 files changed, 52 insertions(+), 52 deletions(-) diff --git a/tensorflow_datasets/image/lost_and_found.py b/tensorflow_datasets/image/lost_and_found.py index 38fdca151b9..4e746bb65b2 100644 --- a/tensorflow_datasets/image/lost_and_found.py +++ b/tensorflow_datasets/image/lost_and_found.py @@ -4,7 +4,7 @@ from __future__ import division from __future__ import print_function -from os import path, listdir +from os import path import re import tensorflow as tf @@ -47,19 +47,18 @@ class LostAndFoundConfig(tfds.core.BuilderConfig): ''' @api_utils.disallow_positional_args - def __init__(self, right_images=False, segmentation_labels=False, instance_ids=False, - disparity_maps=False, use_16bit=False, **kwargs): + def __init__(self, right_images=False, segmentation_labels=False, + instance_ids=False, disparity_maps=False, use_16bit=False, + **kwargs): super().__init__(**kwargs) - self.ignored_ids = set() - self.features = ['image_left'] if right_images: self.features.append('image_right') - # this image causes IO errors - # self.ignored_ids.add('06_Galgenbergstr_40_000000_000040') if segmentation_labels: self.features.append('segmentation_label') + if instance_ids: + self.features.append('instance_id') if disparity_maps: self.features.append('disparity_map') @@ -116,10 +115,14 @@ class LostAndFound(tfds.core.GeneratorBasedBuilder): def _info(self): possible_features = { - 'image_left': tfds.features.Image(shape=(1024, 2048, 3), encoding_format='png'), - 'image_right': tfds.features.Image(shape=(1024, 2048, 3), encoding_format='png'), + 'image_left': tfds.features.Image(shape=(1024, 2048, 3), + encoding_format='png'), + 'image_right': tfds.features.Image(shape=(1024, 2048, 3), + encoding_format='png'), 'segmentation_label': tfds.features.Image(shape=(1024, 2048, 1), encoding_format='png'), + 'instance_id': tfds.features.Image(shape=(1024, 2048, 1), + encoding_format='png'), 'disparity_map': tfds.features.Image(shape=(1024, 2048, 1), encoding_format='png')} return tfds.core.DatasetInfo( @@ -129,7 +132,8 @@ def _info(self): # tfds.features.FeatureConnectors features=tfds.features.FeaturesDict({ 'image_id': tfds.features.Text(), - **{feat: possible_features[feat] for feat in self.builder_config.features}}), + **{feat: possible_features[feat] + for feat in self.builder_config.features}}), # Homepage of the dataset for documentation urls=['http://www.6d-vision.com/lostandfounddataset'], citation=_CITATION, @@ -137,14 +141,14 @@ def _info(self): def _split_generators(self, dl_manager): """Returns SplitGenerators.""" - base_url = 'http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/{}.zip' + base_url = 'http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/{}.zip' download_urls = { 'image_left': base_url.format(self.builder_config.left_image_string)} if 'image_right' in self.builder_config.features: download_urls['image_right'] = base_url.format( self.builder_config.right_image_string) if 'segmentation_label' in self.builder_config.features \ - or 'instance_id' in self.builder_config.features: + or 'instance_id' in self.builder_config.features: download_urls['gt'] = base_url.format('gtCoarse') if 'disparity_map' in self.builder_config.features: download_urls['disparity_map'] = base_url.format('disparity') @@ -190,8 +194,6 @@ def _generate_examples(self, **paths): 'instance_id': 'gtCoarse_instanceIds', 'disparity_map': 'disparity'} - print(paths['image_left']) - for scene_id in tf.io.gfile.listdir(paths['image_left']): paths_city_root = {feat: path.join(feat_dir, scene_id) for feat, feat_dir in paths.items()} @@ -200,11 +202,6 @@ def _generate_examples(self, **paths): for left_img in tf.io.gfile.listdir(left_city_root): image_id = _get_id_from_left_image(left_img) - if image_id in self.builder_config.ignored_ids: - continue - - print(listdir(paths_city_root['segmentation_label'])) - features = { 'image_id': image_id, **{feat: path.join(paths_city_root[feat], diff --git a/tensorflow_datasets/image/lost_and_found_test.py b/tensorflow_datasets/image/lost_and_found_test.py index 3bb27f8b3bc..9600c30f569 100644 --- a/tensorflow_datasets/image/lost_and_found_test.py +++ b/tensorflow_datasets/image/lost_and_found_test.py @@ -4,7 +4,6 @@ from __future__ import division from __future__ import print_function -from os import path, listdir, remove import tensorflow as tf from tensorflow_datasets import testing @@ -19,11 +18,7 @@ class LostAndFoundTest(testing.DatasetBuilderTestCase): "train": 4, # Number of fake train example "test": 2, # Number of fake test example } - - # If you are calling `download/download_and_extract` with a dict, like: - # dl_manager.download({'some_key': 'http://a.org/out.txt', ...}) - # then the tests needs to provide the fake output paths relative to the - # fake data directory + # files as generated by fake data functions below DL_EXTRACT_RESULT = { 'image_left': 'leftImg8bit.zip', 'image_right': 'rightImg8bit.zip', @@ -35,26 +30,32 @@ class LostAndFoundTest(testing.DatasetBuilderTestCase): tf.compat.v1.enable_eager_execution() # create fake files - testing.test_utils.remake_dir( - 'tensorflow_datasets/testing/test_data/fake_examples/lost_and_found') - base_path = 'tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/{}.zip' + example_dir = ('tensorflow_datasets/testing/test_data/fake_examples/' + 'lost_and_found') + testing.test_utils.remake_dir(example_dir) + base_path = example_dir + '/{}.zip' # generate image ids matching between zipfiles - train_ids = [*generate_ids('01_Turmstr_17'), *generate_ids('02_Goethe_Str_6')] + train_ids = [*generate_ids('01_Turmstr_17'), + *generate_ids('02_Goethe_Str_6')] test_ids = list(generate_ids('03_Schlossallee_1')) splits = {'train': train_ids, 'test': test_ids} with tf.Graph().as_default(): - create_zipfile( - base_path.format('leftImg8bit'), splits_with_ids=splits, suffixes=['leftImg8bit']) - create_zipfile( - base_path.format('gtCoarse'), splits_with_ids=splits, - suffixes=['gtCoarse_instanceIds', 'gtCoarse_labelIds', 'gtCoarse_color']) - create_zipfile( - base_path.format('rightImg8bit'), splits_with_ids=splits, suffixes=['rightImg8bit']) - create_zipfile( - base_path.format('disparity'), splits_with_ids=splits, suffixes=['disparity']) + create_zipfile(base_path.format('leftImg8bit'), + splits_with_ids=splits, + suffixes=['leftImg8bit']) + create_zipfile(base_path.format('gtCoarse'), + splits_with_ids=splits, + suffixes=['gtCoarse_instanceIds', + 'gtCoarse_labelIds', + 'gtCoarse_color']) + create_zipfile(base_path.format('rightImg8bit'), + splits_with_ids=splits, + suffixes=['rightImg8bit']) + create_zipfile(base_path.format('disparity'), + splits_with_ids=splits, + suffixes=['disparity']) testing.test_main() # remove fake files - testing.test_utils.remake_dir( - 'tensorflow_datasets/testing/test_data/fake_examples/lost_and_found') + testing.test_utils.remake_dir(example_dir) diff --git a/tensorflow_datasets/testing/cityscapes.py b/tensorflow_datasets/testing/cityscapes.py index f428b42cd5a..ce128c1303e 100644 --- a/tensorflow_datasets/testing/cityscapes.py +++ b/tensorflow_datasets/testing/cityscapes.py @@ -1,12 +1,12 @@ -"""Helper functions to generate fake Cityscapes-like zip archives for testing.""" - -from tensorflow_datasets.testing.fake_data_utils import get_random_png +"""Helper functions to generate fake Cityscapes-like data for testing.""" import re from os import path from zipfile import ZipFile from random import randint +from tensorflow_datasets.testing.fake_data_utils import get_random_png + CITY_IN_ID_RE = re.compile(r'(.+)_[0-9]+_[0-9]+') @@ -21,21 +21,22 @@ def generate_ids(city, num=2): Generator for id strings. """ for _ in range(num): - yield '{}_{:06d}_{:06d}'.format(city, randint(0, 999999), randint(0, 999999)) + yield '{}_{:06d}_{:06d}'.format(city, randint(0, 999999), + randint(0, 999999)) -def create_zipfile(zip_filepath, splits_with_ids, suffixes=['leftImg8bit'], - maindir=None): +def create_zipfile(zip_filepath, splits_with_ids, suffixes, maindir=None): """ Generates a zipfile with a cityscapes-like file structure and random pngs. Args: zip_filepath (str): filepath to the zip archive that will be created - splits_with_ids (Dict[str, List[str]]): data-splits like 'train' or 'val' that map to - a list of image ids - suffixes (List[str]): suffix per modality that should be created e.g. 'leftImg8bit' - maindir (str): name of the root directory of the zipfile, defaults to the name of the - zipfile + splits_with_ids (Dict[str, List[str]]): data-splits like 'train' or 'val' + that map to a list of image ids + suffixes (List[str]): suffix per modality that should be created e.g. + 'leftImg8bit' + maindir (str): name of the root directory of the zipfile, defaults to the + name of the zipfile """ with ZipFile(zip_filepath, 'w') as z: for split, ids in splits_with_ids.items(): @@ -49,4 +50,5 @@ def create_zipfile(zip_filepath, splits_with_ids, suffixes=['leftImg8bit'], img = get_random_png(height=1024, width=2048, channels=3) else: img = get_random_png(height=1024, width=2048, channels=1) - z.write(img, path.join(split, city, '{}_{}.png'.format(img_id, suffix))) + z.write(img, + path.join(split, city, '{}_{}.png'.format(img_id, suffix))) From 347e3f1a1d16569ab86ea1a35e2345f0d095c139 Mon Sep 17 00:00:00 2001 From: Hermann Date: Tue, 27 Aug 2019 14:04:32 +0200 Subject: [PATCH 05/11] remove double import --- tensorflow_datasets/image/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow_datasets/image/__init__.py b/tensorflow_datasets/image/__init__.py index ed9c567f9f2..f3344f86841 100644 --- a/tensorflow_datasets/image/__init__.py +++ b/tensorflow_datasets/image/__init__.py @@ -83,4 +83,3 @@ from tensorflow_datasets.image.uc_merced import UcMerced from tensorflow_datasets.image.visual_domain_decathlon import VisualDomainDecathlon from tensorflow_datasets.image.voc import Voc2007 -from tensorflow_datasets.image.lost_and_found import LostAndFound # TODO(lost_and_found) Sort alphabetically From 46fb524e075d556d25cb7f4cfa31c295f9d8aabe Mon Sep 17 00:00:00 2001 From: Hermann Date: Wed, 28 Aug 2019 10:45:32 +0200 Subject: [PATCH 06/11] remove double blank lines --- tensorflow_datasets/image/lost_and_found.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow_datasets/image/lost_and_found.py b/tensorflow_datasets/image/lost_and_found.py index 4e746bb65b2..58e0f9be2d2 100644 --- a/tensorflow_datasets/image/lost_and_found.py +++ b/tensorflow_datasets/image/lost_and_found.py @@ -212,10 +212,8 @@ def _generate_examples(self, **paths): # Helper functions - LEFT_IMAGE_FILE_RE = re.compile(r'(.+)_leftImg(?:8|16)bit\.png') - def _get_id_from_left_image(left_image): '''Returns the id of an image file. Used to associate an image file with its corresponding label. From 02f23e818b8591985e7610c82e3afd6c56449a24 Mon Sep 17 00:00:00 2001 From: Hermann Date: Fri, 6 Sep 2019 11:42:11 +0200 Subject: [PATCH 07/11] reduce filesize of fake pngs --- tensorflow_datasets/testing/fake_data_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow_datasets/testing/fake_data_utils.py b/tensorflow_datasets/testing/fake_data_utils.py index c8506df2d33..970e1ec864c 100644 --- a/tensorflow_datasets/testing/fake_data_utils.py +++ b/tensorflow_datasets/testing/fake_data_utils.py @@ -54,7 +54,10 @@ def get_random_jpeg(height=None, width=None, channels=CHANNELS_NB): def get_random_png(height=None, width=None, channels=CHANNELS_NB): """Returns path to PNG picture.""" - image = get_random_picture(height, width, channels) + # Big randomly generated pngs take large amounts of diskspace. + # Instead, we resize a 4x4 random image to the png size. + image = get_random_picture(4, 4, channels) + image = tf.image.resize_nearest_neighbor(tf.expand_dims(image, 0), (height, width))[0] png = tf.image.encode_png(image) with utils.nogpu_session() as sess: res = sess.run(png) From ce9de6a317e3ee3d95d3a388b70a9172e41e03d0 Mon Sep 17 00:00:00 2001 From: Hermann Date: Fri, 6 Sep 2019 11:43:01 +0200 Subject: [PATCH 08/11] add fakedata to repositoy instead of creating at test time --- .../image/lost_and_found_test.py | 36 +----------------- tensorflow_datasets/testing/lost_and_found.py | 30 +++++++++++++++ .../lost_and_found/disparity.zip | Bin 0 -> 27666 bytes .../fake_examples/lost_and_found/gtCoarse.zip | Bin 0 -> 83202 bytes .../lost_and_found/leftImg8bit.zip | Bin 0 -> 53484 bytes .../lost_and_found/rightImg8bit.zip | Bin 0 -> 53509 bytes 6 files changed, 31 insertions(+), 35 deletions(-) create mode 100644 tensorflow_datasets/testing/lost_and_found.py create mode 100644 tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/disparity.zip create mode 100644 tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/gtCoarse.zip create mode 100644 tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/leftImg8bit.zip create mode 100644 tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/rightImg8bit.zip diff --git a/tensorflow_datasets/image/lost_and_found_test.py b/tensorflow_datasets/image/lost_and_found_test.py index 9600c30f569..3e233126cf3 100644 --- a/tensorflow_datasets/image/lost_and_found_test.py +++ b/tensorflow_datasets/image/lost_and_found_test.py @@ -4,11 +4,8 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf - from tensorflow_datasets import testing from tensorflow_datasets.image import lost_and_found -from tensorflow_datasets.testing.cityscapes import generate_ids, create_zipfile class LostAndFoundTest(testing.DatasetBuilderTestCase): @@ -18,7 +15,7 @@ class LostAndFoundTest(testing.DatasetBuilderTestCase): "train": 4, # Number of fake train example "test": 2, # Number of fake test example } - # files as generated by fake data functions below + # files as generated by fake data functions in testing/lost_and_found.py DL_EXTRACT_RESULT = { 'image_left': 'leftImg8bit.zip', 'image_right': 'rightImg8bit.zip', @@ -27,35 +24,4 @@ class LostAndFoundTest(testing.DatasetBuilderTestCase): if __name__ == "__main__": - tf.compat.v1.enable_eager_execution() - - # create fake files - example_dir = ('tensorflow_datasets/testing/test_data/fake_examples/' - 'lost_and_found') - testing.test_utils.remake_dir(example_dir) - base_path = example_dir + '/{}.zip' - # generate image ids matching between zipfiles - train_ids = [*generate_ids('01_Turmstr_17'), - *generate_ids('02_Goethe_Str_6')] - test_ids = list(generate_ids('03_Schlossallee_1')) - splits = {'train': train_ids, 'test': test_ids} - with tf.Graph().as_default(): - create_zipfile(base_path.format('leftImg8bit'), - splits_with_ids=splits, - suffixes=['leftImg8bit']) - create_zipfile(base_path.format('gtCoarse'), - splits_with_ids=splits, - suffixes=['gtCoarse_instanceIds', - 'gtCoarse_labelIds', - 'gtCoarse_color']) - create_zipfile(base_path.format('rightImg8bit'), - splits_with_ids=splits, - suffixes=['rightImg8bit']) - create_zipfile(base_path.format('disparity'), - splits_with_ids=splits, - suffixes=['disparity']) - testing.test_main() - - # remove fake files - testing.test_utils.remake_dir(example_dir) diff --git a/tensorflow_datasets/testing/lost_and_found.py b/tensorflow_datasets/testing/lost_and_found.py new file mode 100644 index 00000000000..edf58752da1 --- /dev/null +++ b/tensorflow_datasets/testing/lost_and_found.py @@ -0,0 +1,30 @@ +"""Script to generate fake 'Lost and Found' data.""" +import tensorflow as tf + +from tensorflow_datasets.testing.cityscapes import generate_ids, create_zipfile + + +if __name__ == '__main__': + example_dir = ('tensorflow_datasets/testing/test_data/fake_examples/' + 'lost_and_found') + base_path = example_dir + '/{}.zip' + # generate image ids matching between zipfiles + train_ids = [*generate_ids('01_Turmstr_17'), + *generate_ids('02_Goethe_Str_6')] + test_ids = list(generate_ids('03_Schlossallee_1')) + splits = {'train': train_ids, 'test': test_ids} + with tf.Graph().as_default(): + create_zipfile(base_path.format('leftImg8bit'), + splits_with_ids=splits, + suffixes=['leftImg8bit']) + create_zipfile(base_path.format('gtCoarse'), + splits_with_ids=splits, + suffixes=['gtCoarse_instanceIds', + 'gtCoarse_labelIds', + 'gtCoarse_color']) + create_zipfile(base_path.format('rightImg8bit'), + splits_with_ids=splits, + suffixes=['rightImg8bit']) + create_zipfile(base_path.format('disparity'), + splits_with_ids=splits, + suffixes=['disparity']) diff --git a/tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/disparity.zip b/tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/disparity.zip new file mode 100644 index 0000000000000000000000000000000000000000..131b093e9754018fd296c342329ee4d8b7076027 GIT binary patch literal 27666 zcmeHQZA?>V6n;yAG8iiwhlYeNKL#Wl^t*)&uv?XnDwMB@n{Z)URUCtkmTfY@BAbyc ztHdxv78j^L*i0r&Tx2f5aBL%SI?y4wFd1qPrXfT|hf_0mUZjxTUdT%Jk5h6Bx4qnx zH1|FCd7kr}Q=OLYzKYOC=k{k(k6Njg$beo298Z5UDdjj1Yhp}Z#biGQlciE5HI-Cedd|y z3UAdB;y03{QD?q;Z2an}G&gupo#%DY_d-OcVytWO*rf}R;g$ZPwO{$fe|^n6uI=xe z)0HiQWh8^S!`sq+QXndQSxCq~^@Ny1c!a3o1P9#VBu_w~m2mF$goGp#vatpaz|R4% zhvl{|OchP5MYfKh&b}VV8O@M0_F_W7#E3TtyisxhhM4$%pSt@w!o1rwV{}8t2tpWv za0+21qGt{kVHvQwHO39_>&mu6#%KxZtQ8>nKoW*r6FIlD_RtXDCiJTx8hlF!@&`%U zl%i>RA@6|BL4rB5=fRC&kYDKLsstlb$faTlqfkky663N2bIUHlSkqqQ5smu0eU)81 zJ%dvNWk-%0GHWEqFMDQv-|G=JROAu%`)_|tf6(#((9vNVY5bY$Finv89;Kqpf^rk$ zWyH%Yyc|;jtm>O4fymYltZEcl6>@J5?u}wS7A^Yc3b{82_r?+* zmO7!L+whCN-F$*ViV4UV*ZTQ&lSo34gdhn)62c-O6kbjVbfLmEZUJWX!WzuL&5sL0 zK!2f|77m+JQtUPu36oH)H|^Fl8Q_FbM2Q^t87Y`x#3J}5Q^=&Q>oB8s9Y(yBPl)Ey znG+t*4x6q1*?UOBlUGASb3QTskg{!up=*EYxsT%CX$uU0&rYoFKoa=x~G9!RY1Y&e)i)MHx{f=h(L2Bnnbw(L&KkxS~Ww3X9#E&m2XFQxLR8aB%)kIbV_jgil4H-aYOaOLX1!Ut`55y zW+_69SSkSthLJ19Dw)D{A%<;;iL3gsA$aGZ&DYbDZ>xK?;UUk3M6As*+!%k!FnKZX z+tk`}s>49Js;@K_6lv69IP^F4g*p4emOF4*j)1zmr;L};YG+o>$gD6iU}C_EK6gcb zY@`%)ml16-%z16Iz9>wSx=mFmJEi7hsA(l7+EU1b10V28;kr@~Qz53xje|zU7&>!~ z4tyw%HAeupKyraYFR+M%Lob|e5Z%*I<*~OVmAxX&I}eV2Z2d(K;c_f_a%q+LIm{FD zUVio_;kZSfzGV?5W@v+d0JJ61ZHY>ZL?HoJ45Lu0dgXCus&;Kqn*eDrDbI>>INOW)H|7 zy?4(-Us5>~cZ{&rBecN5ASlA&M6m@7r0ryV-&3P0mcPz8sxUmaRs4m&DTy9SALdH> z>@+ZDY2TZq(>exa&jzNU=)+uD1#*xpK^K`6%nr&PhVwWi4Jug25!7)Mjw_>a*n!#2 zU-N-$WUdEZq!o-EnC*6&54>ixL*W0x`{WdxrSHwhKE7>9u@_PMbXpHjdVWv%=>x13 IUO@N$1^cWOS^xk5 literal 0 HcmV?d00001 diff --git a/tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/gtCoarse.zip b/tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/gtCoarse.zip new file mode 100644 index 0000000000000000000000000000000000000000..b6cc82aaf290f9403f57df8c82451d90a77bde6a GIT binary patch literal 83202 zcmeHQdsI~Q7CtZ_zIZ5>FA!9&nVHPMz>Fv;I$R?Gd6;h*NJpd)5QbDV8Sv`0GNZL{ z;a!H1%X^Kkg48n108uNJ*i}plyOL0tOllWJSw1qo`-gL&8Rig=`J=^rv_^zQ{Eojl zzkT-p_V;}|bgsy17{@+J7f+a15_Lf9G=$^WD~!Guqn@1*os_I{S0_cs#=DCp%K0ft zYm(JTN{LMWE2Wp*OD>iwr81dVHdV?0kILBiWOa1>3RU3Bi#1%&{FfY9Re? z+rSqT;T+eW{%Y0wCl?xbbvMU3Gz2RA=dXY7my1QgmK>)Da}A2VF*)|V^zfzMe{p!s znfjrhKVS0WaoLGoGuCZ#n)vG?`>1PWW6G;jd^PzOQaSGCV=MZN1L!vorC;X8ajq|L z+{{dl)3{o2oIm}_4EzB00r!Ksor}ENjj0w9b=NI|d}~CzQaQ^E}L)><4dE0J;Yt*5Qj!tUTm)7H_JlM8zXNK%@Np_8+s)A z+%qV_QGy$k;CLk9k&sh&-8$%dYF7F1gcuZ7UXo#%12^1Zr_-Q({q@c8L{O zet>KN*#d|I5N8C4bDqI63mtLp8bLD8x)=#5*g7LyhdRAuosMFqPZukXEDlJAoJsuW z^tXgzn9!^b#XJlX);$h0wz@lblpJ()s@C(`spn$!ly@Y19mQL1r+7{?pO8>}d>q6{I~iZdim zI?fn$1vcUoh?C4sorYW(xp0@o%2oH*5Ehe5Sj-sKDq zxkp_~rnYb)MT`Q97#j?;54a+r?}NS%`aT5GJ`mlz=UFB&pl3+4%P@MlHf51tFW6*rd$cqD3ul@98pjs_Vhq+lEa z;~35=C#`1C?gz|;?h}d_tkq4q1!Af}Oa(pzKSC><3;)J!(lYL4Z60b3r!0r76sR42 zn%aRg-wUhNDi?{1-OYo8ZAJ-0F{7I;#XJ<#O?i?46w@tvQacV4MkX-|f4?o;UAGP$ z^e&e%ZoG%MJjkl*b5Z~8g9_GvHn3n*a>XS_6=M)H44U=1TS~@o+`}2*Lx2x~jszrX z2Kk}(=pn*k$nT+*KBq+A3RNYlN;LHan)<0zS+roDQX2@hJx*B$P+Qt|{qC;dHZTsiVnz<^Gdu0ok`exAJ{X4Rb*1uW& z@A^vFx{~9euMr5{+PZ2af=uCc3$FaJBS;Y_wNPr|k!bKp3~8k16;m86CuKKFoG?Ji zV29A|=ta8&jH158D5|F>5@A5b_CT&156Fa@43O!rPN~f?Mu*0n+<(Hl z-{w92WLfin7N5wQhfHmMphkSoZ0KmbX7*PB9fasZwbn`EeG)y%#q<>Ppm= zhHAx}cL>p`Ms0!rd&L=C+u)Vwr@`E#Er~5hB0KAYUv!jV~{ce zz3mNp+lTSIotJ37bseu(qTxy?usB#VLW6>tlQeU(LxI(BjJ#vm^fgAkWrbEV?iAc9 zM(z~k8^|}*51I2fVYbzLcNp`nJH}L;6r7YkTwhpql@yiA`8zn+MFjsnUl@+@Sg<32 zhhuu6RuX_?dZku6xykk`*9>o4Ra`XZJ0dT5q~kxTC$Npiw%WKG(lgSWL zd^zr38r<(uVj*!x;%tyOKe6UyW)O@T(?oFt2@?{gE5rc*{c7SYf6ZGku0B0UkP`Xr2gfMDMJf#$`GlqT<`aB?zfk1}nX*_!7J@agk zL%}ZPl`n*`nDgs$&W<9b5_>JuqsFL`)$U?XWyFfraS6%E(Q$Dql~U62J4%ViR4=Jq zDf9C1^py2Vu=E2jSSlidjDx~I?XBIfEgNVp-{*ctGIiJDi`%DMtpCsa!>0r1t=qJB z;xtAOq`axs)*O?rIm7|P0poGN+=~b8dg@@MFf!e%G>P(V-Bl5Y_sE+NFA*<0#>=W( zi%8eVyg8YZZy7awnk~R2g8%^n1n4X}>MXRsyk4rwXQN#ildjc02GPP5(E`x|(ZWcy zu>Y@UNZeCLDj3kh1qOsD3T8!Rgw7RN4P)QnN*7O#sbjBUs2ZSOi+CzNccsBly<^+689WD4OOgY%E` z-|_t8x%lKAP7FMBI@%gP@s1%Uh?b8jVz7zVAjsgP z;G}dsDR_c#8Ig}@Q~>jhX(17G^&0B;*mqUB064rO9F8^-+C+W$T-ZCuI8+2?4Nhh7 zi*Z0EEpN&Dyq#}2{Zx#4c0zPgvdUfEOF*VudZl(R-(YH9E6#rsn_;QQ^iZ#uV>!v; zMEH=F2rs#I=b-p|P0t>^)wr|Jrt&h=DlMj-H#3mAK>xs-k3m7lm}A2+$H*DXK9_zn z0-=dun7sty{zkdePiSJHi3LawV@ZuTu4x|@MV42e(Y3tGU>*al77A?tE{S){F#lF> zK1k*B1osZi^=t{!XDd6uI2>b39cW-HxKd;dlZ z1whupB1}5JriJ6`@0;QCzzs4by(X*sgKkGsRdFC5TA_YU|I|NWA@HR zX8rJkFdXyl!i2!y>6HZFm>%nuiUM2pO5-i2*_`el>r_4~aa+*j^g8R>lV3mI{N~29 zR)r-c7b*s+X0YOlX_bm8Zz3TAcXN}mO#oOY0M-G$LaJ#VsbRPXB3^$PCKLK zP8`y@-2ckqX_qpaCSgEkz;EaJITJUAy?Uls5&&enrB}Mr#*A4*po${Q)ouUWg;v#E zhR?b=;VsqH{T#}}hu7Y$i%4uZ=bUR($%?8{oq0^k5|V6Qrom8nVij z6BOSkQa3N9)%Tl>IwA0en=xeS=wcd;e~q~`FNL}IwlZXx{Wt6&@c1<2$EVR#g1+Yy z0$$@g6fOKHTBNeMB4nOWiAez{!cc?(m@@**Rn2fXY!|=nj-Q1_upXMbrolSzt>jdvGIl=D-P)+DQwloFZ#S4uCrms~7WN@X&!Y^tDY zDRi#>@~@PQ{f0+_Lin}mvnKs5x}e@K|9h5>;o7IS))w*9pt*W$4;ycawc7wG!-Ye$ zJ{0o{>+ks-nx=Q$Gw=kt5ce5?5nP5QLhjthGz=On1qt5wPf zy4Ahi#rj_>J?So&i0PBmTk6^MeLs-v#$N(MKKTz#`d7s6;@>@Y^^>EfzhqZ0p|iao zw`zfc-+MwY+deeuOLmxQeZl=dbu=u6x2e+K|EoFOUR>fy0!gXTOD2(%P?F!dKf3ST z#;zUB|NHg#e?y_EE}^si_gJtafL~vK|NEaX)%t?_Z#YMffA=-%SzrHt@m`vS{Xyt4 zbdPVp3b!jJyo%1=1w+5I_DeWIqF4T2GM#&xVe~cB2e+>uU}MpP^#!^ifAB^a(qh}M rqI!f0ucEUz!mtt4ehI2cCcK2-oe1lGY>lDx--G@f*F23l58VF&Zu@*R literal 0 HcmV?d00001 diff --git a/tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/leftImg8bit.zip b/tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/leftImg8bit.zip new file mode 100644 index 0000000000000000000000000000000000000000..813b5942b1200ca4d27b557a3a5671a735cc13aa GIT binary patch literal 53484 zcmeI53s6+o8OJXVc|}U<`&Ays$Fx=1_pS{-M?(-{Np$dqxl8Q|MqUC&6bs8r6hoq| zwAw^vN$bQ`(`cfd7=#Gb=%g7%;~+#75r?Q%BMLSuF5B~+Lsf{ziFRZ&{CDPZxy$Og z`#aw`_dDPDpYKG4ySew0=v6Xnn9eVinXPgIT~-(Rv(ngWd5DYn#p*83UOmJ)@I`U91mXik0vuouL;&qV*a0|z0};UU zLfAo9;V^sCm@ARV%#--Mse(L-nYL_Op;Tt6Id|rGPpMyBo3}85ycza#_ZH`8hlum| z--)w?4+s$801k`*+J!I!Z~zA)faisZLIRe+Zi#Iq8FmCTmnXImvT=mWcWwNuuM2%Skjkty-y)bp|!5N}fbc^7kqJ zQooh4VcChBh{D zjJtL{<5Tytsr19=2`AoDX8U5G3<%0>W(J zHGu;-FanSO2r~eOhjEx4V?S-9IiJJH`?j(`Mcu_Ki)qg1NOSL-$u1Wg$# z^;&ss>3+AwXyw4%*mV&po;!Y-SAP4{!7Xu$0lvfZMVUKS_v>Dq`}2a(n1ioZq(wKL zZ&}ng(=%Y4bTjLf=y5zgQ{gyH^F0}J1IVwkToRrXlpDfA!2ukI02D034!{8%7y&F4 z7iItsZ5)nXZ?&JH_{<&qDJ~;9QNNU?eWLvi@ufbP_90N6H~zq?U4+C$Qw0ZbAOZ-9 z2|EA>C*~0UVOr`~N=&lPHi|EM$#lF*ZpE6~B(5g$6!(r%Iyevkj826efCD%%0vMeN zGXMv0AOd&S{2WbfTBs$rV#bmx3Qz2;E!{-<&T0;J^r=`w(UT4&Xop z?slWdCe00^Gup*r<4a6VvWd${F!~kk(y2H!ck6HTPkO7aB#&hpB?*ZgI21w^fo-lHqJW}uzyTZ>0Zdd1GXMv0AOd)12|IY29Euw1uU3+z zWckTk3QB#pHdE$`)jLgRY3WafxTVhUzMaa`d}xLj;N3P zCWn!>HOX7`EHg<`OAYxEr?Qv-i2@bba|R+SJ%YRNns~PWZ$b2q-~bLp0N#SI18{f< zhutxWg?ww?ZyLBn^*@dhz7S?d!Bw~wa4XIJAQ& zW^CASd?3@QHebyko$A=6zI+MLV!MGH3xCm;p>YQXB7kwaumf-a2O{8<<6DfnTZzPeb{WCjhRufufnia7i{~5PQj7mh)1P5?n1h6Dd zm;pF+Du>hl`N2$9QY7}gVYkvL1M`j|3IvO|QQWPIqr(GU3(zkboUw)8f3T>;+VXbg zMJ6brnG8xJdAZia|22;3wtiNH_RD(yUFZraa2^Tka@O};(dCW z_IciDqqW5*^D<6HYHURifRjKFKo|r#fCD3dAb>CfZ~zA)@HB%VPA#{KzjeDYVVRrc z)|$1PPXHO`T;dK!>e3MCN-%FMx(jds2O@yTgs=l}00$y)HyVMI!rUP4wTt`>XB_WQ zwvsz%`Lg2Iva@V75mdD|8Ow@cH&|B8+Y98W90TOai1u?sz(lZU58&_+4!f(XOKi2w zkMOI}^DsrryhOfVuPvBwtcU%22^^xF!hFC19Ebp-y}}N_q0>0@?|o|m6P0Y?q7qEX zih6KL4i|j?Ic)IUk4^s?M>ly-SmNT^%dHo`?-G`;#Md5n;A6{6V>C?iC)gxx@1w?Ah2-z#d03h(AYWw1RAq|0n%`tQSZ^lw0mqTJuK@9Y`g!YZbVX*=RP^b_c%`t5> zE@`b;*=$p_CtCL<39==|mQsT$$L$bt0YelO4K8dWz2CixvNWDzo6ITyIp^_sa|Z66 z-}}CMzkBchelKLI!ofwNSMKc5(^I!q9=_@!Npu9U_u``?qY?vSB8~H+6MYin=SIi+ zsMK=U;`o?^#CTb4XniH?j5?!AD{Bn~mBCMb@{he2#YVmt5Gd=(gL;&_6>;N3V zfe4_T6m|d(U4w(yh-Ei|<0Ml-ap5_NB$`s1e)6)N#jfg&w9*06kc$t)h?Nckhf{xX zc7IKrE$(*W7kmI57y;M={DKIeeGqm44&Xopx>~;(Ro`f>y`z+r;i)fWWl05=vLl&S zyGzc-x9>W-nB0D8P;eju=wE~#fCD%Xfvz?PwV^P3Ro=NBl4SQM(gL7+ z67Fc592)u@jPML7YHGMbwG4bEIAHfA)5?$*O-TXMlpKmvZLef{5;{7eDbeb68nv&i zH)=>)@=ThNJyYDIVeigu&hS*`nEZCRzP~f4d-eQz`(G>_U4HDN+TQc``m1Ig{dKqB z4t96!7B{J`u(~$9@Qj>y*XN~To#dZgeX5A-N19(wr#MQM^Mj_d#h~MN#4Wm50r?0g z0U3=j2yg%gA^>@rumf-a2SxzuF<}N1PF}uQa+|OFjPu$-0@cMe5j0V=PCt{&Ngk?2 z;5Kj@xDCXH7+vso08zDQ4#5E&h(NpW;9%YT1YItZrIt9Zr#NEop~e&nD`L+yPiONL z7DsUFkQ+AraKnI@UHAt$fCCXgKP&729J)4#lb3IN*GRJ!N5kEMY2D+&=UhjU(QM3Q z%OTRkO|@7z6=nhs;6MbPuG7M9F>WaUB^(AE{udmKe{ptZnv!hNl+gc&%u7(VDGpnc z+=jXLe-QffdsMlOAsmQ88%O}@Q~|GCbj?$L8l zq2lNXHk#D6W?o$>t9;mg>r}-IwYrP-sCcfaGN&%JS z?p3h>R8Yn;qtMHy?v?w))hRSr;^u$@I4}aXsnbIu0&biblL;RMhyQC1Ti53ND)(wH z$$8fKX0BBAS)Wb|L5r3o`%{21Tzq>>aDxL8z{IYw18@KbBG7I@9Sg(UFwkEJ|LC$D z)->n)^Ymw@Zx+)=m|dG1DXh3v(QuzGlLa2gd%=MSbXhk+GGXoj=tf0501n{52%uXO zX3(C4SMS|fI}B|5%bmJtz77&6bA8E$^eYG}Fgizf${h#Y2)Yq;BepcE$lj71%_Jq; zxTK_gw}$On++l5ERyEh&)qZ&2PZwCeq&jshESXX{t+s-RO6cgMutcr#Ws(wEXHXlp z`ezfCOdK^3kM4Q+b#K$Qjr%?S-tGGQ=U*J_s!v!k`_nIy`V9K)c)zos6mIOf zsQcYL7si!zFSqVVx?OYqo550Zw)K1|Wsv>YnqZ(6pR$zYe1q}*N2yAxV4b#Oowygc z7mNTRJZ=WyATWRfI1mAwh%8TDHd_}jRY;0*!%ZGZR9=dwxti1En~)X zB5%8F!gX;ka4#4EEXQ&)00&zdZ64fzw~nVOnOS^-&sP}3`PP?;9aVIBOWZPO>cN2s zVE>b_18@KbB7kii!VbUz9Ed>67R)(c`@03vb9$@3dy{4>B0<@q_!tiJ~ORRgxugm0XbWqZn)~QO&35h-`jU4)B)WW!ggt-eBn$5Dh{X4SS*H5Q4$_AaU zMq_w3QHjTtQPPlAGk!NSCREmZWa|Iwv5~tc6t3NTXr1e3c>)2RSzcj8BC>fCD%X0h`FKW>Y(g&(12#AW=z0F%K-78gn+& z!}O{ft=LhFtuB}f;q4}-LPYlg4&cBDV7gYA0XY0S4s)!B9;8tIvwQvz_(VlT_2)EE zkulzla@AWL#UCOZ0|z32EkME!zyTZ>0nB0wGXMv0AObD3cPB4@yCjmr3h(mMT%;Ot zrHY3Yc^7DpEy4<6FX%?Vfe}FSDa-&Iz<~&~bR$1fir>YzmlrchNw&yS^5fUX7MR#v z9Zn8@d!XlyhRkWBsA`W@C3f~M3Kx1y%AGxWdg`{y!&jN6gpMHouJ~bDLFd+qOXBB7 z$NH$$a@gYdn1sZ5S#4;2CF_hjqe?4l4F;9LubsRkWa{Ih3s}*I;#AuoNm^PL9p?ET z7p>LnG-_X2Z`4o?o~*a`qS(l`MW-(qQ+ADgV>;-OKaXCP6(0t4T(qw|InJCIWtKzP z7uWl!T3^c=_MO!#c4ChK(mb{Jwoe>-TzpeRr;7jn=MT00_HEPCJT5-?y|%^wcaqh# ltwCEg)8iWaVP|J+@cr7Rkg45x!XBlhl Date: Fri, 6 Sep 2019 11:52:34 +0200 Subject: [PATCH 09/11] clean up code, address review comments --- tensorflow_datasets/image/lost_and_found.py | 39 ++++++++----------- .../image/lost_and_found_test.py | 3 +- .../testing/fake_data_utils.py | 3 +- 3 files changed, 20 insertions(+), 25 deletions(-) diff --git a/tensorflow_datasets/image/lost_and_found.py b/tensorflow_datasets/image/lost_and_found.py index 58e0f9be2d2..432447a85c4 100644 --- a/tensorflow_datasets/image/lost_and_found.py +++ b/tensorflow_datasets/image/lost_and_found.py @@ -142,44 +142,37 @@ def _info(self): def _split_generators(self, dl_manager): """Returns SplitGenerators.""" base_url = 'http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/{}.zip' - download_urls = { - 'image_left': base_url.format(self.builder_config.left_image_string)} - if 'image_right' in self.builder_config.features: - download_urls['image_right'] = base_url.format( - self.builder_config.right_image_string) - if 'segmentation_label' in self.builder_config.features \ - or 'instance_id' in self.builder_config.features: - download_urls['gt'] = base_url.format('gtCoarse') - if 'disparity_map' in self.builder_config.features: - download_urls['disparity_map'] = base_url.format('disparity') - # split into two steps to save space for testing data - dl_paths = dl_manager.download(download_urls) - dl_paths = dl_manager.extract(dl_paths) - - # point segmentation label and instance IDs both to directory for ground-truth - if 'gt' in dl_paths: - dl_paths['segmentation_label'] = dl_paths['gt'] - dl_paths['instance_id'] = dl_paths['gt'] - - # first directory in the zipfile, dependent on feature to load - sub_dirs = { + + # For each feature, this is the name of the zipfile and root-directory in the archive + zip_file_names = { 'image_left': self.builder_config.left_image_string, 'image_right': self.builder_config.right_image_string, 'segmentation_label': 'gtCoarse', 'instance_id': 'gtCoarse', 'disparity_map': 'disparity'} + download_urls = {feat: base_url.format(zip_file_names[feat]) + for feat in self.builder_config.features} + # Split donwload and extract in two functions such that mock-data can replace the + # result of the download function and is still used as input to extract. Therefore, + # fake_data can be compressed zip archives. + dl_paths = dl_manager.extract(dl_manager.download(download_urls)) + return [ tfds.core.SplitGenerator( name=tfds.Split.TRAIN, # These kwargs will be passed to _generate_examples - gen_kwargs={feat: path.join(dl_paths[feat], sub_dirs[feat], 'train') + gen_kwargs={feat: path.join(dl_paths[feat], + zip_file_names[feat], + 'train') for feat in self.builder_config.features}, ), tfds.core.SplitGenerator( name=tfds.Split.TEST, # These kwargs will be passed to _generate_examples - gen_kwargs={feat: path.join(dl_paths[feat], sub_dirs[feat], 'test') + gen_kwargs={feat: path.join(dl_paths[feat], + zip_file_names[feat], + 'test') for feat in self.builder_config.features}, ) ] diff --git a/tensorflow_datasets/image/lost_and_found_test.py b/tensorflow_datasets/image/lost_and_found_test.py index 3e233126cf3..0f804e14a27 100644 --- a/tensorflow_datasets/image/lost_and_found_test.py +++ b/tensorflow_datasets/image/lost_and_found_test.py @@ -20,7 +20,8 @@ class LostAndFoundTest(testing.DatasetBuilderTestCase): 'image_left': 'leftImg8bit.zip', 'image_right': 'rightImg8bit.zip', 'disparity_map': 'disparity.zip', - 'gt': 'gtCoarse.zip'} + 'segmentation_label': 'gtCoarse.zip', + 'instance_id': 'gtCoarse.zip'} if __name__ == "__main__": diff --git a/tensorflow_datasets/testing/fake_data_utils.py b/tensorflow_datasets/testing/fake_data_utils.py index 970e1ec864c..076a161f81e 100644 --- a/tensorflow_datasets/testing/fake_data_utils.py +++ b/tensorflow_datasets/testing/fake_data_utils.py @@ -57,7 +57,8 @@ def get_random_png(height=None, width=None, channels=CHANNELS_NB): # Big randomly generated pngs take large amounts of diskspace. # Instead, we resize a 4x4 random image to the png size. image = get_random_picture(4, 4, channels) - image = tf.image.resize_nearest_neighbor(tf.expand_dims(image, 0), (height, width))[0] + image = tf.image.resize_nearest_neighbor(tf.expand_dims(image, 0), + (height, width))[0] png = tf.image.encode_png(image) with utils.nogpu_session() as sess: res = sess.run(png) From d94dec305257a0a5c0de8c8933391fb8b8854a3b Mon Sep 17 00:00:00 2001 From: Hermann Date: Thu, 31 Oct 2019 22:50:18 +0900 Subject: [PATCH 10/11] remove dictionary unpacking --- tensorflow_datasets/image/lost_and_found.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tensorflow_datasets/image/lost_and_found.py b/tensorflow_datasets/image/lost_and_found.py index 432447a85c4..173eadd64c6 100644 --- a/tensorflow_datasets/image/lost_and_found.py +++ b/tensorflow_datasets/image/lost_and_found.py @@ -125,15 +125,15 @@ def _info(self): encoding_format='png'), 'disparity_map': tfds.features.Image(shape=(1024, 2048, 1), encoding_format='png')} + features = {feat: possible_features[feat] + for feat in self.builder_config.features} + features['image_id'] = tfds.features.Text() return tfds.core.DatasetInfo( builder=self, # This is the description that will appear on the datasets page. description=_DESCRIPTION, # tfds.features.FeatureConnectors - features=tfds.features.FeaturesDict({ - 'image_id': tfds.features.Text(), - **{feat: possible_features[feat] - for feat in self.builder_config.features}}), + features=features, # Homepage of the dataset for documentation urls=['http://www.6d-vision.com/lostandfounddataset'], citation=_CITATION, @@ -195,11 +195,10 @@ def _generate_examples(self, **paths): for left_img in tf.io.gfile.listdir(left_city_root): image_id = _get_id_from_left_image(left_img) - features = { - 'image_id': image_id, - **{feat: path.join(paths_city_root[feat], - '{}_{}.png'.format(image_id, file_suffix[feat])) - for feat in paths}} + features = {feat: path.join(paths_city_root[feat], + '{}_{}.png'.format(image_id, file_suffix[feat])) + for feat in paths} + features['image_id'] = image_id yield image_id, features From 599c94fcfb156afe39b457af9fe580d11e41f0b2 Mon Sep 17 00:00:00 2001 From: Hermann Date: Fri, 1 Nov 2019 12:36:51 +0900 Subject: [PATCH 11/11] fix featuredict --- tensorflow_datasets/image/lost_and_found.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow_datasets/image/lost_and_found.py b/tensorflow_datasets/image/lost_and_found.py index 173eadd64c6..ad5a9d0e5ec 100644 --- a/tensorflow_datasets/image/lost_and_found.py +++ b/tensorflow_datasets/image/lost_and_found.py @@ -128,6 +128,7 @@ def _info(self): features = {feat: possible_features[feat] for feat in self.builder_config.features} features['image_id'] = tfds.features.Text() + features = tfds.features.FeaturesDict(features) return tfds.core.DatasetInfo( builder=self, # This is the description that will appear on the datasets page.