tensorflow · tfds-copybara · Nov 5, 2019 · Aug 27, 2019 · Aug 27, 2019 · Aug 27, 2019
diff --git a/tensorflow_datasets/image/__init__.py b/tensorflow_datasets/image/__init__.py
@@ -54,6 +54,7 @@
 from tensorflow_datasets.image.imagenet2012_corrupted import Imagenet2012Corrupted
 from tensorflow_datasets.image.imagenet_resized import ImagenetResized
 from tensorflow_datasets.image.lfw import LFW
+from tensorflow_datasets.image.lost_and_found import LostAndFound
 from tensorflow_datasets.image.lsun import Lsun
 from tensorflow_datasets.image.malaria import Malaria
 from tensorflow_datasets.image.mnist import EMNIST

diff --git a/tensorflow_datasets/image/lost_and_found.py b/tensorflow_datasets/image/lost_and_found.py
@@ -0,0 +1,216 @@
+"""Lost and Found Road Hazard Dataset."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from os import path
+import re
+
+import tensorflow as tf
+import tensorflow_datasets as tfds
+from tensorflow_datasets.core import api_utils
+
+
+_CITATION = """
+@inproceedings{pinggera2016lost,
+  title={Lost and found: detecting small road hazards for self-driving vehicles},
+  author={Pinggera, Peter and Ramos, Sebastian and Gehrig, Stefan and Franke, Uwe and Rother, Carsten and Mester, Rudolf},
+  booktitle={2016 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
+  year={2016}
+}
+"""
+
+_DESCRIPTION = """
+The LostAndFound Dataset addresses the problem of detecting unexpected small obstacles on
+the road often caused by lost cargo. The dataset comprises 112 stereo video sequences
+with 2104 annotated frames (picking roughly every tenth frame from the recorded data).
+
+The dataset is designed analogous to the 'Cityscapes' dataset. The datset provides:
+- stereo image pairs in either 8 or 16 bit color resolution
+- precomputed disparity maps
+- coarse semantic labels for objects and street
+
+Descriptions of the labels are given here: http://www.6d-vision.com/laf_table.pdf
+"""
+
+
+class LostAndFoundConfig(tfds.core.BuilderConfig):
+  '''BuilderConfig for 'Lost and Found'
+
+    Args:
+      right_images (bool): Enables right images for stereo image tasks.
+      segmentation_labels (bool): Enables image segmentation labels.
+      instance_ids (bool): Enables instance-id labels.
+      disparity_maps (bool): Enables disparity maps.
+      use_16bit (bool): Loads 16 bit (rgb) images instead of 8bit.
+  '''
+
+  @api_utils.disallow_positional_args
+  def __init__(self, right_images=False, segmentation_labels=False,
+               instance_ids=False, disparity_maps=False, use_16bit=False,
+               **kwargs):
+    super().__init__(**kwargs)
+
+    self.features = ['image_left']
+    if right_images:
+      self.features.append('image_right')
+    if segmentation_labels:
+      self.features.append('segmentation_label')
+    if instance_ids:
+      self.features.append('instance_id')
+    if disparity_maps:
+      self.features.append('disparity_map')
+
+    self.left_image_string = 'leftImg{}bit'.format('16' if use_16bit else '8')
+    self.right_image_string = 'rightImg{}bit'.format('16' if use_16bit else '8')
+
+
+class LostAndFound(tfds.core.GeneratorBasedBuilder):
+  """Lost and Found Road Hazard Dataset."""
+
+  VERSION = tfds.core.Version('1.0.0')
+
+  BUILDER_CONFIGS = [
+      LostAndFoundConfig(
+          name='semantic_segmentation',
+          description='Lost and Found semantic segmentation dataset.',
+          version="1.0.0",
+          right_images=False,
+          segmentation_labels=True,
+          instance_ids=False,
+          disparity_maps=False,
+          use_16bit=False,
+      ),
+      LostAndFoundConfig(
+          name='stereo_disparity',
+          description='Lost and Found stereo images and disparity maps.',
+          version="1.0.0",
+          right_images=True,
+          segmentation_labels=False,
+          instance_ids=False,
+          disparity_maps=True,
+          use_16bit=False,
+      ),
+      LostAndFoundConfig(
+          name='full',
+          description='Full Lost and Found dataset.',
+          version="1.0.0",
+          right_images=True,
+          segmentation_labels=True,
+          instance_ids=True,
+          disparity_maps=True,
+          use_16bit=False,
+      ),
+      LostAndFoundConfig(
+          name='full_16bit',
+          description='Full Lost and Found dataset.',
+          version="1.0.0",
+          right_images=True,
+          segmentation_labels=True,
+          instance_ids=True,
+          disparity_maps=True,
+          use_16bit=True,
+      )]
+
+  def _info(self):
+    possible_features = {
+        'image_left': tfds.features.Image(shape=(1024, 2048, 3),
+                                          encoding_format='png'),
+        'image_right': tfds.features.Image(shape=(1024, 2048, 3),
+                                           encoding_format='png'),
+        'segmentation_label': tfds.features.Image(shape=(1024, 2048, 1),
+                                                  encoding_format='png'),
+        'instance_id': tfds.features.Image(shape=(1024, 2048, 1),
+                                           encoding_format='png'),
+        'disparity_map': tfds.features.Image(shape=(1024, 2048, 1),
+                                             encoding_format='png')}
+    features = {feat: possible_features[feat]
+                for feat in self.builder_config.features}
+    features['image_id'] = tfds.features.Text()
+    features = tfds.features.FeaturesDict(features)
+    return tfds.core.DatasetInfo(
+        builder=self,
+        # This is the description that will appear on the datasets page.
+        description=_DESCRIPTION,
+        # tfds.features.FeatureConnectors
+        features=features,
+        # Homepage of the dataset for documentation
+        urls=['http://www.6d-vision.com/lostandfounddataset'],
+        citation=_CITATION,
+    )
+
+  def _split_generators(self, dl_manager):
+    """Returns SplitGenerators."""
+    base_url = 'http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/{}.zip'
+
+    # For each feature, this is the name of the zipfile and root-directory in the archive
+    zip_file_names = {
+        'image_left': self.builder_config.left_image_string,
+        'image_right': self.builder_config.right_image_string,
+        'segmentation_label': 'gtCoarse',
+        'instance_id': 'gtCoarse',
+        'disparity_map': 'disparity'}
+
+    download_urls = {feat: base_url.format(zip_file_names[feat])
+                     for feat in self.builder_config.features}
+    # Split donwload and extract in two functions such that mock-data can replace the
+    # result of the download function and is still used as input to extract. Therefore,
+    # fake_data can be compressed zip archives.
+    dl_paths = dl_manager.extract(dl_manager.download(download_urls))
+
+    return [
+        tfds.core.SplitGenerator(
+            name=tfds.Split.TRAIN,
+            # These kwargs will be passed to _generate_examples
+            gen_kwargs={feat: path.join(dl_paths[feat],
+                                        zip_file_names[feat],
+                                        'train')
+                        for feat in self.builder_config.features},
+        ),
+        tfds.core.SplitGenerator(
+            name=tfds.Split.TEST,
+            # These kwargs will be passed to _generate_examples
+            gen_kwargs={feat: path.join(dl_paths[feat],
+                                        zip_file_names[feat],
+                                        'test')
+                        for feat in self.builder_config.features},
+        )
+    ]
+
+  def _generate_examples(self, **paths):
+    """Yields examples."""
+    # different file-suffixes dependent on the feature to load
+    file_suffix = {
+        'image_left': self.builder_config.left_image_string,
+        'image_right': self.builder_config.right_image_string,
+        'segmentation_label': 'gtCoarse_labelIds',
+        'instance_id': 'gtCoarse_instanceIds',
+        'disparity_map': 'disparity'}
+
+    for scene_id in tf.io.gfile.listdir(paths['image_left']):
+      paths_city_root = {feat: path.join(feat_dir, scene_id)
+                         for feat, feat_dir in paths.items()}
+
+      left_city_root = paths_city_root['image_left']
+      for left_img in tf.io.gfile.listdir(left_city_root):
+        image_id = _get_id_from_left_image(left_img)
+
+        features = {feat: path.join(paths_city_root[feat],
+                                    '{}_{}.png'.format(image_id, file_suffix[feat]))
+                    for feat in paths}
+        features['image_id'] = image_id
+
+        yield image_id, features
+
+# Helper functions
+
+LEFT_IMAGE_FILE_RE = re.compile(r'(.+)_leftImg(?:8|16)bit\.png')
+
+def _get_id_from_left_image(left_image):
+  '''Returns the id of an image file. Used to associate an image file
+  with its corresponding label.
+  Example:
+    'bonn_000001_000019_leftImg8bit' -> 'bonn_000001_000019'
+  '''
+  return LEFT_IMAGE_FILE_RE.match(left_image).group(1)
diff --git a/tensorflow_datasets/image/lost_and_found_test.py b/tensorflow_datasets/image/lost_and_found_test.py
@@ -0,0 +1,28 @@
+"""Tests for LostAndFound dataset module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow_datasets import testing
+from tensorflow_datasets.image import lost_and_found
+
+
+class LostAndFoundTest(testing.DatasetBuilderTestCase):
+  DATASET_CLASS = lost_and_found.LostAndFound
+  BUILDER_CONFIG_NAMES_TO_TEST = ['semantic_segmentation', 'full']
+  SPLITS = {
+      "train": 4,  # Number of fake train example
+      "test": 2,  # Number of fake test example
+  }
+  # files as generated by fake data functions in testing/lost_and_found.py
+  DL_EXTRACT_RESULT = {
+      'image_left': 'leftImg8bit.zip',
+      'image_right': 'rightImg8bit.zip',
+      'disparity_map': 'disparity.zip',
+      'segmentation_label': 'gtCoarse.zip',
+      'instance_id': 'gtCoarse.zip'}
+
+
+if __name__ == "__main__":
+  testing.test_main()
diff --git a/tensorflow_datasets/testing/cityscapes.py b/tensorflow_datasets/testing/cityscapes.py
@@ -0,0 +1,54 @@
+"""Helper functions to generate fake Cityscapes-like data for testing."""
+
+import re
+from os import path
+from zipfile import ZipFile
+from random import randint
+
+from tensorflow_datasets.testing.fake_data_utils import get_random_png
+
+
+CITY_IN_ID_RE = re.compile(r'(.+)_[0-9]+_[0-9]+')
+
+
+def generate_ids(city, num=2):
+  """ Generates image ids following the format of the cityscapes dataset.
+
+  Args:
+    city (str): The city/scene the ids belong to, used as a prefix to the id.
+    num (int): Number of random ids to generate.
+  Returns:
+    Generator for id strings.
+  """
+  for _ in range(num):
+    yield '{}_{:06d}_{:06d}'.format(city, randint(0, 999999),
+                                    randint(0, 999999))
+
+
+def create_zipfile(zip_filepath, splits_with_ids, suffixes, maindir=None):
+  """
+  Generates a zipfile with a cityscapes-like file structure and random pngs.
+
+  Args:
+    zip_filepath (str): filepath to the zip archive that will be created
+    splits_with_ids (Dict[str, List[str]]): data-splits like 'train' or 'val'
+        that map to a list of image ids
+    suffixes (List[str]): suffix per modality that should be created e.g.
+        'leftImg8bit'
+    maindir (str): name of the root directory of the zipfile, defaults to the
+        name of the zipfile
+  """
+  with ZipFile(zip_filepath, 'w') as z:
+    for split, ids in splits_with_ids.items():
+      if maindir is None:
+        maindir = path.basename(zip_filepath).strip('.zip')
+      split = path.join(maindir, split)
+      for img_id in ids:
+        city = CITY_IN_ID_RE.match(img_id).group(1)
+        for suffix in suffixes:
+          if 'Img' in suffix:
+            img = get_random_png(height=1024, width=2048, channels=3)
+          else:
+            img = get_random_png(height=1024, width=2048, channels=1)
+          z.write(img,
+                  path.join(split, city, '{}_{}.png'.format(img_id, suffix)))
diff --git a/tensorflow_datasets/testing/fake_data_utils.py b/tensorflow_datasets/testing/fake_data_utils.py
@@ -54,7 +54,11 @@ def get_random_jpeg(height=None, width=None, channels=CHANNELS_NB):
 
 def get_random_png(height=None, width=None, channels=CHANNELS_NB):
   """Returns path to PNG picture."""
-  image = get_random_picture(height, width, channels)
+  # Big randomly generated pngs take large amounts of diskspace.
+  # Instead, we resize a 4x4 random image to the png size.
+  image = get_random_picture(4, 4, channels)
+  image = tf.image.resize_nearest_neighbor(tf.expand_dims(image, 0),
+                                           (height, width))[0]
   png = tf.image.encode_png(image)
   with utils.nogpu_session() as sess:
     res = sess.run(png)

diff --git a/tensorflow_datasets/testing/lost_and_found.py b/tensorflow_datasets/testing/lost_and_found.py
@@ -0,0 +1,30 @@
+"""Script to generate fake 'Lost and Found' data."""
+import tensorflow as tf
+
+from tensorflow_datasets.testing.cityscapes import generate_ids, create_zipfile
+
+
+if __name__ == '__main__':
+  example_dir = ('tensorflow_datasets/testing/test_data/fake_examples/'
+                 'lost_and_found')
+  base_path = example_dir + '/{}.zip'
+  # generate image ids matching between zipfiles
+  train_ids = [*generate_ids('01_Turmstr_17'),
+               *generate_ids('02_Goethe_Str_6')]
+  test_ids = list(generate_ids('03_Schlossallee_1'))
+  splits = {'train': train_ids, 'test': test_ids}
+  with tf.Graph().as_default():
+    create_zipfile(base_path.format('leftImg8bit'),
+                   splits_with_ids=splits,
+                   suffixes=['leftImg8bit'])
+    create_zipfile(base_path.format('gtCoarse'),
+                   splits_with_ids=splits,
+                   suffixes=['gtCoarse_instanceIds',
+                             'gtCoarse_labelIds',
+                             'gtCoarse_color'])
+    create_zipfile(base_path.format('rightImg8bit'),
+                   splits_with_ids=splits,
+                   suffixes=['rightImg8bit'])
+    create_zipfile(base_path.format('disparity'),
+                   splits_with_ids=splits,
+                   suffixes=['disparity'])
diff --git a/tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/disparity.zip b/tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/disparity.zip
diff --git a/tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/gtCoarse.zip b/tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/gtCoarse.zip
diff --git a/tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/leftImg8bit.zip b/tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/leftImg8bit.zip
diff --git a/tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/rightImg8bit.zip b/tensorflow_datasets/testing/test_data/fake_examples/lost_and_found/rightImg8bit.zip
diff --git a/tensorflow_datasets/url_checksums/lost_and_found.txt b/tensorflow_datasets/url_checksums/lost_and_found.txt
@@ -0,0 +1,6 @@
+http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/disparity.zip 1461824611 1e06350d082f3bd686ff889940ad60ed85bfb1e8aa691a547a259c52fa3b60b1
+http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/gtCoarse.zip 37756896 53b6d3ab000f08b1fb59d70c1398eecc4d82a7baf4e9cf74fbf60d1858abe9ac
+http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/leftImg16bit.zip 18039875634 f3530514163f30ccafc05210b643ea690c4ba17cdb4497d8a7d4f9c324c71da8
+http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/leftImg8bit.zip 5802953400 307f66002023ab597d309963b94990f5b9a8e5735ee729c3292647a66e9f2b18
+http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/rightImg16bit.zip 17938768019 db492fc9b9e0adf7a662cd589a6c8ea5cd8cf68d08600b2099d0cd7e0c58f6dd
+http://www.dhbw-stuttgart.de/~sgehrig/lostAndFoundDataset/rightImg8bit.zip 5787134165 d5219f49e730a1ce064a9d118227e71cd39681bcc7f8a87ab4061c86cd7dc6fb