operator-framework · kevinrizza · Apr 1, 2019 · Mar 25, 2019 · csomh · Mar 27, 2019
diff --git a/Pipfile b/Pipfile
@@ -10,6 +10,6 @@ requests = "*"
 pytest = "*"
 pyyaml = "*"
 twine = "*"
-
+semver = "*"
 
 [dev-packages]
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/operatorcourier/api.py b/operatorcourier/api.py
@@ -15,6 +15,7 @@
 from operatorcourier.push import PushCmd
 from operatorcourier.format import format_bundle
 from operatorcourier.nest import nest_bundles
+from operatorcourier.flatten import flatten_bundles
 from operatorcourier.errors import OpCourierBadBundle
 
 logger = logging.getLogger(__name__)
@@ -136,3 +137,18 @@ def nest(source_dir, registry_dir):
 
     with TemporaryDirectory() as temp_dir:
         nest_bundles(yaml_files, registry_dir, temp_dir)
+
+
+def flatten(source_dir, dest_dir):
+    """
+    Given a directory containing different versions of operator bundles
+    (CRD, CSV, package) in separate version directories, this function
+    copies all files needed to the flattened directory. It is the inverse
+    of the `nest` function.
+
+    :param source_dir: the directory containing different versions
+    of operator bundles (CRD, CSV, package) in separate version directories
+    :param dest_dir: the flattened directory path where all necessary files are copied
+    """
+    os.makedirs(dest_dir, exist_ok=True)
+    flatten_bundles(source_dir, dest_dir)
diff --git a/operatorcourier/cli.py b/operatorcourier/cli.py
@@ -35,6 +35,7 @@ def parse(self):
     verify      Create a bundle and test it for correctness.
     push        Create a bundle, test it, and push it to an app registry.
     nest        Take a flat to-be-bundled directory and version nest it.
+    flatten     Create a flat directory from versioned operator bundle yaml files.
 ''')
         try:
             __version__ = pkg_resources.get_distribution('operator-courier').version
@@ -113,3 +114,24 @@ def nest(self):
 
         args, leftovers = parser.parse_known_args(sys.argv[2:])
         api.nest(args.source_dir, args.registry_dir)
+
+    # Parse the flatten command
+    def flatten(self):
+        parser = argparse.ArgumentParser(
+            usage='operator-courier flatten [-h] source_dir dest_dir',
+            description='Given a directory with different versions of '
+                        'operator bundles (CRD, CSV, package), this command extracts '
+                        'versioned CSVs and the latest version of each CRD along with '
+                        'the package file and creates a new flat directory '
+                        'of yaml files. See https://github.com/operator-framework/'
+                        'operator-registry#manifest-format to find out more about '
+                        'how nested bundles should be structured.')
+        parser.add_argument('source_dir',
+                            help='Path of the source directory that contains different '
+                                 'versions of operator bundles (CRD, CSV, package)')
+        parser.add_argument('dest_dir',
+                            help='The new flat directory that contains '
+                                 'extracted bundle files')
+
+        args, leftovers = parser.parse_known_args(sys.argv[2:])
+        api.flatten(args.source_dir, args.dest_dir)
diff --git a/operatorcourier/flatten.py b/operatorcourier/flatten.py
@@ -0,0 +1,170 @@
+import logging
+import os
+from yaml import safe_load, MarkedYAMLError
+from typing import Dict, Tuple
+from shutil import copyfile
+import semver
+from operatorcourier import identify
+from operatorcourier import errors
+
+logger = logging.getLogger(__name__)
+
+
+def flatten_bundles(source_dir: str, dest_dir: str):
+    file_paths_to_copy = get_flattened_files_info(source_dir)
+    for (src_file_path, new_file_name) in file_paths_to_copy:
+        copyfile(src_file_path, os.path.join(dest_dir, new_file_name))
+
+
+def get_flattened_files_info(source_dir: str) -> [(str, str)]:
+    """
+    :param source_dir: Path of the directory containing different versions
+    of operator bundles (CRD, CSV, package) in separate version directories
+    :return: A list of tuples where in each tuple, the first element is
+    the source file path to be copied to the flattened directory, and the second is
+    the new file name to be used in the file copy. The function returns an empty list if
+    the source_dir is already flat
+    """
+
+    # extract package file and version folders from source_dir
+    _, folder_names, file_names = next(os.walk(source_dir))
+    if not folder_names:
+        logger.info('The source directory is already flat.')
+        return []
+
+    file_paths_to_copy = []  # [ (SRC_FILE_PATH, NEW_FILE_NAME) ]
+
+    crd_dict = {}  # { CRD_NAME => (VERSION, CRD_PATH) }
+    csv_paths = []
+
+    for version_folder_name in folder_names:
+        parse_version_folder(source_dir, version_folder_name, csv_paths, crd_dict)
+
+    # add package in source_dir
+    package_path = get_package_path(source_dir, file_names)
+    file_paths_to_copy.append((package_path, os.path.basename(package_path)))
+
+    # add all CRDs with the latest version
+    for _, crd_path in crd_dict.values():
+        file_paths_to_copy.append((crd_path, os.path.basename(crd_path)))
+
+    # add all CSVs
+    for csv_file_name, csv_entries in create_csv_dict(csv_paths).items():
+        for (version, csv_path) in csv_entries:
+            basename, ext = os.path.splitext(csv_file_name)
+            file_paths_to_copy.append((csv_path, f'{basename}-v{version}{ext}'))
+
+    return file_paths_to_copy
+
+
+def parse_version_folder(base_dir: str, version_folder_name: str,
+                         csv_paths: list, crd_dict: Dict[str, Tuple[str, str]]):
+    """
+    Parse the version folder of the bundle and collect information of CSV and CRDs
+    in the bundle
+
+    :param base_dir: Path of the base directory where the version folder is located
+    :param version_folder_name: The name of the version folder containing bundle files
+    :param csv_paths: A list of CSV file paths inside version folders
+    :param crd_dict: dict that contains CRD info collected from different version folders,
+    where the key is the CRD name, and the value is a tuple where the first element is
+    the version of the bundle, and the second is the path of the CRD file
+    """
+    # parse each version folder and parse CRD, CSV files
+    try:
+        semver.parse(version_folder_name)
+    except ValueError:
+        logger.warning("Ignoring %s as it is not a valid semver. "
+                       "See https://semver.org for the semver specification.",
+                       version_folder_name)
+        return
+
+    logger.info('Parsing folder: %s...', version_folder_name)
+
+    contains_csv = False
+    version_folder_path = os.path.join(base_dir, version_folder_name)
+
+    for item in os.listdir(os.path.join(base_dir, version_folder_name)):
+        item_path = os.path.join(version_folder_path, item)
+
+        if not os.path.isfile(item_path):
+            logger.warning('Ignoring %s as it is not a regular file.', item)
+            continue
+        if not is_yaml_file(item_path):
+            logging.warning('Ignoring %s as the file does not end with .yaml or .yml',
+                            item_path)
+            continue
+
+        with open(item_path, 'r') as f:
+            file_content = f.read()
+
+        yaml_type = identify.get_operator_artifact_type(file_content)
+
+        if yaml_type == 'ClusterServiceVersion':
+            contains_csv = True
+            csv_paths.append(item_path)
+        elif yaml_type == 'CustomResourceDefinition':
+            try:
+                crd_name = safe_load(file_content)['metadata']['name']
+            except MarkedYAMLError:
+                msg = "Courier requires valid input YAML files"
+                logger.error(msg)
+                raise errors.OpCourierBadYaml(msg)
+            except KeyError:
+                msg = f'{item} is not a valid CRD file as "metadata.name" ' \
+                      f'field is required'
+                logger.error(msg)
+                raise errors.OpCourierBadBundle(msg)
+            # create new CRD type entry if not found in dict
+            if crd_name not in crd_dict:
+                crd_dict[crd_name] = (version_folder_name, item_path)
+            # update the CRD type entry with the file with the newest version
+            elif semver.compare(version_folder_name, crd_dict[crd_name][0]) > 0:
+                crd_dict[crd_name] = (crd_dict[crd_name][0], item_path)
+
+    if not contains_csv:
+        msg = 'This version directory does not contain any valid CSV file.'
+        logger.error(msg)
+        raise errors.OpCourierBadBundle(msg)
+
+
+def get_package_path(base_dir: str, file_names_in_base_dir: list) -> str:
+    packages = []
+
+    # add package file to file_paths_to_copy
+    # only 1 package yaml file is expected in file_names
+    for file_name in file_names_in_base_dir:
+        file_path = os.path.join(base_dir, file_name)
+        if not is_yaml_file(file_path):
+            logging.warning('Ignoring %s as the file does not end with .yaml or .yml',
+                            file_path)
+            continue
+
+        with open(file_path, 'r') as f:
+            file_content = f.read()
+        if identify.get_operator_artifact_type(file_content) != 'Package':
+            logger.warning('Ignoring %s as it is not a valid package file.', file_name)
+        elif not packages:
+            packages.append(file_path)
+        else:
+            msg = f'The input source directory expects only 1 valid package file.'
+            logger.error(msg)
+            raise errors.OpCourierBadBundle(msg)
+
+    return packages[0]
+
+
+# parse all CSVs and ensure those with same names are handled
+def create_csv_dict(csv_paths: list) -> dict:
+    csv_dict = {}  # { CSV_FILE_NAME => [ (v1, csv_path_1), ... , (vn, csv_path_n) ] }
+    for csv_path in csv_paths:
+        version_folder_path, csv_file_name = os.path.split(csv_path)
+        version = os.path.basename(version_folder_path)
+        val = (version, csv_path)
+        csv_dict.setdefault(csv_file_name, []).append(val)
+    return csv_dict
+
+
+def is_yaml_file(file_path: str) -> bool:
+    yaml_ext = ['.yaml', '.yml']
+    return os.path.splitext(file_path)[1] in yaml_ext
diff --git a/setup.py b/setup.py
@@ -24,7 +24,8 @@
   install_requires=[
     'pyyaml',
     'requests',
-    'validators'
+    'validators',
+    'semver'
   ],
   python_requires='>=3.6, <4',
   setup_requires=['pytest-runner'],

diff --git a/tests/test_files/bundles/flatten/etcd_valid_input_1/0.6.1/etcdcluster.crd.yaml b/tests/test_files/bundles/flatten/etcd_valid_input_1/0.6.1/etcdcluster.crd.yaml
@@ -0,0 +1,16 @@
+apiVersion: apiextensions.k8s.io/v1beta1
+kind: CustomResourceDefinition
+metadata:
+  name: etcdclusters.etcd.database.coreos.com
+spec:
+  group: etcd.database.coreos.com
+  version: v1beta2
+  scope: Namespaced
+  names:
+    plural: etcdclusters
+    singular: etcdcluster
+    kind: EtcdCluster
+    listKind: EtcdClusterList
+    shortNames:
+      - etcdclus
+      - etcd