Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ requests = "*"
pytest = "*"
pyyaml = "*"
twine = "*"

semver = "*"

[dev-packages]
66 changes: 44 additions & 22 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions operatorcourier/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from operatorcourier.push import PushCmd
from operatorcourier.format import format_bundle
from operatorcourier.nest import nest_bundles
from operatorcourier.flatten import flatten_bundles
from operatorcourier.errors import OpCourierBadBundle

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -136,3 +137,18 @@ def nest(source_dir, registry_dir):

with TemporaryDirectory() as temp_dir:
nest_bundles(yaml_files, registry_dir, temp_dir)


def flatten(source_dir, dest_dir):
"""
Given a directory containing different versions of operator bundles
(CRD, CSV, package) in separate version directories, this function
copies all files needed to the flattened directory. It is the inverse
of the `nest` function.

:param source_dir: the directory containing different versions
of operator bundles (CRD, CSV, package) in separate version directories
:param dest_dir: the flattened directory path where all necessary files are copied
"""
os.makedirs(dest_dir, exist_ok=True)
flatten_bundles(source_dir, dest_dir)
22 changes: 22 additions & 0 deletions operatorcourier/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def parse(self):
verify Create a bundle and test it for correctness.
push Create a bundle, test it, and push it to an app registry.
nest Take a flat to-be-bundled directory and version nest it.
flatten Create a flat directory from versioned operator bundle yaml files.
''')
try:
__version__ = pkg_resources.get_distribution('operator-courier').version
Expand Down Expand Up @@ -113,3 +114,24 @@ def nest(self):

args, leftovers = parser.parse_known_args(sys.argv[2:])
api.nest(args.source_dir, args.registry_dir)

# Parse the flatten command
def flatten(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think something is missing from the usage string of flatten -h:

$ operator-courier flatten -h
usage: operator-courier [-h] source_dir dest_dir

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@csomh This is strange... I am able to see the description on my side.

(operator-courier) 10:42 > operator-courier git:(add-flatten-command) ✗ operator-courier flatten -h
usage: operator-courier [-h] source_dir dest_dir

Given a directory with different versions of operator bundles (CRD, CSV,
package), this command extracts versioned CSVs and the latest version of each
CRD along with the package file and creates a new flat directory of yaml
files. See https://github.com/operator-framework/operator-registry#manifest-
format to find out more about how nested bundles should be structured.

positional arguments:
  source_dir  Path of the source directory that contains different versions of
              operator bundles (CRD, CSV, package)
  dest_dir    The new flat directory that contains extracted bundle files

optional arguments:
  -h, --help  show this help message and exit

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JEREMYLINLIN, it's not about the description, it's the usage string. For operator-courier flatten -h my expectation would be

operator-courier flatten [-h] source_dir dest_dir

but it's:

operator-courier [-h] source_dir dest_dir

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@csomh Gotcha. But it seems to be a general issue for all other subcommands. I fiddled around for some time and wasn't able to find a quick fix. Maybe we should open up an issue and make a separate PR for this? @kevinrizza

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a good point, but let's not check in more bad code. Let's do this one right and fix the others in a separate PR.

Copy link
Member Author

@jeremy-wl jeremy-wl Mar 27, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kevinrizza May I hard code the right usage for flatten for this time?

    def flatten(self):
        parser = argparse.ArgumentParser(
            usage='operator-courier flatten [-h] source_dir dest_dir',
            description='Given a directory with ...')

Because after reading some docs about subcommands and doing some experiments, it seems the current subcommands implementation is not the standard way of doing it. And it will be very hard to make the flatten help message work using the "right way" without breaking others.

I can also make a PR to fix the CLI subcommands before fixing this PR, but this may take a while to get through while the current PR is more urgent.

parser = argparse.ArgumentParser(
usage='operator-courier flatten [-h] source_dir dest_dir',
description='Given a directory with different versions of '
'operator bundles (CRD, CSV, package), this command extracts '
'versioned CSVs and the latest version of each CRD along with '
'the package file and creates a new flat directory '
'of yaml files. See https://github.com/operator-framework/'
'operator-registry#manifest-format to find out more about '
'how nested bundles should be structured.')
parser.add_argument('source_dir',
help='Path of the source directory that contains different '
'versions of operator bundles (CRD, CSV, package)')
parser.add_argument('dest_dir',
help='The new flat directory that contains '
'extracted bundle files')

args, leftovers = parser.parse_known_args(sys.argv[2:])
api.flatten(args.source_dir, args.dest_dir)
170 changes: 170 additions & 0 deletions operatorcourier/flatten.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import logging
import os
from yaml import safe_load, MarkedYAMLError
from typing import Dict, Tuple
from shutil import copyfile
import semver
from operatorcourier import identify
from operatorcourier import errors

logger = logging.getLogger(__name__)


def flatten_bundles(source_dir: str, dest_dir: str):
file_paths_to_copy = get_flattened_files_info(source_dir)
for (src_file_path, new_file_name) in file_paths_to_copy:
copyfile(src_file_path, os.path.join(dest_dir, new_file_name))


def get_flattened_files_info(source_dir: str) -> [(str, str)]:
"""
:param source_dir: Path of the directory containing different versions
of operator bundles (CRD, CSV, package) in separate version directories
:return: A list of tuples where in each tuple, the first element is
the source file path to be copied to the flattened directory, and the second is
the new file name to be used in the file copy. The function returns an empty list if
the source_dir is already flat
"""

# extract package file and version folders from source_dir
_, folder_names, file_names = next(os.walk(source_dir))
if not folder_names:
logger.info('The source directory is already flat.')
return []

file_paths_to_copy = [] # [ (SRC_FILE_PATH, NEW_FILE_NAME) ]

crd_dict = {} # { CRD_NAME => (VERSION, CRD_PATH) }
csv_paths = []

for version_folder_name in folder_names:
parse_version_folder(source_dir, version_folder_name, csv_paths, crd_dict)

# add package in source_dir
package_path = get_package_path(source_dir, file_names)
file_paths_to_copy.append((package_path, os.path.basename(package_path)))

# add all CRDs with the latest version
for _, crd_path in crd_dict.values():
file_paths_to_copy.append((crd_path, os.path.basename(crd_path)))

# add all CSVs
for csv_file_name, csv_entries in create_csv_dict(csv_paths).items():
for (version, csv_path) in csv_entries:
basename, ext = os.path.splitext(csv_file_name)
file_paths_to_copy.append((csv_path, f'{basename}-v{version}{ext}'))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So here we're renaming the CSV files to avoid naming overlap, is that right? In that case, this format for filename is nice.
Will the resulting file ext be of the format .clusterserviceversion.yaml?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So here we're renaming the CSV files to avoid naming overlap, is that right?

Yes.

Will the resulting file ext be of the format .clusterserviceversion.yaml?

It will append a version string before the extension. You can check the test cases to get a better sense.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why add the version in two spots of the file name? Is it because the first version was part of the file name?
To keep with conventions for yaml naming, the file format for CSVs should look something like:

{basename}{separator}v{version}.clusterserviceversion.yaml

For example:

etcd-operator.v0.9.2.clusterserviceversion.yaml

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah but that's not guaranteed @SamiSousa , it's just a convention. we don't want this utility to fail just because someone broke a convention.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@SamiSousa See this conversation and this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kevinrizza Where would this fail based on a convention? It seems like we're renaming the files anyways, so in a way aren't we just setting our own convention?
It looks like we're pulling the basename from the original filename, so that would explain why the version pops up twice. I don't mind if we have the version back to back like so:

etcd-operator.v0.9.2-v0.9.2.clusterserviceversion.yaml

The reason I'm insisting is because other tools may naively be depending on this filenaming convention to discern which files are CSVs, which are CRDs, etc.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because there's nothing stopping anyone from naming their files whatever they want. This command literally just renames them so we don't get collisions. No other tool besides courier should ever touch these temp files -- if they do that is a mistake and should be corrected.


return file_paths_to_copy


def parse_version_folder(base_dir: str, version_folder_name: str,
csv_paths: list, crd_dict: Dict[str, Tuple[str, str]]):
"""
Parse the version folder of the bundle and collect information of CSV and CRDs
in the bundle

:param base_dir: Path of the base directory where the version folder is located
:param version_folder_name: The name of the version folder containing bundle files
:param csv_paths: A list of CSV file paths inside version folders
:param crd_dict: dict that contains CRD info collected from different version folders,
where the key is the CRD name, and the value is a tuple where the first element is
the version of the bundle, and the second is the path of the CRD file
"""
# parse each version folder and parse CRD, CSV files
try:
semver.parse(version_folder_name)
except ValueError:
logger.warning("Ignoring %s as it is not a valid semver. "
"See https://semver.org for the semver specification.",
version_folder_name)
return

logger.info('Parsing folder: %s...', version_folder_name)

contains_csv = False
version_folder_path = os.path.join(base_dir, version_folder_name)

for item in os.listdir(os.path.join(base_dir, version_folder_name)):
item_path = os.path.join(version_folder_path, item)

if not os.path.isfile(item_path):
logger.warning('Ignoring %s as it is not a regular file.', item)
continue
if not is_yaml_file(item_path):
logging.warning('Ignoring %s as the file does not end with .yaml or .yml',
item_path)
continue

with open(item_path, 'r') as f:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tested this PR and I got following error (because I had binary (zip) file in that directory by accident):

$ operator-courier flatten orig dest
'utf-8' codec can't decode byte 0xd8 in position 14: invalid continuation byte

There is no traceback, but I assume that this line the source of that error.

push and verify commands reads only files with *.yaml and *.yml extensions, IMHO flatten should work consistently with other commands.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 This is valuable feedback. I think there should be two takeways from this: we certainly limit our parsing and copying in this command to just yaml files. And we should also explicitly log and return an error if the folder contains files which are not of that type.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MartinBasti Thanks for the feedback! I've updated the code to handle that situation.

file_content = f.read()

yaml_type = identify.get_operator_artifact_type(file_content)

if yaml_type == 'ClusterServiceVersion':
contains_csv = True
csv_paths.append(item_path)
elif yaml_type == 'CustomResourceDefinition':
try:
crd_name = safe_load(file_content)['metadata']['name']
except MarkedYAMLError:
msg = "Courier requires valid input YAML files"
logger.error(msg)
raise errors.OpCourierBadYaml(msg)
except KeyError:
msg = f'{item} is not a valid CRD file as "metadata.name" ' \
f'field is required'
logger.error(msg)
raise errors.OpCourierBadBundle(msg)
# create new CRD type entry if not found in dict
if crd_name not in crd_dict:
crd_dict[crd_name] = (version_folder_name, item_path)
# update the CRD type entry with the file with the newest version
elif semver.compare(version_folder_name, crd_dict[crd_name][0]) > 0:
crd_dict[crd_name] = (crd_dict[crd_name][0], item_path)

if not contains_csv:
msg = 'This version directory does not contain any valid CSV file.'
logger.error(msg)
raise errors.OpCourierBadBundle(msg)


def get_package_path(base_dir: str, file_names_in_base_dir: list) -> str:
packages = []

# add package file to file_paths_to_copy
# only 1 package yaml file is expected in file_names
for file_name in file_names_in_base_dir:
file_path = os.path.join(base_dir, file_name)
if not is_yaml_file(file_path):
logging.warning('Ignoring %s as the file does not end with .yaml or .yml',
file_path)
continue

with open(file_path, 'r') as f:
file_content = f.read()
if identify.get_operator_artifact_type(file_content) != 'Package':
logger.warning('Ignoring %s as it is not a valid package file.', file_name)
elif not packages:
packages.append(file_path)
else:
msg = f'The input source directory expects only 1 valid package file.'
logger.error(msg)
raise errors.OpCourierBadBundle(msg)

return packages[0]


# parse all CSVs and ensure those with same names are handled
def create_csv_dict(csv_paths: list) -> dict:
csv_dict = {} # { CSV_FILE_NAME => [ (v1, csv_path_1), ... , (vn, csv_path_n) ] }
for csv_path in csv_paths:
version_folder_path, csv_file_name = os.path.split(csv_path)
version = os.path.basename(version_folder_path)
val = (version, csv_path)
csv_dict.setdefault(csv_file_name, []).append(val)
return csv_dict


def is_yaml_file(file_path: str) -> bool:
yaml_ext = ['.yaml', '.yml']
return os.path.splitext(file_path)[1] in yaml_ext
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
install_requires=[
'pyyaml',
'requests',
'validators'
'validators',
'semver'
],
python_requires='>=3.6, <4',
setup_requires=['pytest-runner'],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
name: etcdclusters.etcd.database.coreos.com
spec:
group: etcd.database.coreos.com
version: v1beta2
scope: Namespaced
names:
plural: etcdclusters
singular: etcdcluster
kind: EtcdCluster
listKind: EtcdClusterList
shortNames:
- etcdclus
- etcd
Loading