From 66c7e345794e8d44d8e5f12559a9cce8e95e570e Mon Sep 17 00:00:00 2001 From: Richard Gildea Date: Wed, 23 Mar 2022 17:37:47 +0000 Subject: [PATCH 1/8] Add basic support for deserializing yaml recipes --- .pre-commit-config.yaml | 2 +- requirements_dev.txt | 1 + setup.cfg | 1 + src/workflows/recipe/recipe.py | 4 ++- tests/recipe/test_recipe.py | 61 ++++++++++++++++++++++++++++++++++ 5 files changed, 67 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 71f9e404..1ca3bb56 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -48,4 +48,4 @@ repos: hooks: - id: mypy files: 'src/.*\.py$' - additional_dependencies: ['types-setuptools==57.0.2'] + additional_dependencies: ['types-setuptools==57.0.2', 'types-PyYAML'] diff --git a/requirements_dev.txt b/requirements_dev.txt index a4079d2a..852a5d6a 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -2,6 +2,7 @@ bidict==0.21.4 pytest==7.1.1 pytest-cov==3.0.0 pytest-timeout==2.1.0 +pyyamml setuptools==60.10.0 stomp.py==8.0.0 pika==1.2.0 diff --git a/setup.cfg b/setup.cfg index cb1bc074..52f6d779 100644 --- a/setup.cfg +++ b/setup.cfg @@ -28,6 +28,7 @@ project_urls = install_requires = bidict pika + pyyaml setuptools stomp.py>=7 packages = find: diff --git a/src/workflows/recipe/recipe.py b/src/workflows/recipe/recipe.py index d4744489..9242dd08 100644 --- a/src/workflows/recipe/recipe.py +++ b/src/workflows/recipe/recipe.py @@ -5,6 +5,8 @@ import string from typing import Any, Dict +import yaml + import workflows basestring = (str, bytes) @@ -29,7 +31,7 @@ def __init__(self, recipe=None): def deserialize(self, string): """Convert a recipe that has been stored as serialized json string to a data structure.""" - return self._sanitize(json.loads(string)) + return self._sanitize(yaml.safe_load(string)) @staticmethod def _sanitize(recipe): diff --git a/tests/recipe/test_recipe.py b/tests/recipe/test_recipe.py index c27eb8f2..e596655b 100644 --- a/tests/recipe/test_recipe.py +++ b/tests/recipe/test_recipe.py @@ -368,3 +368,64 @@ def test_merging_recipes(): C.recipe.values(), ) ) + + +def test_deserialize_json(): + healthy_recipe = """ + { + "1": { + "queue": "my.queue", + "parameters": { + "foo": "bar", + "ingredients": ["ham", "spam"], + "workingdir": "/path/to/workingdir", + "output_file": "out.txt" + } + }, + "start": [ + [1, []] + ] + } + """ + A = workflows.recipe.Recipe(healthy_recipe) + assert A.recipe == { + "start": [(1, [])], + 1: { + "queue": "my.queue", + "parameters": { + "foo": "bar", + "ingredients": ["ham", "spam"], + "workingdir": "/path/to/workingdir", + "output_file": "out.txt", + }, + }, + } + + +def test_deserialize_yaml(): + healthy_recipe = """ +1: + queue: my.queue + parameters: + foo: bar + ingredients: + - ham + - spam + workingdir: /path/to/workingdir + output_file: out.txt +start: +- [1, []] +""" + A = workflows.recipe.Recipe(healthy_recipe) + assert A.recipe == { + "start": [(1, [])], + 1: { + "queue": "my.queue", + "parameters": { + "foo": "bar", + "ingredients": ["ham", "spam"], + "workingdir": "/path/to/workingdir", + "output_file": "out.txt", + }, + }, + } From 7c5f8d35dd0a18f336e7c74c857451ef8769dcac Mon Sep 17 00:00:00 2001 From: Richard Gildea Date: Wed, 23 Mar 2022 18:01:23 +0000 Subject: [PATCH 2/8] Support for serializing recipes as yaml --- src/workflows/recipe/recipe.py | 14 +++++++++--- tests/recipe/test_recipe.py | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/src/workflows/recipe/recipe.py b/src/workflows/recipe/recipe.py index 9242dd08..16ccf12b 100644 --- a/src/workflows/recipe/recipe.py +++ b/src/workflows/recipe/recipe.py @@ -52,9 +52,17 @@ def _sanitize(recipe): recipe["start"] = [tuple(x) for x in recipe["start"]] return recipe - def serialize(self): - """Write out the current recipe as serialized json string.""" - return json.dumps(self.recipe) + def serialize(self, format: str = "json"): + """Write out the current recipe as serialized string. + + Supported serialization formats are "json" and "yaml". + """ + if format == "json": + return json.dumps(self.recipe) + elif format == "yaml": + return yaml.safe_dump(self.recipe) + else: + raise ValueError(f"Unsupported serialization format {format}") def pretty(self): """Write out the current recipe as serialized json string with pretty formatting.""" diff --git a/tests/recipe/test_recipe.py b/tests/recipe/test_recipe.py index e596655b..cc951f6b 100644 --- a/tests/recipe/test_recipe.py +++ b/tests/recipe/test_recipe.py @@ -1,8 +1,10 @@ from __future__ import annotations +import json from unittest import mock import pytest +import yaml import workflows import workflows.recipe @@ -100,6 +102,9 @@ def test_serializing_and_deserializing_recipes(): assert A.deserialize(A.serialize()) == A.recipe assert B.deserialize(B.serialize()) == B.recipe + assert A.deserialize(A.serialize(format="yaml")) == A.recipe + assert B.deserialize(B.serialize(format="yaml")) == B.recipe + def test_validate_tests_for_empty_recipe(): """Validating a recipe that has not been defined must throw an error.""" @@ -429,3 +434,40 @@ def test_deserialize_yaml(): }, }, } + + +def test_serialize_json(): + A, B = generate_recipes() + + assert ( + A.recipe + == workflows.recipe.Recipe(json.loads(A.serialize(format="json"))).recipe + ) + assert ( + B.recipe + == workflows.recipe.Recipe(json.loads(B.serialize(format="json"))).recipe + ) + + +def test_serialize_yaml(): + A, B = generate_recipes() + + # Verify that this definitely isn't json + with pytest.raises(json.JSONDecodeError): + json.loads(A.serialize(format="yaml")) + + assert ( + A.recipe + == workflows.recipe.Recipe(yaml.safe_load(A.serialize(format="yaml"))).recipe + ) + assert ( + B.recipe + == workflows.recipe.Recipe(yaml.safe_load(B.serialize(format="yaml"))).recipe + ) + + +def test_unsupported_serialization_format_raises_error(): + A, _ = generate_recipes() + + with pytest.raises(ValueError): + A.serialize(format="xml") From 6bebb2907fe75b940685efaa0c1feae3d4012245 Mon Sep 17 00:00:00 2001 From: Richard Gildea Date: Wed, 23 Mar 2022 18:37:03 +0000 Subject: [PATCH 3/8] Use ruamel.yaml instead of pyyaml Since this defaults to YAML 1.2 which makes JSON an official subset. This includes e.g. allowing tab characters as whitespace (included in some existing Zocalo recipes). --- .pre-commit-config.yaml | 2 +- requirements_dev.txt | 2 +- setup.cfg | 2 +- src/workflows/recipe/recipe.py | 12 +++++++++--- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1ca3bb56..71f9e404 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -48,4 +48,4 @@ repos: hooks: - id: mypy files: 'src/.*\.py$' - additional_dependencies: ['types-setuptools==57.0.2', 'types-PyYAML'] + additional_dependencies: ['types-setuptools==57.0.2'] diff --git a/requirements_dev.txt b/requirements_dev.txt index 852a5d6a..fe8e5a6d 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -2,7 +2,7 @@ bidict==0.21.4 pytest==7.1.1 pytest-cov==3.0.0 pytest-timeout==2.1.0 -pyyamml +ruamel.yaml setuptools==60.10.0 stomp.py==8.0.0 pika==1.2.0 diff --git a/setup.cfg b/setup.cfg index 52f6d779..371c1a9d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -28,7 +28,7 @@ project_urls = install_requires = bidict pika - pyyaml + ruamel.yaml setuptools stomp.py>=7 packages = find: diff --git a/src/workflows/recipe/recipe.py b/src/workflows/recipe/recipe.py index 16ccf12b..097b0478 100644 --- a/src/workflows/recipe/recipe.py +++ b/src/workflows/recipe/recipe.py @@ -1,11 +1,12 @@ from __future__ import annotations import copy +import io import json import string from typing import Any, Dict -import yaml +from ruamel.yaml import YAML import workflows @@ -31,7 +32,9 @@ def __init__(self, recipe=None): def deserialize(self, string): """Convert a recipe that has been stored as serialized json string to a data structure.""" - return self._sanitize(yaml.safe_load(string)) + with YAML(typ="safe") as yaml: + yaml.allow_duplicate_keys = True + return self._sanitize(yaml.load(string)) @staticmethod def _sanitize(recipe): @@ -60,7 +63,10 @@ def serialize(self, format: str = "json"): if format == "json": return json.dumps(self.recipe) elif format == "yaml": - return yaml.safe_dump(self.recipe) + buf = io.StringIO() + with YAML(output=buf, typ="safe") as yaml: + yaml.dump(self.recipe) + return buf.getvalue() else: raise ValueError(f"Unsupported serialization format {format}") From c6b0af5cf8c623a69f0ddbf36666764e6662e4b2 Mon Sep 17 00:00:00 2001 From: Richard Gildea Date: Wed, 23 Mar 2022 20:06:09 +0000 Subject: [PATCH 4/8] Catch different parsing error --- tests/recipe/test_validate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/recipe/test_validate.py b/tests/recipe/test_validate.py index 46bed6fc..67cc5c2a 100644 --- a/tests/recipe/test_validate.py +++ b/tests/recipe/test_validate.py @@ -4,11 +4,11 @@ from __future__ import annotations -import json import sys from unittest import mock import pytest +import ruamel.yaml import workflows from workflows.recipe.validate import main, validate_recipe @@ -64,8 +64,8 @@ def test_value_error_when_validating_bad_json(tmpdir): recipe_file = tmpdir.join("recipe.json") recipe_file.write(bad_json) - # Run validate with mock open, expect JSON error - with pytest.raises(json.JSONDecodeError): + # Run validate with mock open, expect parsing error + with pytest.raises(ruamel.yaml.parser.ParserError): validate_recipe(recipe_file.strpath) From 6c1a006b3a4eac7a69a9da3965204cf5f0029e5e Mon Sep 17 00:00:00 2001 From: Richard Gildea Date: Wed, 23 Mar 2022 20:06:40 +0000 Subject: [PATCH 5/8] Fix import when running test standalone --- tests/recipe/test_wrapped_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/recipe/test_wrapped_recipe.py b/tests/recipe/test_wrapped_recipe.py index bf4704f4..5219d46d 100644 --- a/tests/recipe/test_wrapped_recipe.py +++ b/tests/recipe/test_wrapped_recipe.py @@ -4,7 +4,7 @@ import pytest -import workflows +import workflows.transport.common_transport from workflows.recipe import Recipe from workflows.recipe.wrapper import RecipeWrapper From 688928c1ce36e9259eaab043cb95fa73e5a7a05a Mon Sep 17 00:00:00 2001 From: Richard Gildea Date: Wed, 23 Mar 2022 21:12:07 +0000 Subject: [PATCH 6/8] Test json recipe with pseudo comment --- tests/recipe/test_recipe.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/recipe/test_recipe.py b/tests/recipe/test_recipe.py index cc951f6b..db6b33dc 100644 --- a/tests/recipe/test_recipe.py +++ b/tests/recipe/test_recipe.py @@ -471,3 +471,21 @@ def test_unsupported_serialization_format_raises_error(): with pytest.raises(ValueError): A.serialize(format="xml") + + +def test_json_recipe_with_pseudo_comment(): + recipe = """ + { + "1": ["This is a comment"], + "1": { + "parameters": { + "foo": "bar" + } + }, + "start": [ + [1, []] + ] + } + """ + A = workflows.recipe.Recipe(recipe) + assert A.recipe == {"start": [(1, [])], 1: {"parameters": {"foo": "bar"}}} From 3204e90d67dfb270802d796656952a22e08667b9 Mon Sep 17 00:00:00 2001 From: Richard Gildea Date: Wed, 23 Mar 2022 22:02:01 +0000 Subject: [PATCH 7/8] Support parsing json files with pseudo comments --- src/workflows/recipe/recipe.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/workflows/recipe/recipe.py b/src/workflows/recipe/recipe.py index 097b0478..dbb66391 100644 --- a/src/workflows/recipe/recipe.py +++ b/src/workflows/recipe/recipe.py @@ -7,12 +7,27 @@ from typing import Any, Dict from ruamel.yaml import YAML +from ruamel.yaml.constructor import SafeConstructor import workflows basestring = (str, bytes) +def construct_yaml_map(self, node): + # Test if there are duplicate node keys + # In the case of duplicate keys, last wins + data = {} + yield data + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=True) + val = self.construct_object(value_node, deep=True) + data[key] = val + + +SafeConstructor.add_constructor("tag:yaml.org,2002:map", construct_yaml_map) + + class Recipe: """Object containing a processing recipe that can be passed to services. A recipe describes how all involved services are connected together, how From 87cc7f2414b496b4865fa4d748ee707dca6d80d7 Mon Sep 17 00:00:00 2001 From: Richard Gildea Date: Thu, 24 Mar 2022 11:45:14 +0000 Subject: [PATCH 8/8] Use ruamel.yaml not yaml in tests --- tests/recipe/test_recipe.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/recipe/test_recipe.py b/tests/recipe/test_recipe.py index db6b33dc..bf2690fa 100644 --- a/tests/recipe/test_recipe.py +++ b/tests/recipe/test_recipe.py @@ -4,7 +4,7 @@ from unittest import mock import pytest -import yaml +from ruamel.yaml import YAML import workflows import workflows.recipe @@ -456,14 +456,15 @@ def test_serialize_yaml(): with pytest.raises(json.JSONDecodeError): json.loads(A.serialize(format="yaml")) - assert ( - A.recipe - == workflows.recipe.Recipe(yaml.safe_load(A.serialize(format="yaml"))).recipe - ) - assert ( - B.recipe - == workflows.recipe.Recipe(yaml.safe_load(B.serialize(format="yaml"))).recipe - ) + with YAML(typ="safe") as yaml: + assert ( + A.recipe + == workflows.recipe.Recipe(yaml.load(A.serialize(format="yaml"))).recipe + ) + assert ( + B.recipe + == workflows.recipe.Recipe(yaml.load(B.serialize(format="yaml"))).recipe + ) def test_unsupported_serialization_format_raises_error():