From 1feddda4f25d6062339622b027953ff06e6b3b1f Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Wed, 10 Sep 2025 00:22:45 +0200 Subject: [PATCH 01/36] Add basic implementation of API class --- src/hermes/model/__init__.py | 2 ++ src/hermes/model/api.py | 10 ++++++++++ 2 files changed, 12 insertions(+) create mode 100644 src/hermes/model/api.py diff --git a/src/hermes/model/__init__.py b/src/hermes/model/__init__.py index faf5a2f5..4a4bca25 100644 --- a/src/hermes/model/__init__.py +++ b/src/hermes/model/__init__.py @@ -1,3 +1,5 @@ # SPDX-FileCopyrightText: 2022 German Aerospace Center (DLR) # # SPDX-License-Identifier: Apache-2.0 + +from hermes.model.api import SoftwareMetadata diff --git a/src/hermes/model/api.py b/src/hermes/model/api.py new file mode 100644 index 00000000..6deba117 --- /dev/null +++ b/src/hermes/model/api.py @@ -0,0 +1,10 @@ +from hermes.model.types import ld_dict + +from hermes.model.types.ld_context import ALL_CONTEXTS + +class SoftwareMetadata(ld_dict): + + def __init__(self, data: dict=None, extra_vocabs: dict[str, str]=None) -> None: + ctx = ALL_CONTEXTS + [{**extra_vocabs}] if extra_vocabs is not None else ALL_CONTEXTS + super().__init__([data or {}], context=ctx) + From 74ba45df3890fd6b1cf38632961e7a6e39d47da3 Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Wed, 10 Sep 2025 00:24:00 +0200 Subject: [PATCH 02/36] Test initialization of API class --- test/hermes_test/model/test_api.py | 39 ++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 test/hermes_test/model/test_api.py diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py new file mode 100644 index 00000000..6b4b0d06 --- /dev/null +++ b/test/hermes_test/model/test_api.py @@ -0,0 +1,39 @@ +import pytest + +from hermes.model import SoftwareMetadata + +from hermes.model.types.ld_context import ALL_CONTEXTS + +EXTRA_VOCABS = {"foo": "https://bar.net/schema"} + +@pytest.fixture +def default_context(): + return {"@context": ALL_CONTEXTS} + +@pytest.fixture +def default_ld(): + return {"@context": ALL_CONTEXTS, "funding": "foo"} + +@pytest.fixture +def custom_context(): + return {"@context": ALL_CONTEXTS + [EXTRA_VOCABS]} + +@pytest.fixture +def custom_ld(): + return {"@context": ALL_CONTEXTS + [EXTRA_VOCABS], "funding": "foo"} + +@pytest.fixture +def none(): + return None + +@pytest.mark.parametrize("data,codemeta,full_context,expanded", [ + (SoftwareMetadata(), "default_context", "default_context", "none"), # FIXME: Replace none fixtures + (SoftwareMetadata({"funding": "foo"}), "default_ld", "none", "none"), # FIXME: Replace none fixtures + (SoftwareMetadata(extra_vocabs=EXTRA_VOCABS), "custom_context", "custom_context", "none"), # FIXME: Replace none fixtures + (SoftwareMetadata({"funding": "foo"}, extra_vocabs=EXTRA_VOCABS), "custom_ld", "none", "none"), # FIXME: Replace none fixtures +]) +def test_init(data, codemeta, full_context, expanded, request): + assert data.compact() == request.getfixturevalue(codemeta) + assert data.full_context == request.getfixturevalue(full_context)["@context"] + assert data.ld_value == request.getfixturevalue(expanded) + From 79575b8845123c4aabc8b35b6d6ab511f9dfb81f Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Wed, 17 Sep 2025 18:03:51 +0200 Subject: [PATCH 03/36] Test API object initiatlization with and without data --- test/hermes_test/model/test_api.py | 33 +++++++++++------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index 6b4b0d06..deb00d74 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -10,30 +10,21 @@ def default_context(): return {"@context": ALL_CONTEXTS} -@pytest.fixture -def default_ld(): - return {"@context": ALL_CONTEXTS, "funding": "foo"} - @pytest.fixture def custom_context(): return {"@context": ALL_CONTEXTS + [EXTRA_VOCABS]} -@pytest.fixture -def custom_ld(): - return {"@context": ALL_CONTEXTS + [EXTRA_VOCABS], "funding": "foo"} - -@pytest.fixture -def none(): - return None - -@pytest.mark.parametrize("data,codemeta,full_context,expanded", [ - (SoftwareMetadata(), "default_context", "default_context", "none"), # FIXME: Replace none fixtures - (SoftwareMetadata({"funding": "foo"}), "default_ld", "none", "none"), # FIXME: Replace none fixtures - (SoftwareMetadata(extra_vocabs=EXTRA_VOCABS), "custom_context", "custom_context", "none"), # FIXME: Replace none fixtures - (SoftwareMetadata({"funding": "foo"}, extra_vocabs=EXTRA_VOCABS), "custom_ld", "none", "none"), # FIXME: Replace none fixtures +@pytest.mark.parametrize("metadata,full_context", [ + (SoftwareMetadata(), "default_context"), + (SoftwareMetadata(extra_vocabs=EXTRA_VOCABS), "custom_context"), ]) -def test_init(data, codemeta, full_context, expanded, request): - assert data.compact() == request.getfixturevalue(codemeta) - assert data.full_context == request.getfixturevalue(full_context)["@context"] - assert data.ld_value == request.getfixturevalue(expanded) +def test_init_no_data(metadata, full_context, request): + assert metadata.full_context == request.getfixturevalue(full_context)["@context"] +@pytest.mark.parametrize("metadata,full_context", [ + (SoftwareMetadata({"funding": "foo"}), "default_context"), + (SoftwareMetadata({"funding": "foo"}, extra_vocabs=EXTRA_VOCABS), "custom_context"), +]) +def test_init_with_data(metadata, full_context, request): + assert metadata.full_context == request.getfixturevalue(full_context)["@context"] + assert metadata["funding"] == "foo" From 69f6a245dd1b3c52c4a77fd62a30de8c73ee02a2 Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Wed, 17 Sep 2025 18:09:03 +0200 Subject: [PATCH 04/36] Test API object initialization with nested object --- test/hermes_test/model/test_api.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index deb00d74..4ccf29ea 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -6,6 +6,7 @@ EXTRA_VOCABS = {"foo": "https://bar.net/schema"} + @pytest.fixture def default_context(): return {"@context": ALL_CONTEXTS} @@ -14,6 +15,7 @@ def default_context(): def custom_context(): return {"@context": ALL_CONTEXTS + [EXTRA_VOCABS]} + @pytest.mark.parametrize("metadata,full_context", [ (SoftwareMetadata(), "default_context"), (SoftwareMetadata(extra_vocabs=EXTRA_VOCABS), "custom_context"), @@ -21,6 +23,7 @@ def custom_context(): def test_init_no_data(metadata, full_context, request): assert metadata.full_context == request.getfixturevalue(full_context)["@context"] + @pytest.mark.parametrize("metadata,full_context", [ (SoftwareMetadata({"funding": "foo"}), "default_context"), (SoftwareMetadata({"funding": "foo"}, extra_vocabs=EXTRA_VOCABS), "custom_context"), @@ -28,3 +31,14 @@ def test_init_no_data(metadata, full_context, request): def test_init_with_data(metadata, full_context, request): assert metadata.full_context == request.getfixturevalue(full_context)["@context"] assert metadata["funding"] == "foo" + + +def test_init_full_object(): + my_software = {"foo:softwareName": "MySoftware", "foo:egg": "spam", "foo:ham": "eggs", + "maintainer": {"name": "Some Name", "email": "maintainer@example.com"}, + "author": [{"name": "Foo"}, {"name": "Bar"}]} + data = SoftwareMetadata(my_software, extra_vocabs={"foo": "https://foo.bar"}) + assert data["foo:softwareName"] == "MySoftware" + assert data["maintainer"]["name"] == "Some Name" + for author in data["author"]: + assert author["name"] in ["Foo", "Bar"] From 8e1a38bc942ebccf15e91c41e489560e7790de7f Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Wed, 17 Sep 2025 18:11:35 +0200 Subject: [PATCH 05/36] Test appending objects to model via API --- test/hermes_test/model/test_api.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index 4ccf29ea..29ed3580 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -33,7 +33,7 @@ def test_init_with_data(metadata, full_context, request): assert metadata["funding"] == "foo" -def test_init_full_object(): +def test_init_nested_object(): my_software = {"foo:softwareName": "MySoftware", "foo:egg": "spam", "foo:ham": "eggs", "maintainer": {"name": "Some Name", "email": "maintainer@example.com"}, "author": [{"name": "Foo"}, {"name": "Bar"}]} @@ -42,3 +42,15 @@ def test_init_full_object(): assert data["maintainer"]["name"] == "Some Name" for author in data["author"]: assert author["name"] in ["Foo", "Bar"] + + +def test_append(): + data = SoftwareMetadata(extra_vocabs={"foo": "https://foo.bar"}) + author1 = {"name": "Foo"} + data["author"] = author1 + author2 = {"name": "Bar"} + data["author"].append(author2) + assert len(data["author"]) == 2 + assert data["author"][0]["name"] == "Foo" + assert data["author"][1]["name"] == "Bar" + From b65989e7bb91e1e604f6df61b0bee995b259bb56 Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Wed, 17 Sep 2025 18:35:42 +0200 Subject: [PATCH 06/36] Test model building via API object - Adds tests for the expected behaviour as described in https://github.com/softwarepub/hermes/issues/423#issuecomment-3271450262 --- test/hermes_test/model/test_api.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index 29ed3580..7b40bc35 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -45,12 +45,29 @@ def test_init_nested_object(): def test_append(): - data = SoftwareMetadata(extra_vocabs={"foo": "https://foo.bar"}) + data = SoftwareMetadata() author1 = {"name": "Foo"} data["author"] = author1 + assert type(data["author"]) is list author2 = {"name": "Bar"} data["author"].append(author2) assert len(data["author"]) == 2 assert data["author"][0]["name"] == "Foo" assert data["author"][1]["name"] == "Bar" + +def test_iterative_assignment(): + # This tests iterative assignments/traversals to edit/appending values + # This requires SoftwareMetadata.__getitem__ to return a plain dict. SoftwareMetadata.__setitem__ can then + # implement the isinstanceof checks that @notactuallyfinn suggested. + data = SoftwareMetadata(extra_vocabs={"foo": "https://foo.bar"}) + data["author"] = {"name": "Foo"} + # Look, a squirrel! + authors = data["author"] + assert type(authors) is list + author1 = authors[0] + author1["email"] = "author@example.com" + authors[0] = author1 + assert len(authors) == 1 + authors.append({"name": "Bar", "email": "author2@example.com"}) + data["author"] = authors From 59180c75e09096f22e31bf32eafe0afa70a617a2 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Thu, 25 Sep 2025 14:43:14 +0200 Subject: [PATCH 07/36] added an add method to SoftwareMetadata and improved __init__ of it and rewrote a test --- src/hermes/model/api.py | 18 +++++++++++++++--- test/hermes_test/model/test_api.py | 26 +++++++++++++++++--------- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/hermes/model/api.py b/src/hermes/model/api.py index 6deba117..eaee6a77 100644 --- a/src/hermes/model/api.py +++ b/src/hermes/model/api.py @@ -1,10 +1,22 @@ -from hermes.model.types import ld_dict +from hermes.model.types import ld_dict, ld_list from hermes.model.types.ld_context import ALL_CONTEXTS + class SoftwareMetadata(ld_dict): - def __init__(self, data: dict=None, extra_vocabs: dict[str, str]=None) -> None: + def __init__(self, data: dict = None, extra_vocabs: dict[str, str] = None) -> None: ctx = ALL_CONTEXTS + [{**extra_vocabs}] if extra_vocabs is not None else ALL_CONTEXTS - super().__init__([data or {}], context=ctx) + super().__init__([ld_dict.from_dict(data, context=ctx).data_dict if data else {}], context=ctx) + def add(self, key, value): + if key not in self: + self[key] = value + return + if isinstance(val := self[key], ld_list): + val.append(value) + else: + temp = ld_list([{"@list": []}], parent=self, key=self.ld_proc.expand_iri(self.active_ctx, key), + context=self.context) + temp.extend([val, value]) + self[key] = temp diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index 7b40bc35..d48efa47 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -1,6 +1,7 @@ import pytest from hermes.model import SoftwareMetadata +from hermes.model.types import ld_list, ld_dict from hermes.model.types.ld_context import ALL_CONTEXTS @@ -11,6 +12,7 @@ def default_context(): return {"@context": ALL_CONTEXTS} + @pytest.fixture def custom_context(): return {"@context": ALL_CONTEXTS + [EXTRA_VOCABS]} @@ -44,16 +46,22 @@ def test_init_nested_object(): assert author["name"] in ["Foo", "Bar"] -def test_append(): +def test_add(): + data = SoftwareMetadata() + data.add("foo", "a") + assert data["foo"] == "a" + data.add("foo", "b") + assert type(data["foo"]) is ld_list and data["foo"].item_list == [{"@value": "a"}, {"@value": "b"}] + data.add("foo", "c") + assert data["foo"].item_list == [{"@value": "a"}, {"@value": "b"}, {"@value": "c"}] data = SoftwareMetadata() - author1 = {"name": "Foo"} - data["author"] = author1 - assert type(data["author"]) is list - author2 = {"name": "Bar"} - data["author"].append(author2) - assert len(data["author"]) == 2 - assert data["author"][0]["name"] == "Foo" - assert data["author"][1]["name"] == "Bar" + # FIXME: #433 will fix this + data.add("foo", {"bar": "foo"}) + assert type(data["foo"]) is ld_dict and data["foo"].data_dict == {"bar": "foo"} + data.add("foo", {"bar": "foo"}) + assert type(data["foo"]) is ld_list and data["foo"].item_list == 2 * [{"bar": "foo"}] + data.add("foo", {"bar": "foo"}) + assert data["foo"].item_list == 3 * [{"bar": "foo"}] def test_iterative_assignment(): From daed5d36b48b50c5c8b0b13ee6a13c0941b2f9fc Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Fri, 26 Sep 2025 09:48:17 +0200 Subject: [PATCH 08/36] Change existing test to assume returned lists --- test/hermes_test/model/test_api.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index d48efa47..44e541fd 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -40,10 +40,10 @@ def test_init_nested_object(): "maintainer": {"name": "Some Name", "email": "maintainer@example.com"}, "author": [{"name": "Foo"}, {"name": "Bar"}]} data = SoftwareMetadata(my_software, extra_vocabs={"foo": "https://foo.bar"}) - assert data["foo:softwareName"] == "MySoftware" - assert data["maintainer"]["name"] == "Some Name" + assert data["foo:softwareName"] == ["MySoftware"] + assert data["maintainer"]["name"] == ["Some Name"] for author in data["author"]: - assert author["name"] in ["Foo", "Bar"] + assert author["name"] in [["Foo"], ["Bar"]] def test_add(): @@ -72,10 +72,10 @@ def test_iterative_assignment(): data["author"] = {"name": "Foo"} # Look, a squirrel! authors = data["author"] - assert type(authors) is list + assert isinstance(authors, list) author1 = authors[0] author1["email"] = "author@example.com" authors[0] = author1 - assert len(authors) == 1 authors.append({"name": "Bar", "email": "author2@example.com"}) data["author"] = authors + assert len(authors) == 2 \ No newline at end of file From 45839153f059d4686188eb0822122e4aff7e5745 Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Fri, 26 Sep 2025 10:07:29 +0200 Subject: [PATCH 09/36] Add test for harvesting case --- test/hermes_test/model/test_api.py | 37 +++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index 44e541fd..817fc0b3 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -78,4 +78,39 @@ def test_iterative_assignment(): authors[0] = author1 authors.append({"name": "Bar", "email": "author2@example.com"}) data["author"] = authors - assert len(authors) == 2 \ No newline at end of file + assert len(authors) == 2 + + +### Application scenarios + +## Harvesting + +def test_harvest(): + data = SoftwareMetadata() + data["author"] = {"name": "Foo"} + data["author"].append({"name": "Bar"}) + data["author"][0]["email"] = "foo@bar.net" + data["author"][0]["email"].append("foo@baz.com") + assert len(data["author"]) == 2 + assert len(data["author"][1]["email"]) == 2 + assert len(data["author"][0]["email"]) == 0 + harvest = {"authors": [{"name": "Foo", "affiliations": ["Uni A", "Lab B"], "kw": ["a", "b", "c"]}, {"name": "Bar", "affiliations": ["Uni C"], "email": "bar@c.edu"}, {"name": "Baz", "affiliations": ["Lab E"]}]} + for author in harvest["authors"]: + for exist_author in data["author"]: + if author["name"] == exist_author["name"]: + exist_author["affiliation"] = author["affiliations"] + exist_author["email"].append(author["email"]) + exist_author["schema:knowsAbout"].append(kw for kw in author["kw"]) + assert len(data["author"]) == 3 + foo, bar, baz = data["author"] + assert foo["name"] == "Foo" + assert foo["affiliation"] == ["Uni A", "Lab B"] + assert foo["schema:knowsAbout"] == ["a", "b", "c"] + assert foo["email"] == ["foo@bar.net", "foo@baz.com"] + assert bar["name"] == "Bar" + assert bar["affiliation"] == ["Uni C"] + assert bar["email"] == ["bar@c.edu"] + assert baz["name"] == "Baz" + assert baz["affiliation"] == ["Lab E"] + assert baz["schema:knowsAbout"] is None + assert baz["email"] is None From 6808272435008df01d56b96c97540d1eaad2bf94 Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Fri, 26 Sep 2025 10:23:02 +0200 Subject: [PATCH 10/36] Add more comprehensive usage test --- test/hermes_test/model/test_api.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index 817fc0b3..7849b884 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -81,11 +81,7 @@ def test_iterative_assignment(): assert len(authors) == 2 -### Application scenarios - -## Harvesting - -def test_harvest(): +def test_usage(): data = SoftwareMetadata() data["author"] = {"name": "Foo"} data["author"].append({"name": "Bar"}) @@ -114,3 +110,11 @@ def test_harvest(): assert baz["affiliation"] == ["Lab E"] assert baz["schema:knowsAbout"] is None assert baz["email"] is None + assert data["@type"] == "SoftwareSourceCode" + assert data["@context"] == ALL_CONTEXTS + for author in data["author"]: + assert "name" in author + assert "email" in author + if "schema:knowsAbout" not in author: + author["schema:knowsAbout"] = None + author["schema:pronouns"] = "they/them" \ No newline at end of file From 2f7eadf6d7299bdcbf7a183cfc35692e5345a5f3 Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Fri, 26 Sep 2025 10:25:16 +0200 Subject: [PATCH 11/36] Add new license annotation for Python files --- REUSE.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/REUSE.toml b/REUSE.toml index b3033158..c81fab97 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -17,3 +17,9 @@ path = ["REUSE.toml"] precedence = "aggregate" SPDX-FileCopyrightText = "German Aerospace Center (DLR), Helmholtz-Zentrum Dresden-Rossendorf, Forschungszentrum Jülich" SPDX-License-Identifier = "CC0-1.0" + +[[annotations]] +path = ["src/**/*.py", "test/**/*.py"] +precedence = "aggregate" +SPDX-FileCopyrightText = "German Aerospace Center (DLR), Helmholtz-Zentrum Dresden-Rossendorf, Forschungszentrum Jülich" +SPDX-License-Identifier = "Apache-2.0" \ No newline at end of file From 0f324949c8be3f90cd1d5bc5c705b473e35ec055 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 26 Sep 2025 12:46:05 +0200 Subject: [PATCH 12/36] changed conversions of types to output ld_lists for every item in a dict --- src/hermes/model/types/__init__.py | 62 +++++++++++------------------- src/hermes/model/types/ld_list.py | 4 +- 2 files changed, 24 insertions(+), 42 deletions(-) diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py index 6fff24b2..11270d43 100644 --- a/src/hermes/model/types/__init__.py +++ b/src/hermes/model/types/__init__.py @@ -19,57 +19,39 @@ lambda c: isinstance(c, ld_container), { "ld_container": lambda c, **_: c, - "json": lambda c, **_: c.compact(), "expanded_json": lambda c, **_: c.ld_value, - } + }, ), - # Wrap expanded_json to ld_container - (ld_container.is_ld_id, dict(python=lambda c, **_: c[0]['@id'])), - (ld_container.is_typed_ld_value, dict(python=ld_container.typed_ld_to_py)), - (ld_container.is_ld_value, dict(python=lambda c, **_: c[0]['@value'])), + # Wrap item from ld_dict in ld_list (ld_list.is_ld_list, dict(ld_container=ld_list)), - (ld_dict.is_ld_dict, dict(ld_container=ld_dict)), + (lambda c: isinstance(c, list) and all(isinstance(item, dict) for item in c), dict(ld_container=ld_list.from_list)), - # Expand and access JSON data - (ld_container.is_json_id, dict(python=lambda c: c["@id"], expanded_json=lambda c, **_: [c])), + # pythonize items from lists (expanded set is already handled above) + (ld_container.is_json_id, dict(python=lambda c, **_: c["@id"])), (ld_container.is_typed_json_value, dict(python=ld_container.typed_ld_to_py)), - (ld_container.is_json_value, dict(python=lambda c, **_: c["@value"], expanded_json=lambda c, **_: [c])), + (ld_container.is_json_value, dict(python=lambda c, **_: c["@value"])), (ld_list.is_container, dict(ld_container=lambda c, **kw: ld_list([c], **kw))), - - # FIXME: add conversion from list and json dict to expanded_json - # to parse nested dicts and lists when using for example __setitem__(key, value) from ld_dict - # where value is converted to expanded_json bevor adding it to data_dict - # Suggested: - # ( - # ld_dict.is_json_dict, - # { - # "ld_container": ld_dict.from_dict, - # "expanded_json": lambda c, **kw: kw["parent"]._to_expanded_json(kw["key"], ld_dict.from_dict(c, **kw)) - # } - # ), - # - # ( - # lambda c: isinstance(c, list), - # { - # "ld_container": ld_list.from_list, - # "expanded_json": lambda c, **kw: kw["parent"]._to_expanded_json(kw["key"], ld_list.from_list(c, **kw)) - # } - # ), (ld_dict.is_json_dict, dict(ld_container=ld_dict.from_dict)), - (lambda c: isinstance(c, list), dict(ld_container=ld_list.from_list)), - - # Wrap internal data types + # Convert internal data types to expanded_json + (lambda c: ld_container.is_json_id(c) or ld_container.is_json_value(c), dict(expanded_json=lambda c, **_: [c])), + (ld_dict.is_json_dict, dict(expanded_json=lambda c, **kw: ld_dict.from_dict(c, **kw).ld_value)), + (lambda c: isinstance(c, list), dict(expanded_json=lambda c, **kw: ld_list.from_list(c, **kw).ld_value)), (lambda v: isinstance(v, (int, float, str, bool)), dict(expanded_json=lambda v, **_: [{"@value": v}])), - - (lambda v: isinstance(v, datetime), - dict(expanded_json=lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:DateTime"]}])), - (lambda v: isinstance(v, date), - dict(expanded_json=lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:Date"]}])), - (lambda v: isinstance(v, time), - dict(expanded_json=lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:Time"]}])), + ( + lambda v: isinstance(v, datetime), + dict(expanded_json=lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:DateTime"]}]), + ), + ( + lambda v: isinstance(v, date), + dict(expanded_json=lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:Date"]}]), + ), + ( + lambda v: isinstance(v, time), + dict(expanded_json=lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:Time"]}]), + ), ] diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index 62a7e5f3..110b95f7 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -33,7 +33,7 @@ def __getitem__(self, index): if isinstance(index, slice): return [self[i] for i in [*range(len(self))][index]] - item = self._to_python(self.key, self.item_list[index:index + 1]) + item = self._to_python(self.key, self.item_list[index]) if isinstance(item, ld_container): item.index = index return item @@ -46,7 +46,7 @@ def __len__(self): def __iter__(self): for index, value in enumerate(self.item_list): - item = self._to_python(self.key, [value]) + item = self._to_python(self.key, value) if isinstance(item, ld_container): item.index = index yield item From 8298e49f86547b80e5388880f41206849c1b63fb Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 26 Sep 2025 12:46:42 +0200 Subject: [PATCH 13/36] added some tests for the conversions and formated to satisfy flake8 --- test/hermes_test/model/test_api.py | 20 +++++++--- test/hermes_test/model/types/test_ld_dict.py | 39 +++++++++++--------- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index 7849b884..ce3fe4e7 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -36,9 +36,13 @@ def test_init_with_data(metadata, full_context, request): def test_init_nested_object(): - my_software = {"foo:softwareName": "MySoftware", "foo:egg": "spam", "foo:ham": "eggs", - "maintainer": {"name": "Some Name", "email": "maintainer@example.com"}, - "author": [{"name": "Foo"}, {"name": "Bar"}]} + my_software = { + "foo:softwareName": "MySoftware", + "foo:egg": "spam", + "foo:ham": "eggs", + "maintainer": {"name": "Some Name", "email": "maintainer@example.com"}, + "author": [{"name": "Foo"}, {"name": "Bar"}], + } data = SoftwareMetadata(my_software, extra_vocabs={"foo": "https://foo.bar"}) assert data["foo:softwareName"] == ["MySoftware"] assert data["maintainer"]["name"] == ["Some Name"] @@ -90,7 +94,13 @@ def test_usage(): assert len(data["author"]) == 2 assert len(data["author"][1]["email"]) == 2 assert len(data["author"][0]["email"]) == 0 - harvest = {"authors": [{"name": "Foo", "affiliations": ["Uni A", "Lab B"], "kw": ["a", "b", "c"]}, {"name": "Bar", "affiliations": ["Uni C"], "email": "bar@c.edu"}, {"name": "Baz", "affiliations": ["Lab E"]}]} + harvest = { + "authors": [ + {"name": "Foo", "affiliations": ["Uni A", "Lab B"], "kw": ["a", "b", "c"]}, + {"name": "Bar", "affiliations": ["Uni C"], "email": "bar@c.edu"}, + {"name": "Baz", "affiliations": ["Lab E"]}, + ] + } for author in harvest["authors"]: for exist_author in data["author"]: if author["name"] == exist_author["name"]: @@ -117,4 +127,4 @@ def test_usage(): assert "email" in author if "schema:knowsAbout" not in author: author["schema:knowsAbout"] = None - author["schema:pronouns"] = "they/them" \ No newline at end of file + author["schema:pronouns"] = "they/them" diff --git a/test/hermes_test/model/types/test_ld_dict.py b/test/hermes_test/model/types/test_ld_dict.py index 2fc0d5f3..7e7099bd 100644 --- a/test/hermes_test/model/types/test_ld_dict.py +++ b/test/hermes_test/model/types/test_ld_dict.py @@ -26,18 +26,22 @@ def test_malformed_input(): def test_build_in_get(): - di = ld_dict([{"name": [{"@value": "Manu Sporny"}], - "homepage": [{"@id": "http://manu.sporny.org/"}], - "foo": [{"foobar": "bar", "barfoo": "foo"}]}]) - assert di["name"] == "Manu Sporny" - assert di["homepage"] == "http://manu.sporny.org/" - assert di["foo"].data_dict == ld_dict([{"foobar": "bar", "barfoo": "foo"}]).data_dict + di = ld_dict([{"http://schema.org/name": [{"@value": "Manu Sporny"}], + "http://schema.org/homepage": [{"@id": "http://manu.sporny.org/"}], + "http://schema.org/foo": [{"http://schema.org/foobar": "bar", "http://schema.org/barfoo": "foo"}]}], + context=[{"schema": "http://schema.org/"}]) + assert isinstance(di["schema:name"], ld_list) and di["schema:name"].item_list == [{"@value": "Manu Sporny"}] + assert isinstance(di["schema:homepage"], ld_list) + assert di["schema:homepage"].item_list == [{"@id": "http://manu.sporny.org/"}] + assert isinstance(di["http://schema.org/foo"], ld_list) and isinstance(di["http://schema.org/foo"][0], ld_dict) + assert di["http://schema.org/foo"][0].data_dict == {"http://schema.org/foobar": [{"@value": "bar"}], + "http://schema.org/barfoo": [{"@value": "foo"}]} with pytest.raises(KeyError): di["bar"] di = ld_dict([{"http://xmlns.com/foaf/0.1/name": [{"@value": "Manu Sporny"}]}], context={"xmlns": "http://xmlns.com/foaf/0.1/"}) - assert di["xmlns:name"] == "Manu Sporny" + assert di["xmlns:name"].item_list == [{"@value": "Manu Sporny"}] def test_build_in_set(): @@ -95,7 +99,7 @@ def test_build_in_set(): }] }] } - assert isinstance(di["schema:result"]["schema:error"]["schema:name"], ld_list) + assert isinstance(di["schema:result"][0]["schema:error"][0]["schema:name"], ld_list) def test_build_in_delete(): @@ -117,8 +121,8 @@ def test_build_in_contains(): def test_get(): di = ld_dict([{"https://schema.org/name": [{"@value": "Manu Sporny"}]}], context={"schema": "https://schema.org/"}) - assert di.get("https://schema.org/name") == "Manu Sporny" - assert di.get("schema:name") == "Manu Sporny" + assert di.get("https://schema.org/name").item_list == [{"@value": "Manu Sporny"}] + assert di.get("schema:name").item_list == [{"@value": "Manu Sporny"}] assert di.get("bar", None) is None with pytest.raises(KeyError): di.get("bar") @@ -169,11 +173,12 @@ def test_items(): inner_di = ld_dict([{}], parent=di) inner_di.update({"xmlns:foobar": "bar", "http://xmlns.com/foaf/0.1/barfoo": {"@id": "foo"}}) di.update({"http://xmlns.com/foaf/0.1/name": "foo", "xmlns:homepage": {"@id": "bar"}, "xmlns:foo": inner_di}) - assert [*di.items()][0:2] == [("http://xmlns.com/foaf/0.1/name", "foo"), - ("http://xmlns.com/foaf/0.1/homepage", "bar")] - assert [*di.items()][2][0] == "http://xmlns.com/foaf/0.1/foo" - assert [*di.items()][2][1].data_dict == {"http://xmlns.com/foaf/0.1/foobar": [{"@value": "bar"}], - "http://xmlns.com/foaf/0.1/barfoo": [{"@id": "foo"}]} + items = [*di.items()] + assert (items[0][0], items[1][0]) == ("http://xmlns.com/foaf/0.1/name", "http://xmlns.com/foaf/0.1/homepage") + assert (items[0][1].item_list, items[1][1].item_list) == ([{"@value": "foo"}], [{"@id": "bar"}]) + assert items[2][0] == "http://xmlns.com/foaf/0.1/foo" and isinstance(items[2][1], ld_list) + assert items[2][1][0].data_dict == {"http://xmlns.com/foaf/0.1/foobar": [{"@value": "bar"}], + "http://xmlns.com/foaf/0.1/barfoo": [{"@id": "foo"}]} def test_ref(): @@ -191,8 +196,8 @@ def test_to_python(): inner_di = ld_dict([{}], parent=di) inner_di.update({"xmlns:foobar": "bar", "http://xmlns.com/foaf/0.1/barfoo": {"@id": "foo"}}) di.update({"http://xmlns.com/foaf/0.1/name": "foo", "xmlns:homepage": {"@id": "bar"}, "xmlns:foo": inner_di}) - assert di.to_python() == {"xmlns:name": "foo", "xmlns:homepage": "bar", - "xmlns:foo": {"xmlns:foobar": "bar", "xmlns:barfoo": "foo"}} + assert di.to_python() == {"xmlns:name": ["foo"], "xmlns:homepage": ["bar"], + "xmlns:foo": [{"xmlns:foobar": ["bar"], "xmlns:barfoo": ["foo"]}]} def test_from_dict(): From 3a8bfbed119f5823ac9a69aacf7483137d154325 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 26 Sep 2025 12:55:27 +0200 Subject: [PATCH 14/36] added three more conversions for container to expanded json --- src/hermes/model/types/__init__.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py index 11270d43..e533a069 100644 --- a/src/hermes/model/types/__init__.py +++ b/src/hermes/model/types/__init__.py @@ -38,6 +38,22 @@ # Convert internal data types to expanded_json (lambda c: ld_container.is_json_id(c) or ld_container.is_json_value(c), dict(expanded_json=lambda c, **_: [c])), (ld_dict.is_json_dict, dict(expanded_json=lambda c, **kw: ld_dict.from_dict(c, **kw).ld_value)), + (ld_dict.is_ld_dict, dict(expanded_json=lambda c, **kw: ld_dict.from_dict(c[0], **kw).ld_value)), + ( + ld_list.is_container, + dict( + expanded_json=lambda c, **kw: ld_list.from_list( + ld_list([c]).item_list, container=ld_list([c]).container, **kw + ).ld_value + ), + ), + ( + ld_list.is_ld_list, + dict( + expanded_json=lambda c, **kw: ld_list.from_list( + ld_list(c).item_list, container=ld_list(c).container, **kw + ).ld_value + ), ), (lambda c: isinstance(c, list), dict(expanded_json=lambda c, **kw: ld_list.from_list(c, **kw).ld_value)), (lambda v: isinstance(v, (int, float, str, bool)), dict(expanded_json=lambda v, **_: [{"@value": v}])), ( From 2ef89d397d1bc4154a7eb66239af56b3c42f127c Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 26 Sep 2025 14:33:18 +0200 Subject: [PATCH 15/36] always return a list when getting an item from ld_dict --- src/hermes/model/types/ld_dict.py | 18 +++++++++--------- test/hermes_test/model/types/test_ld_dict.py | 9 ++++----- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index 49461894..439e0043 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -21,7 +21,11 @@ def __init__(self, data, *, parent=None, key=None, index=None, context=None): def __getitem__(self, key): full_iri = self.ld_proc.expand_iri(self.active_ctx, key) - ld_value = self.data_dict[full_iri] + try: + ld_value = self.data_dict[full_iri] + except KeyError: + self.data_dict.update({full_iri: [{"@list": []}]}) + ld_value = self.data_dict[full_iri] return self._to_python(full_iri, ld_value) def __setitem__(self, key, value): @@ -35,16 +39,12 @@ def __delitem__(self, key): def __contains__(self, key): full_iri = self.ld_proc.expand_iri(self.active_ctx, key) - return full_iri in self.data_dict + return len(self[full_iri]) != 0 def get(self, key, default=_NO_DEFAULT): - try: - value = self[key] - return value - except KeyError as e: - if default is not ld_dict._NO_DEFAULT: - return default - raise e + if key not in self and default is not ld_dict._NO_DEFAULT: + return default + return self[key] def update(self, other): for key, value in other.items(): diff --git a/test/hermes_test/model/types/test_ld_dict.py b/test/hermes_test/model/types/test_ld_dict.py index 7e7099bd..b31c24a6 100644 --- a/test/hermes_test/model/types/test_ld_dict.py +++ b/test/hermes_test/model/types/test_ld_dict.py @@ -28,7 +28,8 @@ def test_malformed_input(): def test_build_in_get(): di = ld_dict([{"http://schema.org/name": [{"@value": "Manu Sporny"}], "http://schema.org/homepage": [{"@id": "http://manu.sporny.org/"}], - "http://schema.org/foo": [{"http://schema.org/foobar": "bar", "http://schema.org/barfoo": "foo"}]}], + "http://schema.org/foo": [{"http://schema.org/foobar": [{"@value": "bar"}], + "http://schema.org/barfoo": [{"@value": "foo"}]}]}], context=[{"schema": "http://schema.org/"}]) assert isinstance(di["schema:name"], ld_list) and di["schema:name"].item_list == [{"@value": "Manu Sporny"}] assert isinstance(di["schema:homepage"], ld_list) @@ -36,8 +37,7 @@ def test_build_in_get(): assert isinstance(di["http://schema.org/foo"], ld_list) and isinstance(di["http://schema.org/foo"][0], ld_dict) assert di["http://schema.org/foo"][0].data_dict == {"http://schema.org/foobar": [{"@value": "bar"}], "http://schema.org/barfoo": [{"@value": "foo"}]} - with pytest.raises(KeyError): - di["bar"] + assert isinstance(di["bar"], ld_list) and len(di["bar"]) == 0 di = ld_dict([{"http://xmlns.com/foaf/0.1/name": [{"@value": "Manu Sporny"}]}], context={"xmlns": "http://xmlns.com/foaf/0.1/"}) @@ -124,8 +124,7 @@ def test_get(): assert di.get("https://schema.org/name").item_list == [{"@value": "Manu Sporny"}] assert di.get("schema:name").item_list == [{"@value": "Manu Sporny"}] assert di.get("bar", None) is None - with pytest.raises(KeyError): - di.get("bar") + assert isinstance(di["bar"], ld_list) and len(di["bar"]) == 0 def test_update(): From 2db93cffe94a3a2234c9650bbb5448d4902abd00 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 26 Sep 2025 14:33:56 +0200 Subject: [PATCH 16/36] added tests and fixed issues --- src/hermes/model/types/__init__.py | 10 ++-- test/hermes_test/model/test_api.py | 83 ++++++++++++++++-------------- 2 files changed, 52 insertions(+), 41 deletions(-) diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py index e533a069..6c02662c 100644 --- a/src/hermes/model/types/__init__.py +++ b/src/hermes/model/types/__init__.py @@ -26,14 +26,17 @@ # Wrap item from ld_dict in ld_list (ld_list.is_ld_list, dict(ld_container=ld_list)), - (lambda c: isinstance(c, list) and all(isinstance(item, dict) for item in c), dict(ld_container=ld_list.from_list)), + ( + lambda c: isinstance(c, list) and all(isinstance(item, dict) for item in c), + dict(ld_container=lambda c, **kw: ld_list([{"@list": c}], **kw)) + ), # pythonize items from lists (expanded set is already handled above) (ld_container.is_json_id, dict(python=lambda c, **_: c["@id"])), (ld_container.is_typed_json_value, dict(python=ld_container.typed_ld_to_py)), (ld_container.is_json_value, dict(python=lambda c, **_: c["@value"])), (ld_list.is_container, dict(ld_container=lambda c, **kw: ld_list([c], **kw))), - (ld_dict.is_json_dict, dict(ld_container=ld_dict.from_dict)), + (ld_dict.is_json_dict, dict(ld_container=lambda c, **kw: ld_dict([c], **kw))), # Convert internal data types to expanded_json (lambda c: ld_container.is_json_id(c) or ld_container.is_json_value(c), dict(expanded_json=lambda c, **_: [c])), @@ -53,7 +56,8 @@ expanded_json=lambda c, **kw: ld_list.from_list( ld_list(c).item_list, container=ld_list(c).container, **kw ).ld_value - ), ), + ), + ), (lambda c: isinstance(c, list), dict(expanded_json=lambda c, **kw: ld_list.from_list(c, **kw).ld_value)), (lambda v: isinstance(v, (int, float, str, bool)), dict(expanded_json=lambda v, **_: [{"@value": v}])), ( diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index ce3fe4e7..0ee910db 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -32,40 +32,42 @@ def test_init_no_data(metadata, full_context, request): ]) def test_init_with_data(metadata, full_context, request): assert metadata.full_context == request.getfixturevalue(full_context)["@context"] - assert metadata["funding"] == "foo" + assert metadata["funding"][0] == "foo" def test_init_nested_object(): my_software = { - "foo:softwareName": "MySoftware", + "schema:softwareName": "MySoftware", "foo:egg": "spam", "foo:ham": "eggs", "maintainer": {"name": "Some Name", "email": "maintainer@example.com"}, "author": [{"name": "Foo"}, {"name": "Bar"}], } data = SoftwareMetadata(my_software, extra_vocabs={"foo": "https://foo.bar"}) - assert data["foo:softwareName"] == ["MySoftware"] - assert data["maintainer"]["name"] == ["Some Name"] + assert data["schema:softwareName"][0] == "MySoftware" + assert data["maintainer"][0]["name"][0] == "Some Name" for author in data["author"]: - assert author["name"] in [["Foo"], ["Bar"]] + for name in author["name"]: + assert name in ["Foo", "Bar"] -def test_add(): +def test_append(): data = SoftwareMetadata() - data.add("foo", "a") - assert data["foo"] == "a" - data.add("foo", "b") + data["foo"].append("a") + assert type(data["foo"]) is ld_list and data["foo"][0] == "a" and data["foo"].item_list == [{"@value": "a"}] + data["foo"].append("b") assert type(data["foo"]) is ld_list and data["foo"].item_list == [{"@value": "a"}, {"@value": "b"}] - data.add("foo", "c") + data["foo"].append("c") assert data["foo"].item_list == [{"@value": "a"}, {"@value": "b"}, {"@value": "c"}] data = SoftwareMetadata() # FIXME: #433 will fix this - data.add("foo", {"bar": "foo"}) - assert type(data["foo"]) is ld_dict and data["foo"].data_dict == {"bar": "foo"} - data.add("foo", {"bar": "foo"}) - assert type(data["foo"]) is ld_list and data["foo"].item_list == 2 * [{"bar": "foo"}] - data.add("foo", {"bar": "foo"}) - assert data["foo"].item_list == 3 * [{"bar": "foo"}] + data["foo"].append({"schema:name": "foo"}) + assert type(data["foo"]) is ld_list and type(data["foo"][0]) is ld_dict + assert data["foo"][0].data_dict == {"http://schema.org/name": [{"@value": "foo"}]} + data["foo"].append({"schema:name": "foo"}) + assert type(data["foo"]) is ld_list and data["foo"].item_list == 2*[{"http://schema.org/name": [{"@value": "foo"}]}] + data["foo"].append({"schema:name": "foo"}) + assert data["foo"].item_list == 3 * [{"http://schema.org/name": [{"@value": "foo"}]}] def test_iterative_assignment(): @@ -76,7 +78,7 @@ def test_iterative_assignment(): data["author"] = {"name": "Foo"} # Look, a squirrel! authors = data["author"] - assert isinstance(authors, list) + assert isinstance(authors, ld_list) author1 = authors[0] author1["email"] = "author@example.com" authors[0] = author1 @@ -92,36 +94,41 @@ def test_usage(): data["author"][0]["email"] = "foo@bar.net" data["author"][0]["email"].append("foo@baz.com") assert len(data["author"]) == 2 - assert len(data["author"][1]["email"]) == 2 - assert len(data["author"][0]["email"]) == 0 + assert len(data["author"][0]["email"]) == 2 + assert len(data["author"][1]["email"]) == 0 harvest = { "authors": [ - {"name": "Foo", "affiliations": ["Uni A", "Lab B"], "kw": ["a", "b", "c"]}, - {"name": "Bar", "affiliations": ["Uni C"], "email": "bar@c.edu"}, - {"name": "Baz", "affiliations": ["Lab E"]}, + {"name": "Foo", "affiliation": ["Uni A", "Lab B"], "kw": ["a", "b", "c"]}, + {"name": "Bar", "affiliation": ["Uni C"], "email": "bar@c.edu"}, + {"name": "Baz", "affiliation": ["Lab E"]}, ] } for author in harvest["authors"]: for exist_author in data["author"]: - if author["name"] == exist_author["name"]: - exist_author["affiliation"] = author["affiliations"] - exist_author["email"].append(author["email"]) - exist_author["schema:knowsAbout"].append(kw for kw in author["kw"]) + if author["name"] == exist_author["name"][0]: + exist_author["affiliation"] = author["affiliation"] + if "email" in author: + exist_author["email"].append(author["email"]) + if "kw" in author: + exist_author["schema:knowsAbout"].extend(author["kw"]) + break + else: + data["author"].append(author) assert len(data["author"]) == 3 foo, bar, baz = data["author"] - assert foo["name"] == "Foo" - assert foo["affiliation"] == ["Uni A", "Lab B"] - assert foo["schema:knowsAbout"] == ["a", "b", "c"] - assert foo["email"] == ["foo@bar.net", "foo@baz.com"] - assert bar["name"] == "Bar" - assert bar["affiliation"] == ["Uni C"] - assert bar["email"] == ["bar@c.edu"] - assert baz["name"] == "Baz" - assert baz["affiliation"] == ["Lab E"] - assert baz["schema:knowsAbout"] is None - assert baz["email"] is None + assert foo["name"][0] == "Foo" + assert foo["affiliation"].to_python() == ["Uni A", "Lab B"] + assert foo["schema:knowsAbout"].to_python() == ["a", "b", "c"] + assert foo["email"].to_python() == ["foo@bar.net", "foo@baz.com"] + assert bar["name"][0] == "Bar" + assert bar["affiliation"].to_python() == ["Uni C"] + assert bar["email"].to_python() == ["bar@c.edu"] + assert baz["name"][0] == "Baz" + assert baz["affiliation"].to_python() == ["Lab E"] + assert len(baz["schema:knowsAbout"]) == 0 + assert len(baz["email"]) == 0 assert data["@type"] == "SoftwareSourceCode" - assert data["@context"] == ALL_CONTEXTS + assert data["@context"] == ALL_CONTEXTS # FIXME: #435 will solve this issue for author in data["author"]: assert "name" in author assert "email" in author From 17213251034ae8d177719d1f09d7d85e193ba1e4 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Thu, 2 Oct 2025 10:54:25 +0200 Subject: [PATCH 17/36] clean up --- src/hermes/model/api.py | 14 +------------- test/hermes_test/model/test_api.py | 2 -- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/src/hermes/model/api.py b/src/hermes/model/api.py index eaee6a77..8b079544 100644 --- a/src/hermes/model/api.py +++ b/src/hermes/model/api.py @@ -1,4 +1,4 @@ -from hermes.model.types import ld_dict, ld_list +from hermes.model.types import ld_dict from hermes.model.types.ld_context import ALL_CONTEXTS @@ -8,15 +8,3 @@ class SoftwareMetadata(ld_dict): def __init__(self, data: dict = None, extra_vocabs: dict[str, str] = None) -> None: ctx = ALL_CONTEXTS + [{**extra_vocabs}] if extra_vocabs is not None else ALL_CONTEXTS super().__init__([ld_dict.from_dict(data, context=ctx).data_dict if data else {}], context=ctx) - - def add(self, key, value): - if key not in self: - self[key] = value - return - if isinstance(val := self[key], ld_list): - val.append(value) - else: - temp = ld_list([{"@list": []}], parent=self, key=self.ld_proc.expand_iri(self.active_ctx, key), - context=self.context) - temp.extend([val, value]) - self[key] = temp diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index 0ee910db..fd2288d4 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -60,7 +60,6 @@ def test_append(): data["foo"].append("c") assert data["foo"].item_list == [{"@value": "a"}, {"@value": "b"}, {"@value": "c"}] data = SoftwareMetadata() - # FIXME: #433 will fix this data["foo"].append({"schema:name": "foo"}) assert type(data["foo"]) is ld_list and type(data["foo"][0]) is ld_dict assert data["foo"][0].data_dict == {"http://schema.org/name": [{"@value": "foo"}]} @@ -83,7 +82,6 @@ def test_iterative_assignment(): author1["email"] = "author@example.com" authors[0] = author1 authors.append({"name": "Bar", "email": "author2@example.com"}) - data["author"] = authors assert len(authors) == 2 From 1fb7574f9105817e90166ed54aa57bc4a8423723 Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Thu, 2 Oct 2025 23:43:39 +0200 Subject: [PATCH 18/36] Comment out local extension that breaks build --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 9ee616da..7ab7b582 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -102,7 +102,7 @@ def read_version_from_pyproject(): 'sphinx_togglebutton', 'sphinxcontrib.datatemplates', # Custom extensions, see `_ext` directory. - 'plugin_markup', + # 'plugin_markup', ] language = 'en' From 8d147a8ac0cb3708933d739ff64db645b74f10ce Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Thu, 2 Oct 2025 23:44:03 +0200 Subject: [PATCH 19/36] Document data model API --- docs/source/dev/data_model.md | 305 ++++++++++++++++++++++++++++++++-- 1 file changed, 292 insertions(+), 13 deletions(-) diff --git a/docs/source/dev/data_model.md b/docs/source/dev/data_model.md index a5f4c727..c51f3401 100644 --- a/docs/source/dev/data_model.md +++ b/docs/source/dev/data_model.md @@ -1,27 +1,306 @@ -# HERMES Data Model +# Data model -*hermes* uses an internal data model to store the output of the different stages. -All the data is collected in a directory called `.hermes` located in the root of the project directory. +`hermes`' internal data model acts like a contract between `hermes` and plugins. +It is based on [**JSON-LD (JSON Linked Data)**](https://json-ld.org/), and +the public API simplifies interaction with the data model through Python code. -You should not need to interact with this data directly. -Instead, use {class}`hermes.model.context.HermesContext` and respective subclasses to access the data in a consistent way. +Output of the different `hermes` commands consequently is valid JSON-LD, serialized as JSON, that is cached in +subdirectories of the `.hermes/` directory that is created in the root of the project directory. +The cache is purely for internal purposes, its data should not be interacted with. -## Harvest Data +As JSON-LD can be confusing to work with directly, the following sections provide documentation of the data model. +Depending on whether you develop a plugin for `hermes`, or you develop `hermes` itself, you need to know either _some_, +or _quite a few_ things about JSON-LD. -The data of the havesters is cached in the sub-directory `.hermes/harvest`. -Each harvester has a separate cache file to allow parallel harvesting. -The cache file is encoded in JSON and stored in `.hermes/harvest/HARVESTER_NAME.json` -where `HARVESTER_NAME` corresponds to the entry point name. +## The data model for plugin developers -{class}`hermes.model.context.HermesHarvestContext` encapsulates these harvester caches. +If you develop a plugin for `hermes`, you will only need to work with a single Python class and the public API +it provides: {class}`hermes.model.SoftwareMetadata`. + +Nevertheless, it is still necessary that you know _some_ things about JSON-LD. + +### JSON-LD for plugin developers + +```{attention} +Work in progress. +``` + + +### Working with the `hermes` data model in plugins + +> **Goal** +> Understand how plugins access and interact with the `hermes` data model. + +`hermes` aims to hide as much of the data model as possible behind a public API +to avoid that plugin developers have to deal with the complexities and intricacies of JSON-LD. + +#### Model instances in different types of plugin + +You can extend `hermes` with plugins for three different commands: `harvest`, `curate`, `deposit`. + +The commands differ in how they work with instances of the data model. + +- `harvest` plugins _create_ a single new model instance and return it. +- `curate` plugins are passed a single existing model instance (the output of `process`), +and return a single model instance. +- `deposit` plugins are passed a single existing model instance (the output of `curate`), +and return a single model instance. + +#### How plugins work with the API + +```{important} +Plugins access the data model _exclusively_ through the API class {class}`hermes.model.SoftwareMetadata`. +``` + +The following sections show how this class works. + +##### Creating a data model instance + +Model instances are primarily created in `harvest` plugins, but may also be created in other plugins to map +existing data into. + +To create a new model instance, initialize {class}`hermes.model.SoftwareMetadata`: + +```{code-block} python +:caption: Initializing a default data model instance +from hermes.model import SoftwareMetadata + +data = SoftwareMetadata() +``` + +`SoftwareMetadata` objects initialized without arguments provide the default _context_ +(see [_JSON-LD for plugin developers_](#json-ld-for-plugin-developers)). +This means that now, you can use terms from the schemas included in the default context to describe software metadata. + +Terms from [_CodeMeta_](https://codemeta.github.io/terms/) can be used without a prefix: + +```{code-block} python +:caption: Using terms from the default schema +data["readme"] = ... +``` + +Terms from [_Schema.org_](https://schema.org/) can be used with the prefix `schema`: + +```{code-block} python +:caption: Using terms from a non-default schema +data["schema:copyrightNotice"] = ... +``` + +You can also use other linked data vocabularies. To do this, you need to identify them with a prefix and register them +with the data model by passing it `extra_vocabs` as a `dict` mapping prefixes to URLs where the vocabularies are +provided as JSON-LD: + +```{code-block} python +:caption: Injecting additional schemas +from hermes.model import SoftwareMetadata + +# Contents served at https://bar.net/schema.jsonld: +# { +# "@context": +# { +# "baz": "https://schema.org/Thing" +# } +# } + +data = SoftwareMetadata(extra_vocabs={"foo": "https://bar.net/schema.jsonld"}) + +data["foo:baz"] = ... +``` + +##### Adding data + +Once you have an instance of {class}`hermes.model.SoftwareMetadata`, you can add data to it, +i.e., metadata that describes software: + +```{code-block} python +:caption: Setting data values +data["name"] = "My Research Software" # A simple "Text"-type value +data["author"] = {"name": "Foo"} # An object value that uses terms available in the defined context +``` + +##### Accessing data + +You need to be able to access data in the data model instance to add, edit or remove data. +Data can be accessed by using term strings, similar to how values in Python `dict`s are accessed by keys. + +```{important} +When you access data from a data model instance, +it will always be returned in a **list**-like object! +``` + +The reason for providing data in list-like objects is that JSON-LD treats all property values as arrays. +Even if you add "single value" data to a `hermes` data model instance via the API, the underlying JSON-LD model +will treat it as an array, i.e., a list-like object: + +```{code-block} python +:caption: Internal data values are arrays +data["name"] = "My Research Software" # → [ "My Research Software" ] +data["author"] = {"name": "Foo"} # → [ { "name": [ "Foo" ] } ] +``` + +The fact that you will always be returned a list-like object has consequences for accessing and creating data: + +1. You need to access single values using indices, e.g., `data["name"][0]`. +2. You can use list-like API to interact with data objects, e.g., +`data["name"].append("Bar")`, `data["name"].extend(["Bar", "Baz"])`. + +##### Interacting with data + +The following longer example shows different ways that you can interact with `SoftwareMetadata` objects and the data API. + +```{code-block} python +:caption: Building the data model +from hermes.model import SoftwareMetadata + +data = SoftwareMetadata() + +# Let's create author metadata for our software! +# Below each line of code, the value of `data["author"]` is given. + +data["author"] = {"name": "Foo"} +# → [{'name': ['Foo']}] + +data["author"].append({"name": "Bar"}) +# [{'name': ['Foo']}, {'name': ['Bar']}] + +data["author"][0]["email"] = "foo@baz.net" +# [{'name': ['Foo'], 'email': ['foo@baz.net']}, {'name': ['Bar']}] + +data["author"][1]["email"].append("bar@baz.net") +# [{'name': ['Foo'], 'email': ['foo@baz.net']}, {'name': ['Bar'], 'email': ['bar@baz.net']}] + +data["author"][1]["email"].extend(["bar@spam.org", "bar@eggs.com"]) +# [ +# {'name': ['Foo'], 'email': ['foo@baz.net']}, +# {'name': ['Bar'], 'email': ['bar@baz.net', 'bar@spam.org', 'bar@eggs.com']} +# ] +``` + +The example continues to show how to iterate through data. + +```{code-block} python +:caption: for-loop, containment check +for i, author in enumerate(data["author"]): + if author["name"][0] in ["Foo", "Bar"]: + print(f"Author {i + 1} has expected name.") + else: + raise ValueError("Unexpected author name found!", author["name"][0]) + +# Mock output: +# $> Author 1 has expected name. +# $> Author 2 has expected name. +``` + +```{code-block} python +:caption: Value check +for email in data["author"][0]["email"]: + if email.endswith(".edu"): + print("Author has an email address at an educational institution.") + else: + print("Cannot confirm affiliation with educational institution for author.") + +# Mock output +# $> Cannot confirm affiliation with educational institution for author. +``` + +```{code-block} python +:caption: Value check and list comprehension +if ["bar" in email for email in data["author"][1]["email"]]: + print("Author has only emails with their name in it.") + +# Mock output +# $> Author has only emails with their name in it. +``` + +The example continues to show how to assert data values. + +As mentioned in the [introduction to the data model](#data-model), +`hermes` uses a JSON-LD-like internal data model. +The API class {class}`hermes.model.SoftwareMetadata` hides many +of the more complex aspects of JSON-LD and makes it easy to work +with the data model. + +Assertions, however, operate on the internal model objects. +Therefore, they may not work as you would expect from plain +Python data: + +```{code-block} python +:caption: Naive containment assertion that raises +:emphasize-lines: 5,13 +try: + assert ( + {'name': ['Foo'], 'email': ['foo@baz.net']} + in + data["author"] + ) + print("The author was found!") +except AssertionError: + print("The author could not be found.") + raise + +# Mock output +# $> The author could not be found. +# $> AssertionError: +# assert +# {'email': ['foo@baz.net'], 'name': ['Foo']} +# in +# _LDList( +# {'@list': [ +# { +# 'http://schema.org/name': [{'@value': 'Foo'}], +# 'http://schema.org/email': [{'@value': 'foo@baz.net'}] +# }, +# { +# 'http://schema.org/name': [{'@value': 'Bar'}], +# 'http://schema.org/email': [ +# {'@list': [ +# {'@value': 'bar@baz.net'}, {'@value': 'bar@spam.org'}, {'@value': 'bar@eggs.com'} +# ]} +# ] +# }] +# } +# ) +``` + +The mock output in the example above shows the inequality of the expected and the actual value. +The actual value is an internal data type wrapping the more complex JSON-LD data. + +The complex data structure of JSON-LD is internally constructed in the `hermes` data +model, and to make it possible to work with only the data that is important - the actual terms +and their values - the internal data model types provide a function `.to_python()`. +This function can be used in assertions to assert full data integrity: + +```{code-block} python +:caption: Containment assertion with `to_python()` +:emphasize-lines: 5,13 +try: + assert ( + {'name': ['Foo'], 'email': ['foo@baz.net']} + in + data["author"].to_python() + ) + print("The author was found!") +except AssertionError: + print("The author could not be found.") + raise + +# Mock output +# $> The author was found! +``` + +--- + +## See Also + +- Reference: {class}`hermes.model.SoftwareMetadata` API From 7e1ac6441a440c4d79d87a817af486a6c312c24c Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Thu, 2 Oct 2025 23:51:43 +0200 Subject: [PATCH 20/36] Update dependency lock --- poetry.lock | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0eaf104b..34fa117f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "accessible-pygments" @@ -700,6 +700,19 @@ files = [ {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, ] +[[package]] +name = "isodate" +version = "0.7.2" +description = "An ISO 8601 date/time/duration parser and formatter" +optional = false +python-versions = ">=3.7" +groups = ["main"] +markers = "python_version == \"3.10\"" +files = [ + {file = "isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15"}, + {file = "isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6"}, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -1139,6 +1152,24 @@ files = [ dev = ["abi3audit", "black", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest-cov", "requests", "rstcheck", "ruff", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "vulture", "wheel"] test = ["pytest", "pytest-xdist", "setuptools"] +[[package]] +name = "pyaml" +version = "25.7.0" +description = "PyYAML-based module to produce a bit more pretty and readable YAML-serialized data" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pyaml-25.7.0-py3-none-any.whl", hash = "sha256:ce5d7867cc2b455efdb9b0448324ff7b9f74d99f64650f12ca570102db6b985f"}, + {file = "pyaml-25.7.0.tar.gz", hash = "sha256:e113a64ec16881bf2b092e2beb84b7dcf1bd98096ad17f5f14e8fb782a75d99b"}, +] + +[package.dependencies] +PyYAML = "*" + +[package.extras] +anchors = ["unidecode"] + [[package]] name = "pycodestyle" version = "2.9.1" @@ -1594,7 +1625,7 @@ version = "6.0.2" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.8" -groups = ["docs"] +groups = ["main", "docs"] files = [ {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, @@ -1651,6 +1682,29 @@ files = [ {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] +[[package]] +name = "rdflib" +version = "7.2.1" +description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information." +optional = false +python-versions = ">=3.8.1" +groups = ["main"] +files = [ + {file = "rdflib-7.2.1-py3-none-any.whl", hash = "sha256:1a175bc1386a167a42fbfaba003bfa05c164a2a3ca3cb9c0c97f9c9638ca6ac2"}, + {file = "rdflib-7.2.1.tar.gz", hash = "sha256:cf9b7fa25234e8925da8b1fb09700f8349b5f0f100e785fb4260e737308292ac"}, +] + +[package.dependencies] +isodate = {version = ">=0.7.2,<1.0.0", markers = "python_version < \"3.11\""} +pyparsing = ">=2.1.0,<4" + +[package.extras] +berkeleydb = ["berkeleydb (>=18.1.0,<19.0.0)"] +html = ["html5rdf (>=1.2,<2)"] +lxml = ["lxml (>=4.3,<6.0)"] +networkx = ["networkx (>=2,<4)"] +orjson = ["orjson (>=3.9.14,<4)"] + [[package]] name = "requests" version = "2.32.4" @@ -1806,6 +1860,21 @@ files = [ {file = "ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f"}, ] +[[package]] +name = "schemaorg" +version = "0.1.1" +description = "Python functions for applied use of schema.org" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "schemaorg-0.1.1.tar.gz", hash = "sha256:567f1735df666221c893d2c206dd70f9cddcc983c8cdc39f3a7b7726884d2c51"}, +] + +[package.dependencies] +lxml = ">=4.1.1" +pyaml = ">=17.12.1" + [[package]] name = "setuptools" version = "80.9.0" @@ -2477,4 +2546,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.10, <4.0.0" -content-hash = "58304fd33d6ec1ce3400b43ecffb16b3f48a5621e513c3e8057f9e3e050835e8" +content-hash = "e76de51d1f5dd86486d4cc24a5cdf7d007b16ce5d9d0cc3f7d0f353cf0defff0" From 9be8041d613cca5492cb54d843bc0157e27a6921 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Mon, 6 Oct 2025 12:12:04 +0200 Subject: [PATCH 21/36] removed tests of unclear matters (@type and @context fields) and added fixme --- test/hermes_test/model/test_api.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index fd2288d4..4b2c614b 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -71,8 +71,6 @@ def test_append(): def test_iterative_assignment(): # This tests iterative assignments/traversals to edit/appending values - # This requires SoftwareMetadata.__getitem__ to return a plain dict. SoftwareMetadata.__setitem__ can then - # implement the isinstanceof checks that @notactuallyfinn suggested. data = SoftwareMetadata(extra_vocabs={"foo": "https://foo.bar"}) data["author"] = {"name": "Foo"} # Look, a squirrel! @@ -125,11 +123,10 @@ def test_usage(): assert baz["affiliation"].to_python() == ["Lab E"] assert len(baz["schema:knowsAbout"]) == 0 assert len(baz["email"]) == 0 - assert data["@type"] == "SoftwareSourceCode" - assert data["@context"] == ALL_CONTEXTS # FIXME: #435 will solve this issue for author in data["author"]: assert "name" in author assert "email" in author if "schema:knowsAbout" not in author: + # FIXME: None has to be discussed author["schema:knowsAbout"] = None author["schema:pronouns"] = "they/them" From dd854c7b9bb19547c8c6fb64e39974781e42f8a9 Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Fri, 17 Oct 2025 09:24:12 +0200 Subject: [PATCH 22/36] Track data in model in simplified form --- docs/source/dev/data_model.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/source/dev/data_model.md b/docs/source/dev/data_model.md index c51f3401..9d989eb9 100644 --- a/docs/source/dev/data_model.md +++ b/docs/source/dev/data_model.md @@ -126,7 +126,11 @@ i.e., metadata that describes software: ```{code-block} python :caption: Setting data values data["name"] = "My Research Software" # A simple "Text"-type value +# → Simplified model representation : { "name": [ "My Research Software" ] } +# Cf. "Accessing data" below data["author"] = {"name": "Foo"} # An object value that uses terms available in the defined context +# → Simplified model representation : { "name": [ "My Research Software" ], "author": [ { "name": "Foo" } ] } +# Cf. "Accessing data" below ``` ##### Accessing data From f4c1e7d3eef9cef19693160f37d5fdd77a1cb8cc Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Fri, 17 Oct 2025 09:30:00 +0200 Subject: [PATCH 23/36] Link to dummy section --- docs/source/dev/data_model.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/dev/data_model.md b/docs/source/dev/data_model.md index 9d989eb9..de313e40 100644 --- a/docs/source/dev/data_model.md +++ b/docs/source/dev/data_model.md @@ -19,8 +19,7 @@ subdirectories of the `.hermes/` directory that is created in the root of the pr The cache is purely for internal purposes, its data should not be interacted with. -As JSON-LD can be confusing to work with directly, the following sections provide documentation of the data model. -Depending on whether you develop a plugin for `hermes`, or you develop `hermes` itself, you need to know either _some_, +Depending on whether you develop a plugin for `hermes`, or you develop `hermes` itself, you need to know either [_some_](#json-ld-for-plugin-developers), or _quite a few_ things about JSON-LD. ## The data model for plugin developers From 97ebad4cfaf984e44d3f26623196f6e297b6571f Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Fri, 17 Oct 2025 09:39:50 +0200 Subject: [PATCH 24/36] Make tone less intimidating, more neutral/positive --- docs/source/dev/data_model.md | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/docs/source/dev/data_model.md b/docs/source/dev/data_model.md index de313e40..8f008dc4 100644 --- a/docs/source/dev/data_model.md +++ b/docs/source/dev/data_model.md @@ -22,12 +22,16 @@ The cache is purely for internal purposes, its data should not be interacted wit Depending on whether you develop a plugin for `hermes`, or you develop `hermes` itself, you need to know either [_some_](#json-ld-for-plugin-developers), or _quite a few_ things about JSON-LD. +The following sections provide documentation of the data model. +They aim to help you get started with `hermes` plugin and core development, +even if you have no previous experience with JSON-LD. + ## The data model for plugin developers If you develop a plugin for `hermes`, you will only need to work with a single Python class and the public API it provides: {class}`hermes.model.SoftwareMetadata`. -Nevertheless, it is still necessary that you know _some_ things about JSON-LD. +To work with this class, it is necessary that you know _some_ things about JSON-LD. ### JSON-LD for plugin developers @@ -39,10 +43,10 @@ Work in progress. ### Working with the `hermes` data model in plugins > **Goal** -> Understand how plugins access and interact with the `hermes` data model. +> Understand how plugins access the `hermes` data model and interact with it. `hermes` aims to hide as much of the data model as possible behind a public API -to avoid that plugin developers have to deal with the complexities and intricacies of JSON-LD. +to avoid that plugin developers have to deal with some of the more complex features of JSON-LD. #### Model instances in different types of plugin @@ -152,11 +156,11 @@ data["name"] = "My Research Software" # → [ "My Research Software" ] data["author"] = {"name": "Foo"} # → [ { "name": [ "Foo" ] } ] ``` -The fact that you will always be returned a list-like object has consequences for accessing and creating data: +Therefore, you access data in the same way you would access data from a Python `list`: -1. You need to access single values using indices, e.g., `data["name"][0]`. -2. You can use list-like API to interact with data objects, e.g., -`data["name"].append("Bar")`, `data["name"].extend(["Bar", "Baz"])`. +1. You access single values using indices, e.g., `data["name"][0]`. +2. You can use a list-like API to interact with data objects, e.g., +`data["name"].append("Bar")`, `data["name"].extend(["Bar", "Baz"])`, `for name in data["name"]: ...`, etc. ##### Interacting with data @@ -166,6 +170,7 @@ The following longer example shows different ways that you can interact with `So :caption: Building the data model from hermes.model import SoftwareMetadata +# Create the model object with the default context data = SoftwareMetadata() # Let's create author metadata for our software! @@ -306,4 +311,4 @@ except AssertionError: ## See Also -- Reference: {class}`hermes.model.SoftwareMetadata` API +- API reference: {class}`hermes.model.SoftwareMetadata` From 6f039e84e10b8b9e4134b2d30c858928010c7dba Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 19 Dec 2025 13:28:43 +0100 Subject: [PATCH 25/36] slightly adjusted tests and fixed miniature bugs in ld_container and ld_dict --- src/hermes/model/types/ld_container.py | 4 +- src/hermes/model/types/ld_dict.py | 21 ++++--- test/hermes_test/model/test_api.py | 64 +++++++++++--------- test/hermes_test/model/types/test_ld_dict.py | 6 +- 4 files changed, 53 insertions(+), 42 deletions(-) diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index 88d92795..ef47715f 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -237,7 +237,7 @@ def _to_expanded_json( # while searching build a path such that it leads from the found ld_dicts ld_value to selfs data_dict/ item_list parent = self path = [] - while parent.__class__.__name__ != "ld_dict": + while parent.__class__.__name__ not in {"ld_dict", "SoftwareMetadata"}: if parent.container_type == "@list": path.extend(["@list", 0]) elif parent.container_type == "@graph": @@ -250,7 +250,7 @@ def _to_expanded_json( # if neither self nor any of its parents is a ld_dict: # create a dict with the key of the outer most parent of self and this parents ld_value as a value # this dict is stored in an ld_container and simulates the most minimal JSON-LD object possible - if parent.__class__.__name__ != "ld_dict": + if parent.__class__.__name__ not in {"ld_dict", "SoftwareMetadata"}: key = self.ld_proc.expand_iri(parent.active_ctx, parent.key) parent = ld_container([{key: parent._data}]) path.append(0) diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index 9b707864..e2a3eb1b 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -22,15 +22,16 @@ def __init__(self, data, *, parent=None, key=None, index=None, context=None): def __getitem__(self, key): full_iri = self.ld_proc.expand_iri(self.active_ctx, key) - try: - ld_value = self.data_dict[full_iri] - except KeyError: - self.data_dict.update({full_iri: [{"@list": []}]}) - ld_value = self.data_dict[full_iri] + if full_iri not in self.data_dict: + self[full_iri] = [] + ld_value = self.data_dict[full_iri] return self._to_python(full_iri, ld_value) def __setitem__(self, key, value): full_iri = self.ld_proc.expand_iri(self.active_ctx, key) + if value is None: + del self[full_iri] + return ld_value = self._to_expanded_json({full_iri: value}) self.data_dict.update(ld_value) @@ -40,7 +41,8 @@ def __delitem__(self, key): def __contains__(self, key): full_iri = self.ld_proc.expand_iri(self.active_ctx, key) - return len(self[full_iri]) != 0 + # FIXME: is that good? + return full_iri in self.data_dict def __eq__(self, other): if not isinstance(other, (dict, ld_dict)): @@ -79,9 +81,12 @@ def __ne__(self, other): return not x def get(self, key, default=_NO_DEFAULT): - if key not in self and default is not ld_dict._NO_DEFAULT: + try: + return self[key] + except KeyError as e: + if default is self._NO_DEFAULT: + raise e return default - return self[key] def update(self, other): for key, value in other.items(): diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index 4b2c614b..b4bec276 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -44,8 +44,8 @@ def test_init_nested_object(): "author": [{"name": "Foo"}, {"name": "Bar"}], } data = SoftwareMetadata(my_software, extra_vocabs={"foo": "https://foo.bar"}) - assert data["schema:softwareName"][0] == "MySoftware" - assert data["maintainer"][0]["name"][0] == "Some Name" + assert data["schema:softwareName"] == ["MySoftware"] + assert len(data["maintainer"]) == 1 and data["maintainer"][0]["name"] == ["Some Name"] for author in data["author"]: for name in author["name"]: assert name in ["Foo", "Bar"] @@ -53,20 +53,23 @@ def test_init_nested_object(): def test_append(): data = SoftwareMetadata() - data["foo"].append("a") - assert type(data["foo"]) is ld_list and data["foo"][0] == "a" and data["foo"].item_list == [{"@value": "a"}] - data["foo"].append("b") - assert type(data["foo"]) is ld_list and data["foo"].item_list == [{"@value": "a"}, {"@value": "b"}] - data["foo"].append("c") - assert data["foo"].item_list == [{"@value": "a"}, {"@value": "b"}, {"@value": "c"}] + data["schema:foo"].append("a") + assert type(data["schema:foo"]) is ld_list + assert data["schema:foo"][0] == "a" and data["schema:foo"].item_list == [{"@value": "a"}] + data["schema:foo"].append("b") + assert type(data["schema:foo"]) is ld_list + assert data["schema:foo"] == [{"@value": "a"}, {"@value": "b"}] + data["schema:foo"].append("c") + assert data["schema:foo"] == [{"@value": "a"}, {"@value": "b"}, {"@value": "c"}] data = SoftwareMetadata() - data["foo"].append({"schema:name": "foo"}) - assert type(data["foo"]) is ld_list and type(data["foo"][0]) is ld_dict - assert data["foo"][0].data_dict == {"http://schema.org/name": [{"@value": "foo"}]} - data["foo"].append({"schema:name": "foo"}) - assert type(data["foo"]) is ld_list and data["foo"].item_list == 2*[{"http://schema.org/name": [{"@value": "foo"}]}] - data["foo"].append({"schema:name": "foo"}) - assert data["foo"].item_list == 3 * [{"http://schema.org/name": [{"@value": "foo"}]}] + data["schema:foo"].append({"schema:name": "bar"}) + assert type(data["schema:foo"]) is ld_list and type(data["schema:foo"][0]) is ld_dict + assert data["schema:foo"] == [{"http://schema.org/name": [{"@value": "bar"}]}] + data["schema:foo"].append({"schema:name": "bar"}) + assert type(data["schema:foo"]) is ld_list + assert data["schema:foo"] == 2 * [{"http://schema.org/name": [{"@value": "bar"}]}] + data["schema:foo"].append({"schema:name": "bar"}) + assert data["schema:foo"] == 3 * [{"http://schema.org/name": [{"@value": "bar"}]}] def test_iterative_assignment(): @@ -78,9 +81,10 @@ def test_iterative_assignment(): assert isinstance(authors, ld_list) author1 = authors[0] author1["email"] = "author@example.com" - authors[0] = author1 authors.append({"name": "Bar", "email": "author2@example.com"}) assert len(authors) == 2 + del authors[0] + assert len(authors) == 1 def test_usage(): @@ -95,38 +99,38 @@ def test_usage(): harvest = { "authors": [ {"name": "Foo", "affiliation": ["Uni A", "Lab B"], "kw": ["a", "b", "c"]}, - {"name": "Bar", "affiliation": ["Uni C"], "email": "bar@c.edu"}, + {"name": "Bar", "affiliation": ["Uni C"], "email": "bar@c.edu", "kw": "egg"}, {"name": "Baz", "affiliation": ["Lab E"]}, ] } for author in harvest["authors"]: for exist_author in data["author"]: - if author["name"] == exist_author["name"][0]: - exist_author["affiliation"] = author["affiliation"] - if "email" in author: - exist_author["email"].append(author["email"]) - if "kw" in author: - exist_author["schema:knowsAbout"].extend(author["kw"]) + if author["name"] in exist_author["name"]: + exist_author["affiliation"] = author.get("affiliation", []) + exist_author["email"].extend(email if isinstance((email := author.get("email", [])), list) else [email]) + exist_author["schema:knowsAbout"].extend(kw if isinstance((kw := author.get("kw", [])), list) else [kw]) break else: data["author"].append(author) assert len(data["author"]) == 3 foo, bar, baz = data["author"] assert foo["name"][0] == "Foo" - assert foo["affiliation"].to_python() == ["Uni A", "Lab B"] - assert foo["schema:knowsAbout"].to_python() == ["a", "b", "c"] - assert foo["email"].to_python() == ["foo@bar.net", "foo@baz.com"] + assert foo["affiliation"] == ["Uni A", "Lab B"] + assert foo["schema:knowsAbout"] == ["a", "b", "c"] + assert foo["email"] == ["foo@bar.net", "foo@baz.com"] assert bar["name"][0] == "Bar" - assert bar["affiliation"].to_python() == ["Uni C"] - assert bar["email"].to_python() == ["bar@c.edu"] + assert bar["affiliation"] == ["Uni C"] + assert bar["email"] == ["bar@c.edu"] assert baz["name"][0] == "Baz" - assert baz["affiliation"].to_python() == ["Lab E"] + assert baz["affiliation"] == ["Lab E"] assert len(baz["schema:knowsAbout"]) == 0 assert len(baz["email"]) == 0 for author in data["author"]: assert "name" in author assert "email" in author - if "schema:knowsAbout" not in author: + if author["schema:knowsAbout"] == ["egg"]: # FIXME: None has to be discussed + # json-ld processor just removes it in expansion author["schema:knowsAbout"] = None author["schema:pronouns"] = "they/them" + assert len(bar["schema:knowsAbout"]) == 0 diff --git a/test/hermes_test/model/types/test_ld_dict.py b/test/hermes_test/model/types/test_ld_dict.py index 4346f0f4..66095295 100644 --- a/test/hermes_test/model/types/test_ld_dict.py +++ b/test/hermes_test/model/types/test_ld_dict.py @@ -189,8 +189,10 @@ def test_get(): context=[{"schema": "https://schema.org/"}]) assert di.get("https://schema.org/name") == ["Manu Sporny"] assert di.get("schema:name") == ["Manu Sporny"] - assert di.get("bar", None) is None - assert isinstance(di["bar"], ld_list) and len(di["bar"]) == 0 + assert di.get("bar", None) is None # invalid key + with pytest.raises(KeyError): + di.get("bar") + assert isinstance(di.get("schema:bar", None), ld_list) and len(di.get("schema:bar", None)) == 0 def test_update(): From c2b9c4fa71b300c8d78df0f737fdae650b63b679 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 5 Jan 2026 09:07:04 +0100 Subject: [PATCH 26/36] cleaned up __init__.py --- src/hermes/model/types/__init__.py | 47 +++--------------------------- src/hermes/model/types/ld_dict.py | 7 +---- src/hermes/model/types/ld_list.py | 24 +++------------ 3 files changed, 9 insertions(+), 69 deletions(-) diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py index 9e4b1bf5..df6aed71 100644 --- a/src/hermes/model/types/__init__.py +++ b/src/hermes/model/types/__init__.py @@ -5,29 +5,19 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Michael Fritzsche -from datetime import date, time, datetime - from .ld_container import ld_container -from .ld_list import ld_list from .ld_dict import ld_dict -from .ld_context import iri_map +from .ld_list import ld_list from .pyld_util import JsonLdProcessor _TYPEMAP = [ - # Conversion routines for ld_container - ( - lambda c: isinstance(c, ld_container), - { - "ld_container": lambda c, **_: c, - "json": lambda c, **_: c.compact(), - "expanded_json": lambda c, **_: c.ld_value, - }, - ), + # Conversion routine for ld_container + (lambda c: isinstance(c, ld_container), {"ld_container": lambda c, **_: c}), # Wrap item from ld_dict in ld_list (ld_list.is_ld_list, {"ld_container": ld_list}), - (lambda c: isinstance(c, list), {"ld_container": lambda c, **kw: ld_list(c, **kw)}), + (lambda c: isinstance(c, list), {"ld_container": ld_list}), # pythonize items from lists (expanded set is already handled above) (ld_container.is_json_id, {"python": lambda c, **_: c["@id"]}), @@ -36,35 +26,6 @@ (ld_list.is_container, {"ld_container": lambda c, **kw: ld_list([c], **kw)}), (ld_dict.is_json_dict, {"ld_container": lambda c, **kw: ld_dict([c], **kw)}), (lambda v: isinstance(v, str), {"python": lambda v, parent, **_: parent.ld_proc.compact_iri(parent.active_ctx, v)}), - - # Convert internal data types to expanded_json - (ld_container.is_json_id, {"expanded_json": lambda c, **_: [c]}), - (ld_container.is_ld_id, {"expanded_json": lambda c, **_: c}), - (ld_container.is_json_value, {"expanded_json": lambda c, **_: [c]}), - (ld_container.is_ld_value, {"expanded_json": lambda c, **_: c}), - (ld_dict.is_json_dict, {"expanded_json": lambda c, **kw: ld_dict.from_dict(c, **kw).ld_value}), - ( - ld_list.is_container, - {"expanded_json": lambda c, **kw: ld_list.from_list(ld_list.get_item_list_from_container(c), **kw).ld_value} - ), - ( - ld_list.is_ld_list, - {"expanded_json": lambda c, **kw: ld_list.from_list(ld_list.get_item_list_from_container(c[0]), **kw).ld_value} - ), - (lambda c: isinstance(c, list), {"expanded_json": lambda c, **kw: ld_list.from_list(c, **kw).ld_value}), - (lambda v: isinstance(v, (int, float, str, bool)), {"expanded_json": lambda v, **_: [{"@value": v}]}), - ( - lambda v: isinstance(v, datetime), - {"expanded_json": lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:DateTime"]}]} - ), - ( - lambda v: isinstance(v, date), - {"expanded_json": lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:Date"]}]} - ), - ( - lambda v: isinstance(v, time), - {"expanded_json": lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:Time"]}]} - ), ] diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index e2a3eb1b..7493b897 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -65,12 +65,7 @@ def __eq__(self, other): if unique_keys and unique_keys != {"@id"}: return False for key in keys_self.intersection(keys_other): - item = self[key] - other_item = other[key] - res = item.__eq__(other_item) - if res == NotImplemented: - res = other_item.__eq__(item) - if res is False or res == NotImplemented: # res is not True + if self[key] != other[key]: return False return True diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index c4d1c450..002bbd6d 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -312,16 +312,8 @@ def __eq__( if item["@id"] != other_item["@id"]: return False continue - # get the 'real' items (i.e. can also be ld_dicts or ld_lists) - item = self[index] - other_item = other[index] - # compare using the correct equals method - res = item.__eq__(other_item) - if res == NotImplemented: - # swap order if first try returned NotImplemented - res = other_item.__eq__(item) - # return false if the second comparison also fails or one of them returned false - if res is False or res == NotImplemented: + # compare the 'real' items (i.e. can also be ld_dicts or ld_lists) + if self[index] != other[index]: return False # return true because no unequal elements where found return True @@ -341,16 +333,8 @@ def __eq__( if item["@id"] == other_item["@id"]: equality_pairs[index] += [other_index] continue - # get the 'real' items (i.e. can also be ld_dicts or ld_lists) - item = self[index] - other_item = other[index] - # compare using the correct equals method - res = item.__eq__(other_item) - if res == NotImplemented: - # swap order if first try returned NotImplemented - res = other_item.__eq__(item) - # if one of both comparisons returned true the elements are equal - if res is not NotImplemented and res: + # compare the 'real' items (i.e. can also be ld_dicts or ld_lists) + if self[index] == other[other_index]: equality_pairs[index] += [other_index] if len(equality_pairs[index]) == 0: # there exists no element in other that is equal to item From bd1a19fd281502a04adc921970d3755390edb834 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 5 Jan 2026 09:14:04 +0100 Subject: [PATCH 27/36] ran 'poetry lock' --- poetry.lock | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index 067f4087..476df72e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -700,6 +700,19 @@ files = [ {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, ] +[[package]] +name = "isodate" +version = "0.7.2" +description = "An ISO 8601 date/time/duration parser and formatter" +optional = false +python-versions = ">=3.7" +groups = ["main"] +markers = "python_version == \"3.10\"" +files = [ + {file = "isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15"}, + {file = "isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6"}, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -1139,6 +1152,24 @@ files = [ dev = ["abi3audit", "black", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest-cov", "requests", "rstcheck", "ruff", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "vulture", "wheel"] test = ["pytest", "pytest-xdist", "setuptools"] +[[package]] +name = "pyaml" +version = "25.7.0" +description = "PyYAML-based module to produce a bit more pretty and readable YAML-serialized data" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pyaml-25.7.0-py3-none-any.whl", hash = "sha256:ce5d7867cc2b455efdb9b0448324ff7b9f74d99f64650f12ca570102db6b985f"}, + {file = "pyaml-25.7.0.tar.gz", hash = "sha256:e113a64ec16881bf2b092e2beb84b7dcf1bd98096ad17f5f14e8fb782a75d99b"}, +] + +[package.dependencies] +PyYAML = "*" + +[package.extras] +anchors = ["unidecode"] + [[package]] name = "pycodestyle" version = "2.9.1" @@ -1594,7 +1625,7 @@ version = "6.0.2" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.8" -groups = ["docs"] +groups = ["main", "docs"] files = [ {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, @@ -1651,6 +1682,30 @@ files = [ {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] +[[package]] +name = "rdflib" +version = "7.5.0" +description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information." +optional = false +python-versions = ">=3.8.1" +groups = ["main"] +files = [ + {file = "rdflib-7.5.0-py3-none-any.whl", hash = "sha256:b011dfc40d0fc8a44252e906dcd8fc806a7859bc231be190c37e9568a31ac572"}, + {file = "rdflib-7.5.0.tar.gz", hash = "sha256:663083443908b1830e567350d72e74d9948b310f827966358d76eebdc92bf592"}, +] + +[package.dependencies] +isodate = {version = ">=0.7.2,<1.0.0", markers = "python_version < \"3.11\""} +pyparsing = ">=2.1.0,<4" + +[package.extras] +berkeleydb = ["berkeleydb (>=18.1.0,<19.0.0)"] +html = ["html5rdf (>=1.2,<2)"] +lxml = ["lxml (>=4.3,<6.0)"] +networkx = ["networkx (>=2,<4)"] +orjson = ["orjson (>=3.9.14,<4)"] +rdf4j = ["httpx (>=0.28.1,<0.29.0)"] + [[package]] name = "requests" version = "2.32.4" @@ -1806,6 +1861,21 @@ files = [ {file = "ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f"}, ] +[[package]] +name = "schemaorg" +version = "0.1.1" +description = "Python functions for applied use of schema.org" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "schemaorg-0.1.1.tar.gz", hash = "sha256:567f1735df666221c893d2c206dd70f9cddcc983c8cdc39f3a7b7726884d2c51"}, +] + +[package.dependencies] +lxml = ">=4.1.1" +pyaml = ">=17.12.1" + [[package]] name = "setuptools" version = "80.9.0" @@ -2477,4 +2547,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.10, <4.0.0" -content-hash = "2e2405b30c3dee4416a6e77828c7cff1197a8be71665770bcbdb308c19ef4358" +content-hash = "e76de51d1f5dd86486d4cc24a5cdf7d007b16ce5d9d0cc3f7d0f353cf0defff0" From 9527e260b122229bdd62cd769ee722703853fa8a Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 5 Jan 2026 09:52:06 +0100 Subject: [PATCH 28/36] updated type hints to be supported by python 3.10 --- src/hermes/model/types/ld_container.py | 5 ++- src/hermes/model/types/ld_list.py | 55 +++++++++++++++----------- 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index ef47715f..92e2ed11 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -8,7 +8,8 @@ from .pyld_util import JsonLdProcessor, bundled_loader from datetime import date, time, datetime -from typing import Union, Self, Any +from typing import Union, Any +from typing_extensions import Self JSON_LD_CONTEXT_DICT = dict[str, Union[str, "JSON_LD_CONTEXT_DICT"]] @@ -64,7 +65,7 @@ def __init__( self: Self, data: EXPANDED_JSON_LD_VALUE, *, - parent: Union["ld_container", None] = None, + parent: Union[Self, None] = None, key: Union[str, None] = None, index: Union[int, None] = None, context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None, diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index 002bbd6d..0b89ed00 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -17,7 +17,9 @@ BASIC_TYPE, ) -from typing import Generator, Hashable, Union, Self, Any +from collections.abc import Generator, Hashable +from typing import Union, Any +from typing_extensions import Self class ld_list(ld_container): @@ -32,10 +34,10 @@ class ld_list(ld_container): """ def __init__( - self: Self, + self: "ld_list", data: Union[list[str], list[dict[str, EXPANDED_JSON_LD_VALUE]]], *, - parent: Union["ld_container", None] = None, + parent: Union[ld_container, None] = None, key: Union[str, None] = None, index: Union[int, None] = None, context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None, @@ -44,7 +46,7 @@ def __init__( Create a new ld_list container. :param self: The instance of ld_list to be initialized. - :type self: Self + :type self: ld_list :param data: The expanded json-ld data that is mapped (must be valid for @set, @list or @graph) :type data: list[str] | list[dict[str, BASIC_TYPE | EXPANDED_JSON_LD_VALUE]] :param parent: parent node of this container. @@ -100,7 +102,7 @@ def __getitem__( Get the item(s) at position index in a pythonized form. :param self: The ld_list the items are taken from. - :type self: Self + :type self: ld_list :param index: The positon(s) from which the item(s) is/ are taken. :type index: int | slice @@ -125,7 +127,7 @@ def __setitem__( All given values are expanded. If any are assimilated by self all items that would be added by this are added. :param self: The ld_list the items are set in. - :type self: Self + :type self: ld_list :param index: The positon(s) at which the item(s) is/ are set. :type index: int | slice :param value: The new value(s). @@ -161,7 +163,7 @@ def __delitem__(self: Self, index: Union[int, slice]) -> None: and not be modified afterwards. :param self: The ld_list the items are deleted from. - :type self: Self + :type self: ld_list :param index: The positon(s) at which the item(s) is/ are deleted. :type index: int | slice @@ -175,22 +177,22 @@ def __len__(self: Self) -> int: Returns the number of items in this ld_list. :param self: The ld_list whose length is to be returned. - :type self: Self + :type self: ld_list :return: The length of self. :rtype: int """ return len(self.item_list) - def __iter__(self: Self) -> Generator[Union[BASIC_TYPE | TIME_TYPE | ld_container], None, None]: + def __iter__(self: Self) -> Generator[Union[BASIC_TYPE, TIME_TYPE, ld_container], None, None]: """ Returns an iterator over the pythonized values contained in self. :param self: The ld_list over whose items is iterated. - :type self: Self + :type self: ld_list :return: The Iterator over self's values. - :rtype: Generator[Union[BASIC_TYPE | TIME_TYPE | ld_container], None, None] + :rtype: Generator[BASIC_TYPE | TIME_TYPE | ld_container, None, None] """ # return an Iterator over each value in self in its pythonized from for index, value in enumerate(self.item_list): @@ -211,7 +213,7 @@ def __contains__(self: Self, value: JSON_LD_VALUE) -> bool: has the same @id like it or it xor the object in the item_list has an id an all other values are the same. :param self: The ld_list that is checked if it contains value. - :type self: Self + :type self: ld_list :param value: The object being checked whether or not it is in self. :type value: JSON_LD_VALUE @@ -240,7 +242,7 @@ def __contains__(self: Self, value: JSON_LD_VALUE) -> bool: def __eq__( self: Self, other: Union[ - "ld_list", + Self, list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]], dict[str, list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]]], ], @@ -258,7 +260,7 @@ def __eq__( returns true. :param self: The ld_list other is compared to. - :type self: Self + :type self: ld_list :param other: The list/ container/ ld_list self is compared to. :type other: ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_container] | dict[str, list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_container]] @@ -354,7 +356,10 @@ def __eq__( @classmethod def _bfs_step( - cls: Self, verticies1: set[Hashable], edges: dict[Hashable, tuple[Hashable]], matches: dict[Hashable, Hashable], + cls: type[Self], + verticies1: set[Hashable], + edges: dict[Hashable, tuple[Hashable]], + matches: dict[Hashable, Hashable], distances: dict[Hashable, Union[int, float]] ) -> bool: """ @@ -408,7 +413,10 @@ def _bfs_step( @classmethod def _dfs_step( - cls: Self, ver: Hashable, edges: dict[Hashable, tuple[Hashable]], matches: dict[Hashable, Hashable], + cls: type[Self], + ver: Hashable, + edges: dict[Hashable, tuple[Hashable]], + matches: dict[Hashable, Hashable], distances: dict[Hashable, Union[int, float]] ) -> bool: """ @@ -452,7 +460,10 @@ def _dfs_step( @classmethod def _hopcroft_karp( - cls: Self, verticies1: set[Hashable], verticies2: set[Hashable], edges: dict[Hashable, tuple[Hashable]] + cls: type[Self], + verticies1: set[Hashable], + verticies2: set[Hashable], + edges: dict[Hashable, tuple[Hashable]] ) -> int: """ Implementation of Hopcroft-Karp. I.e.:
@@ -495,7 +506,7 @@ def _hopcroft_karp( def __ne__( self: Self, other: Union[ - "ld_list", + Self, list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]], dict[str, list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]]], ], @@ -506,7 +517,7 @@ def __ne__( See ld_list.__eq__ for more details on the comparison.) :param self: The ld_list other is compared to. - :type self: Self + :type self: ld_list :param other: The list/ container/ ld_list self is compared to. :type other: ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_container] | dict[str, list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_container]] @@ -528,7 +539,7 @@ def append(self: Self, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_con The given value is expanded. If it is assimilated by self all items that would be added by this are added. :param self: The ld_list the item is appended to. - :type self: Self + :type self: ld_list :param value: The new value. :type value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container] @@ -543,7 +554,7 @@ def extend(self: Self, value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, l The given values are expanded. If any are assimilated by self all items that would be added by this are added. :param self: The ld_list the items are appended to. - :type self: Self + :type self: ld_list :param value: The new values. :type value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]] @@ -558,7 +569,7 @@ def to_python(self: Self) -> list[PYTHONIZED_LD_CONTAINER]: Return a fully pythonized version of this object where all ld_container are replaced by lists and dicts. :param self: The ld_list whose fully pythonized version is returned. - :type self: Self + :type self: ld_list :return: The fully pythonized version of self. :rtype: list[PYTHONIZED_LD_CONTAINER] From 97d9d95c0756e2380ef7239f5e42d04a751b2902 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 5 Jan 2026 11:00:28 +0100 Subject: [PATCH 29/36] update type hints and began commenting ld_dict --- src/hermes/model/types/ld_container.py | 6 ++- src/hermes/model/types/ld_dict.py | 56 ++++++++++++++++++++------ src/hermes/model/types/ld_list.py | 8 ++-- 3 files changed, 53 insertions(+), 17 deletions(-) diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index 92e2ed11..766205a9 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -14,7 +14,11 @@ JSON_LD_CONTEXT_DICT = dict[str, Union[str, "JSON_LD_CONTEXT_DICT"]] BASIC_TYPE = Union[str, float, int, bool] -EXPANDED_JSON_LD_VALUE = list[dict[str, Union["EXPANDED_JSON_LD_VALUE", BASIC_TYPE]]] +EXPANDED_JSON_LD_VALUE = list[Union[ + dict[str, Union["EXPANDED_JSON_LD_VALUE", BASIC_TYPE]], + "EXPANDED_JSON_LD_VALUE", + str +]] COMPACTED_JSON_LD_VALUE = Union[ list[Union[dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], BASIC_TYPE]], dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index 7493b897..3d01c711 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -5,29 +5,56 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Michael Fritzsche -from .ld_container import ld_container - from .pyld_util import bundled_loader +from .ld_container import ( + ld_container, + JSON_LD_CONTEXT_DICT, + EXPANDED_JSON_LD_VALUE, + PYTHONIZED_LD_CONTAINER, + JSON_LD_VALUE, + TIME_TYPE, + BASIC_TYPE, +) + +from collections.abc import KeysView +from types import NotImplementedType +from typing import Union, Any +from typing_extensions import Self class ld_dict(ld_container): + """ + An JSON-LD container resembling a dict. + See also :class:`ld_container` + + :cvar container_type: A type used as a placeholder to represent "no default". + :cvartype container_type: type[str] + """ _NO_DEFAULT = type("NO DEFAULT") - def __init__(self, data, *, parent=None, key=None, index=None, context=None): + def __init__( + self: Self, + data: list[dict[str, EXPANDED_JSON_LD_VALUE]], + *, + parent: Union[ld_container, None] = None, + key: Union[str, None] = None, + index: Union[int, None] = None, + context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None + ) -> None: if not self.is_ld_dict(data): raise ValueError("The given data does not represent a ld_dict.") super().__init__(data, parent=parent, key=key, index=index, context=context) self.data_dict = data[0] - def __getitem__(self, key): + def __getitem__(self: Self, key: str) -> list[Union[BASIC_TYPE, TIME_TYPE, ld_container]]: full_iri = self.ld_proc.expand_iri(self.active_ctx, key) if full_iri not in self.data_dict: self[full_iri] = [] ld_value = self.data_dict[full_iri] return self._to_python(full_iri, ld_value) - def __setitem__(self, key, value): + def __setitem__(self: Self, key: str, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]) -> None: full_iri = self.ld_proc.expand_iri(self.active_ctx, key) if value is None: del self[full_iri] @@ -35,16 +62,16 @@ def __setitem__(self, key, value): ld_value = self._to_expanded_json({full_iri: value}) self.data_dict.update(ld_value) - def __delitem__(self, key): + def __delitem__(self: Self, key: str) -> None: full_iri = self.ld_proc.expand_iri(self.active_ctx, key) del self.data_dict[full_iri] - def __contains__(self, key): + def __contains__(self: Self, key: str) -> bool: full_iri = self.ld_proc.expand_iri(self.active_ctx, key) # FIXME: is that good? return full_iri in self.data_dict - def __eq__(self, other): + def __eq__(self: Self, other: Any) -> Union[bool, NotImplementedType]: # FIXME: give another type hint to other? if not isinstance(other, (dict, ld_dict)): return NotImplemented if ld_container.is_json_id(other): @@ -69,13 +96,15 @@ def __eq__(self, other): return False return True - def __ne__(self, other): + def __ne__(self: Self, other: Any) -> Union[bool, NotImplementedType]: # FIXME: give another type hint to other? x = self.__eq__(other) if x is NotImplemented: return NotImplemented return not x - def get(self, key, default=_NO_DEFAULT): + def get( + self: Self, key: str, default: Any = _NO_DEFAULT + ) -> Union[list[Union[BASIC_TYPE, TIME_TYPE, ld_container]], Any]: try: return self[key] except KeyError as e: @@ -83,11 +112,14 @@ def get(self, key, default=_NO_DEFAULT): raise e return default - def update(self, other): + def update( + self: Self, + other: Union[dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]], "ld_dict"] + ) -> None: for key, value in other.items(): self[key] = value - def keys(self): + def keys(self: Self) -> KeysView[str]: return self.data_dict.keys() def compact_keys(self): diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index 0b89ed00..e567dff8 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -34,8 +34,8 @@ class ld_list(ld_container): """ def __init__( - self: "ld_list", - data: Union[list[str], list[dict[str, EXPANDED_JSON_LD_VALUE]]], + self: Self, + data: EXPANDED_JSON_LD_VALUE, *, parent: Union[ld_container, None] = None, key: Union[str, None] = None, @@ -48,7 +48,7 @@ def __init__( :param self: The instance of ld_list to be initialized. :type self: ld_list :param data: The expanded json-ld data that is mapped (must be valid for @set, @list or @graph) - :type data: list[str] | list[dict[str, BASIC_TYPE | EXPANDED_JSON_LD_VALUE]] + :type data: EXPANDED_JSON_LD_VALUE :param parent: parent node of this container. :type parent: ld_container | None :param key: key into the parent container. @@ -253,7 +253,7 @@ def __eq__( For each index it is checked if the ids of the items at index in self and other match if both have one, if only one has or neither have an id all other values are compared.
Note that due to those circumstances equality is not transitve - meaning if a == b and b == c is is not guaranteed that a == c.
+ meaning if a == b and b == c it is not guaranteed that a == c.
If self or other is considered unordered the comparison is more difficult. All items in self are compared with all items in other. On the resulting graph given by the realtion == the Hopcroft-Karp algoritm is used to determine if there exists a bijection reordering self so that the ordered comparison of self with other From cd6e3d5564813e798217dbe62e1b0fa3ab1fe077 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 9 Jan 2026 13:17:20 +0100 Subject: [PATCH 30/36] added and updated comments --- src/hermes/model/types/ld_container.py | 64 +++--- src/hermes/model/types/ld_dict.py | 303 ++++++++++++++++++++++--- src/hermes/model/types/ld_list.py | 102 ++++----- 3 files changed, 355 insertions(+), 114 deletions(-) diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index 766205a9..c8ab051f 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -5,33 +5,37 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Michael Fritzsche -from .pyld_util import JsonLdProcessor, bundled_loader +from __future__ import annotations -from datetime import date, time, datetime -from typing import Union, Any -from typing_extensions import Self - - -JSON_LD_CONTEXT_DICT = dict[str, Union[str, "JSON_LD_CONTEXT_DICT"]] -BASIC_TYPE = Union[str, float, int, bool] -EXPANDED_JSON_LD_VALUE = list[Union[ - dict[str, Union["EXPANDED_JSON_LD_VALUE", BASIC_TYPE]], - "EXPANDED_JSON_LD_VALUE", - str -]] -COMPACTED_JSON_LD_VALUE = Union[ - list[Union[dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], BASIC_TYPE]], - dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], -] -TIME_TYPE = Union[datetime, date, time] -JSON_LD_VALUE = Union[ - list[Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, "ld_container"]], - dict[str, Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, "ld_container"]], -] -PYTHONIZED_LD_CONTAINER = Union[ - list[Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]], - dict[str, Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]], -] +from .pyld_util import JsonLdProcessor, bundled_loader +from datetime import date, datetime, time + +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from .ld_dict import ld_dict + from .ld_list import ld_list + from typing import Any, TypeAlias, Union + from typing_extensions import Self + JSON_LD_CONTEXT_DICT: TypeAlias = dict[str, Union[str, "JSON_LD_CONTEXT_DICT"]] + BASIC_TYPE: TypeAlias = Union[str, float, int, bool] + EXPANDED_JSON_LD_VALUE: TypeAlias = list[Union[ + dict[str, Union["EXPANDED_JSON_LD_VALUE", BASIC_TYPE]], + "EXPANDED_JSON_LD_VALUE", + str + ]] + COMPACTED_JSON_LD_VALUE: TypeAlias = Union[ + list[Union[dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], BASIC_TYPE]], + dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], + ] + TIME_TYPE: TypeAlias = Union[datetime, date, time] + JSON_LD_VALUE: TypeAlias = Union[ + list[Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]], + dict[str, Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]], + ] + PYTHONIZED_LD_CONTAINER: TypeAlias = Union[ + list[Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]], + dict[str, Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]], + ] class ld_container: @@ -69,7 +73,7 @@ def __init__( self: Self, data: EXPANDED_JSON_LD_VALUE, *, - parent: Union[Self, None] = None, + parent: Union[ld_dict, ld_list, None] = None, key: Union[str, None] = None, index: Union[int, None] = None, context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None, @@ -82,7 +86,7 @@ def __init__( :param data: The expanded json-ld data that is mapped. :type data: EXPANDED_JSON_LD_VALUE :param parent: parent node of this container. - :type parent: ld_container | None + :type parent: ld_dict | ld_list | None :param key: key into the parent container. :type key: str | None :param index: index into the parent container. @@ -182,7 +186,7 @@ def ld_value(self: Self) -> EXPANDED_JSON_LD_VALUE: def _to_python( self: Self, full_iri: str, ld_value: Union[list, dict, str] - ) -> Union["ld_container", BASIC_TYPE, TIME_TYPE]: + ) -> Union[ld_dict, ld_list, BASIC_TYPE, TIME_TYPE]: """ Returns a pythonized version of the given value pretending the value is in self and full_iri its key. @@ -195,7 +199,7 @@ def _to_python( :type ld_value: list | dict | str :return: The pythonized value of the ld_value. - :rtype: ld_container | BASIC_TYPE | TIME_TYPE + :rtype: ld_dict | ld_list | BASIC_TYPE | TIME_TYPE """ if full_iri == "@id": # values of key "@id" only have to be compacted diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index 3d01c711..ba29838c 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -5,21 +5,25 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Michael Fritzsche +from __future__ import annotations + from .pyld_util import bundled_loader -from .ld_container import ( - ld_container, - JSON_LD_CONTEXT_DICT, - EXPANDED_JSON_LD_VALUE, - PYTHONIZED_LD_CONTAINER, - JSON_LD_VALUE, - TIME_TYPE, - BASIC_TYPE, -) - -from collections.abc import KeysView -from types import NotImplementedType -from typing import Union, Any -from typing_extensions import Self +from .ld_container import ld_container + +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from collections.abc import Generator, Iterator, KeysView + from .ld_container import ( + JSON_LD_CONTEXT_DICT, + EXPANDED_JSON_LD_VALUE, + PYTHONIZED_LD_CONTAINER, + JSON_LD_VALUE, + TIME_TYPE, + BASIC_TYPE, + ) + from .ld_list import ld_list + from typing import Any, Union, Literal + from typing_extensions import Self class ld_dict(ld_container): @@ -27,6 +31,9 @@ class ld_dict(ld_container): An JSON-LD container resembling a dict. See also :class:`ld_container` + :ivar ref: A dict used to reference this object by its id. (Its form is {"@id": ...}) + :ivartype ref: dict[Literal["@id"], str] + :cvar container_type: A type used as a placeholder to represent "no default". :cvartype container_type: type[str] """ @@ -36,44 +43,143 @@ def __init__( self: Self, data: list[dict[str, EXPANDED_JSON_LD_VALUE]], *, - parent: Union[ld_container, None] = None, + parent: Union[ld_dict, ld_list, None] = None, key: Union[str, None] = None, index: Union[int, None] = None, context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None ) -> None: + """ + Create a new instance of an ld_dict. + + :param self: The instance of ld_container to be initialized. + :type self: Self + :param data: The expanded json-ld data that is mapped. + :type data: EXPANDED_JSON_LD_VALUE + :param parent: parent node of this container. + :type parent: ld_dict | ld_list | None + :param key: key into the parent container. + :type key: str | None + :param index: index into the parent container. + :type index: int | None + :param context: local context for this container. + :type context: list[str | JSON_LD_CONTEXT_DICT] | None + + :return: + :rtype: None + + :raises ValueError: If the given data doesn't represent an ld_dict. + """ + # check for validity of data if not self.is_ld_dict(data): raise ValueError("The given data does not represent a ld_dict.") + self.data_dict = data[0] + # call super constructor super().__init__(data, parent=parent, key=key, index=index, context=context) - self.data_dict = data[0] + def __getitem__(self: Self, key: str) -> ld_list: + """ + Get the item with the given key in a pythonized form. + If self contains no key, value pair with the given key, then an empty list is added as its value and returned. + + :param self: The ld_dict the item is taken from. + :type self: ld_dict + :param key: The key (compacted or expanded) to the item. + :type key: str - def __getitem__(self: Self, key: str) -> list[Union[BASIC_TYPE, TIME_TYPE, ld_container]]: + :return: The pythonized item at the key. + :rtype: ld_list + """ full_iri = self.ld_proc.expand_iri(self.active_ctx, key) if full_iri not in self.data_dict: self[full_iri] = [] ld_value = self.data_dict[full_iri] return self._to_python(full_iri, ld_value) - def __setitem__(self: Self, key: str, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]) -> None: + def __setitem__(self: Self, key: str, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None: + """ + Set the item at the given key to the given value or delete it if value is None. + The given value is expanded. + + :param self: The ld_dict the item is set in. + :type self: ld_dict + :param key: The key at which the item is set. + :type key: str + :param value: The new value. + :type value: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + + :return: + :rtype: None + """ + # expand key full_iri = self.ld_proc.expand_iri(self.active_ctx, key) + # if the value is None delete the entry instead of updating it if value is None: del self[full_iri] return + # expand the value and replace the key, value pair ld_value = self._to_expanded_json({full_iri: value}) self.data_dict.update(ld_value) def __delitem__(self: Self, key: str) -> None: + """ + Delete the key, value pair with the given value pair. + Note that if a deleted object is represented by an ld_container druing this process it will still exist + and not be modified afterwards. + + :param self: The ld_dict the key, value pair is deleted from. + :type self: ld_dict + :param key: The key (expanded or compacted) of the key, value pair that is deleted. + :type key: str + + :return: + :rtype: None + """ + # expand key and delete the key, value pair full_iri = self.ld_proc.expand_iri(self.active_ctx, key) del self.data_dict[full_iri] def __contains__(self: Self, key: str) -> bool: + """ + Returns whether or not self contains a key, value pair with the given key. + + :param self: The ld_dict that is checked if it a key, value pair with the given key. + :type self: ld_dict + :param key: The key for which it is checked if a key, value pair is contained in self. + :type key: str + + :return: Whether or not self contains a key, value pair with the given key. + :rtype: bool + """ + # expand the key and check if self contains a key, value pair with it full_iri = self.ld_proc.expand_iri(self.active_ctx, key) # FIXME: is that good? return full_iri in self.data_dict - def __eq__(self: Self, other: Any) -> Union[bool, NotImplementedType]: # FIXME: give another type hint to other? + def __eq__( + self: Self, other: Union[ld_dict, dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]] + ) -> bool: + """ + Returns wheter or not self is considered to be equal to other.
+ If other is not an ld_dict, it is converted first. + If an id check is possible return its result otherwise: + For each key, value pair its value is compared to the value with the same key in other. + Note that due to those circumstances equality is not transitve + meaning if a == b and b == c it is not guaranteed that a == c.
+ + :param self: The ld_dict other is compared to. + :type self: ld_dict + :param other: The dict/ ld_dict self is compared to. + :type other: ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] + + :return: Whether or not self and other are considered equal. + If other is of the wrong type return the NotImplemented singleton instead. + :rtype: bool + """ + # check if other has an acceptable type if not isinstance(other, (dict, ld_dict)): return NotImplemented + + # compare in the special case that other is a json_id or json_value if ld_container.is_json_id(other): if "@id" in self: return self["@id"] == other["@id"] @@ -82,29 +188,69 @@ def __eq__(self: Self, other: Any) -> Union[bool, NotImplementedType]: # FIXME: if {*self.keys()}.issubset({"@id", *other.keys()}): return ld_container.are_values_equal(self.data_dict, other) return False + + # convert into an ld_dict if other is not one if isinstance(other, dict): other = self.from_dict(other, parent=self.parent, key=self.key, context=self.context) + + # check for id equality if "@id" in self and "@id" in other: return self["@id"] == other["@id"] + + # test for value equality keys_self = {*self.keys()} keys_other = {*other.keys()} unique_keys = keys_self.symmetric_difference(keys_other) if unique_keys and unique_keys != {"@id"}: + # there is a key that isn't "@id" that is only in other or self return False + # check if the values with the same key are equal for key in keys_self.intersection(keys_other): if self[key] != other[key]: return False return True - def __ne__(self: Self, other: Any) -> Union[bool, NotImplementedType]: # FIXME: give another type hint to other? + def __ne__( + self: Self, other: Union[ld_dict, dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]] + ) -> bool: + """ + Returns whether or not self and other not considered to be equal. + (Returns not self.__eq__(other) if the return type is bool. + See ld_list.__eq__ for more details on the comparison.) + + :param self: The ld_dict other is compared to. + :type self: ld_dict + :param other: The dict/ ld_dict self is compared to. + :type other: ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] + + :return: Whether or not self and other are not considered equal. + If other is of the wrong type return the NotImplemented singleton instead. + :rtype: bool + """ + # compare self and other using __eq__ x = self.__eq__(other) + # return NotImplemented if __eq__ did so and else the inverted result of __eq__ if x is NotImplemented: return NotImplemented return not x def get( self: Self, key: str, default: Any = _NO_DEFAULT - ) -> Union[list[Union[BASIC_TYPE, TIME_TYPE, ld_container]], Any]: + ) -> Union[ld_list, Any]: + """ + Get the item with the given key in a pythonized form using the build in get. + If a KeyError is raised, return the default or reraise it if no default is given. + + :param self: The ld_dict the item is taken from. + :type self: ld_dict + :param key: The key (compacted or expanded) to the item. + :type key: str + + :return: The pythonized item at the key. + :rtype: ld_list + + :raises KeyError: If the build in get raised a KeyError. + """ try: return self[key] except KeyError as e: @@ -114,29 +260,75 @@ def get( def update( self: Self, - other: Union[dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]], "ld_dict"] + other: Union[ld_dict, dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]] ) -> None: + """ + Set the items at the given keys to the given values or delete it if value is None by using build in set. + + :param self: The ld_dict the items are set in. + :type self: ld_dict + :param other: The key, value pairs giving the new values and their keys. + :type other: ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] + + :return: + :rtype: None + """ for key, value in other.items(): self[key] = value def keys(self: Self) -> KeysView[str]: + """ + Return the keys of the key, value pairs of self. + + :param self: The ld_dict whose keys are returned. + :type self: ld_dict + """ return self.data_dict.keys() - def compact_keys(self): + def compact_keys(self: Self) -> Iterator[str]: + """ + Return an iterator of the compacted keys of the key, value pairs of self. + + :param self: The ld_dict whose compacted keys are returned. + :type self: ld_dict + """ return map( lambda k: self.ld_proc.compact_iri(self.active_ctx, k), self.data_dict.keys() ) - def items(self): + def items(self: Self) -> Generator[tuple[str, ld_list], None, None]: + """ + Return an generator of tuples of keys and their values in self. + + :param self: The ld_dict whose items are returned. + :type self: ld_dict + """ for k in self.data_dict.keys(): yield k, self[k] @property - def ref(self): + def ref(self: Self) -> dict[Literal["@id"], str]: + """ + Return the dict used to reference this object by its id. (Its form is {"@id": ...}) + + :param self: The ld_dict whose reference is returned. + :type self: ld_dict + + :raises KeyError: If self has no id. + """ return {"@id": self.data_dict['@id']} - def to_python(self): + def to_python(self: Self) -> dict[str, Union[BASIC_TYPE, TIME_TYPE, PYTHONIZED_LD_CONTAINER]]: + """ + Return a fully pythonized version of this object where all ld_container are replaced by lists and dicts. + + :param self: The ld_dict whose fully pythonized version is returned. + :type self: ld_dict + + :return: The fully pythonized version of self. + :rtype: dict[str, BASIC_TYPE | TIME_TYPE | PYTHONIZED_LD_CONTAINER] + """ res = {} for key in self.compact_keys(): value = self[key] @@ -145,14 +337,42 @@ def to_python(self): res[key] = value return res + # FIXME: Allow from_dict to handle dicts containing ld_dicts and ld_lists @classmethod - def from_dict(cls, value, *, parent=None, key=None, context=None, ld_type=None): - ld_data = value.copy() + def from_dict( + cls: type[Self], + value: dict[str, PYTHONIZED_LD_CONTAINER], + *, + parent: Union[ld_dict, ld_list, None] = None, + key: Union[str, None] = None, + context: Union[str, JSON_LD_CONTEXT_DICT, list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None, + ld_type: Union[str, list[str], None] = None + ) -> ld_dict: + """ + Creates a ld_dict from the given dict with the given parent, key, context and ld_type.
+ Uses the expansion of the JSON-LD Processor and not the one of ld_container. + + :param value: The dict of values the ld_dict should be created from. + :type value: dict[str, PYTHONIZED_LD_CONTAINER] + :param parent: The parent container of the new ld_list. + :type parent: ld_dict | ld_list | None + :param key: The key into the inner most parent container representing a dict of the new ld_list. + :type: key: str | None + :param context: The context for the new dict (it will also inherit the context of parent). + :type context: str | JSON_LD_CONTEXT_DICT | list[str | JSON_LD_CONTEXT_DICT] | None + :param ld_type: Additional value(s) for the new dict. + :type ld_type: str | list[str] | None + :return: The new ld_dict build from value. + :rtype: ld_dict + """ + # make a copy of value and add the new type to it. + ld_data = value.copy() ld_type = ld_container.merge_to_list(ld_type or [], ld_data.get('@type', [])) if ld_type: ld_data["@type"] = ld_type + # generate the context from value, context and parent data_context = ld_data.pop('@context', []) merged_contexts = ld_container.merge_to_list(data_context, context or []) full_context = [] @@ -161,17 +381,40 @@ def from_dict(cls, value, *, parent=None, key=None, context=None, ld_type=None): elif parent is not None: full_context = parent.full_context + merged_contexts + # expand value and create an ld_dict from it ld_value = cls.ld_proc.expand(ld_data, {"expandContext": full_context, "documentLoader": bundled_loader}) ld_value = cls(ld_value, parent=parent, key=key, context=merged_contexts) return ld_value @classmethod - def is_ld_dict(cls, ld_value): + def is_ld_dict(cls: type[Self], ld_value: Any) -> bool: + """ + Returns wheter the given value is considered to be possible of representing an expanded json-ld dict.
+ I.e. if ld_value is a list containing a dict containing none of the keys "@set", "@graph", "@list" and "@value" + and not only the key "@id". + + :param ld_value: The value that is checked. + :type ld_value: Any + + :returns: Wheter or not ld_value could represent an expanded json-ld dict. + :rtype: bool + """ return cls.is_ld_node(ld_value) and cls.is_json_dict(ld_value[0]) @classmethod - def is_json_dict(cls, ld_value): + def is_json_dict(cls: type[Self], ld_value: Any) -> bool: + """ + Returns wheter the given value is considered to be possible of representing an expanded json-ld dict.
+ I.e. if ld_value is a dict containing none of the keys "@set", "@graph", "@list" and "@value" + and not only the key "@id". + + :param ld_value: The value that is checked. + :type ld_value: Any + + :returns: Wheter or not ld_value could represent an expanded json-ld dict. + :rtype: bool + """ if not isinstance(ld_value, dict): return False diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index e567dff8..23ebe4d0 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -5,21 +5,25 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Michael Fritzsche +from __future__ import annotations + +from .ld_container import ld_container from collections import deque -from types import NotImplementedType -from .ld_container import ( - ld_container, - JSON_LD_CONTEXT_DICT, - EXPANDED_JSON_LD_VALUE, - PYTHONIZED_LD_CONTAINER, - JSON_LD_VALUE, - TIME_TYPE, - BASIC_TYPE, -) - -from collections.abc import Generator, Hashable -from typing import Union, Any -from typing_extensions import Self + +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from collections.abc import Generator, Hashable + from .ld_dict import ld_dict + from .ld_container import ( + JSON_LD_CONTEXT_DICT, + EXPANDED_JSON_LD_VALUE, + PYTHONIZED_LD_CONTAINER, + JSON_LD_VALUE, + TIME_TYPE, + BASIC_TYPE, + ) + from typing import Any, Union + from typing_extensions import Self class ld_list(ld_container): @@ -37,20 +41,20 @@ def __init__( self: Self, data: EXPANDED_JSON_LD_VALUE, *, - parent: Union[ld_container, None] = None, + parent: Union[ld_dict, ld_list, None] = None, key: Union[str, None] = None, index: Union[int, None] = None, context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None, ) -> None: """ - Create a new ld_list container. + Create a new instance of an ld_list. :param self: The instance of ld_list to be initialized. :type self: ld_list :param data: The expanded json-ld data that is mapped (must be valid for @set, @list or @graph) :type data: EXPANDED_JSON_LD_VALUE :param parent: parent node of this container. - :type parent: ld_container | None + :type parent: ld_dict | ld_list | None :param key: key into the parent container. :type key: str | None :param index: index into the parent container. @@ -97,7 +101,7 @@ def __init__( def __getitem__( self: Self, index: Union[int, slice] - ) -> Union[BASIC_TYPE, TIME_TYPE, ld_container, list[Union[BASIC_TYPE, TIME_TYPE, ld_container]]]: + ) -> Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list, list[Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]]: """ Get the item(s) at position index in a pythonized form. @@ -107,7 +111,7 @@ def __getitem__( :type index: int | slice :return: The pythonized item(s) at index. - :rtype: BASIC_TYPE | TIME_TYPE | ld_container | list[BASIC_TYPE | TIME_TYPE | ld_container]] + :rtype: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list | list[BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] """ # handle slices by applying them to a list of indices and then getting the items at those if isinstance(index, slice): @@ -120,7 +124,7 @@ def __getitem__( return item def __setitem__( - self: Self, index: Union[int, slice], value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container] + self: Self, index: Union[int, slice], value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] ) -> None: """ Set the item(s) at position index to the given value(s). @@ -131,7 +135,7 @@ def __setitem__( :param index: The positon(s) at which the item(s) is/ are set. :type index: int | slice :param value: The new value(s). - :type value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container] + :type value: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list :return: :rtype: None @@ -184,7 +188,7 @@ def __len__(self: Self) -> int: """ return len(self.item_list) - def __iter__(self: Self) -> Generator[Union[BASIC_TYPE, TIME_TYPE, ld_container], None, None]: + def __iter__(self: Self) -> Generator[Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list], None, None]: """ Returns an iterator over the pythonized values contained in self. @@ -192,7 +196,7 @@ def __iter__(self: Self) -> Generator[Union[BASIC_TYPE, TIME_TYPE, ld_container] :type self: ld_list :return: The Iterator over self's values. - :rtype: Generator[BASIC_TYPE | TIME_TYPE | ld_container, None, None] + :rtype: Generator[BASIC_TYPE | TIME_TYPE | ld_dict | ld_list, None, None] """ # return an Iterator over each value in self in its pythonized from for index, value in enumerate(self.item_list): @@ -241,12 +245,9 @@ def __contains__(self: Self, value: JSON_LD_VALUE) -> bool: def __eq__( self: Self, - other: Union[ - Self, - list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]], - dict[str, list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]]], - ], - ) -> Union[bool, NotImplementedType]: + other: Union[ld_list, list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]], + dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]] + ) -> bool: """ Returns wheter or not self is considered to be equal to other.
If other is not an ld_list, it is converted first. @@ -262,12 +263,11 @@ def __eq__( :param self: The ld_list other is compared to. :type self: ld_list :param other: The list/ container/ ld_list self is compared to. - :type other: ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_container] - | dict[str, list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_container]] + :type other: ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] :return: Whether or not self and other are considered equal. - If other is of the wrong type return NotImplemented instead. - :rtype: bool | NotImplementedType + If other is of the wrong type return the NotImplemented singleton instead. + :rtype: bool """ # check if other has an acceptable type if not (isinstance(other, (list, ld_list)) or ld_list.is_container(other)): @@ -504,13 +504,8 @@ def _hopcroft_karp( return matching_size def __ne__( - self: Self, - other: Union[ - Self, - list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]], - dict[str, list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]]], - ], - ) -> Union[bool, NotImplementedType]: + self: Self, other: Union[ld_list, list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]] + ) -> bool: """ Returns whether or not self and other not considered to be equal. (Returns not self.__eq__(other) if the return type is bool. @@ -519,12 +514,11 @@ def __ne__( :param self: The ld_list other is compared to. :type self: ld_list :param other: The list/ container/ ld_list self is compared to. - :type other: ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_container] - | dict[str, list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_container]] + :type other: ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] :return: Whether or not self and other are not considered equal. - If other is of the wrong type return NotImplemented instead. - :rtype: bool | NotImplementedType + If other is of the wrong type return the NotImplemented singleton instead. + :rtype: bool """ # compare self and other using __eq__ x = self.__eq__(other) @@ -533,7 +527,7 @@ def __ne__( return NotImplemented return not x - def append(self: Self, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]) -> None: + def append(self: Self, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None: """ Append the item to the given ld_list self. The given value is expanded. If it is assimilated by self all items that would be added by this are added. @@ -541,14 +535,14 @@ def append(self: Self, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_con :param self: The ld_list the item is appended to. :type self: ld_list :param value: The new value. - :type value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container] + :type value: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list :return: :rtype: None """ self.item_list.extend(self._to_expanded_json([value])) - def extend(self: Self, value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]]) -> None: + def extend(self: Self, value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]) -> None: """ Append the items in value to the given ld_list self. The given values are expanded. If any are assimilated by self all items that would be added by this are added. @@ -556,7 +550,7 @@ def extend(self: Self, value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, l :param self: The ld_list the items are appended to. :type self: ld_list :param value: The new values. - :type value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]] + :type value: list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dcit | ld_list] :return: :rtype: None @@ -564,7 +558,7 @@ def extend(self: Self, value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, l for item in value: self.append(item) - def to_python(self: Self) -> list[PYTHONIZED_LD_CONTAINER]: + def to_python(self: Self) -> list[Union[BASIC_TYPE, TIME_TYPE, PYTHONIZED_LD_CONTAINER]]: """ Return a fully pythonized version of this object where all ld_container are replaced by lists and dicts. @@ -572,7 +566,7 @@ def to_python(self: Self) -> list[PYTHONIZED_LD_CONTAINER]: :type self: ld_list :return: The fully pythonized version of self. - :rtype: list[PYTHONIZED_LD_CONTAINER] + :rtype: list[BASIC_TYPE | TIME_TYPE | PYTHONIZED_LD_CONTAINER] """ return [ item.to_python() if isinstance(item, ld_container) else item @@ -616,11 +610,11 @@ def from_list( cls: type[Self], value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE]], *, - parent: Union[ld_container, None] = None, + parent: Union[ld_dict, ld_list, None] = None, key: Union[str, None] = None, context: Union[str, JSON_LD_CONTEXT_DICT, list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None, container_type: str = "@set" - ) -> "ld_list": + ) -> ld_list: """ Creates a ld_list from the given list with the given parent, key, context and container_type.
Note that only container_type '@set' is valid for key '@type'.
@@ -631,10 +625,10 @@ def from_list( :type value: list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE] :param parent: The parent container of the new ld_list.
If value is assimilated by parent druing JSON-LD expansion parent is extended by value and parent is returned. - :type parent: ld_container | None + :type parent: ld_dict | ld_list | None :param key: The key into the inner most parent container representing a dict of the new ld_list. :type: key: str | None - :param context: The context for the new list (is will also inherit the context of parent).
+ :param context: The context for the new list (it will also inherit the context of parent).
Note that this context won't be added to parent if value is assimilated by parent and parent is returned. :type context: str | JSON_LD_CONTEXT_DICT | list[str | JSON_LD_CONTEXT_DICT] | None :param container_type: The container type of the new list valid are '@set', '@list' and '@graph'.
From de457e3a11d4bca59bf6f304034654991b8e86a7 Mon Sep 17 00:00:00 2001 From: "Kernchen, Sophie" Date: Tue, 13 Jan 2026 11:32:41 +0100 Subject: [PATCH 31/36] Apply Style Changes (Author names instead of foo etc.) --- docs/source/dev/data_model.md | 56 +++++++++++++++++------------------ 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/docs/source/dev/data_model.md b/docs/source/dev/data_model.md index 8f008dc4..1a746a1e 100644 --- a/docs/source/dev/data_model.md +++ b/docs/source/dev/data_model.md @@ -112,13 +112,13 @@ from hermes.model import SoftwareMetadata # { # "@context": # { -# "baz": "https://schema.org/Thing" +# "name": "https://schema.org/name" # } # } data = SoftwareMetadata(extra_vocabs={"foo": "https://bar.net/schema.jsonld"}) -data["foo:baz"] = ... +data["foo:name"] = ... ``` ##### Adding data @@ -131,8 +131,8 @@ i.e., metadata that describes software: data["name"] = "My Research Software" # A simple "Text"-type value # → Simplified model representation : { "name": [ "My Research Software" ] } # Cf. "Accessing data" below -data["author"] = {"name": "Foo"} # An object value that uses terms available in the defined context -# → Simplified model representation : { "name": [ "My Research Software" ], "author": [ { "name": "Foo" } ] } +data["author"] = {"name": "Shakespeare"} # An object value that uses terms available in the defined context +# → Simplified model representation : { "name": [ "My Research Software" ], "author": [ { "name": "Shakespeare" } ] } # Cf. "Accessing data" below ``` @@ -153,14 +153,14 @@ will treat it as an array, i.e., a list-like object: ```{code-block} python :caption: Internal data values are arrays data["name"] = "My Research Software" # → [ "My Research Software" ] -data["author"] = {"name": "Foo"} # → [ { "name": [ "Foo" ] } ] +data["author"] = {"name": "Shakespeare"} # → [ { "name": [ "Shakespeare" ] } ] ``` Therefore, you access data in the same way you would access data from a Python `list`: 1. You access single values using indices, e.g., `data["name"][0]`. 2. You can use a list-like API to interact with data objects, e.g., -`data["name"].append("Bar")`, `data["name"].extend(["Bar", "Baz"])`, `for name in data["name"]: ...`, etc. +`data["name"].append("Hamilton")`, `data["name"].extend(["Hamilton", "Knuth"])`, `for name in data["name"]: ...`, etc. ##### Interacting with data @@ -176,22 +176,22 @@ data = SoftwareMetadata() # Let's create author metadata for our software! # Below each line of code, the value of `data["author"]` is given. -data["author"] = {"name": "Foo"} -# → [{'name': ['Foo']}] +data["author"] = {"name": "Shakespeare"} +# → [{'name': ['Shakespeare']}] -data["author"].append({"name": "Bar"}) -# [{'name': ['Foo']}, {'name': ['Bar']}] +data["author"].append({"name": "Hamilton"}) +# [{'name': ['Shakespeare']}, {'name': ['Hamilton']}] -data["author"][0]["email"] = "foo@baz.net" -# [{'name': ['Foo'], 'email': ['foo@baz.net']}, {'name': ['Bar']}] +data["author"][0]["email"] = "Shakespeare@baz.net" +# [{'name': ['Shakespeare'], 'email': ['shakespeare@baz.net']}, {'name': ['Hamilton']}] -data["author"][1]["email"].append("bar@baz.net") -# [{'name': ['Foo'], 'email': ['foo@baz.net']}, {'name': ['Bar'], 'email': ['bar@baz.net']}] +data["author"][1]["email"].append("Hamilton@baz.net") +# [{'name': ['Shakespeare'], 'email': ['shakespeare@baz.net']}, {'name': ['Hamilton'], 'email': ['hamilton@baz.net']}] -data["author"][1]["email"].extend(["bar@spam.org", "bar@eggs.com"]) +data["author"][1]["email"].extend(["hamilton@spam.org", "hamilton@eggs.com"]) # [ -# {'name': ['Foo'], 'email': ['foo@baz.net']}, -# {'name': ['Bar'], 'email': ['bar@baz.net', 'bar@spam.org', 'bar@eggs.com']} +# {'name': ['Shakespeare'], 'email': ['shakespeare@baz.net']}, +# {'name': ['Hamilton'], 'email': ['hamilton@baz.net', 'hamilton@spam.org', 'hamilton@eggs.com']} # ] ``` @@ -199,9 +199,9 @@ The example continues to show how to iterate through data. ```{code-block} python :caption: for-loop, containment check -for i, author in enumerate(data["author"]): - if author["name"][0] in ["Foo", "Bar"]: - print(f"Author {i + 1} has expected name.") +for i, author in enumerate(data["author"], start=1): + if author["name"][0] in ["Shakespeare", "Hamilton"]: + print(f"Author {i} has expected name.") else: raise ValueError("Unexpected author name found!", author["name"][0]) @@ -224,7 +224,7 @@ for email in data["author"][0]["email"]: ```{code-block} python :caption: Value check and list comprehension -if ["bar" in email for email in data["author"][1]["email"]]: +if all(["hamilton" in email for email in data["author"][1]["email"]]): print("Author has only emails with their name in it.") # Mock output @@ -248,7 +248,7 @@ Python data: :emphasize-lines: 5,13 try: assert ( - {'name': ['Foo'], 'email': ['foo@baz.net']} + {'name': ['Shakespeare'], 'email': ['shakespeare@baz.net']} in data["author"] ) @@ -261,19 +261,19 @@ except AssertionError: # $> The author could not be found. # $> AssertionError: # assert -# {'email': ['foo@baz.net'], 'name': ['Foo']} +# {'email': ['shakespeare@baz.net'], 'name': ['Shakespeare']} # in # _LDList( # {'@list': [ # { -# 'http://schema.org/name': [{'@value': 'Foo'}], -# 'http://schema.org/email': [{'@value': 'foo@baz.net'}] +# 'http://schema.org/name': [{'@value': 'Shakespeare'}], +# 'http://schema.org/email': [{'@value': 'shakespeare@baz.net'}] # }, # { -# 'http://schema.org/name': [{'@value': 'Bar'}], +# 'http://schema.org/name': [{'@value': 'Hamilton'}], # 'http://schema.org/email': [ # {'@list': [ -# {'@value': 'bar@baz.net'}, {'@value': 'bar@spam.org'}, {'@value': 'bar@eggs.com'} +# {'@value': 'hamilton@baz.net'}, {'@value': 'hamilton@spam.org'}, {'@value': 'hamilton@eggs.com'} # ]} # ] # }] @@ -294,7 +294,7 @@ This function can be used in assertions to assert full data integrity: :emphasize-lines: 5,13 try: assert ( - {'name': ['Foo'], 'email': ['foo@baz.net']} + {'name': ['Shakespeare'], 'email': ['Shakespeare@baz.net']} in data["author"].to_python() ) From e9d010f0fabb1e87c74727d72f9080cfa0ab3fb8 Mon Sep 17 00:00:00 2001 From: "Kernchen, Sophie" Date: Tue, 13 Jan 2026 11:47:47 +0100 Subject: [PATCH 32/36] Correct lower letters for emails --- docs/source/dev/data_model.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/dev/data_model.md b/docs/source/dev/data_model.md index 1a746a1e..e788fd44 100644 --- a/docs/source/dev/data_model.md +++ b/docs/source/dev/data_model.md @@ -182,10 +182,10 @@ data["author"] = {"name": "Shakespeare"} data["author"].append({"name": "Hamilton"}) # [{'name': ['Shakespeare']}, {'name': ['Hamilton']}] -data["author"][0]["email"] = "Shakespeare@baz.net" +data["author"][0]["email"] = "shakespeare@baz.net" # [{'name': ['Shakespeare'], 'email': ['shakespeare@baz.net']}, {'name': ['Hamilton']}] -data["author"][1]["email"].append("Hamilton@baz.net") +data["author"][1]["email"].append("hamilton@baz.net") # [{'name': ['Shakespeare'], 'email': ['shakespeare@baz.net']}, {'name': ['Hamilton'], 'email': ['hamilton@baz.net']}] data["author"][1]["email"].extend(["hamilton@spam.org", "hamilton@eggs.com"]) @@ -294,7 +294,7 @@ This function can be used in assertions to assert full data integrity: :emphasize-lines: 5,13 try: assert ( - {'name': ['Shakespeare'], 'email': ['Shakespeare@baz.net']} + {'name': ['Shakespeare'], 'email': ['shakespeare@baz.net']} in data["author"].to_python() ) From 605201d396a045a1880786e7e56f0501d0c05447 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 16 Jan 2026 10:54:09 +0100 Subject: [PATCH 33/36] fixed small bug in set_item of ld_dict --- src/hermes/model/types/ld_dict.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index ba29838c..a6b9de09 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -110,14 +110,12 @@ def __setitem__(self: Self, key: str, value: Union[JSON_LD_VALUE, BASIC_TYPE, TI :return: :rtype: None """ - # expand key - full_iri = self.ld_proc.expand_iri(self.active_ctx, key) # if the value is None delete the entry instead of updating it if value is None: - del self[full_iri] + del self[self.ld_proc.expand_iri(self.active_ctx, key)] return - # expand the value and replace the key, value pair - ld_value = self._to_expanded_json({full_iri: value}) + # expand the key, value pair and update data_dict + ld_value = self._to_expanded_json({key: value}) self.data_dict.update(ld_value) def __delitem__(self: Self, key: str) -> None: From eb6f587126b42f343f75a545655cc414a8c27b54 Mon Sep 17 00:00:00 2001 From: "Kernchen, Sophie" Date: Fri, 16 Jan 2026 15:48:48 +0100 Subject: [PATCH 34/36] Fix compact_iri for schema elements with containers --- src/hermes/model/types/pyld_util.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/hermes/model/types/pyld_util.py b/src/hermes/model/types/pyld_util.py index 3cf9862b..10b9b7c5 100644 --- a/src/hermes/model/types/pyld_util.py +++ b/src/hermes/model/types/pyld_util.py @@ -103,8 +103,12 @@ class JsonLdProcessor(jsonld.JsonLdProcessor): def expand_iri(self, active_ctx: t.Any, short_iri: str, vocab: bool = True) -> str: return self._expand_iri(active_ctx, short_iri, vocab=vocab) - def compact_iri(self, active_ctx: t.Any, long_iri: str, vocab: bool = True) -> str: - return self._compact_iri(active_ctx, long_iri, vocab=vocab) + def compact_iri(self, active_ctx: t.Any, long_iri: str, vocab: bool = True, value: list = None) -> str: + # print(active_ctx, long_iri, vocab) + for mapping in active_ctx["mappings"].values(): + if ("@container" in mapping and long_iri): + value = {x: "none" for x in mapping["@container"]} + return self._compact_iri(active_ctx, long_iri, vocab=vocab, value=value) def initial_ctx(self, local_ctx, options=None): return self.process_context(self._INITIAL_CONTEXT, local_ctx, options or {}) From 0284b01186b26670fbb0351d37445562420e6a50 Mon Sep 17 00:00:00 2001 From: "Kernchen, Sophie" Date: Tue, 20 Jan 2026 09:29:58 +0100 Subject: [PATCH 35/36] Correct Docs for newer functionality --- docs/source/dev/data_model.md | 44 +++++++---------------------------- 1 file changed, 8 insertions(+), 36 deletions(-) diff --git a/docs/source/dev/data_model.md b/docs/source/dev/data_model.md index e788fd44..9077b0d3 100644 --- a/docs/source/dev/data_model.md +++ b/docs/source/dev/data_model.md @@ -214,9 +214,9 @@ for i, author in enumerate(data["author"], start=1): :caption: Value check for email in data["author"][0]["email"]: if email.endswith(".edu"): - print("Author has an email address at an educational institution.") + print("Shakespeare has an email address at an educational institution.") else: - print("Cannot confirm affiliation with educational institution for author.") + print("Cannot confirm affiliation with educational institution for Shakespeare.") # Mock output # $> Cannot confirm affiliation with educational institution for author. @@ -239,8 +239,8 @@ The API class {class}`hermes.model.SoftwareMetadata` hides many of the more complex aspects of JSON-LD and makes it easy to work with the data model. -Assertions, however, operate on the internal model objects. -Therefore, they may not work as you would expect from plain +So the API class hides the internal model objects. +Therefore, they work as you would expect from plain Python data: ```{code-block} python @@ -258,12 +258,10 @@ except AssertionError: raise # Mock output -# $> The author could not be found. -# $> AssertionError: -# assert -# {'email': ['shakespeare@baz.net'], 'name': ['Shakespeare']} -# in -# _LDList( +# $> The author was found! +# +# +# Internal Model from data["author"]: # {'@list': [ # { # 'http://schema.org/name': [{'@value': 'Shakespeare'}], @@ -281,32 +279,6 @@ except AssertionError: # ) ``` -The mock output in the example above shows the inequality of the expected and the actual value. -The actual value is an internal data type wrapping the more complex JSON-LD data. - -The complex data structure of JSON-LD is internally constructed in the `hermes` data -model, and to make it possible to work with only the data that is important - the actual terms -and their values - the internal data model types provide a function `.to_python()`. -This function can be used in assertions to assert full data integrity: - -```{code-block} python -:caption: Containment assertion with `to_python()` -:emphasize-lines: 5,13 -try: - assert ( - {'name': ['Shakespeare'], 'email': ['shakespeare@baz.net']} - in - data["author"].to_python() - ) - print("The author was found!") -except AssertionError: - print("The author could not be found.") - raise - -# Mock output -# $> The author was found! -``` - --- ## See Also From d46394e551b6265c0b7a34d301a2c4663537123b Mon Sep 17 00:00:00 2001 From: "Kernchen, Sophie" Date: Fri, 23 Jan 2026 09:13:20 +0100 Subject: [PATCH 36/36] Correct type of value --- src/hermes/model/types/pyld_util.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/hermes/model/types/pyld_util.py b/src/hermes/model/types/pyld_util.py index 10b9b7c5..8bcef253 100644 --- a/src/hermes/model/types/pyld_util.py +++ b/src/hermes/model/types/pyld_util.py @@ -103,10 +103,9 @@ class JsonLdProcessor(jsonld.JsonLdProcessor): def expand_iri(self, active_ctx: t.Any, short_iri: str, vocab: bool = True) -> str: return self._expand_iri(active_ctx, short_iri, vocab=vocab) - def compact_iri(self, active_ctx: t.Any, long_iri: str, vocab: bool = True, value: list = None) -> str: - # print(active_ctx, long_iri, vocab) + def compact_iri(self, active_ctx: t.Any, long_iri: str, vocab: bool = True, value: dict = None) -> str: for mapping in active_ctx["mappings"].values(): - if ("@container" in mapping and long_iri): + if "@container" in mapping and long_iri: value = {x: "none" for x in mapping["@container"]} return self._compact_iri(active_ctx, long_iri, vocab=vocab, value=value)