From 248ae33b8f094c361a8280b83241fc780f4629f7 Mon Sep 17 00:00:00 2001
From: notactuallyfinn <michael.fritzsche@dlr.de>
Date: Fri, 6 Feb 2026 13:12:19 +0100
Subject: [PATCH 1/7] added adjusted files from feature/153-refactor-datamodel
 for process

---
 src/hermes/commands/process/base.py    |  51 ++++-------
 src/hermes/model/merge/__init__.py     |   3 +
 src/hermes/model/merge/action.py       |  83 ++++++++++++++++++
 src/hermes/model/merge/container.py    | 116 +++++++++++++++++++++++++
 src/hermes/model/merge/match.py        |  17 ++++
 src/hermes/model/merge/strategy.py     |  42 +++++++++
 src/hermes/model/types/ld_container.py |   6 +-
 7 files changed, 279 insertions(+), 39 deletions(-)
 create mode 100644 src/hermes/model/merge/__init__.py
 create mode 100644 src/hermes/model/merge/action.py
 create mode 100644 src/hermes/model/merge/container.py
 create mode 100644 src/hermes/model/merge/match.py
 create mode 100644 src/hermes/model/merge/strategy.py

diff --git a/src/hermes/commands/process/base.py b/src/hermes/commands/process/base.py
index 9e29d1e6..83480056 100644
--- a/src/hermes/commands/process/base.py
+++ b/src/hermes/commands/process/base.py
@@ -5,13 +5,13 @@
 # SPDX-FileContributor: Michael Meinel
 
 import argparse
-import json
-import sys
 
 from pydantic import BaseModel
 
 from hermes.commands.base import HermesCommand, HermesPlugin
-from hermes.model.context import HermesHarvestContext, CodeMetaContext
+from hermes.model.api import SoftwareMetadata
+from hermes.model.context_manager import HermesContext
+from hermes.model.merge.container import ld_merge_dict
 
 
 class HermesProcessPlugin(HermesPlugin):
@@ -33,42 +33,21 @@ class HermesProcessCommand(HermesCommand):
 
     def __call__(self, args: argparse.Namespace) -> None:
         self.args = args
-        ctx = CodeMetaContext()
-
-        if not (ctx.hermes_dir / "harvest").exists():
-            self.log.error("You must run the harvest command before process")
-            sys.exit(1)
+        ctx = HermesContext()
+        merged_doc = ld_merge_dict([{}])
 
         # Get all harvesters
         harvester_names = self.root_settings.harvest.sources
-        harvester_names.reverse()   # Switch order for priority handling
 
+        ctx.prepare_step('harvest')
         for harvester in harvester_names:
             self.log.info("## Process data from %s", harvester)
-
-            harvest_context = HermesHarvestContext(ctx, harvester, {})
-            try:
-                harvest_context.load_cache()
-            # when the harvest step ran, but there is no cache file, this is a serious flaw
-            except FileNotFoundError:
-                self.log.warning("No output data from harvester %s found, skipping", harvester)
-                continue
-
-            ctx.merge_from(harvest_context)
-            ctx.merge_contexts_from(harvest_context)
-
-        if ctx._errors:
-            self.log.error('Errors during merge')
-            self.errors.extend(ctx._errors)
-
-            for ep, error in ctx._errors:
-                self.log.info("    - %s: %s", ep.name, error)
-
-        tags_path = ctx.get_cache('process', 'tags', create=True)
-        with tags_path.open('w') as tags_file:
-            json.dump(ctx.tags, tags_file, indent=2)
-
-        ctx.prepare_codemeta()
-
-        with open(ctx.get_cache("process", ctx.hermes_name, create=True), 'w') as codemeta_file:
-            json.dump(ctx._data, codemeta_file, indent=2)
+            merged_doc.update(SoftwareMetadata.load_from_cache(ctx, harvester))
+        ctx.finalize_step("harvest")
+
+        ctx.prepare_step("process")
+        with ctx["result"] as result_ctx:
+            result_ctx["codemeta"] = merged_doc.compact()
+            result_ctx["context"] = {"@context": merged_doc.full_context}
+            result_ctx["expanded"] = merged_doc.ld_value
+        ctx.finalize_step("process")
diff --git a/src/hermes/model/merge/__init__.py b/src/hermes/model/merge/__init__.py
new file mode 100644
index 00000000..1741dca8
--- /dev/null
+++ b/src/hermes/model/merge/__init__.py
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: 2022 German Aerospace Center (DLR)
+#
+# SPDX-License-Identifier: Apache-2.0
\ No newline at end of file
diff --git a/src/hermes/model/merge/action.py b/src/hermes/model/merge/action.py
new file mode 100644
index 00000000..80f45591
--- /dev/null
+++ b/src/hermes/model/merge/action.py
@@ -0,0 +1,83 @@
+# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR)
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# SPDX-FileContributor: Michael Meinel
+
+from hermes.model.types import ld_list
+
+
+class MergeError(ValueError):
+    pass
+
+
+class MergeAction:
+    def merge(self, target, key, value, update):
+        raise NotImplementedError()
+
+
+class Reject(MergeAction):
+    @classmethod
+    def merge(cls, target, key, value, update):
+        if value != update:
+            target.reject(key, update)
+        return value
+
+
+class Replace(MergeAction):
+    @classmethod
+    def merge(cls, target, key, value, update):
+        if value != update:
+            target.replace(key, value)
+        return update
+
+
+class Concat(MergeAction):
+    @classmethod
+    def merge(cls, target, key, value, update):
+        return cls.merge_to_list(value, update)
+
+    @classmethod
+    def merge_to_list(cls, head, tail):
+        if not isinstance(head, (list, ld_list)):
+            head = [head]
+        if not isinstance(tail, (list, ld_list)):
+            head.append(tail)
+        else:
+            head.extend(tail)
+        return head
+
+
+class Collect(MergeAction):
+    def __init__(self, match):
+        self.match = match
+
+    def merge(self, target, key, value, update):
+        if not isinstance(value, list):
+            value = [value]
+        if not isinstance(update, list):
+            update = [update]
+
+        for update_item in update:
+            if not any(self.match(item, update_item) for item in value):
+                value.append(update_item)
+
+        if len(value) == 1:
+            return value[0]
+        else:
+            return value
+
+
+class MergeSet(MergeAction):
+    def __init__(self, match, merge_items=True):
+        self.match = match
+        self.merge_items = merge_items
+
+    def merge(self, target, key, value, update):
+        for item in update:
+            target_item = target.match(key[-1], item, self.match)
+            if target_item and self.merge_items:
+                target_item.update(item)
+            else:
+                value.append(item)
+        return value
diff --git a/src/hermes/model/merge/container.py b/src/hermes/model/merge/container.py
new file mode 100644
index 00000000..80395d87
--- /dev/null
+++ b/src/hermes/model/merge/container.py
@@ -0,0 +1,116 @@
+# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR)
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# SPDX-FileContributor: Michael Meinel
+
+from hermes.model.types import ld_context, ld_dict, ld_list
+
+from .strategy import CODEMETA_STRATEGY, PROV_STRATEGY, REPLACE_STRATEGY
+from ..types.pyld_util import bundled_loader
+
+
+class _ld_merge_container:
+    def _to_python(self, full_iri, ld_value):
+        value = super()._to_python(full_iri, ld_value)
+        if isinstance(value, ld_dict) and not isinstance(value, ld_merge_dict):
+            value = ld_merge_dict(
+                value.ld_value,
+                parent=value.parent,
+                key=value.key,
+                index=value.index,
+                context=value.context
+            )
+        if isinstance(value, ld_list) and not isinstance(value, ld_merge_list):
+            value = ld_merge_list(
+                value.ld_value,
+                parent=value.parent,
+                key=value.key,
+                index=value.index,
+                context=value.context
+            )
+        return value
+
+
+class ld_merge_list(_ld_merge_container, ld_list):
+    def __init__(self, data, *, parent=None, key=None, index=None, context=None):
+        super().__init__(data, parent=parent, key=key, index=index, context=context)
+
+
+class ld_merge_dict(_ld_merge_container, ld_dict):
+    def __init__(self, data, *, parent=None, key=None, index=None, context=None):
+        super().__init__(data, parent=parent, key=key, index=index, context=context)
+
+        self.update_context(ld_context.HERMES_PROV_CONTEXT)
+
+        self.strategies = {**REPLACE_STRATEGY}
+        self.add_strategy(CODEMETA_STRATEGY)
+        self.add_strategy(PROV_STRATEGY)
+
+    def update_context(self, other_context):
+        if other_context:
+            if len(self.context) < 1 or not isinstance(self.context[-1], dict):
+                self.context.append({})
+
+            if not isinstance(other_context, list):
+                other_context = [other_context]
+            for ctx in other_context:
+                if isinstance(ctx, dict):
+                    # FIXME: Shouldn't the dict be appended instead?
+                    # How it is implemented currently results in anomalies like this:
+                    # other_context = [{"codemeta": "https://doi.org/10.5063/schema/codemeta-1.0/"}]
+                    # self.context = [{"codemeta": "https://doi.org/10.5063/schema/codemeta-2.0/"}]
+                    # resulting context is only [{"codemeta": "https://doi.org/10.5063/schema/codemeta-1.0/"}]
+                    # values that start with "https://doi.org/10.5063/schema/codemeta-2.0/" can't be compacted anymore
+                    self.context[-1].update(ctx)
+                elif ctx not in self.context:
+                    self.context.insert(0, ctx)
+
+            self.active_ctx = self.ld_proc.initial_ctx(self.context, {"documentLoader": bundled_loader})
+
+    def update(self, other):
+        if isinstance(other, ld_dict):
+            self.update_context(other.context)
+
+        super().update(other)
+
+    def add_strategy(self, strategy):
+        for key, value in strategy.items():
+            self.strategies[key] = {**value, **self.strategies.get(key, {})}
+
+    def __setitem__(self, key, value):
+        if key in self:
+            value = self._merge_item(key, value)
+        super().__setitem__(key, value)
+
+    def match(self, key, value, match):
+        for index, item in enumerate(self[key]):
+            if match(item, value):
+                if isinstance(item, ld_dict) and not isinstance(item, ld_merge_dict):
+                    item = ld_merge_dict(
+                        item.ld_value, parent=item.parent, key=item.key, index=index, context=item.context
+                    )
+                elif isinstance(item, ld_list) and not isinstance(item, ld_merge_list):
+                    item = ld_merge_list(
+                        item.ld_value, parent=item.parent, key=item.key, index=index, context=item.context
+                    )
+                return item
+
+    def _merge_item(self, key, value):
+        strategy = {**self.strategies[None]}
+        ld_types = self.data_dict.get('@type', [])
+        for ld_type in ld_types:
+            strategy.update(self.strategies.get(ld_type, {}))
+
+        merger = strategy.get(key, strategy[None])
+        return merger.merge(self, [*self.path, key], self[key], value)
+
+    def _add_related(self, rel, key, value):
+        self.emplace(rel)
+        self[rel].append({"@type": "schema:PropertyValue", "schema:name": str(key), "schema:value": str(value)})
+
+    def reject(self, key, value):
+        self._add_related("hermes-rt:reject", key, value)
+
+    def replace(self, key, value):
+        self._add_related("hermes-rt:replace", key, value)
diff --git a/src/hermes/model/merge/match.py b/src/hermes/model/merge/match.py
new file mode 100644
index 00000000..03b9f9ef
--- /dev/null
+++ b/src/hermes/model/merge/match.py
@@ -0,0 +1,17 @@
+# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR)
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# SPDX-FileContributor: Michael Meinel
+
+
+def match_equals(a, b):
+    return a == b
+
+
+def match_keys(*keys):
+    def match_func(left, right):
+        active_keys = [key for key in keys if key in left and key in right]
+        pairs = [(left[key] == right[key]) for key in active_keys]
+        return len(active_keys) > 0 and all(pairs)
+    return match_func
diff --git a/src/hermes/model/merge/strategy.py b/src/hermes/model/merge/strategy.py
new file mode 100644
index 00000000..12681fe6
--- /dev/null
+++ b/src/hermes/model/merge/strategy.py
@@ -0,0 +1,42 @@
+# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR)
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# SPDX-FileContributor: Michael Meinel
+
+from hermes.model.types.ld_context import iri_map as iri
+
+from .action import Reject, Replace, Collect, Concat, MergeSet
+from .match import match_equals, match_keys
+
+
+REPLACE_STRATEGY = {
+    None: {
+        None: Replace,
+        "@type": Collect(match_equals),
+    },
+}
+
+
+REJECT_STRATEGY = {
+    None: {
+        None: Reject,
+        "@type": Collect(match_equals),
+    },
+}
+
+
+PROV_STRATEGY = {
+    None: {
+        iri["hermes-rt:graph"]: Concat,
+        iri["hermes-rt:replace"]: Concat,
+        iri["hermes-rt:reject"]: Concat,
+    },
+}
+
+
+CODEMETA_STRATEGY = {
+    iri["schema:SoftwareSourceCode"]: {
+        iri["schema:author"]: MergeSet(match_keys('@id', iri['schema:email'])),
+    },
+}
diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py
index a18c886d..f97868d9 100644
--- a/src/hermes/model/types/ld_container.py
+++ b/src/hermes/model/types/ld_container.py
@@ -237,7 +237,7 @@ def _to_expanded_json(
         # while searching build a path such that it leads from the found ld_dicts ld_value to selfs data_dict/ item_list
         parent = self
         path = []
-        while parent.__class__.__name__ not in ("ld_dict", "SoftwareMetadata"):
+        while parent.__class__.__name__ not in ("ld_dict", "SoftwareMetadata", "ld_merge_dict"):
             if parent.container_type == "@list":
                 path.extend(["@list", 0])
             elif parent.container_type == "@graph":
@@ -250,7 +250,7 @@ def _to_expanded_json(
         # if neither self nor any of its parents is a ld_dict:
         # create a dict with the key of the outer most parent of self and this parents ld_value as a value
         # this dict is stored in an ld_container and simulates the most minimal JSON-LD object possible
-        if parent.__class__.__name__ not in ("ld_dict", "SoftwareMetadata"):
+        if parent.__class__.__name__ not in ("ld_dict", "SoftwareMetadata", "ld_merge_dict"):
             key = self.ld_proc.expand_iri(parent.active_ctx, parent.key)
             parent = ld_container([{key: parent._data}])
         path.append(0)
@@ -277,7 +277,7 @@ def _to_expanded_json(
                     [(new_key, temp) for new_key in temp.keys() if isinstance(temp[new_key], special_types)]
                 )
             elif isinstance(temp, ld_container):
-                if temp.__class__.__name__ == "ld_list" and temp.container_type == "@set":
+                if temp.__class__.__name__ in ("ld_list", "ld_merge_list") and temp.container_type == "@set":
                     ref[key] = temp._data
                 else:
                     ref[key] = temp._data[0]

From ebebca4e5099c1a856acfbf755077ca5d0a2aa45 Mon Sep 17 00:00:00 2001
From: notactuallyfinn <michael.fritzsche@dlr.de>
Date: Fri, 6 Feb 2026 14:00:09 +0100
Subject: [PATCH 2/7] added first tests

---
 src/hermes/commands/__init__.py        |   2 +-
 src/hermes/commands/cli.py             |   4 +-
 test/hermes_test/model/test_api_e2e.py | 103 +++++++++++++++++++++++++
 3 files changed, 106 insertions(+), 3 deletions(-)

diff --git a/src/hermes/commands/__init__.py b/src/hermes/commands/__init__.py
index 278faddf..e1ddf036 100644
--- a/src/hermes/commands/__init__.py
+++ b/src/hermes/commands/__init__.py
@@ -14,6 +14,6 @@
 # from hermes.commands.init.base import HermesInitCommand
 # from hermes.commands.curate.base import HermesCurateCommand
 from hermes.commands.harvest.base import HermesHarvestCommand
-# from hermes.commands.process.base import HermesProcessCommand
+from hermes.commands.process.base import HermesProcessCommand
 from hermes.commands.deposit.base import HermesDepositCommand
 # from hermes.commands.postprocess.base import HermesPostprocessCommand
diff --git a/src/hermes/commands/cli.py b/src/hermes/commands/cli.py
index 0ec2d1ae..d465f3b8 100644
--- a/src/hermes/commands/cli.py
+++ b/src/hermes/commands/cli.py
@@ -16,7 +16,7 @@
 # from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand,
 #                              HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand,
 #                              HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand)
-from hermes.commands import HermesDepositCommand, HermesHarvestCommand
+from hermes.commands import HermesDepositCommand, HermesHarvestCommand, HermesProcessCommand
 from hermes.commands.base import HermesCommand
 
 
@@ -43,7 +43,7 @@ def main() -> None:
             # HermesInitCommand(parser),
             # HermesCleanCommand(parser),
             HermesHarvestCommand(parser),
-            # HermesProcessCommand(parser),
+            HermesProcessCommand(parser),
             # HermesCurateCommand(parser),
             HermesDepositCommand(parser),
             # HermesPostprocessCommand(parser),
diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py
index 18dc973c..0eddc59b 100644
--- a/test/hermes_test/model/test_api_e2e.py
+++ b/test/hermes_test/model/test_api_e2e.py
@@ -475,3 +475,106 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_
 
     # TODO: compare to actually expected value
     assert result == invenio_metadata
+
+
+@pytest.mark.parametrize(
+    "metadata_in, metadata_out",
+    [
+        (
+            {
+                "cff": SoftwareMetadata({
+                    "@type": ["http://schema.org/SoftwareSourceCode"],
+                    "http://schema.org/description": [{"@value": "for testing"}],
+                    "http://schema.org/name": [{"@value": "Test"}],
+                    "http://schema.org/author": [{
+                        "@type": "http://schema.org/Person",
+                        "http://schema.org/familyName": [{"@value": "Test"}],
+                        "http://schema.org/givenName": [{"@value": "Testi"}]
+                    }],
+                    "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}]
+                })
+            },
+            SoftwareMetadata({
+                "@type": ["http://schema.org/SoftwareSourceCode"],
+                "http://schema.org/description": [{"@value": "for testing"}],
+                "http://schema.org/name": [{"@value": "Test"}],
+                "http://schema.org/author": [{
+                    "@type": "http://schema.org/Person",
+                    "http://schema.org/familyName": [{"@value": "Test"}],
+                    "http://schema.org/givenName": [{"@value": "Testi"}]
+                }],
+                "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}]
+            })
+        ),
+        (
+            {
+                "cff": SoftwareMetadata({
+                    "@type": ["http://schema.org/SoftwareSourceCode"],
+                    "http://schema.org/name": [{"@value": "Test"}],
+                    "http://schema.org/author": [{
+                        "@type": "http://schema.org/Person",
+                        "http://schema.org/familyName": [{"@value": "Test"}],
+                        "http://schema.org/givenName": [{"@value": "Testi"}],
+                        "http://schema.org/email": [{"@value": "test.testi@testis.tests"}]
+                    }],
+                    "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}]
+                }),
+                "codemeta": SoftwareMetadata({
+                    "@type": ["http://schema.org/SoftwareSourceCode"],
+                    "http://schema.org/description": [{"@value": "for testing"}],
+                    "http://schema.org/name": [{"@value": "Test"}],
+                    "http://schema.org/author": [{
+                        "@type": "http://schema.org/Person",
+                        "http://schema.org/familyName": [{"@value": "Test"}],
+                        "http://schema.org/givenName": [{"@value": "Testi"}],
+                        "http://schema.org/email": [{"@value": "test.testi@testis.tests"}]
+                    }]
+                })
+            },
+            SoftwareMetadata({
+                "@type": ["http://schema.org/SoftwareSourceCode"],
+                "http://schema.org/description": [{"@value": "for testing"}],
+                "http://schema.org/name": [{"@value": "Test"}],
+                "http://schema.org/author": [{
+                    "@type": "http://schema.org/Person",
+                    "http://schema.org/familyName": [{"@value": "Test"}],
+                    "http://schema.org/givenName": [{"@value": "Testi"}],
+                    "http://schema.org/email": [{"@value": "test.testi@testis.tests"}]
+                }],
+                "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}]
+            })
+        )
+    ]
+)
+def test_process(tmp_path, monkeypatch, metadata_in, metadata_out):
+    monkeypatch.chdir(tmp_path)
+
+    manager = context_manager.HermesContext(tmp_path)
+    manager.prepare_step("harvest")
+    for harvester, result in metadata_in.items():
+        with manager[harvester] as cache:
+            cache["codemeta"] = result.compact()
+            cache["context"] = {"@context": result.full_context}
+            cache["expanded"] = result.ld_value
+    manager.finalize_step("harvest")
+
+    config_file = tmp_path / "hermes.toml"
+    config_file.write_text(f"[harvest]\nsources = [{", ".join(f"\"{harvester}\"" for harvester in metadata_in)}]")
+
+    orig_argv = sys.argv[:]
+    sys.argv = ["hermes", "process", "--path", str(tmp_path), "--config", str(config_file)]
+    result = {}
+    try:
+        monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),))
+        cli.main()
+    except SystemExit as e:
+        if e.code != 0:
+            raise e
+    finally:
+        manager.prepare_step("process")
+        result = SoftwareMetadata.load_from_cache(manager, "result")
+        manager.finalize_step("process")
+        sys.argv = orig_argv
+
+    assert result.ld_value == metadata_out.ld_value
+    assert result == metadata_out

From f21df496ef85d61341dfa31ff15f4cbf54d42a87 Mon Sep 17 00:00:00 2001
From: Michael Fritzsche <michael.fritzsche@dlr.de>
Date: Mon, 9 Feb 2026 09:16:05 +0100
Subject: [PATCH 3/7] (re)added version and help commands to the available
 commands

---
 src/hermes/commands/__init__.py |  6 +++---
 src/hermes/commands/base.py     | 21 +++++++++++++++++++++
 src/hermes/commands/cli.py      |  8 +++++---
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/src/hermes/commands/__init__.py b/src/hermes/commands/__init__.py
index e1ddf036..d239cb0e 100644
--- a/src/hermes/commands/__init__.py
+++ b/src/hermes/commands/__init__.py
@@ -8,9 +8,9 @@
 # "unused import" errors.
 # flake8: noqa
 
-# from hermes.commands.base import HermesHelpCommand
-# from hermes.commands.base import HermesVersionCommand
-# from hermes.commands.clean.base import HermesCleanCommand
+from hermes.commands.base import HermesHelpCommand
+from hermes.commands.base import HermesVersionCommand
+from hermes.commands.clean.base import HermesCleanCommand
 # from hermes.commands.init.base import HermesInitCommand
 # from hermes.commands.curate.base import HermesCurateCommand
 from hermes.commands.harvest.base import HermesHarvestCommand
diff --git a/src/hermes/commands/base.py b/src/hermes/commands/base.py
index 2d182267..12e3c994 100644
--- a/src/hermes/commands/base.py
+++ b/src/hermes/commands/base.py
@@ -175,6 +175,7 @@ def __call__(self, command: HermesCommand) -> None:
 
 
 class HermesHelpSettings(BaseModel):
+    """Intentionally empty settings class for the help command."""
     pass
 
 
@@ -200,3 +201,23 @@ def __call__(self, args: argparse.Namespace) -> None:
             # Otherwise, simply show the general help and exit (cleanly).
             self.parser.print_help()
             self.parser.exit()
+
+
+class HermesVersionSettings(BaseModel):
+    """Intentionally empty settings class for the version command."""
+    pass
+
+
+class HermesVersionCommand(HermesCommand):
+    """Show HERMES version and exit."""
+
+    command_name = "version"
+    settings_class = HermesVersionSettings
+
+    def load_settings(self, args: argparse.Namespace):
+        """Pass loading settings as not necessary for this command."""
+        pass
+
+    def __call__(self, args: argparse.Namespace) -> None:
+        self.log.info(metadata.version("hermes"))
+        self.parser.exit()
diff --git a/src/hermes/commands/cli.py b/src/hermes/commands/cli.py
index d465f3b8..debe6f62 100644
--- a/src/hermes/commands/cli.py
+++ b/src/hermes/commands/cli.py
@@ -16,7 +16,9 @@
 # from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand,
 #                              HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand,
 #                              HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand)
-from hermes.commands import HermesDepositCommand, HermesHarvestCommand, HermesProcessCommand
+from hermes.commands import (
+    HermesDepositCommand, HermesHarvestCommand, HermesHelpCommand, HermesProcessCommand, HermesVersionCommand
+)
 from hermes.commands.base import HermesCommand
 
 
@@ -38,8 +40,8 @@ def main() -> None:
     setting_types = {}
 
     for command in (
-            # HermesHelpCommand(parser),
-            # HermesVersionCommand(parser),
+            HermesHelpCommand(parser),
+            HermesVersionCommand(parser),
             # HermesInitCommand(parser),
             # HermesCleanCommand(parser),
             HermesHarvestCommand(parser),

From d4d9ca8d6e84edf137cf739483816a346139a151 Mon Sep 17 00:00:00 2001
From: Michael Fritzsche <michael.fritzsche@dlr.de>
Date: Mon, 9 Feb 2026 09:16:50 +0100
Subject: [PATCH 4/7] made test for process step more complex

---
 test/hermes_test/model/test_api_e2e.py | 37 +++++++++++++++++---------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py
index 0eddc59b..7a65098b 100644
--- a/test/hermes_test/model/test_api_e2e.py
+++ b/test/hermes_test/model/test_api_e2e.py
@@ -511,12 +511,18 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_
                 "cff": SoftwareMetadata({
                     "@type": ["http://schema.org/SoftwareSourceCode"],
                     "http://schema.org/name": [{"@value": "Test"}],
-                    "http://schema.org/author": [{
-                        "@type": "http://schema.org/Person",
-                        "http://schema.org/familyName": [{"@value": "Test"}],
-                        "http://schema.org/givenName": [{"@value": "Testi"}],
-                        "http://schema.org/email": [{"@value": "test.testi@testis.tests"}]
-                    }],
+                    "http://schema.org/author": [
+                        {
+                            "@type": "http://schema.org/Person",
+                            "http://schema.org/familyName": [{"@value": "Test"}],
+                            "http://schema.org/email": [{"@value": "test.testi@testis.tests"}]
+                        },
+                        {
+                            "@type": "http://schema.org/Person",
+                            "http://schema.org/familyName": [{"@value": "Tester"}],
+                            "http://schema.org/email": [{"@value": "test@tester.tests"}]
+                        }
+                    ],
                     "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}]
                 }),
                 "codemeta": SoftwareMetadata({
@@ -535,12 +541,19 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_
                 "@type": ["http://schema.org/SoftwareSourceCode"],
                 "http://schema.org/description": [{"@value": "for testing"}],
                 "http://schema.org/name": [{"@value": "Test"}],
-                "http://schema.org/author": [{
-                    "@type": "http://schema.org/Person",
-                    "http://schema.org/familyName": [{"@value": "Test"}],
-                    "http://schema.org/givenName": [{"@value": "Testi"}],
-                    "http://schema.org/email": [{"@value": "test.testi@testis.tests"}]
-                }],
+                "http://schema.org/author": [
+                    {
+                        "@type": "http://schema.org/Person",
+                        "http://schema.org/familyName": [{"@value": "Test"}],
+                        "http://schema.org/givenName": [{"@value": "Testi"}],
+                        "http://schema.org/email": [{"@value": "test.testi@testis.tests"}]
+                    },
+                    {
+                        "@type": "http://schema.org/Person",
+                        "http://schema.org/familyName": [{"@value": "Tester"}],
+                        "http://schema.org/email": [{"@value": "test@tester.tests"}]
+                    }
+                ],
                 "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}]
             })
         )

From 7cfa7bcc7be101dd6580ead1d933f762e768d280 Mon Sep 17 00:00:00 2001
From: Michael Fritzsche <michael.fritzsche@dlr.de>
Date: Mon, 9 Feb 2026 09:18:19 +0100
Subject: [PATCH 5/7] made process step and ld_container._to_expanded_json more
 robust

---
 src/hermes/commands/process/base.py    | 9 ++++++++-
 src/hermes/model/types/ld_container.py | 6 +++---
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/hermes/commands/process/base.py b/src/hermes/commands/process/base.py
index 83480056..1aae0dab 100644
--- a/src/hermes/commands/process/base.py
+++ b/src/hermes/commands/process/base.py
@@ -11,6 +11,7 @@
 from hermes.commands.base import HermesCommand, HermesPlugin
 from hermes.model.api import SoftwareMetadata
 from hermes.model.context_manager import HermesContext
+from hermes.model.error import HermesContextError
 from hermes.model.merge.container import ld_merge_dict
 
 
@@ -42,7 +43,13 @@ def __call__(self, args: argparse.Namespace) -> None:
         ctx.prepare_step('harvest')
         for harvester in harvester_names:
             self.log.info("## Process data from %s", harvester)
-            merged_doc.update(SoftwareMetadata.load_from_cache(ctx, harvester))
+            try:
+                metadata = SoftwareMetadata.load_from_cache(ctx, harvester)
+            except HermesContextError as e:
+                self.log.error("Error while trying to load data from harvest plugin '%s': %s", harvester, e)
+                self.errors.append(e)
+                continue
+            merged_doc.update(metadata)
         ctx.finalize_step("harvest")
 
         ctx.prepare_step("process")
diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py
index f97868d9..756f2033 100644
--- a/src/hermes/model/types/ld_container.py
+++ b/src/hermes/model/types/ld_container.py
@@ -237,7 +237,7 @@ def _to_expanded_json(
         # while searching build a path such that it leads from the found ld_dicts ld_value to selfs data_dict/ item_list
         parent = self
         path = []
-        while parent.__class__.__name__ not in ("ld_dict", "SoftwareMetadata", "ld_merge_dict"):
+        while not "ld_dict" in [sub_cls.__name__ for sub_cls in type(parent).mro()]:
             if parent.container_type == "@list":
                 path.extend(["@list", 0])
             elif parent.container_type == "@graph":
@@ -250,7 +250,7 @@ def _to_expanded_json(
         # if neither self nor any of its parents is a ld_dict:
         # create a dict with the key of the outer most parent of self and this parents ld_value as a value
         # this dict is stored in an ld_container and simulates the most minimal JSON-LD object possible
-        if parent.__class__.__name__ not in ("ld_dict", "SoftwareMetadata", "ld_merge_dict"):
+        if not "ld_dict" in [sub_cls.__name__ for sub_cls in type(parent).mro()]:
             key = self.ld_proc.expand_iri(parent.active_ctx, parent.key)
             parent = ld_container([{key: parent._data}])
         path.append(0)
@@ -277,7 +277,7 @@ def _to_expanded_json(
                     [(new_key, temp) for new_key in temp.keys() if isinstance(temp[new_key], special_types)]
                 )
             elif isinstance(temp, ld_container):
-                if temp.__class__.__name__ in ("ld_list", "ld_merge_list") and temp.container_type == "@set":
+                if "ld_list" in [sub_cls.__name__ for sub_cls in type(temp).mro()] and temp.container_type == "@set":
                     ref[key] = temp._data
                 else:
                     ref[key] = temp._data[0]

From 520ef39bf267643f32ab13da06d10db22a014565 Mon Sep 17 00:00:00 2001
From: Michael Fritzsche <michael.fritzsche@dlr.de>
Date: Mon, 9 Feb 2026 09:26:51 +0100
Subject: [PATCH 6/7] improved flake8 rating

---
 src/hermes/model/merge/__init__.py     | 2 +-
 src/hermes/model/types/ld_container.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/hermes/model/merge/__init__.py b/src/hermes/model/merge/__init__.py
index 1741dca8..faf5a2f5 100644
--- a/src/hermes/model/merge/__init__.py
+++ b/src/hermes/model/merge/__init__.py
@@ -1,3 +1,3 @@
 # SPDX-FileCopyrightText: 2022 German Aerospace Center (DLR)
 #
-# SPDX-License-Identifier: Apache-2.0
\ No newline at end of file
+# SPDX-License-Identifier: Apache-2.0
diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py
index 756f2033..f30a212c 100644
--- a/src/hermes/model/types/ld_container.py
+++ b/src/hermes/model/types/ld_container.py
@@ -237,7 +237,7 @@ def _to_expanded_json(
         # while searching build a path such that it leads from the found ld_dicts ld_value to selfs data_dict/ item_list
         parent = self
         path = []
-        while not "ld_dict" in [sub_cls.__name__ for sub_cls in type(parent).mro()]:
+        while "ld_dict" not in [sub_cls.__name__ for sub_cls in type(parent).mro()]:
             if parent.container_type == "@list":
                 path.extend(["@list", 0])
             elif parent.container_type == "@graph":
@@ -250,7 +250,7 @@ def _to_expanded_json(
         # if neither self nor any of its parents is a ld_dict:
         # create a dict with the key of the outer most parent of self and this parents ld_value as a value
         # this dict is stored in an ld_container and simulates the most minimal JSON-LD object possible
-        if not "ld_dict" in [sub_cls.__name__ for sub_cls in type(parent).mro()]:
+        if "ld_dict" not in [sub_cls.__name__ for sub_cls in type(parent).mro()]:
             key = self.ld_proc.expand_iri(parent.active_ctx, parent.key)
             parent = ld_container([{key: parent._data}])
         path.append(0)

From bcdc82124a1a6f3cacd0398bcf3a978ae8a18b57 Mon Sep 17 00:00:00 2001
From: notactuallyfinn <michael.fritzsche@dlr.de>
Date: Fri, 13 Feb 2026 13:54:47 +0100
Subject: [PATCH 7/7] added lots of comments and fixed small inconsistencies

---
 src/hermes/commands/deposit/invenio.py |   4 +-
 src/hermes/model/merge/container.py    | 301 +++++++++++++++++++++++--
 src/hermes/model/merge/match.py        |  53 ++++-
 src/hermes/model/types/ld_container.py |   6 +-
 src/hermes/model/types/ld_list.py      |   4 +-
 test/hermes_test/model/test_api_e2e.py |   9 +-
 6 files changed, 342 insertions(+), 35 deletions(-)

diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py
index 3915d536..ba45c146 100644
--- a/src/hermes/commands/deposit/invenio.py
+++ b/src/hermes/commands/deposit/invenio.py
@@ -513,7 +513,7 @@ def _codemeta_to_invenio_deposition(self) -> dict:
 
         creators = []
         for author in metadata.get("author", []):
-            if not "Person" in author.get("@type", []):
+            if "Person" not in author.get("@type", []):
                 continue
             creator = {}
             if len(
@@ -527,7 +527,7 @@ def _codemeta_to_invenio_deposition(self) -> dict:
                 raise HermesValidationError(f"Author has too many family names: {author}")
             if len(author.get("familyName", [])) == 1:
                 given_names_str = " ".join(author.get("givenName", []))
-                name = f"{author["familyName"][0]}, {given_names_str}"
+                name = f"{author['familyName'][0]}, {given_names_str}"
             elif len(author.get("name", [])) != 1:
                 raise HermesValidationError(f"Author has too many or no names: {author}")
             else:
diff --git a/src/hermes/model/merge/container.py b/src/hermes/model/merge/container.py
index 80395d87..ec9fedd9 100644
--- a/src/hermes/model/merge/container.py
+++ b/src/hermes/model/merge/container.py
@@ -3,16 +3,49 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # SPDX-FileContributor: Michael Meinel
+# SPDX-FileContributor: Michael Fritzsche
 
-from hermes.model.types import ld_context, ld_dict, ld_list
+from typing import Callable, Union
+from typing_extensions import Self
+
+from hermes.model.merge.action import MergeAction
+from hermes.model.types import ld_container, ld_context, ld_dict, ld_list
+from hermes.model.types.ld_container import (
+    BASIC_TYPE, EXPANDED_JSON_LD_VALUE, JSON_LD_CONTEXT_DICT, JSON_LD_VALUE, TIME_TYPE
+)
 
 from .strategy import CODEMETA_STRATEGY, PROV_STRATEGY, REPLACE_STRATEGY
 from ..types.pyld_util import bundled_loader
 
 
 class _ld_merge_container:
-    def _to_python(self, full_iri, ld_value):
+    """
+    Abstract base class for ld_merge_dict and ld_merge_list,
+    providing the merge containers with overrides of ld_container._to_python().
+    See also :class:`ld_dict`, :class:`ld_list` and :class:`ld_container`.
+    """
+
+    def _to_python(
+        self: Self,
+        full_iri: str,
+        ld_value: Union[EXPANDED_JSON_LD_VALUE, dict[str, EXPANDED_JSON_LD_VALUE], list[str], str]
+    ) -> Union["ld_merge_dict", "ld_merge_list", BASIC_TYPE, TIME_TYPE]:
+        """
+        Returns a pythonized version of the given value pretending the value is in self and full_iri its key.
+
+        :param self: the ld_container ld_value is considered to be in.
+        :type self: Self
+        :param full_iri: The expanded iri of the key of ld_value / self (later if self is not a dictionary).
+        :type full_iri: str
+        :param ld_value: The value thats pythonized value is requested. ld_value has to be valid expanded JSON-LD if it
+            was embeded in self._data.
+        :type ld_value: EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE] | list[str] | str
+
+        :return: The pythonized value of the ld_value.
+        :rtype: ld_merge_dict | ld_merge_list | BASIC_TYPE | TIME_TYPE
+        """
         value = super()._to_python(full_iri, ld_value)
+        # replace ld_dicts with ld_merge_dicts
         if isinstance(value, ld_dict) and not isinstance(value, ld_merge_dict):
             value = ld_merge_dict(
                 value.ld_value,
@@ -21,6 +54,7 @@ def _to_python(self, full_iri, ld_value):
                 index=value.index,
                 context=value.context
             )
+        # replace ld_lists with ld_merge_lists
         if isinstance(value, ld_list) and not isinstance(value, ld_merge_list):
             value = ld_merge_list(
                 value.ld_value,
@@ -33,21 +67,108 @@ def _to_python(self, full_iri, ld_value):
 
 
 class ld_merge_list(_ld_merge_container, ld_list):
-    def __init__(self, data, *, parent=None, key=None, index=None, context=None):
+    """
+    ld_list wrapper to ensure the 'merge_container'-property does not get lost, while merging.
+    See also :class:`ld_list` and :class:`ld_merge_container`.
+    """
+
+    def __init__(
+        self: "ld_merge_list",
+        data: Union[list[str], list[dict[str, EXPANDED_JSON_LD_VALUE]]],
+        *,
+        parent: Union[ld_container, None] = None,
+        key: Union[str, None] = None,
+        index: Union[int, None] = None,
+        context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None
+    ) -> None:
+        """
+        Create a new ld_merge_list.
+        For further information on this function and the errors it throws see :meth:`ld_list.__init__`.
+
+        :param self: The instance of ld_merge_list to be initialized.
+        :type self: Self
+        :param data: The expanded json-ld data that is mapped (must be valid for @set, @list or @graph)
+        :type data: list[str] | list[dict[str, BASIC_TYPE | EXPANDED_JSON_LD_VALUE]]
+        :param parent: parent node of this container.
+        :type parent: ld_container | None
+        :param key: key into the parent container.
+        :type key: str | None
+        :param index: index into the parent container.
+        :type index: int | None
+        :param context: local context for this container.
+        :type context: list[str | JSON_LD_CONTEXT_DICT] | None
+
+        :return:
+        :rtype: None
+        """
         super().__init__(data, parent=parent, key=key, index=index, context=context)
 
 
 class ld_merge_dict(_ld_merge_container, ld_dict):
-    def __init__(self, data, *, parent=None, key=None, index=None, context=None):
+    """
+    ld_dict wrapper providing methods to merge an object of this class with an ld_dict object.
+    See also :class:`ld_dict` and :class:`ld_merge_container`.
+
+    :ivar strategies: The strategies for merging different types of values in the ld_dicts.
+    :ivartype strategies: dict[str | None, dict[str | None, MergeAction]]
+    """
+
+    def __init__(
+        self: Self,
+        data: list[dict[str, EXPANDED_JSON_LD_VALUE]],
+        *,
+        parent: Union[ld_dict, ld_list, None] = None,
+        key: Union[str, None] = None,
+        index: Union[int, None] = None,
+        context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None
+    ) -> None:
+        """
+        Create a new instance of an ld_merge_dict.
+        See also :meth:`ld_dict.__init__`.
+
+        :param self: The instance of ld_container to be initialized.
+        :type self: Self
+        :param data: The expanded json-ld data that is mapped.
+        :type data: EXPANDED_JSON_LD_VALUE
+        :param parent: parent node of this container.
+        :type parent: ld_dict | ld_list | None
+        :param key: key into the parent container.
+        :type key: str | None
+        :param index: index into the parent container.
+        :type index: int | None
+        :param context: local context for this container.
+        :type context: list[str | JSON_LD_CONTEXT_DICT] | None
+
+        :return:
+        :rtype: None
+
+        :raises ValueError: If the given data doesn't represent an ld_dict.
+        """
         super().__init__(data, parent=parent, key=key, index=index, context=context)
 
+        # add provernance context
         self.update_context(ld_context.HERMES_PROV_CONTEXT)
 
+        # add strategies
         self.strategies = {**REPLACE_STRATEGY}
         self.add_strategy(CODEMETA_STRATEGY)
         self.add_strategy(PROV_STRATEGY)
 
-    def update_context(self, other_context):
+    def update_context(
+        self: Self, other_context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None]
+    ) -> None:
+        """
+        Updates selfs context with other_context.
+        JSON-LD processing prioritizes the context values in order (first least important, last most important).
+
+        :param self: The instance of the ld_merge_dict context is added to.
+        :type self: Self
+        :param other_context: The context object that is added to selfs context.
+        :type other_context: list[str | JSON_LD_CONTEXT_DICT] | None
+
+        :return:
+        :rtype: None
+        """
         if other_context:
             if len(self.context) < 1 or not isinstance(self.context[-1], dict):
                 self.context.append({})
@@ -56,7 +177,7 @@ def update_context(self, other_context):
                 other_context = [other_context]
             for ctx in other_context:
                 if isinstance(ctx, dict):
-                    # FIXME: Shouldn't the dict be appended instead?
+                    # FIXME #471: Shouldn't the dict be appended instead?
                     # How it is implemented currently results in anomalies like this:
                     # other_context = [{"codemeta": "https://doi.org/10.5063/schema/codemeta-1.0/"}]
                     # self.context = [{"codemeta": "https://doi.org/10.5063/schema/codemeta-2.0/"}]
@@ -64,53 +185,187 @@ def update_context(self, other_context):
                     # values that start with "https://doi.org/10.5063/schema/codemeta-2.0/" can't be compacted anymore
                     self.context[-1].update(ctx)
                 elif ctx not in self.context:
+                    # FIXME #471: If multiple string values are in self.context, the others are prefered
+                    # if the new one is inserted at the beginning. But with the dictionaries the order is reversed.
                     self.context.insert(0, ctx)
 
+            # update the active context that is used for compaction/ expansion
             self.active_ctx = self.ld_proc.initial_ctx(self.context, {"documentLoader": bundled_loader})
 
-    def update(self, other):
+    def update(self: Self, other: ld_dict) -> None:
+        """
+        Updates/ Merges this ld_merge dict with the given ld_dict other.
+        This overwrites :meth:`ld_dict.update`, and may cause unexpected behavior if not used carefully.
+
+        :param self: The ld_merge_dict that is updated with other.
+        :type self: Self
+        :param other: The ld_container that is merged into self.
+        :type other: ld_dict
+
+        :return:
+        :rtype: None
+        """
+        # update add all new context
         if isinstance(other, ld_dict):
             self.update_context(other.context)
 
+        # add the acutal values based on the MergeAction strategies
+        # this works implicitly because ld_dict.update invokes self.__setitem__ which is overwritten by ld_merge_dict
         super().update(other)
 
-    def add_strategy(self, strategy):
+    def add_strategy(self: Self, strategy: dict[Union[str, None], dict[Union[str, None], MergeAction]]) -> None:
+        """
+        Adds the given strategy to the self.strategies.
+
+        :param self: The ld_merge_dict the strategy is added to.
+        :type self: Self
+        :param strategy: The object describing how which object types are supposed to be merged.
+        :type strategy: dict[str | None, dict[str | None, MergeAction]]
+        """
         for key, value in strategy.items():
             self.strategies[key] = {**value, **self.strategies.get(key, {})}
 
-    def __setitem__(self, key, value):
+    def __setitem__(self: Self, key: str, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]):
+        """
+        Creates the new entry for self[key] using self.strategies on the values in self[key] and value.
+        Wraps :meth:`ld_dict.__setitem__`, and may cause unexpected behavior if not used carefully.
+
+        :param self: The ld_merge_dict whose value at key gets updated/ merged with value.
+        :type self: Self
+        :param key: The key at whicht the value is updated/ merged at in self.
+        :type key: str
+        :param value: The value that is merged into self[key].
+        :type value: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list
+        """
+        # create the new item if self[key] and value have to be merged.
         if key in self:
             value = self._merge_item(key, value)
+        # update the entry of self[key]
         super().__setitem__(key, value)
 
-    def match(self, key, value, match):
-        for index, item in enumerate(self[key]):
+    def match(
+        self: Self,
+        key: str,
+        value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list],
+        match: Union[
+            Callable[
+                [
+                    Union[BASIC_TYPE, TIME_TYPE, "ld_merge_dict", ld_merge_list],
+                    Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]
+                ],
+                bool
+            ],
+            Callable[["ld_merge_dict", ld_dict], bool]
+        ]
+    ) -> Union[BASIC_TYPE, TIME_TYPE, "ld_merge_dict", ld_merge_list]:
+        """
+        Returns the first item in self[key] for which match(item, value) returns true.
+        If no such item is found None is returned instead.
+
+        :param self: The ld_merge_dict in whose entry for key a match for value is searched.
+        :type self: Self
+        :param key: The key to the items in self in which a match for value is searched.
+        :type key: str
+        :param value: The value a match is searched for in self[key].
+        :type value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]
+        :param match: The method defining if two objects are a match.
+        :type match: Callable[
+            [
+                BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list,
+                BASIC_TYPE | TIME_TYPE | ld_dict | ld_list
+            ],
+            bool
+        ] | Callable[[ld_merge_dict, ld_dict], bool]
+
+        :return: The item in self[key] that is a match to value if one exists else None
+        :rtype: BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list
+        """
+        # iterate over all items in self[key] and return the first that is a match
+        for item in self[key]:
             if match(item, value):
-                if isinstance(item, ld_dict) and not isinstance(item, ld_merge_dict):
-                    item = ld_merge_dict(
-                        item.ld_value, parent=item.parent, key=item.key, index=index, context=item.context
-                    )
-                elif isinstance(item, ld_list) and not isinstance(item, ld_merge_list):
-                    item = ld_merge_list(
-                        item.ld_value, parent=item.parent, key=item.key, index=index, context=item.context
-                    )
                 return item
 
-    def _merge_item(self, key, value):
+    def _merge_item(
+        self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]
+    ) -> Union[BASIC_TYPE, TIME_TYPE, "ld_merge_dict", ld_merge_list]:
+        """
+        Applies the most suitable merge strategy to merge self[key] and value and then returns the result.
+
+        :param self: The ld_merge_dict whose entry at key is to be merged with value.
+        :type self: Self
+        :param key: The key to the entry in self that is to be merged with value.
+        :type key: str
+        :param value: The value that is to be merged with self[key].
+        :type value: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list
+
+        :return: The result of the merge from self[key] with value.
+        :rtype: BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list
+        """
+        # search for all applicable strategies
         strategy = {**self.strategies[None]}
         ld_types = self.data_dict.get('@type', [])
         for ld_type in ld_types:
             strategy.update(self.strategies.get(ld_type, {}))
 
+        # choose one merge strategy and return the item returned by following the merge startegy
         merger = strategy.get(key, strategy[None])
         return merger.merge(self, [*self.path, key], self[key], value)
 
-    def _add_related(self, rel, key, value):
+    def _add_related(
+        self: Self, rel: str, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]
+    ) -> None:
+        """
+        Adds an entry for rel to self containing which key and value is affected.
+
+        :param self: The ld_merge_container the special entry is added to.
+        :type self: Self
+        :param rel: The "type" of the special entry (used as the key).
+        :type rel: str
+        :param key: The key of the affected key, value pair in self.
+        :type key: str
+        :param value: The value of the affected key, value pair in self.
+        :type value:  BASIC_TYPE | TIME_TYPE | ld_dict | ld_list
+
+        :return:
+        :rtype: None
+        """
+        # make sure appending is possible
         self.emplace(rel)
+        # append the new entry
         self[rel].append({"@type": "schema:PropertyValue", "schema:name": str(key), "schema:value": str(value)})
 
-    def reject(self, key, value):
+    def reject(self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None:
+        """
+        Adds an entry to self containing containing information that the key, value pair
+        key, value has been rejected in the merge.
+        For further information see :meth:`ld_merge_dict._add_related`.
+
+        :param self: The ld_merge_container the special entry is added to.
+        :type self: Self
+        :param key: The key of the rejected key, value pair in self.
+        :type key: str
+        :param value: The value of the rejected key, value pair in self.
+        :type value: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list
+
+        :return:
+        :rtype: None
+        """
         self._add_related("hermes-rt:reject", key, value)
 
-    def replace(self, key, value):
+    def replace(self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None:
+        """
+        Adds an entry to self containing containing information that the key, value pair
+        key, value was replaced in the merge.
+        For further information see :meth:`ld_merge_dict._add_related`.
+
+        :param self: The ld_merge_container the special entry is added to.
+        :type self: Self
+        :param key: The key of the old key, value pair in self.
+        :type key: str
+        :param value: The value of the old key, value pair in self.
+        :type value: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list
+
+        :return:
+        :rtype: None
+        """
         self._add_related("hermes-rt:replace", key, value)
diff --git a/src/hermes/model/merge/match.py b/src/hermes/model/merge/match.py
index 03b9f9ef..77abca35 100644
--- a/src/hermes/model/merge/match.py
+++ b/src/hermes/model/merge/match.py
@@ -4,14 +4,61 @@
 
 # SPDX-FileContributor: Michael Meinel
 
+from typing import Any, Callable
 
-def match_equals(a, b):
+from hermes.model.merge.container import ld_merge_dict
+from hermes.model.types import ld_dict
+
+
+def match_equals(a: Any, b: Any) -> bool:
+    """
+    Wrapper method for normal == comparison.
+
+    :param a: First item for the comparison.
+    :type a: Any
+    :param b: Second item for the comparison.
+    :type b: Any
+
+    :return: Truth value of a == b.
+    :rtype: bool
+    """
     return a == b
 
 
-def match_keys(*keys):
-    def match_func(left, right):
+def match_keys(
+    *keys: list[str]
+) -> Callable[[ld_merge_dict, ld_dict], bool]:
+    """
+    Creates a function taking to parameters that returns true
+    if both given parameter have at least one common key in the given list of keys
+    and for all common keys in the given list of keys the values of both objects are the same.
+
+    :param keys: The list of important keys for the comparison method.
+    :type keys: list[str]
+
+    :return: A function comparing two given objects values for the keys in keys.
+    :rtype: Callable[[ld_merge_dict, ld_dict], bool]
+    """
+
+    # create and return the match function using the given keys
+    def match_func(left: ld_merge_dict, right: ld_dict) -> bool:
+        """
+        Compares left to right by checking if a) they have at least one common key in a predetermined list of keys and
+        b) testing if both objects have equal values for all common keys in the predetermined key list.
+
+        :param left: The first object for the comparison.
+        :type left: ld_merge_dict
+        :param right: The second object for the comparison.
+        :type right: ld_dict
+
+        :return: The result of the comparison.
+        :rtype: bool
+        """
+        # create a list of all common important keys
         active_keys = [key for key in keys if key in left and key in right]
+        # check if both objects have the same values for all active keys
         pairs = [(left[key] == right[key]) for key in active_keys]
+        # return whether or not both objects had the same values for all active keys
+        # and there was at least one active key
         return len(active_keys) > 0 and all(pairs)
     return match_func
diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py
index f30a212c..b2456017 100644
--- a/src/hermes/model/types/ld_container.py
+++ b/src/hermes/model/types/ld_container.py
@@ -176,7 +176,9 @@ def ld_value(self: Self) -> EXPANDED_JSON_LD_VALUE:
         return self._data
 
     def _to_python(
-        self: Self, full_iri: str, ld_value: Union[list, dict, str]
+        self: Self,
+        full_iri: str,
+        ld_value: Union[EXPANDED_JSON_LD_VALUE, dict[str, EXPANDED_JSON_LD_VALUE], list[str], str]
     ) -> Union["ld_container", BASIC_TYPE, TIME_TYPE]:
         """
         Returns a pythonized version of the given value pretending the value is in self and full_iri its key.
@@ -187,7 +189,7 @@ def _to_python(
         :type full_iri: str
         :param ld_value: The value thats pythonized value is requested. ld_value has to be valid expanded JSON-LD if it
             was embeded in self._data.
-        :type ld_value: list | dict | str
+        :type ld_value: EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE] | list[str] | str
 
         :return: The pythonized value of the ld_value.
         :rtype: ld_container | BASIC_TYPE | TIME_TYPE
diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py
index c4d1c450..a76db3b6 100644
--- a/src/hermes/model/types/ld_list.py
+++ b/src/hermes/model/types/ld_list.py
@@ -23,7 +23,7 @@
 class ld_list(ld_container):
     """
     An JSON-LD container resembling a list ("@set", "@list" or "@graph").
-    See also :class:`ld_container`
+    See also :class:`ld_container`.
 
     :ivar container_type: The type of JSON-LD container the list is representing. ("@set", "@list", "graph")
     :ivartype container_type: str
@@ -35,7 +35,7 @@ def __init__(
         self: Self,
         data: Union[list[str], list[dict[str, EXPANDED_JSON_LD_VALUE]]],
         *,
-        parent: Union["ld_container", None] = None,
+        parent: Union[ld_container, None] = None,
         key: Union[str, None] = None,
         index: Union[int, None] = None,
         context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None,
diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py
index 7a65098b..f756f101 100644
--- a/test/hermes_test/model/test_api_e2e.py
+++ b/test/hermes_test/model/test_api_e2e.py
@@ -4,6 +4,7 @@
 
 # SPDX-FileContributor: Michael Fritzsche
 
+from datetime import date
 import json
 import pytest
 import sys
@@ -422,7 +423,7 @@ def test_file_deposit(tmp_path, monkeypatch, metadata):
             }),
             {
                 "upload_type": "software",
-                "publication_date": "2026-02-02",
+                "publication_date": date.today().isoformat(),
                 "title": "Test",
                 "creators": [{"name": "Test, Testi"}],
                 "description": "for testing",
@@ -445,6 +446,8 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_
         cache["codemeta"] = metadata.compact()
     manager.finalize_step("curate")
 
+    (tmp_path / "test.txt").write_text("Test, oh wonderful test!\n")
+
     config_file = tmp_path / "hermes.toml"
     config_file.write_text(f"""[deposit]
 target = "invenio"
@@ -452,7 +455,7 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_
 site_url = "https://sandbox.zenodo.org"
 access_right = "closed"
 auth_token = "{sandbox_auth}"
-files = ["hermes.toml"]
+files = ["test.txt"]
 [deposit.invenio.api_paths]
 licenses = "api/vocabularies/licenses"
 """)
@@ -572,7 +575,7 @@ def test_process(tmp_path, monkeypatch, metadata_in, metadata_out):
     manager.finalize_step("harvest")
 
     config_file = tmp_path / "hermes.toml"
-    config_file.write_text(f"[harvest]\nsources = [{", ".join(f"\"{harvester}\"" for harvester in metadata_in)}]")
+    config_file.write_text(f"[harvest]\nsources = [{', '.join(f'\"{harvester}\"' for harvester in metadata_in)}]")
 
     orig_argv = sys.argv[:]
     sys.argv = ["hermes", "process", "--path", str(tmp_path), "--config", str(config_file)]