From a68f2ae8abb4a81efa09784da6abce0c4b65a97e Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Fri, 21 Nov 2025 11:25:12 +0100 Subject: [PATCH 01/10] Add test file --- test/hermes_test/model/test_api_e2e.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test/hermes_test/model/test_api_e2e.py diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py new file mode 100644 index 00000000..e69de29b From 95288a2f68ab090605b25aa58e25c82dca9ecb24 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 12 Jan 2026 10:59:59 +0100 Subject: [PATCH 02/10] added first e2e harvest test for SoftwareMetadata --- src/hermes/commands/__init__.py | 16 +++--- src/hermes/commands/base.py | 77 +++++++++----------------- src/hermes/commands/harvest/base.py | 44 +++++++-------- src/hermes/commands/harvest/cff.py | 33 +++++------ src/hermes/model/__init__.py | 4 ++ test/hermes_test/model/test_api_e2e.py | 56 +++++++++++++++++++ 6 files changed, 132 insertions(+), 98 deletions(-) diff --git a/src/hermes/commands/__init__.py b/src/hermes/commands/__init__.py index 5203ac18..14f77741 100644 --- a/src/hermes/commands/__init__.py +++ b/src/hermes/commands/__init__.py @@ -8,12 +8,12 @@ # "unused import" errors. # flake8: noqa -from hermes.commands.base import HermesHelpCommand -from hermes.commands.base import HermesVersionCommand -from hermes.commands.clean.base import HermesCleanCommand -from hermes.commands.init.base import HermesInitCommand -from hermes.commands.curate.base import HermesCurateCommand +# from hermes.commands.base import HermesHelpCommand +# from hermes.commands.base import HermesVersionCommand +# from hermes.commands.clean.base import HermesCleanCommand +# from hermes.commands.init.base import HermesInitCommand +# from hermes.commands.curate.base import HermesCurateCommand from hermes.commands.harvest.base import HermesHarvestCommand -from hermes.commands.process.base import HermesProcessCommand -from hermes.commands.deposit.base import HermesDepositCommand -from hermes.commands.postprocess.base import HermesPostprocessCommand +# from hermes.commands.process.base import HermesProcessCommand +# from hermes.commands.deposit.base import HermesDepositCommand +# from hermes.commands.postprocess.base import HermesPostprocessCommand diff --git a/src/hermes/commands/base.py b/src/hermes/commands/base.py index 3ae9030b..d64581de 100644 --- a/src/hermes/commands/base.py +++ b/src/hermes/commands/base.py @@ -9,19 +9,20 @@ import logging import pathlib from importlib import metadata -from typing import Dict, Optional, Type +from typing import Type, Union import toml from pydantic import BaseModel from pydantic_settings import BaseSettings, SettingsConfigDict -class _HermesSettings(BaseSettings): + +class HermesSettings(BaseSettings): """Root class for HERMES configuration model.""" model_config = SettingsConfigDict(env_file_encoding='utf-8') - logging: Dict = {} + logging: dict = {} class HermesCommand(abc.ABC): @@ -31,7 +32,7 @@ class HermesCommand(abc.ABC): """ command_name: str = "" - settings_class: Type = _HermesSettings + settings_class: Type = HermesSettings def __init__(self, parser: argparse.ArgumentParser): """Initialize a new instance of any HERMES command. @@ -45,28 +46,27 @@ def __init__(self, parser: argparse.ArgumentParser): self.log = logging.getLogger(f"hermes.{self.command_name}") self.errors = [] - @classmethod - def init_plugins(cls): + def init_plugins(self): """Collect and initialize the plugins available for the HERMES command.""" # Collect all entry points for this group (i.e., all valid plug-ins for the step) - entry_point_group = f"hermes.{cls.command_name}" - group_plugins = { - entry_point.name: entry_point.load() - for entry_point in metadata.entry_points(group=entry_point_group) - } - - # Collect the plug-in specific configurations - cls.derive_settings_class({ - plugin_name: plugin_class.settings_class - for plugin_name, plugin_class in group_plugins.items() - if hasattr(plugin_class, "settings_class") and plugin_class.settings_class is not None - }) + entry_point_group = f"hermes.{self.command_name}" + group_plugins = {} + group_settings = {} + + for entry_point in metadata.entry_points(group=entry_point_group): + plugin_cls = entry_point.load() + + group_plugins[entry_point.name] = plugin_cls + if hasattr(plugin_cls, 'settings_class') and plugin_cls.settings_class is not None: + group_settings[entry_point.name] = plugin_cls.settings_class + + self.derive_settings_class(group_settings) return group_plugins @classmethod - def derive_settings_class(cls, setting_types: Dict[str, Type]) -> None: + def derive_settings_class(cls, setting_types: dict[str, Type]) -> None: """Build a new Pydantic data model class for configuration. This will create a new class that includes all settings from the plugins available. @@ -131,13 +131,10 @@ def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None: def load_settings(self, args: argparse.Namespace): """Load settings from the configuration file (passed in from command line).""" - try: - toml_data = toml.load(args.path / args.config) - self.root_settings = HermesCommand.settings_class.model_validate(toml_data) - self.settings = getattr(self.root_settings, self.command_name) - except FileNotFoundError as e: - self.log.error("hermes.toml was not found. Try to run 'hermes init' first or create one manually.") - raise e # This will lead to our default error message & sys.exit + + toml_data = toml.load(args.path / args.config) + self.root_settings = HermesCommand.settings_class.model_validate(toml_data) + self.settings = getattr(self.root_settings, self.command_name) def patch_settings(self, args: argparse.Namespace): """Process command line options for the settings.""" @@ -164,7 +161,9 @@ def __call__(self, args: argparse.Namespace): class HermesPlugin(abc.ABC): """Base class for all HERMES plugins.""" - settings_class: Optional[Type] = None + pluing_node = None + + settings_class: Union[Type, None] = None @abc.abstractmethod def __call__(self, command: HermesCommand) -> None: @@ -202,27 +201,3 @@ def __call__(self, args: argparse.Namespace) -> None: # Otherwise, simply show the general help and exit (cleanly). self.parser.print_help() self.parser.exit() - - def load_settings(self, args: argparse.Namespace): - """No settings are needed for the help command.""" - pass - - -class HermesVersionSettings(BaseModel): - """Intentionally empty settings class for the version command.""" - pass - - -class HermesVersionCommand(HermesCommand): - """Show HERMES version and exit.""" - - command_name = "version" - settings_class = HermesVersionSettings - - def load_settings(self, args: argparse.Namespace): - """Pass loading settings as not necessary for this command.""" - pass - - def __call__(self, args: argparse.Namespace) -> None: - self.log.info(metadata.version("hermes")) - self.parser.exit() diff --git a/src/hermes/commands/harvest/base.py b/src/hermes/commands/harvest/base.py index 59fad8f1..28a62301 100644 --- a/src/hermes/commands/harvest/base.py +++ b/src/hermes/commands/harvest/base.py @@ -5,14 +5,13 @@ # SPDX-FileContributor: Michael Meinel import argparse -import typing as t -from datetime import datetime from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin -from hermes.model.context import HermesContext, HermesHarvestContext -from hermes.model.error import HermesValidationError, HermesMergeError +from hermes.model.context_manager import HermesContext +from hermes.model.error import HermesValidationError +from hermes.model import SoftwareMetadata class HermesHarvestPlugin(HermesPlugin): @@ -21,11 +20,11 @@ class HermesHarvestPlugin(HermesPlugin): TODO: describe the harvesting process and how this is mapped to this plugin. """ - def __call__(self, command: HermesCommand) -> t.Tuple[t.Dict, t.Dict]: + def __call__(self, command: HermesCommand) -> tuple[SoftwareMetadata, dict]: pass -class _HarvestSettings(BaseModel): +class HarvestSettings(BaseModel): """Generic harvesting settings.""" sources: list[str] = [] @@ -35,32 +34,31 @@ class HermesHarvestCommand(HermesCommand): """ Harvest metadata from configured sources. """ command_name = "harvest" - settings_class = _HarvestSettings + settings_class = HarvestSettings def __call__(self, args: argparse.Namespace) -> None: self.args = args - ctx = HermesContext() # Initialize the harvest cache directory here to indicate the step ran - ctx.init_cache("harvest") + ctx = HermesContext() + ctx.prepare_step('harvest') for plugin_name in self.settings.sources: + plugin_cls = self.plugins[plugin_name] + try: - plugin_func = self.plugins[plugin_name]() - harvested_data, tags = plugin_func(self) - - with HermesHarvestContext(ctx, plugin_name) as harvest_ctx: - harvest_ctx.update_from(harvested_data, - plugin=plugin_name, - timestamp=datetime.now().isoformat(), **tags) - for _key, ((_value, _tag), *_trace) in harvest_ctx._data.items(): - if any(v != _value and t == _tag for v, t in _trace): - raise HermesMergeError(_key, None, _value) - - except KeyError as e: - self.log.error("Plugin '%s' not found.", plugin_name) - self.errors.append(e) + # Load plugin and run the harvester + plugin_func = plugin_cls() + harvested_data = plugin_func(self) + + with ctx[plugin_name] as plugin_ctx: + plugin_ctx["codemeta"] = harvested_data.compact() + plugin_ctx["context"] = {"@context": harvested_data.full_context} + + plugin_ctx["expanded"] = harvested_data.ld_value except HermesValidationError as e: self.log.error("Error while executing %s: %s", plugin_name, e) self.errors.append(e) + + ctx.finalize_step('harvest') diff --git a/src/hermes/commands/harvest/cff.py b/src/hermes/commands/harvest/cff.py index e333b27c..6c2b6594 100644 --- a/src/hermes/commands/harvest/cff.py +++ b/src/hermes/commands/harvest/cff.py @@ -9,16 +9,16 @@ import logging import pathlib import urllib.request -import typing as t from pydantic import BaseModel from ruamel.yaml import YAML import jsonschema from cffconvert import Citation +from typing import Any, Union -from hermes.model.context import ContextPath -from hermes.model.errors import HermesValidationError +from hermes.model.error import HermesValidationError from hermes.commands.harvest.base import HermesHarvestPlugin, HermesHarvestCommand +from hermes.model import SoftwareMetadata # TODO: should this be configurable via a CLI option? @@ -35,7 +35,7 @@ class CffHarvestSettings(BaseModel): class CffHarvestPlugin(HermesHarvestPlugin): settings_class = CffHarvestSettings - def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: + def __call__(self, command: HermesHarvestCommand) -> tuple[SoftwareMetadata, dict]: # Get source files cff_file = self._get_single_cff(command.args.path) if not cff_file: @@ -44,23 +44,24 @@ def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: # Read the content cff_data = cff_file.read_text() - - # Validate the content to be correct CFF cff_dict = self._load_cff_from_file(cff_data) - if command.settings.cff.enable_validation and not self._validate(cff_file, cff_dict): - raise HermesValidationError(cff_file) + if command.settings.cff.enable_validation: + # Validate the content to be correct CFF + if not self._validate(cff_file, cff_dict): + raise HermesValidationError(cff_file) # Convert to CodeMeta using cffconvert codemeta_dict = self._convert_cff_to_codemeta(cff_data) - # TODO Replace the following temp patch for #112 once there is a new cffconvert version with cffconvert#309 - codemeta_dict = self._patch_author_emails(cff_dict, codemeta_dict) if "version" in codemeta_dict: codemeta_dict["version"] = str(codemeta_dict["version"]) # Convert Version to string - return codemeta_dict, {'local_path': str(cff_file)} + # TODO Replace the following temp patch for #112 once there is a new cffconvert version with cffconvert#309 + codemeta_dict = self._patch_author_emails(cff_dict, codemeta_dict) + ld_codemeta = SoftwareMetadata(codemeta_dict, extra_vocabs={'legalName': {'@id': "http://schema.org/name"}}) + return ld_codemeta, {} - def _load_cff_from_file(self, cff_data: str) -> t.Any: + def _load_cff_from_file(self, cff_data: str) -> Any: yaml = YAML(typ='safe') yaml.constructor.yaml_constructors[u'tag:yaml.org,2002:timestamp'] = yaml.constructor.yaml_constructors[ u'tag:yaml.org,2002:str'] @@ -73,11 +74,11 @@ def _patch_author_emails(self, cff: dict, codemeta: dict) -> dict: codemeta["author"][i]["email"] = author["email"] return codemeta - def _convert_cff_to_codemeta(self, cff_data: str) -> t.Any: + def _convert_cff_to_codemeta(self, cff_data: str) -> Any: codemeta_str = Citation(cff_data).as_codemeta() return json.loads(codemeta_str) - def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool: + def _validate(self, cff_file: pathlib.Path, cff_dict: dict) -> bool: audit_log = logging.getLogger('audit.cff') cff_schema_url = f'https://citation-file-format.github.io/{_CFF_VERSION}/schema.json' @@ -93,7 +94,7 @@ def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool: audit_log.warning('!!! warning "%s is not valid according to <%s>"', cff_file, cff_schema_url) for error in errors: - path = ContextPath.make(error.absolute_path or ['root']) + path = error.absolute_path or ['root'] audit_log.info(' Invalid input for `%s`.', str(path)) audit_log.info(' !!! message "%s"', error.message) audit_log.debug(' !!! value "%s"', error.instance) @@ -108,7 +109,7 @@ def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool: audit_log.info('- Found valid Citation File Format file at: %s', cff_file) return True - def _get_single_cff(self, path: pathlib.Path) -> t.Optional[pathlib.Path]: + def _get_single_cff(self, path: pathlib.Path) -> Union[pathlib.Path, None]: # Find CFF files in directories and subdirectories cff_file = path / 'CITATION.cff' if cff_file.exists(): diff --git a/src/hermes/model/__init__.py b/src/hermes/model/__init__.py index 4a4bca25..febdb0ff 100644 --- a/src/hermes/model/__init__.py +++ b/src/hermes/model/__init__.py @@ -2,4 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 +# This is an interface file that only provides a public interface, hence linter is disabled to avoid +# "unused import" errors. +# flake8: noqa + from hermes.model.api import SoftwareMetadata diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index e69de29b..11101722 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -0,0 +1,56 @@ +import pytest +from hermes.commands.harvest.cff import CffHarvestPlugin, CffHarvestSettings +from hermes.model import SoftwareMetadata + + +@pytest.mark.parametrize( + "cff, res", + [ + ( + """cff-version: 1.2.0 +title: Temp\nmessage: >- + If you use this software, please cite it using the + metadata from this file. +type: software +authors: + - given-names: Max + family-names: Mustermann + email: max@muster.mann""", + SoftwareMetadata({ + "@type": "SoftwareSourceCode", + "schema:author": { + "@list": [{ + "@type": "Person", + "email": ["max@muster.mann"], + "familyName": ["Mustermann"], + "givenName": ["Max"] + }] + }, + "schema:name": ["Temp"] + }) + ) + ] +) +def test_cff_harvest(tmp_path, cff, res): + class Args: + def __init__(self, path): + self.path = path + + class Settings: + def __init__(self, cff_settings): + self.cff = cff_settings + + class Command: + def __init__(self, args, settings): + self.args = args + self.settings = settings + + command = Command(Args(tmp_path), Settings(CffHarvestSettings())) + + cff_file = tmp_path / "CITATION.cff" + cff_file.write_text(cff) + + result = CffHarvestPlugin().__call__(command) + # FIXME: update to compare the SoftwareMetadata objects instead of the data_dicts + # after merge with refactor/data-model and/or refactor/423-implement-public-api + assert result[0].data_dict == res.data_dict From 4920090d2db1793ccedd6fab6b710ed3ba1a24ee Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 16 Jan 2026 10:58:24 +0100 Subject: [PATCH 03/10] added more tests --- src/hermes/commands/harvest/codemeta.py | 11 ++- test/hermes_test/model/test_api_e2e.py | 122 +++++++++++++++++++++++- 2 files changed, 125 insertions(+), 8 deletions(-) diff --git a/src/hermes/commands/harvest/codemeta.py b/src/hermes/commands/harvest/codemeta.py index b75bb002..5f211222 100644 --- a/src/hermes/commands/harvest/codemeta.py +++ b/src/hermes/commands/harvest/codemeta.py @@ -8,15 +8,16 @@ import glob import json import pathlib -import typing as t +from typing import Union from hermes.commands.harvest.base import HermesHarvestCommand, HermesHarvestPlugin from hermes.commands.harvest.util.validate_codemeta import validate_codemeta -from hermes.model.errors import HermesValidationError +from hermes.model.error import HermesValidationError +from hermes.model import SoftwareMetadata class CodeMetaHarvestPlugin(HermesHarvestPlugin): - def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: + def __call__(self, command: HermesHarvestCommand) -> tuple[SoftwareMetadata, dict]: """ Implementation of a harvester that provides data from a codemeta.json file format. @@ -39,7 +40,7 @@ def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: raise HermesValidationError(codemeta_file) codemeta = json.loads(codemeta_str) - return codemeta, {'local_path': str(codemeta_file)} + return SoftwareMetadata(codemeta), {'local_path': str(codemeta_file)} def _validate(self, codemeta_file: pathlib.Path) -> bool: with open(codemeta_file, "r") as fi: @@ -55,7 +56,7 @@ def _validate(self, codemeta_file: pathlib.Path) -> bool: return True - def _get_single_codemeta(self, path: pathlib.Path) -> t.Optional[pathlib.Path]: + def _get_single_codemeta(self, path: pathlib.Path) -> Union[pathlib.Path, None]: # Find CodeMeta files in directories and subdirectories # TODO: Do we really want to search recursive? Maybe add another option to enable pointing to a single file? # (So this stays "convention over configuration") diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 11101722..67b40f7b 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -1,5 +1,12 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + import pytest from hermes.commands.harvest.cff import CffHarvestPlugin, CffHarvestSettings +from hermes.commands.harvest.codemeta import CodeMetaHarvestPlugin from hermes.model import SoftwareMetadata @@ -18,7 +25,7 @@ email: max@muster.mann""", SoftwareMetadata({ "@type": "SoftwareSourceCode", - "schema:author": { + "author": { "@list": [{ "@type": "Person", "email": ["max@muster.mann"], @@ -26,7 +33,80 @@ "givenName": ["Max"] }] }, - "schema:name": ["Temp"] + "name": ["Temp"] + }) + ), + ( + """# SPDX-FileCopyrightText: 2022 German Aerospace Center (DLR), Helmholtz-Zentrum Dresden-Rossendorf +# +# SPDX-License-Identifier: CC0-1.0 + +# SPDX-FileContributor: Michael Meinel + +cff-version: 1.2.0 +title: hermes +message: >- + If you use this software, please cite it using the + metadata from this file. +version: 0.9.0 +license: "Apache-2.0" +abstract: "Tool to automate software publication. Not stable yet." +type: software +authors: + - given-names: Michael + family-names: Meinel + email: michael.meinel@dlr.de + affiliation: German Aerospace Center (DLR) + orcid: "https://orcid.org/0000-0001-6372-3853" + - given-names: Stephan + family-names: Druskat + email: stephan.druskat@dlr.de + affiliation: German Aerospace Center (DLR) + orcid: "https://orcid.org/0000-0003-4925-7248" +identifiers: + - type: doi + value: 10.5281/zenodo.13221384 + description: Version 0.8.1b1 +""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/author": [ + { + "@list": [ + { + "@id": "https://orcid.org/0000-0001-6372-3853", + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "michael.meinel@dlr.de"}], + "http://schema.org/familyName": [{"@value": "Meinel"}], + "http://schema.org/givenName": [{"@value": "Michael"}] + }, + { + "@id": "https://orcid.org/0000-0003-4925-7248", + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "stephan.druskat@dlr.de"}], + "http://schema.org/familyName": [{"@value": "Druskat"}], + "http://schema.org/givenName": [{"@value": "Stephan"}] + } + ] + } + ], + "http://schema.org/description": [{"@value": "Tool to automate software publication. Not stable yet."}], + "http://schema.org/identifier": [{"@id": "https://doi.org/10.5281/zenodo.13221384"}], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + "http://schema.org/name": [{"@value": "hermes"}], + "http://schema.org/version": [{"@value": "0.9.0"}] }) ) ] @@ -51,6 +131,42 @@ def __init__(self, args, settings): cff_file.write_text(cff) result = CffHarvestPlugin().__call__(command) - # FIXME: update to compare the SoftwareMetadata objects instead of the data_dicts + # FIXME: update to compare the SoftwareMetadata objects instead of the data_dicts (in multiple places) # after merge with refactor/data-model and/or refactor/423-implement-public-api assert result[0].data_dict == res.data_dict + + +@pytest.mark.parametrize( + "codemeta, res", + [ + ( + """{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "description": "for testing", + "name": "Test" +}""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }) + ) + ] +) +def test_codemeta_harvest(tmp_path, codemeta, res): + class Args: + def __init__(self, path): + self.path = path + + class Command: + def __init__(self, args): + self.args = args + + command = Command(Args(tmp_path)) + + codemeta_file = tmp_path / "codemeta.json" + codemeta_file.write_text(codemeta) + + result = CodeMetaHarvestPlugin().__call__(command) + assert result[0].data_dict == res.data_dict From 38ef40e67e1a0dc0ab031eab7d28eab2dec0a49a Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 16 Jan 2026 14:01:15 +0100 Subject: [PATCH 04/10] refactored end to end tests --- src/hermes/commands/base.py | 1 - src/hermes/commands/cli.py | 24 +-- src/hermes/commands/harvest/base.py | 6 +- test/hermes_test/model/test_api_e2e.py | 249 +++++++++++++++++++++---- 4 files changed, 232 insertions(+), 48 deletions(-) diff --git a/src/hermes/commands/base.py b/src/hermes/commands/base.py index d64581de..2d182267 100644 --- a/src/hermes/commands/base.py +++ b/src/hermes/commands/base.py @@ -16,7 +16,6 @@ from pydantic_settings import BaseSettings, SettingsConfigDict - class HermesSettings(BaseSettings): """Root class for HERMES configuration model.""" diff --git a/src/hermes/commands/cli.py b/src/hermes/commands/cli.py index 06a18ca7..db109a5e 100644 --- a/src/hermes/commands/cli.py +++ b/src/hermes/commands/cli.py @@ -12,9 +12,11 @@ import sys from hermes import logger -from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand, - HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand, - HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand) +# FIXME: remove comments after new implementation of modules is available +# from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand, +# HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand, +# HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand) +from hermes.commands import HermesHarvestCommand from hermes.commands.base import HermesCommand @@ -36,15 +38,15 @@ def main() -> None: setting_types = {} for command in ( - HermesHelpCommand(parser), - HermesVersionCommand(parser), - HermesInitCommand(parser), - HermesCleanCommand(parser), + # HermesHelpCommand(parser), + # HermesVersionCommand(parser), + # HermesInitCommand(parser), + # HermesCleanCommand(parser), HermesHarvestCommand(parser), - HermesProcessCommand(parser), - HermesCurateCommand(parser), - HermesDepositCommand(parser), - HermesPostprocessCommand(parser), + # HermesProcessCommand(parser), + # HermesCurateCommand(parser), + # HermesDepositCommand(parser), + # HermesPostprocessCommand(parser), ): if command.settings_class is not None: setting_types[command.command_name] = command.settings_class diff --git a/src/hermes/commands/harvest/base.py b/src/hermes/commands/harvest/base.py index 28a62301..19ccc623 100644 --- a/src/hermes/commands/harvest/base.py +++ b/src/hermes/commands/harvest/base.py @@ -52,10 +52,10 @@ def __call__(self, args: argparse.Namespace) -> None: harvested_data = plugin_func(self) with ctx[plugin_name] as plugin_ctx: - plugin_ctx["codemeta"] = harvested_data.compact() - plugin_ctx["context"] = {"@context": harvested_data.full_context} + plugin_ctx["codemeta"] = harvested_data[0].compact() + plugin_ctx["context"] = {"@context": harvested_data[0].full_context} - plugin_ctx["expanded"] = harvested_data.ld_value + plugin_ctx["expanded"] = harvested_data[0].ld_value except HermesValidationError as e: self.log.error("Error while executing %s: %s", plugin_name, e) diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 67b40f7b..650747e0 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -5,9 +5,9 @@ # SPDX-FileContributor: Michael Fritzsche import pytest -from hermes.commands.harvest.cff import CffHarvestPlugin, CffHarvestSettings -from hermes.commands.harvest.codemeta import CodeMetaHarvestPlugin -from hermes.model import SoftwareMetadata +import sys +from hermes.model import context_manager, SoftwareMetadata +from hermes.commands import cli @pytest.mark.parametrize( @@ -108,32 +108,93 @@ "http://schema.org/name": [{"@value": "hermes"}], "http://schema.org/version": [{"@value": "0.9.0"}] }) + ), + ( + """cff-version: 1.2.0 +title: Test +message: None +type: software +authors: + - given-names: Test + family-names: Testi + email: test.testi@test.testi + affiliation: German Aerospace Center (DLR) +identifiers: + - type: url + value: "https://arxiv.org/abs/2201.09015" + - type: doi + value: 10.5281/zenodo.13221384 +repository-code: "https://github.com/softwarepub/hermes" +abstract: for testing +url: "https://docs.software-metadata.pub/en/latest" +keywords: + - testing + - more testing +license: Apache-2.0 +version: 9.0.1 +date-released: "2026-01-16" """, + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/author": [ + { + "@list": [ + { + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ] + } + ], + "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], + "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/identifier": [{"@id": "https://doi.org/10.5281/zenodo.13221384"}], + "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/url": [ + {"@id": 'https://arxiv.org/abs/2201.09015'}, + {"@id": "https://docs.software-metadata.pub/en/latest"} + ], + "http://schema.org/version": [{"@value": "9.0.1"}] + }) ) ] ) -def test_cff_harvest(tmp_path, cff, res): - class Args: - def __init__(self, path): - self.path = path - - class Settings: - def __init__(self, cff_settings): - self.cff = cff_settings - - class Command: - def __init__(self, args, settings): - self.args = args - self.settings = settings - - command = Command(Args(tmp_path), Settings(CffHarvestSettings())) - +def test_cff_harvest(tmp_path, monkeypatch, cff, res): + monkeypatch.chdir(tmp_path) cff_file = tmp_path / "CITATION.cff" cff_file.write_text(cff) - result = CffHarvestPlugin().__call__(command) + config_file = tmp_path / "hermes.toml" + config_file.write_text("[harvest]\nsources = [ \"cff\" ]") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit: + manager = context_manager.HermesContext() + manager.prepare_step("harvest") + with manager["cff"] as cache: + result = SoftwareMetadata(cache["expanded"][0], cache["context"]["@context"][1]) + manager.finalize_step("harvest") + finally: + sys.argv = orig_argv + # FIXME: update to compare the SoftwareMetadata objects instead of the data_dicts (in multiple places) # after merge with refactor/data-model and/or refactor/423-implement-public-api - assert result[0].data_dict == res.data_dict + assert result.data_dict == res.data_dict @pytest.mark.parametrize( @@ -151,22 +212,144 @@ def __init__(self, args, settings): "http://schema.org/description": [{"@value": "for testing"}], "http://schema.org/name": [{"@value": "Test"}] }) + ), + ( + """{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "applicationCategory": "Testing", + "author": [ + { + "id": "_:author_1", + "type": "Person", + "email": "test.testi@test.testi", + "familyName": "Testi", + "givenName": "Test" + } + ], + "codeRepository": "https://github.com/softwarepub/hermes", + "contributor": { + "id": "_:contributor_1", + "type": "Person", + "email": "test.testi@test.testi", + "familyName": "Testi", + "givenName": "Test" + }, + "dateCreated": "2026-01-16", + "dateModified": "2026-01-16", + "datePublished": "2026-01-16", + "description": "for testing", + "funder": { + "type": "Organization", + "name": "TestsTests" + }, + "keywords": [ + "testing", + "more testing" + ], + "license": [ + "https://spdx.org/licenses/Adobe-2006", + "https://spdx.org/licenses/Abstyles", + "https://spdx.org/licenses/AGPL-1.0-only" + ], + "name": "Test", + "operatingSystem": "Windows", + "programmingLanguage": [ + "Python", + "Python 3" + ], + "relatedLink": "https://docs.software-metadata.pub/en/latest", + "schema:releaseNotes": "get it now", + "version": "1.1.1", + "developmentStatus": "abandoned", + "funding": "none :(", + "codemeta:isSourceCodeOf": { + "id": "HERMES" + }, + "issueTracker": "https://github.com/softwarepub/hermes/issues", + "referencePublication": "https://arxiv.org/abs/2201.09015" +}""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/applicationCategory": [{"@id": "Testing"}], + "http://schema.org/author": [ + { + "@list": [ + { + "@id": "_:author_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ] + } + ], + "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], + "http://schema.org/contributor": [ + { + "@id": "_:contributor_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ], + "http://schema.org/dateCreated": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/dateModified": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/funder": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "TestsTests"}] + } + ], + "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], + "http://schema.org/license": [ + {"@id": "https://spdx.org/licenses/Adobe-2006"}, + {"@id": "https://spdx.org/licenses/Abstyles"}, + {"@id": "https://spdx.org/licenses/AGPL-1.0-only"} + ], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/operatingSystem": [{"@value": "Windows"}], + "http://schema.org/programmingLanguage": [{"@value": "Python"}, {"@value": "Python 3"}], + "http://schema.org/relatedLink": [{"@id": "https://docs.software-metadata.pub/en/latest"}], + "http://schema.org/releaseNotes": [{"@value": "get it now"}], + "http://schema.org/version": [{"@value": "1.1.1"}], + "https://codemeta.github.io/terms/developmentStatus": [{"@id": "abandoned"}], + "https://codemeta.github.io/terms/funding": [{"@value": "none :("}], + "https://codemeta.github.io/terms/isSourceCodeOf": [{"@id": "HERMES"}], + "https://codemeta.github.io/terms/issueTracker": [ + {"@id": "https://github.com/softwarepub/hermes/issues"} + ], + "https://codemeta.github.io/terms/referencePublication": [{"@id": "https://arxiv.org/abs/2201.09015"}] + }) ) ] ) -def test_codemeta_harvest(tmp_path, codemeta, res): - class Args: - def __init__(self, path): - self.path = path - - class Command: - def __init__(self, args): - self.args = args - - command = Command(Args(tmp_path)) +def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): + monkeypatch.chdir(tmp_path) codemeta_file = tmp_path / "codemeta.json" codemeta_file.write_text(codemeta) - result = CodeMetaHarvestPlugin().__call__(command) - assert result[0].data_dict == res.data_dict + config_file = tmp_path / "hermes.toml" + config_file.write_text("[harvest]\nsources = [ \"codemeta\" ]") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit: + manager = context_manager.HermesContext() + manager.prepare_step("harvest") + with manager["codemeta"] as cache: + result = SoftwareMetadata(cache["expanded"][0], cache["context"]["@context"][1]) + manager.finalize_step("harvest") + finally: + sys.argv = orig_argv + + assert result.data_dict == res.data_dict From ddcd26a44777c3d6d8ab58afa3179fb0101ced3c Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 19 Jan 2026 08:31:36 +0100 Subject: [PATCH 05/10] updated creation of SoftwareMetadata objects in e2e tests --- test/hermes_test/model/test_api_e2e.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 650747e0..f4ec7fd6 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -187,7 +187,7 @@ def test_cff_harvest(tmp_path, monkeypatch, cff, res): manager = context_manager.HermesContext() manager.prepare_step("harvest") with manager["cff"] as cache: - result = SoftwareMetadata(cache["expanded"][0], cache["context"]["@context"][1]) + result = SoftwareMetadata(cache["codemeta"]) manager.finalize_step("harvest") finally: sys.argv = orig_argv @@ -347,7 +347,7 @@ def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): manager = context_manager.HermesContext() manager.prepare_step("harvest") with manager["codemeta"] as cache: - result = SoftwareMetadata(cache["expanded"][0], cache["context"]["@context"][1]) + result = SoftwareMetadata(cache["codemeta"]) manager.finalize_step("harvest") finally: sys.argv = orig_argv From 6c3ba13f4dcb6ff714fbc76c4bca596e91116abd Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 19 Jan 2026 11:00:38 +0100 Subject: [PATCH 06/10] started to add support for deposit step and added useful method for SoftwareMetadata --- src/hermes/commands/__init__.py | 2 +- src/hermes/commands/cli.py | 4 +- src/hermes/commands/deposit/base.py | 41 ++++++---------- src/hermes/commands/deposit/file.py | 9 +--- src/hermes/commands/deposit/invenio.py | 68 ++++++++++++-------------- src/hermes/error.py | 2 +- src/hermes/model/api.py | 21 +++++++- 7 files changed, 72 insertions(+), 75 deletions(-) diff --git a/src/hermes/commands/__init__.py b/src/hermes/commands/__init__.py index 14f77741..278faddf 100644 --- a/src/hermes/commands/__init__.py +++ b/src/hermes/commands/__init__.py @@ -15,5 +15,5 @@ # from hermes.commands.curate.base import HermesCurateCommand from hermes.commands.harvest.base import HermesHarvestCommand # from hermes.commands.process.base import HermesProcessCommand -# from hermes.commands.deposit.base import HermesDepositCommand +from hermes.commands.deposit.base import HermesDepositCommand # from hermes.commands.postprocess.base import HermesPostprocessCommand diff --git a/src/hermes/commands/cli.py b/src/hermes/commands/cli.py index db109a5e..0ec2d1ae 100644 --- a/src/hermes/commands/cli.py +++ b/src/hermes/commands/cli.py @@ -16,7 +16,7 @@ # from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand, # HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand, # HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand) -from hermes.commands import HermesHarvestCommand +from hermes.commands import HermesDepositCommand, HermesHarvestCommand from hermes.commands.base import HermesCommand @@ -45,7 +45,7 @@ def main() -> None: HermesHarvestCommand(parser), # HermesProcessCommand(parser), # HermesCurateCommand(parser), - # HermesDepositCommand(parser), + HermesDepositCommand(parser), # HermesPostprocessCommand(parser), ): if command.settings_class is not None: diff --git a/src/hermes/commands/deposit/base.py b/src/hermes/commands/deposit/base.py index 75018579..800c15e9 100644 --- a/src/hermes/commands/deposit/base.py +++ b/src/hermes/commands/deposit/base.py @@ -7,15 +7,13 @@ import abc import argparse -import json -import sys from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin -from hermes.model.context import CodeMetaContext -from hermes.model.path import ContextPath -from hermes.model.errors import HermesValidationError +from hermes.model.context_manager import HermesContext +from hermes.model import SoftwareMetadata +from hermes.model.error import HermesValidationError class BaseDepositPlugin(HermesPlugin): @@ -24,16 +22,19 @@ class BaseDepositPlugin(HermesPlugin): TODO: describe workflow... needs refactoring to be less stateful! """ - def __init__(self, command, ctx): - self.command = command - self.ctx = ctx - def __call__(self, command: HermesCommand) -> None: """Initiate the deposition process. This calls a list of additional methods on the class, none of which need to be implemented. """ self.command = command + self.ctx = HermesContext() + + self.ctx.prepare_step("curate") + self.metadata = SoftwareMetadata.load_from_cache(self.ctx, "result") + self.ctx.finalize_step("curate") + + self.ctx.prepare_step("deposit") self.prepare() self.map_metadata() @@ -106,7 +107,7 @@ def publish(self) -> None: pass -class _DepositSettings(BaseModel): +class DepositSettings(BaseModel): """Generic deposition settings.""" target: str = "" @@ -116,7 +117,7 @@ class HermesDepositCommand(HermesCommand): """ Deposit the curated metadata to repositories. """ command_name = "deposit" - settings_class = _DepositSettings + settings_class = DepositSettings def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None: command_parser.add_argument('--file', '-f', nargs=1, action='append', @@ -128,26 +129,12 @@ def __call__(self, args: argparse.Namespace) -> None: self.args = args plugin_name = self.settings.target - ctx = CodeMetaContext() - codemeta_file = ctx.get_cache("curate", ctx.hermes_name) - if not codemeta_file.exists(): - self.log.error("You must run the 'curate' command before deposit") - sys.exit(1) - - codemeta_path = ContextPath("codemeta") - with open(codemeta_file) as codemeta_fh: - ctx.update(codemeta_path, json.load(codemeta_fh)) - try: - plugin_func = self.plugins[plugin_name](self, ctx) - + plugin_func = self.plugins[plugin_name]() + plugin_func(self) except KeyError as e: self.log.error("Plugin '%s' not found.", plugin_name) self.errors.append(e) - - try: - plugin_func(self) - except HermesValidationError as e: self.log.error("Error while executing %s: %s", plugin_name, e) self.errors.append(e) diff --git a/src/hermes/commands/deposit/file.py b/src/hermes/commands/deposit/file.py index 6c5d6419..5ce8d8e0 100644 --- a/src/hermes/commands/deposit/file.py +++ b/src/hermes/commands/deposit/file.py @@ -11,22 +11,17 @@ from pydantic import BaseModel from hermes.commands.deposit.base import BaseDepositPlugin -from hermes.model.path import ContextPath class FileDepositSettings(BaseModel): - filename: str = 'hermes.json' + filename: str = 'codemeta.json' class FileDepositPlugin(BaseDepositPlugin): settings_class = FileDepositSettings - def map_metadata(self) -> None: - self.ctx.update(ContextPath.parse('deposit.file'), self.ctx['codemeta']) - def publish(self) -> None: file_config = self.command.settings.file - output_data = self.ctx['deposit.file'] with open(file_config.filename, 'w') as deposition_file: - json.dump(output_data, deposition_file, indent=2) + json.dump(self.metadata.compact(), deposition_file, indent=2) diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index 69fb87a0..aafe51b7 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -17,11 +17,10 @@ import requests from pydantic import BaseModel -from hermes.commands.deposit.base import BaseDepositPlugin, HermesDepositCommand +from hermes.commands.deposit.base import BaseDepositPlugin from hermes.commands.deposit.error import DepositionUnauthorizedError from hermes.error import MisconfigurationError -from hermes.model.context import CodeMetaContext -from hermes.model.path import ContextPath +from hermes.model.context_manager import HermesContext from hermes.utils import hermes_doi, hermes_user_agent @@ -258,11 +257,13 @@ class InvenioDepositPlugin(BaseDepositPlugin): invenio_resolver_class = InvenioResolver settings_class = InvenioDepositSettings - def __init__(self, command: HermesDepositCommand, ctx: CodeMetaContext, client=None, resolver=None) -> None: - super().__init__(command, ctx) + def __init__(self) -> None: + super().__init__() - self.invenio_context_path = ContextPath.parse(f"deposit.{self.platform_name}") self.invenio_ctx = None + + def __call__(self, command, *, client=None, resolver=None): + self.command = command self.config = getattr(self.command.settings, self.platform_name) if client is None: @@ -292,7 +293,9 @@ def __init__(self, command: HermesDepositCommand, ctx: CodeMetaContext, client=N self.resolver = resolver or self.invenio_resolver_class(self.client) self.links = {} - # TODO: Populate some data structure here? Or move more of this into __init__? + super().__call__(command) + + # TODO: Populate some data structure here? Or move more of this into __init__.py? def prepare(self) -> None: """Prepare the deposition on an Invenio-based platform. @@ -305,49 +308,42 @@ def prepare(self) -> None: - check access modalities (access right, access conditions, embargo data, existence of license) - check whether required configuration options are present - - update ``self.ctx`` with metadata collected during the checks + - update ``self.metadata`` with metadata collected during the checks """ rec_id = self.config.record_id doi = self.config.doi - try: - codemeta_identifier = self.ctx["codemeta.identifier"] - except KeyError: - codemeta_identifier = None - + codemeta_identifier = self.metadata.get("identifier", None) rec_id, rec_meta = self.resolver.resolve_latest_id( record_id=rec_id, doi=doi, codemeta_identifier=codemeta_identifier ) - version = self.ctx["codemeta"].get("version") + version = self.metadata["version"] if rec_meta and (version == rec_meta.get("version")): raise ValueError(f"Version {version} already deposited.") - self.ctx.update(self.invenio_context_path['latestRecord'], {'id': rec_id, 'metadata': rec_meta}) - - license = self._get_license_identifier() - self.ctx.update(self.invenio_context_path["license"], license) - - communities = self._get_community_identifiers() - self.ctx.update(self.invenio_context_path["communities"], communities) + deposition_data = {} + deposition_data["latestRecord"] = {'id': rec_id, 'metadata': rec_meta} + deposition_data["license"] = self._get_license_identifier() + deposition_data["communities"] = self._get_community_identifiers() access_right, embargo_date, access_conditions = self._get_access_modalities(license) - self.ctx.update(self.invenio_context_path["access_right"], access_right) - self.ctx.update(self.invenio_context_path["embargo_date"], embargo_date) - self.ctx.update(self.invenio_context_path["access_conditions"], access_conditions) + deposition_data["access_right"] = access_right + deposition_data["embargo_date"] = embargo_date + deposition_data["access_conditions"] = access_conditions - self.invenio_ctx = self.ctx[self.invenio_context_path] + self.invenio_ctx = deposition_data def map_metadata(self) -> None: """Map the harvested metadata onto the Invenio schema.""" deposition_metadata = self._codemeta_to_invenio_deposition() - self.ctx.update(self.invenio_context_path["depositionMetadata"], deposition_metadata) - - # Store a snapshot of the mapped data within the cache, useful for analysis, debugging, etc - with open(self.ctx.get_cache("deposit", self.platform_name, create=True), 'w') as invenio_json: - json.dump(deposition_metadata, invenio_json, indent=' ') + ctx = HermesContext() + ctx.prepare_step("deposit") + with ctx[self.platform_name] as deposit_ctx: + deposit_ctx["deposit"] = deposition_metadata + ctx.finalize_step("deposit") def is_initial_publication(self) -> bool: latest_record_id = self.invenio_ctx.get("latestRecord", {}).get("id") @@ -426,7 +422,7 @@ def update_metadata(self) -> None: self.links.update(deposit["links"]) _log.debug("Created new version deposit: %s", self.links["html"]) - with open(self.ctx.get_cache('deposit', 'deposit', create=True), 'w') as deposit_file: + with open(self.metadata.get_cache('deposit', 'deposit', create=True), 'w') as deposit_file: json.dump(deposit, deposit_file, indent=4) def delete_artifacts(self) -> None: @@ -505,7 +501,7 @@ def _codemeta_to_invenio_deposition(self) -> dict: differences between Invenio-based platforms. """ - metadata = self.ctx["codemeta"] + metadata = self.metadata license = self.invenio_ctx["license"] communities = self.invenio_ctx["communities"] access_right = self.invenio_ctx["access_right"] @@ -520,7 +516,7 @@ def _codemeta_to_invenio_deposition(self) -> dict: "affiliation": author.get("affiliation", {"legalName": None}).get("legalName"), # Invenio wants "family, given". author.get("name") might not have this format. "name": f"{author.get('familyName')}, {author.get('givenName')}" - if author.get("familyName") and author.get("givenName") + if "familyName" in author and "givenName" in author else author.get("name"), # Invenio expects the ORCID without the URL part "orcid": author.get("@id", "").replace("https://orcid.org/", "") or None, @@ -538,7 +534,7 @@ def _codemeta_to_invenio_deposition(self) -> dict: "affiliation": contributor.get("affiliation", {"legalName": None}).get("legalName"), # Invenio wants "family, given". contributor.get("name") might not have this format. "name": f"{contributor.get('familyName')}, {contributor.get('givenName')}" - if contributor.get("familyName") and contributor.get("givenName") + if "familyName" in contributor and "givenName" in contributor else contributor.get("name"), # Invenio expects the ORCID without the URL part "orcid": contributor.get("@id", "").replace("https://orcid.org/", "") or None, @@ -604,7 +600,7 @@ def _get_license_identifier(self) -> t.Optional[str]: If no license is configured, ``None`` will be returned. """ - license_url = self.ctx["codemeta"].get("license") + license_url = self.metadata["license"] return self.resolver.resolve_license_id(license_url) def _get_community_identifiers(self): @@ -612,7 +608,7 @@ def _get_community_identifiers(self): This function gets the communities to be used for the deposition on an Invenio-based site from the config and checks their validity against the site's API. If one of the - identifiers can not be found on the site, a :class:`HermesMisconfigurationError` is + identifiers can not be found on the site, a :class:`MisconfigurationError` is raised. """ diff --git a/src/hermes/error.py b/src/hermes/error.py index e56c2499..1669ed39 100644 --- a/src/hermes/error.py +++ b/src/hermes/error.py @@ -4,5 +4,5 @@ # SPDX-FileContributor: David Pape -class HermesMisconfigurationError(Exception): +class MisconfigurationError(Exception): pass diff --git a/src/hermes/model/api.py b/src/hermes/model/api.py index 8b079544..24f1405e 100644 --- a/src/hermes/model/api.py +++ b/src/hermes/model/api.py @@ -1,6 +1,7 @@ +from hermes.model.context_manager import HermesContext, HermesContexError from hermes.model.types import ld_dict - from hermes.model.types.ld_context import ALL_CONTEXTS +from hermes.model.types.ld_dict import bundled_loader class SoftwareMetadata(ld_dict): @@ -8,3 +9,21 @@ class SoftwareMetadata(ld_dict): def __init__(self, data: dict = None, extra_vocabs: dict[str, str] = None) -> None: ctx = ALL_CONTEXTS + [{**extra_vocabs}] if extra_vocabs is not None else ALL_CONTEXTS super().__init__([ld_dict.from_dict(data, context=ctx).data_dict if data else {}], context=ctx) + + @classmethod + def load_from_cache(cls, ctx: HermesContext, source: str) -> "SoftwareMetadata": + with ctx[source] as cache: + try: + return SoftwareMetadata(cache["codemeta"]) + except Exception: + pass + try: + context = cache["context"]["@context"] + data = SoftwareMetadata() + data.active_ctx = data.ld_proc.initial_ctx(context, {"documentLoader": bundled_loader}) + data.context = context + for key, value in cache["expanded"][0]: + data[key] = value + return data + except Exception as e: + raise HermesContexError("There is no (valid) data stored in the cache.") from e From feeb16b9263849f14a0cfe9b34bfd6ab12b3e7b7 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 23 Jan 2026 14:18:39 +0100 Subject: [PATCH 07/10] worked on invenio deposit --- src/hermes/commands/deposit/base.py | 29 ++++--- src/hermes/commands/deposit/file.py | 5 +- src/hermes/commands/deposit/invenio.py | 94 ++++++++++++++------- test/hermes_test/model/test_api_e2e.py | 108 +++++++++++++++++++++++++ 4 files changed, 195 insertions(+), 41 deletions(-) diff --git a/src/hermes/commands/deposit/base.py b/src/hermes/commands/deposit/base.py index 800c15e9..4a996eaa 100644 --- a/src/hermes/commands/deposit/base.py +++ b/src/hermes/commands/deposit/base.py @@ -34,17 +34,25 @@ def __call__(self, command: HermesCommand) -> None: self.metadata = SoftwareMetadata.load_from_cache(self.ctx, "result") self.ctx.finalize_step("curate") - self.ctx.prepare_step("deposit") - self.prepare() - self.map_metadata() + deposit = self.map_metadata() + self.ctx.prepare_step("deposit") + with self.ctx[command.settings.target] as cache: + cache["deposit"] = deposit.compact() + self.ctx.finalize_step("deposit") if self.is_initial_publication(): self.create_initial_version() else: self.create_new_version() - self.update_metadata() + deposit = self.update_metadata() + self.ctx.prepare_step("deposit") + with self.ctx[command.settings.target] as cache: + cache["codemeta"] = deposit.compact() + cache["expanded"] = deposit.ld_value + cache["context"] = {"@context": deposit.full_context} + self.ctx.finalize_step("deposit") self.delete_artifacts() self.upload_artifacts() self.publish() @@ -59,8 +67,8 @@ def prepare(self) -> None: pass @abc.abstractmethod - def map_metadata(self) -> None: - """Map the given metadata to the target schema of the deposition platform. + def map_metadata(self) -> SoftwareMetadata: + """Map the given metadata to the target schema of the deposition platform and return it. When mapping metadata, make sure to add traces to the HERMES software, e.g. via DataCite's ``relatedIdentifier`` using the ``isCompiledBy`` relation. Ideally, the value @@ -89,9 +97,9 @@ def create_new_version(self) -> None: """Create a new version of an existing publication on the target platform.""" pass - def update_metadata(self) -> None: - """Update the metadata of the newly created version.""" - pass + def update_metadata(self) -> SoftwareMetadata: + """Update the metadata of the newly created version and return it even if it hasn't changed.""" + return self.metadata def delete_artifacts(self) -> None: """Delete any superfluous artifacts taken from the previous version of the publication.""" @@ -131,10 +139,11 @@ def __call__(self, args: argparse.Namespace) -> None: try: plugin_func = self.plugins[plugin_name]() - plugin_func(self) except KeyError as e: self.log.error("Plugin '%s' not found.", plugin_name) self.errors.append(e) + try: + plugin_func(self) except HermesValidationError as e: self.log.error("Error while executing %s: %s", plugin_name, e) self.errors.append(e) diff --git a/src/hermes/commands/deposit/file.py b/src/hermes/commands/deposit/file.py index 5ce8d8e0..53876c53 100644 --- a/src/hermes/commands/deposit/file.py +++ b/src/hermes/commands/deposit/file.py @@ -11,7 +11,7 @@ from pydantic import BaseModel from hermes.commands.deposit.base import BaseDepositPlugin - +from hermes.model import SoftwareMetadata class FileDepositSettings(BaseModel): filename: str = 'codemeta.json' @@ -20,6 +20,9 @@ class FileDepositSettings(BaseModel): class FileDepositPlugin(BaseDepositPlugin): settings_class = FileDepositSettings + def map_metadata(self) -> SoftwareMetadata: + return self.metadata + def publish(self) -> None: file_config = self.command.settings.file diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index aafe51b7..2fd13f0d 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -6,21 +6,21 @@ # SPDX-FileContributor: Oliver Bertuch # SPDX-FileContributor: Michael Meinel -import json import logging import pathlib -import typing as t from datetime import date, datetime from pathlib import Path from urllib.parse import urlparse import requests from pydantic import BaseModel +from typing import Union from hermes.commands.deposit.base import BaseDepositPlugin from hermes.commands.deposit.error import DepositionUnauthorizedError from hermes.error import MisconfigurationError -from hermes.model.context_manager import HermesContext +from hermes.model import SoftwareMetadata +from hermes.model.error import HermesValidationError from hermes.utils import hermes_doi, hermes_user_agent @@ -108,7 +108,7 @@ def __init__(self, client=None): def resolve_latest_id( self, record_id=None, doi=None, codemeta_identifier=None - ) -> t.Tuple[t.Optional[str], dict]: + ) -> tuple[Union[str, None], dict]: """ Using the given metadata parameters, figure out the latest record id. @@ -166,7 +166,7 @@ def resolve_doi(self, doi) -> str: *_, record_id = page_url.path.split('/') return record_id - def resolve_record_id(self, record_id: str) -> t.Tuple[str, dict]: + def resolve_record_id(self, record_id: str) -> tuple[str, dict]: """ Find the latest version of a given record. @@ -185,7 +185,7 @@ def resolve_record_id(self, record_id: str) -> t.Tuple[str, dict]: res_json = res.json() return res_json['id'], res_json['metadata'] - def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[str]: + def resolve_license_id(self, license_url: Union[str, None]) -> Union[str, None]: """Get Invenio license representation from CodeMeta. The license to use is extracted from the ``license`` field in the @@ -218,7 +218,7 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[str]: parsed_url = urlparse(license_url) url_path = parsed_url.path.rstrip("/") - license_id = url_path.split("/")[-1] + license_id = str.lower(url_path.split("/")[-1]) response = self.client.get_license(license_id) if response.status_code == 404: @@ -230,7 +230,8 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[str]: @staticmethod def _extract_license_id_from_response(data: dict) -> str: - return data["metadata"]["id"] + # TODO: find correct key, data["metadata"]["id"] did not work for me but data["id"] does + return data["id"] class InvenioDepositSettings(BaseModel): @@ -242,7 +243,7 @@ class InvenioDepositSettings(BaseModel): access_right: str = None embargo_date: str = None access_conditions: str = None - api_paths: t.Dict = {} + api_paths: dict = {} auth_token: str = '' files: list[pathlib.Path] = [] @@ -335,15 +336,10 @@ def prepare(self) -> None: self.invenio_ctx = deposition_data - def map_metadata(self) -> None: - """Map the harvested metadata onto the Invenio schema.""" - - deposition_metadata = self._codemeta_to_invenio_deposition() - ctx = HermesContext() - ctx.prepare_step("deposit") - with ctx[self.platform_name] as deposit_ctx: - deposit_ctx["deposit"] = deposition_metadata - ctx.finalize_step("deposit") + def map_metadata(self) -> SoftwareMetadata: + """Map the harvested metadata onto the Invenio schema and return it.""" + self.invenio_ctx["depositionMetadata"] = self._codemeta_to_invenio_deposition() + return SoftwareMetadata(self.invenio_ctx["depositionMetadata"]) def is_initial_publication(self) -> bool: latest_record_id = self.invenio_ctx.get("latestRecord", {}).get("id") @@ -402,8 +398,8 @@ def related_identifiers(self): }, ] - def update_metadata(self) -> None: - """Update the metadata of a draft.""" + def update_metadata(self) -> SoftwareMetadata: + """Update the metadata of a draft and return it.""" draft_url = self.links["latest_draft"] @@ -422,8 +418,7 @@ def update_metadata(self) -> None: self.links.update(deposit["links"]) _log.debug("Created new version deposit: %s", self.links["html"]) - with open(self.metadata.get_cache('deposit', 'deposit', create=True), 'w') as deposit_file: - json.dump(deposit, deposit_file, indent=4) + return SoftwareMetadata(deposit.get("metadata", {})) def delete_artifacts(self) -> None: """Delete existing file artifacts. @@ -444,7 +439,10 @@ def upload_artifacts(self) -> None: bucket_url = self.links["bucket"] - files = *self.config.files, *[f[0] for f in self.command.args.file] + if self.command.args.file: + files = *self.config.files, *[f[0] for f in self.command.args.file] + else: + files = tuple(*self.config.files) for path_arg in files: path = Path(path_arg) @@ -508,7 +506,22 @@ def _codemeta_to_invenio_deposition(self) -> dict: embargo_date = self.invenio_ctx["embargo_date"] access_conditions = self.invenio_ctx["access_conditions"] - creators = [ + creators = [] + for author in metadata["author"]: + creator = {} + if len(affils := [name for affil in author["affiliation"] for name in affil["legalname"]]) != 0: + creator["affiliation"] = affils + given_names_str = " ".join(author["givenName"]) + names = [f"{family_name}, {given_names_str}" for family_name in author["familyName"]] + names.extend(author["names"]) + if len(names) != 0: + creator["name"] = names + if (id := author.get("@id", None)) is not None: + creator["orcid"] = id.replace("https://orcid.org/", "") + if creator: + creators.append(creator) + + """creators = [ # TODO: Distinguish between @type "Person" and others { k: v for k, v in { @@ -523,7 +536,7 @@ def _codemeta_to_invenio_deposition(self) -> dict: }.items() if v is not None } for author in metadata["author"] - ] + ]""" # This is not used at the moment. See comment below in `deposition_metadata` dict. contributors = [ # noqa: F841 @@ -546,6 +559,27 @@ def _codemeta_to_invenio_deposition(self) -> dict: for contributor in metadata.get("contributor", []) if contributor.get("name") != "GitHub" ] + if len(metadata["name"]) != 1: + _log.error("More than one or zero names for the Software are given.") + raise HermesValidationError("More than one or zerno names for the Software.") + name = metadata["name"][0] + + if len(metadata["schema:description"]) > 1: + _log.error("More than one descriptions of the Software are given.") + raise HermesValidationError("More than one descriptions of the Software are given.") + if len(metadata["schema:description"]) == 1: + description = metadata["schema:description"][0] + else: + description = None + + if len(metadata["schema:version"]) > 1: + _log.error("More than one version of the Software are given.") + raise HermesValidationError("More than one version of the Software are given.") + if len(metadata["schema:version"]) == 1: + version = metadata["schema:version"][0] + else: + version = None + # TODO: Use the fields currently set to `None`. # Some more fields are available but they most likely don't relate to software # publications targeted by hermes. @@ -559,12 +593,12 @@ def _codemeta_to_invenio_deposition(self) -> dict: # TODO: Maybe we want a different date? Then make this configurable. If not, # this can be removed as it defaults to today. "publication_date": date.today().isoformat(), - "title": metadata["name"], + "title": name, "creators": creators, # TODO: Use a real description here. Possible sources could be # `tool.poetry.description` from pyproject.toml or `abstract` from # CITATION.cff. This should then be stored in codemeta description field. - "description": metadata["name"], + "description": description, "access_right": access_right, "license": license, "embargo_date": embargo_date, @@ -590,17 +624,17 @@ def _codemeta_to_invenio_deposition(self) -> dict: "communities": communities, "grants": None, "subjects": None, - "version": metadata.get('version'), + "version": version, }.items() if v is not None} return deposition_metadata - def _get_license_identifier(self) -> t.Optional[str]: + def _get_license_identifier(self) -> Union[str, None]: """Get Invenio license identifier that matches the given license URL. If no license is configured, ``None`` will be returned. """ - license_url = self.metadata["license"] + license_url = self.metadata["license"][0] return self.resolver.resolve_license_id(license_url) def _get_community_identifiers(self): diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index f4ec7fd6..1202572e 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -4,10 +4,21 @@ # SPDX-FileContributor: Michael Fritzsche +import json import pytest import sys from hermes.model import context_manager, SoftwareMetadata from hermes.commands import cli +from pathlib import Path + + +@pytest.fixture +def sandbox_auth(): + path = Path("./../auth.txt") + if not path.exists(): + pytest.skip("Local auth token file does not exist.") + with path.open() as f: + yield f.read() @pytest.mark.parametrize( @@ -353,3 +364,100 @@ def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): sys.argv = orig_argv assert result.data_dict == res.data_dict + + +@pytest.mark.parametrize( + "deposit, res", + [ + 2 * ( + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }), + ) + ] +) +def test_file_deposit(tmp_path, monkeypatch, deposit, res): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("curate") + with manager["result"] as cache: + cache["codemeta"] = deposit.compact() + manager.finalize_step("curate") + + config_file = tmp_path / "hermes.toml" + config_file.write_text("[deposit]\ntarget = \"file\"") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "deposit", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit: + with open('codemeta.json', 'r') as cache: + result = SoftwareMetadata(json.load(cache)) + finally: + sys.argv = orig_argv + + assert result.data_dict == res.data_dict + + +@pytest.mark.parametrize( + "metadata", + [ + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [{ + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}] + }], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/apache-2.0"}] + }), + ] +) +def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("curate") + with manager["result"] as cache: + cache["codemeta"] = metadata.compact() + manager.finalize_step("curate") + + config_file = tmp_path / "hermes.toml" + config_file.write_text(f"""[deposit] +target = \"invenio\" +[deposit.invenio] +site_url = \"https://sandbox.zenodo.org\" +access_right = \"closed\" +auth_token = \"{sandbox_auth}\" +file = [] +[deposit.invenio.api_paths] +licenses = "api/vocabularies/licenses" +""") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "deposit", "--path", str(tmp_path), "--config", str(config_file), "--initial"] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit: + manager.prepare_step("deposit") + result = SoftwareMetadata.load_from_cache(manager, "invenio") + manager.finalize_step("deposit") + finally: + sys.argv = orig_argv + + assert result.data_dict == metadata.data_dict + +# TODO: +# - handle get() on Softwaremetadata objects in invenio.py +# - Sophie genaueres bezüglich Zeiten für Arbeitszeiterhöhung und -zeitraumerweiterung schicken + From ed0916baa4b9c75983ad3ced5bf9da200b20d0ff Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Mon, 26 Jan 2026 10:12:20 +0100 Subject: [PATCH 08/10] fixed bugs in invenio.py --- src/hermes/commands/deposit/invenio.py | 19 +++++++++++++------ test/hermes_test/model/test_api_e2e.py | 22 ++++++++++------------ 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index 2fd13f0d..01211e5a 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -442,7 +442,8 @@ def upload_artifacts(self) -> None: if self.command.args.file: files = *self.config.files, *[f[0] for f in self.command.args.file] else: - files = tuple(*self.config.files) + files = tuple(self.config.files) + for path_arg in files: path = Path(path_arg) @@ -511,11 +512,17 @@ def _codemeta_to_invenio_deposition(self) -> dict: creator = {} if len(affils := [name for affil in author["affiliation"] for name in affil["legalname"]]) != 0: creator["affiliation"] = affils - given_names_str = " ".join(author["givenName"]) - names = [f"{family_name}, {given_names_str}" for family_name in author["familyName"]] - names.extend(author["names"]) - if len(names) != 0: - creator["name"] = names + if len(author["familyName"]) > 1: + raise HermesValidationError(f"Author has too many family names: {author.to_python()}") + if len(author["familyName"]) == 1: + given_names_str = " ".join(author["givenName"]) + name = f"{author["familyName"][0]}, {given_names_str}" + elif len(author["name"]) != 1: + raise HermesValidationError(f"Author has too many names: {author.to_python()}") + else: + name = author["name"][0] + if len(name) != 0: + creator["name"] = name if (id := author.get("@id", None)) is not None: creator["orcid"] = id.replace("https://orcid.org/", "") if creator: diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 1202572e..fa8f4ac8 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -205,7 +205,7 @@ def test_cff_harvest(tmp_path, monkeypatch, cff, res): # FIXME: update to compare the SoftwareMetadata objects instead of the data_dicts (in multiple places) # after merge with refactor/data-model and/or refactor/423-implement-public-api - assert result.data_dict == res.data_dict + assert result == res @pytest.mark.parametrize( @@ -363,7 +363,7 @@ def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): finally: sys.argv = orig_argv - assert result.data_dict == res.data_dict + assert result == res @pytest.mark.parametrize( @@ -402,7 +402,7 @@ def test_file_deposit(tmp_path, monkeypatch, deposit, res): finally: sys.argv = orig_argv - assert result.data_dict == res.data_dict + assert result == res @pytest.mark.parametrize( @@ -432,12 +432,12 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata): config_file = tmp_path / "hermes.toml" config_file.write_text(f"""[deposit] -target = \"invenio\" +target = "invenio" [deposit.invenio] -site_url = \"https://sandbox.zenodo.org\" -access_right = \"closed\" -auth_token = \"{sandbox_auth}\" -file = [] +site_url = "https://sandbox.zenodo.org" +access_right = "closed" +auth_token = "{sandbox_auth}" +files = ["hermes.toml"] [deposit.invenio.api_paths] licenses = "api/vocabularies/licenses" """) @@ -455,9 +455,7 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata): finally: sys.argv = orig_argv - assert result.data_dict == metadata.data_dict + assert result == metadata -# TODO: -# - handle get() on Softwaremetadata objects in invenio.py -# - Sophie genaueres bezüglich Zeiten für Arbeitszeiterhöhung und -zeitraumerweiterung schicken +# TODO: handle get() on Softwaremetadata objects in invenio.py From 382e2c3e3f55c95bf1a9908208cea061eaf7b17e Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 30 Jan 2026 14:07:09 +0100 Subject: [PATCH 09/10] fixed bug and adjusted tests --- src/hermes/commands/deposit/base.py | 15 ++--- src/hermes/commands/deposit/file.py | 9 ++- src/hermes/commands/deposit/invenio.py | 68 ++++++++++++-------- src/hermes/model/types/ld_dict.py | 27 ++++---- test/hermes_test/model/test_api.py | 20 +++--- test/hermes_test/model/test_api_e2e.py | 61 +++++++++--------- test/hermes_test/model/types/test_ld_dict.py | 19 ++++++ 7 files changed, 129 insertions(+), 90 deletions(-) diff --git a/src/hermes/commands/deposit/base.py b/src/hermes/commands/deposit/base.py index 4a996eaa..6fbf3625 100644 --- a/src/hermes/commands/deposit/base.py +++ b/src/hermes/commands/deposit/base.py @@ -38,7 +38,7 @@ def __call__(self, command: HermesCommand) -> None: deposit = self.map_metadata() self.ctx.prepare_step("deposit") with self.ctx[command.settings.target] as cache: - cache["deposit"] = deposit.compact() + cache["deposit"] = deposit self.ctx.finalize_step("deposit") if self.is_initial_publication(): @@ -48,10 +48,8 @@ def __call__(self, command: HermesCommand) -> None: deposit = self.update_metadata() self.ctx.prepare_step("deposit") - with self.ctx[command.settings.target] as cache: - cache["codemeta"] = deposit.compact() - cache["expanded"] = deposit.ld_value - cache["context"] = {"@context": deposit.full_context} + with self.ctx["deposit"] as cache: + cache["result"] = deposit self.ctx.finalize_step("deposit") self.delete_artifacts() self.upload_artifacts() @@ -67,7 +65,7 @@ def prepare(self) -> None: pass @abc.abstractmethod - def map_metadata(self) -> SoftwareMetadata: + def map_metadata(self) -> dict: """Map the given metadata to the target schema of the deposition platform and return it. When mapping metadata, make sure to add traces to the HERMES software, e.g. via @@ -97,9 +95,10 @@ def create_new_version(self) -> None: """Create a new version of an existing publication on the target platform.""" pass - def update_metadata(self) -> SoftwareMetadata: + @abc.abstractmethod + def update_metadata(self) -> dict: """Update the metadata of the newly created version and return it even if it hasn't changed.""" - return self.metadata + pass def delete_artifacts(self) -> None: """Delete any superfluous artifacts taken from the previous version of the publication.""" diff --git a/src/hermes/commands/deposit/file.py b/src/hermes/commands/deposit/file.py index 53876c53..ed6bd570 100644 --- a/src/hermes/commands/deposit/file.py +++ b/src/hermes/commands/deposit/file.py @@ -11,7 +11,7 @@ from pydantic import BaseModel from hermes.commands.deposit.base import BaseDepositPlugin -from hermes.model import SoftwareMetadata + class FileDepositSettings(BaseModel): filename: str = 'codemeta.json' @@ -20,8 +20,11 @@ class FileDepositSettings(BaseModel): class FileDepositPlugin(BaseDepositPlugin): settings_class = FileDepositSettings - def map_metadata(self) -> SoftwareMetadata: - return self.metadata + def map_metadata(self) -> dict: + return self.metadata.compact() + + def update_metadata(self) -> dict: + return self.metadata.compact() def publish(self) -> None: file_config = self.command.settings.file diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index 01211e5a..9434beca 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -19,7 +19,6 @@ from hermes.commands.deposit.base import BaseDepositPlugin from hermes.commands.deposit.error import DepositionUnauthorizedError from hermes.error import MisconfigurationError -from hermes.model import SoftwareMetadata from hermes.model.error import HermesValidationError from hermes.utils import hermes_doi, hermes_user_agent @@ -320,7 +319,12 @@ def prepare(self) -> None: record_id=rec_id, doi=doi, codemeta_identifier=codemeta_identifier ) - version = self.metadata["version"] + if len(self.metadata.get("version", [])) > 1: + raise HermesValidationError("Too many licenses for invenio deposit.") + if len(self.metadata.get("version", [])) == 1: + version = self.metadata["version"][0] + else: + version = None if rec_meta and (version == rec_meta.get("version")): raise ValueError(f"Version {version} already deposited.") @@ -336,10 +340,10 @@ def prepare(self) -> None: self.invenio_ctx = deposition_data - def map_metadata(self) -> SoftwareMetadata: + def map_metadata(self) -> dict: """Map the harvested metadata onto the Invenio schema and return it.""" self.invenio_ctx["depositionMetadata"] = self._codemeta_to_invenio_deposition() - return SoftwareMetadata(self.invenio_ctx["depositionMetadata"]) + return self.invenio_ctx["depositionMetadata"] def is_initial_publication(self) -> bool: latest_record_id = self.invenio_ctx.get("latestRecord", {}).get("id") @@ -398,7 +402,7 @@ def related_identifiers(self): }, ] - def update_metadata(self) -> SoftwareMetadata: + def update_metadata(self) -> dict: """Update the metadata of a draft and return it.""" draft_url = self.links["latest_draft"] @@ -418,7 +422,7 @@ def update_metadata(self) -> SoftwareMetadata: self.links.update(deposit["links"]) _log.debug("Created new version deposit: %s", self.links["html"]) - return SoftwareMetadata(deposit.get("metadata", {})) + return deposit def delete_artifacts(self) -> None: """Delete existing file artifacts. @@ -508,21 +512,25 @@ def _codemeta_to_invenio_deposition(self) -> dict: access_conditions = self.invenio_ctx["access_conditions"] creators = [] - for author in metadata["author"]: + for author in metadata.get("author", []): creator = {} - if len(affils := [name for affil in author["affiliation"] for name in affil["legalname"]]) != 0: + if len( + affils := [ + name for affil in author.get("affiliation", []) for name in affil.get("legalname", []) + ] + ) != 0: creator["affiliation"] = affils - if len(author["familyName"]) > 1: - raise HermesValidationError(f"Author has too many family names: {author.to_python()}") - if len(author["familyName"]) == 1: - given_names_str = " ".join(author["givenName"]) + + if len(author.get("familyName", [])) > 1: + raise HermesValidationError(f"Author has too many family names: {author}") + if len(author.get("familyName", [])) == 1: + given_names_str = " ".join(author.get("givenName", [])) name = f"{author["familyName"][0]}, {given_names_str}" - elif len(author["name"]) != 1: - raise HermesValidationError(f"Author has too many names: {author.to_python()}") + elif len(author.get("name", [])) != 1: + raise HermesValidationError(f"Author has too many or no names: {author}") else: name = author["name"][0] - if len(name) != 0: - creator["name"] = name + creator["name"] = name if (id := author.get("@id", None)) is not None: creator["orcid"] = id.replace("https://orcid.org/", "") if creator: @@ -545,6 +553,7 @@ def _codemeta_to_invenio_deposition(self) -> dict: for author in metadata["author"] ]""" + # TODO: reimplement with new api # This is not used at the moment. See comment below in `deposition_metadata` dict. contributors = [ # noqa: F841 # TODO: Distinguish between @type "Person" and others @@ -566,27 +575,33 @@ def _codemeta_to_invenio_deposition(self) -> dict: for contributor in metadata.get("contributor", []) if contributor.get("name") != "GitHub" ] - if len(metadata["name"]) != 1: + if len(metadata.get("name", [])) != 1: _log.error("More than one or zero names for the Software are given.") raise HermesValidationError("More than one or zerno names for the Software.") name = metadata["name"][0] - if len(metadata["schema:description"]) > 1: + if len(metadata.get("schema:description", [])) > 1: _log.error("More than one descriptions of the Software are given.") raise HermesValidationError("More than one descriptions of the Software are given.") - if len(metadata["schema:description"]) == 1: + if len(metadata.get("schema:description", [])) == 1: description = metadata["schema:description"][0] else: description = None - if len(metadata["schema:version"]) > 1: + if len(metadata.get("schema:version", [])) > 1: _log.error("More than one version of the Software are given.") raise HermesValidationError("More than one version of the Software are given.") - if len(metadata["schema:version"]) == 1: + if len(metadata.get("schema:version", [])) == 1: version = metadata["schema:version"][0] else: version = None + keywords = metadata.get("schema:keywords", []) + if len(keywords) == 0: + keywords = None + else: + keywords = keywords.to_python() + # TODO: Use the fields currently set to `None`. # Some more fields are available but they most likely don't relate to software # publications targeted by hermes. @@ -602,9 +617,6 @@ def _codemeta_to_invenio_deposition(self) -> dict: "publication_date": date.today().isoformat(), "title": name, "creators": creators, - # TODO: Use a real description here. Possible sources could be - # `tool.poetry.description` from pyproject.toml or `abstract` from - # CITATION.cff. This should then be stored in codemeta description field. "description": description, "access_right": access_right, "license": license, @@ -618,8 +630,8 @@ def _codemeta_to_invenio_deposition(self) -> dict: # them. # TODO: Use the DOI we get back from this. "prereserve_doi": True, - # TODO: A good source for this could be `tool.poetry.keywords` in pyproject.toml. - "keywords": None, + "keywords": keywords, + # TODO: Is there a good codemeta/ schema field? "notes": None, "related_identifiers": self.related_identifiers(), # TODO: Use `contributors`. In the case of the hermes workflow itself, the @@ -641,6 +653,10 @@ def _get_license_identifier(self) -> Union[str, None]: If no license is configured, ``None`` will be returned. """ + if "license" not in self.metadata: + raise HermesValidationError("No license is given.") + if len(self.metadata["license"]) > 1: + raise HermesValidationError("Too many licenses for invenio deposit.") license_url = self.metadata["license"][0] return self.resolver.resolve_license_id(license_url) diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index 8311b67f..f368ec73 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -22,14 +22,7 @@ def __init__(self, data, *, parent=None, key=None, index=None, context=None): def __getitem__(self, key): full_iri = self.ld_proc.expand_iri(self.active_ctx, key) - if full_iri == "@id": - return self._to_python(full_iri, self.data_dict[full_iri]) - try: - ld_value = self.data_dict[full_iri] - except KeyError: - self[key] = [] - ld_value = self.data_dict[full_iri] - return self._to_python(full_iri, ld_value) + return self._to_python(full_iri, self.data_dict[full_iri]) def __setitem__(self, key, value): ld_value = self._to_expanded_json({key: value}) @@ -41,12 +34,7 @@ def __delitem__(self, key): def __contains__(self, key): full_iri = self.ld_proc.expand_iri(self.active_ctx, key) - if full_iri == "@id": - return "@id" in self.data_dict - try: - return len(self[full_iri]) != 0 - except KeyError: - return False + return full_iri in self.data_dict def __eq__(self, other): if not isinstance(other, (dict, ld_dict)): @@ -89,6 +77,15 @@ def get(self, key, default=_NO_DEFAULT): return default return self[key] + def setdefault(self, key, default): + if key not in self: + self[key] = default + return self[key] + + def emplace(self, key): + if key not in self: + self[key] = [] + def update(self, other): for key, value in other.items(): self[key] = value @@ -136,7 +133,7 @@ def from_dict(cls, value, *, parent=None, key=None, context=None, ld_type=None): full_context = parent.full_context + merged_contexts ld_value = cls.ld_proc.expand(ld_data, {"expandContext": full_context, "documentLoader": bundled_loader}) - ld_value = cls(ld_value, parent=parent, key=key, context=merged_contexts) + ld_value = ld_dict(ld_value, parent=parent, key=key, context=merged_contexts) return ld_value diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index 6845a210..895968d7 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -53,16 +53,18 @@ def test_init_nested_object(): def test_append(): data = SoftwareMetadata() + data.emplace("schema:name") data["schema:name"].append("a") assert type(data["schema:name"]) is ld_list assert data["schema:name"][0] == "a" and data["schema:name"].item_list == [{"@value": "a"}] data["schema:name"].append("b") assert type(data["schema:name"]) is ld_list and data["schema:name"].item_list == [{"@value": "a"}, {"@value": "b"}] + data.emplace("schema:name") data["schema:name"].append("c") assert data["schema:name"].item_list == [{"@value": "a"}, {"@value": "b"}, {"@value": "c"}] data = SoftwareMetadata() - data["schema:Person"].append({"schema:name": "foo"}) + data.setdefault("schema:Person", []).append({"schema:name": "foo"}) assert type(data["schema:Person"]) is ld_list and type(data["schema:Person"][0]) is ld_dict assert data["schema:Person"][0].data_dict == {"http://schema.org/name": [{"@value": "foo"}]} data["schema:Person"].append({"schema:name": "foo"}) @@ -94,7 +96,7 @@ def test_usage(): data["author"][0]["email"].append("foo@baz.com") assert len(data["author"]) == 2 assert len(data["author"][0]["email"]) == 2 - assert len(data["author"][1]["email"]) == 0 + assert len(data["author"][1].get("email", [])) == 0 harvest = { "authors": [ {"name": "Foo", "affiliation": ["Uni A", "Lab B"], "kw": ["a", "b", "c"]}, @@ -103,17 +105,19 @@ def test_usage(): ] } for author in harvest["authors"]: - for exist_author in data["author"]: - if author["name"] == exist_author["name"][0]: + for exist_author in data.get("author", []): + if author["name"] in exist_author.get("name", []): exist_author["affiliation"] = author["affiliation"] if "email" in author: + exist_author.emplace("email") exist_author["email"].append(author["email"]) if "kw" in author: + exist_author.emplace("schema:knowsAbout") exist_author["schema:knowsAbout"].extend(author["kw"]) break else: - data["author"].append(author) - assert len(data["author"]) == 3 + data.setdefault("author", []).append(author) + assert len(data.get("author", [])) == 3 foo, bar, baz = data["author"] assert foo["name"][0] == "Foo" assert foo["affiliation"].to_python() == ["Uni A", "Lab B"] @@ -124,8 +128,8 @@ def test_usage(): assert bar["email"].to_python() == ["bar@c.edu"] assert baz["name"][0] == "Baz" assert baz["affiliation"].to_python() == ["Lab E"] - assert len(baz["schema:knowsAbout"]) == 0 - assert len(baz["email"]) == 0 + assert len(baz.get("schema:knowsAbout", [])) == 0 + assert len(baz.get("email", [])) == 0 for author in data["author"]: assert "name" in author if "Baz" not in author["name"]: diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index fa8f4ac8..16302000 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -194,17 +194,16 @@ def test_cff_harvest(tmp_path, monkeypatch, cff, res): try: monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) cli.main() - except SystemExit: + except SystemExit as e: + if e.code != 0: + raise e + finally: manager = context_manager.HermesContext() manager.prepare_step("harvest") - with manager["cff"] as cache: - result = SoftwareMetadata(cache["codemeta"]) + result = SoftwareMetadata.load_from_cache(manager, "cff") manager.finalize_step("harvest") - finally: sys.argv = orig_argv - # FIXME: update to compare the SoftwareMetadata objects instead of the data_dicts (in multiple places) - # after merge with refactor/data-model and/or refactor/423-implement-public-api assert result == res @@ -354,37 +353,36 @@ def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): try: monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) cli.main() - except SystemExit: + except SystemExit as e: + if e.code != 0: + raise e + finally: manager = context_manager.HermesContext() manager.prepare_step("harvest") - with manager["codemeta"] as cache: - result = SoftwareMetadata(cache["codemeta"]) + result = SoftwareMetadata.load_from_cache(manager, "codemeta") manager.finalize_step("harvest") - finally: sys.argv = orig_argv assert result == res @pytest.mark.parametrize( - "deposit, res", + "metadata", [ - 2 * ( - SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/name": [{"@value": "Test"}] - }), - ) + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }), ] ) -def test_file_deposit(tmp_path, monkeypatch, deposit, res): +def test_file_deposit(tmp_path, monkeypatch, metadata): monkeypatch.chdir(tmp_path) manager = context_manager.HermesContext(tmp_path) manager.prepare_step("curate") with manager["result"] as cache: - cache["codemeta"] = deposit.compact() + cache["codemeta"] = metadata.compact() manager.finalize_step("curate") config_file = tmp_path / "hermes.toml" @@ -396,13 +394,15 @@ def test_file_deposit(tmp_path, monkeypatch, deposit, res): try: monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) cli.main() - except SystemExit: + except SystemExit as e: + if e.code != 0: + raise e + finally: with open('codemeta.json', 'r') as cache: result = SoftwareMetadata(json.load(cache)) - finally: sys.argv = orig_argv - assert result == res + assert result == metadata @pytest.mark.parametrize( @@ -448,14 +448,15 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata): try: monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) cli.main() - except SystemExit: + except SystemExit as e: + if e.code != 0: + raise e + finally: manager.prepare_step("deposit") - result = SoftwareMetadata.load_from_cache(manager, "invenio") + with manager["deposit"] as cache: + result = cache["result"] manager.finalize_step("deposit") - finally: sys.argv = orig_argv - assert result == metadata - -# TODO: handle get() on Softwaremetadata objects in invenio.py - + # TODO: compare to actually expected value + assert result == {} diff --git a/test/hermes_test/model/types/test_ld_dict.py b/test/hermes_test/model/types/test_ld_dict.py index c7a7a183..8736439d 100644 --- a/test/hermes_test/model/types/test_ld_dict.py +++ b/test/hermes_test/model/types/test_ld_dict.py @@ -197,6 +197,25 @@ def test_get(): di["bar"] +def test_setdefault(): + di = ld_dict([{"https://schema.org/name": [{"@value": "Manu Sporny"}]}], + context=[{"schema": "https://schema.org/"}]) + assert di.setdefault("schema:name", []) == [{"@value": "Manu Sporny"}] + assert di.setdefault("schema:email", []) == [] + assert di["schema:email"] == [] + + +def test_emplace(): + di = ld_dict([{"https://schema.org/name": [{"@value": "Manu Sporny"}]}], + context=[{"schema": "https://schema.org/"}]) + di.emplace("schema:name") + assert di["schema:name"] == [{"@value": "Manu Sporny"}] + with pytest.raises(KeyError): + di["schema:email"] + di.emplace("schema:email") + assert di["schema:email"] == [] + + def test_update(): di = ld_dict([{"http://xmlns.com/foaf/0.1/name": [{"@value": "Manu Sporny"}], "http://xmlns.com/foaf/0.1/homepage": [{"@id": "http://manu.sporny.org/"}]}], From 96861ec750f8ef4553a34c062e2b9604b021ff32 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Mon, 2 Feb 2026 10:45:50 +0100 Subject: [PATCH 10/10] adjusted invenio.py and its test a bit --- src/hermes/commands/deposit/invenio.py | 2 + src/hermes/commands/deposit/invenio_rdm.py | 14 ++++-- test/hermes_test/model/test_api_e2e.py | 51 ++++++++++++++-------- 3 files changed, 45 insertions(+), 22 deletions(-) diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index 9434beca..3915d536 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -513,6 +513,8 @@ def _codemeta_to_invenio_deposition(self) -> dict: creators = [] for author in metadata.get("author", []): + if not "Person" in author.get("@type", []): + continue creator = {} if len( affils := [ diff --git a/src/hermes/commands/deposit/invenio_rdm.py b/src/hermes/commands/deposit/invenio_rdm.py index a381db90..01e08371 100644 --- a/src/hermes/commands/deposit/invenio_rdm.py +++ b/src/hermes/commands/deposit/invenio_rdm.py @@ -6,9 +6,8 @@ # SPDX-FileContributor: Oliver Bertuch # SPDX-FileContributor: Michael Meinel -import typing as t - from requests import HTTPError +from typing import Union from hermes.commands.deposit.invenio import InvenioClient, InvenioDepositPlugin, InvenioResolver @@ -27,7 +26,7 @@ def get_licenses(self): class InvenioRDMResolver(InvenioResolver): invenio_client_class = InvenioRDMClient - def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[dict]: + def resolve_license_id(self, license_url: Union[str, None]) -> Union[dict, None]: """Deliberately try to resolve the license URL to a valid InvenioRDM license information record from the vocabulary. @@ -47,6 +46,12 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[dict]: except HTTPError: pass + # FIXME: Why not get all license_cross_refs and then use a query parameter like this: + # ?q=props.url:("license_url" OR "license_cross_ref[1]" OR ...)&size=1000 + # That would be able to replace _search_license_info. + # FIXME: Some licenses in valid_licenses["hits"]["hits"]["props"]["url"] are only http although + # https://spdx.org/licenses/license.json lists them in crossRef as https + # If the easy "mapping" did not work, we really need to "search" for the correct license ID. response = self.client.get_licenses() response.raise_for_status() @@ -65,6 +70,7 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[dict]: if license_info is not None: break else: + # FIXME: Why is this only raised here and not always when license_info is None? raise RuntimeError(f"Could not resolve license URL {license_url} to a valid identifier.") return license_info @@ -73,7 +79,7 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[dict]: def _extract_license_id_from_response(data: dict) -> str: return data["id"] - def _search_license_info(self, _url: str, valid_licenses: dict) -> t.Optional[dict]: + def _search_license_info(self, _url: str, valid_licenses: dict) -> Union[dict, None]: for license_info in valid_licenses['hits']['hits']: try: if license_info['props']['url'] == _url: diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 16302000..18dc973c 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -172,7 +172,7 @@ def sandbox_auth(): "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], "http://schema.org/name": [{"@value": "Test"}], "http://schema.org/url": [ - {"@id": 'https://arxiv.org/abs/2201.09015'}, + {"@id": "https://arxiv.org/abs/2201.09015"}, {"@id": "https://docs.software-metadata.pub/en/latest"} ], "http://schema.org/version": [{"@value": "9.0.1"}] @@ -398,7 +398,7 @@ def test_file_deposit(tmp_path, monkeypatch, metadata): if e.code != 0: raise e finally: - with open('codemeta.json', 'r') as cache: + with open("codemeta.json", "r") as cache: result = SoftwareMetadata(json.load(cache)) sys.argv = orig_argv @@ -406,22 +406,37 @@ def test_file_deposit(tmp_path, monkeypatch, metadata): @pytest.mark.parametrize( - "metadata", + "metadata, invenio_metadata", [ - SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/name": [{"@value": "Test"}], - "http://schema.org/author": [{ - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Test"}], - "http://schema.org/givenName": [{"@value": "Testi"}] - }], - "http://schema.org/license": [{"@id": "https://spdx.org/licenses/apache-2.0"}] - }), + ( + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [{ + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}] + }], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] + }), + { + "upload_type": "software", + "publication_date": "2026-02-02", + "title": "Test", + "creators": [{"name": "Test, Testi"}], + "description": "for testing", + "access_right": "closed", + "license": "apache-2.0", + "prereserve_doi": True, + "related_identifiers": [ + {"identifier": "10.5281/zenodo.13311079", "relation": "isCompiledBy", "scheme": "doi"} + ] + } + ) ] ) -def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata): +def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_metadata): monkeypatch.chdir(tmp_path) manager = context_manager.HermesContext(tmp_path) @@ -453,10 +468,10 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata): raise e finally: manager.prepare_step("deposit") - with manager["deposit"] as cache: - result = cache["result"] + with manager["invenio"] as cache: + result = cache["deposit"] manager.finalize_step("deposit") sys.argv = orig_argv # TODO: compare to actually expected value - assert result == {} + assert result == invenio_metadata