diff --git a/src/hermes/commands/__init__.py b/src/hermes/commands/__init__.py index 5203ac18..d2116ef2 100644 --- a/src/hermes/commands/__init__.py +++ b/src/hermes/commands/__init__.py @@ -8,12 +8,12 @@ # "unused import" errors. # flake8: noqa -from hermes.commands.base import HermesHelpCommand -from hermes.commands.base import HermesVersionCommand -from hermes.commands.clean.base import HermesCleanCommand -from hermes.commands.init.base import HermesInitCommand +# from hermes.commands.base import HermesHelpCommand +# from hermes.commands.base import HermesVersionCommand +# from hermes.commands.clean.base import HermesCleanCommand +# from hermes.commands.init.base import HermesInitCommand from hermes.commands.curate.base import HermesCurateCommand from hermes.commands.harvest.base import HermesHarvestCommand -from hermes.commands.process.base import HermesProcessCommand -from hermes.commands.deposit.base import HermesDepositCommand -from hermes.commands.postprocess.base import HermesPostprocessCommand +# from hermes.commands.process.base import HermesProcessCommand +# from hermes.commands.deposit.base import HermesDepositCommand +# from hermes.commands.postprocess.base import HermesPostprocessCommand diff --git a/src/hermes/commands/base.py b/src/hermes/commands/base.py index 3ae9030b..2d182267 100644 --- a/src/hermes/commands/base.py +++ b/src/hermes/commands/base.py @@ -9,19 +9,19 @@ import logging import pathlib from importlib import metadata -from typing import Dict, Optional, Type +from typing import Type, Union import toml from pydantic import BaseModel from pydantic_settings import BaseSettings, SettingsConfigDict -class _HermesSettings(BaseSettings): +class HermesSettings(BaseSettings): """Root class for HERMES configuration model.""" model_config = SettingsConfigDict(env_file_encoding='utf-8') - logging: Dict = {} + logging: dict = {} class HermesCommand(abc.ABC): @@ -31,7 +31,7 @@ class HermesCommand(abc.ABC): """ command_name: str = "" - settings_class: Type = _HermesSettings + settings_class: Type = HermesSettings def __init__(self, parser: argparse.ArgumentParser): """Initialize a new instance of any HERMES command. @@ -45,28 +45,27 @@ def __init__(self, parser: argparse.ArgumentParser): self.log = logging.getLogger(f"hermes.{self.command_name}") self.errors = [] - @classmethod - def init_plugins(cls): + def init_plugins(self): """Collect and initialize the plugins available for the HERMES command.""" # Collect all entry points for this group (i.e., all valid plug-ins for the step) - entry_point_group = f"hermes.{cls.command_name}" - group_plugins = { - entry_point.name: entry_point.load() - for entry_point in metadata.entry_points(group=entry_point_group) - } - - # Collect the plug-in specific configurations - cls.derive_settings_class({ - plugin_name: plugin_class.settings_class - for plugin_name, plugin_class in group_plugins.items() - if hasattr(plugin_class, "settings_class") and plugin_class.settings_class is not None - }) + entry_point_group = f"hermes.{self.command_name}" + group_plugins = {} + group_settings = {} + + for entry_point in metadata.entry_points(group=entry_point_group): + plugin_cls = entry_point.load() + + group_plugins[entry_point.name] = plugin_cls + if hasattr(plugin_cls, 'settings_class') and plugin_cls.settings_class is not None: + group_settings[entry_point.name] = plugin_cls.settings_class + + self.derive_settings_class(group_settings) return group_plugins @classmethod - def derive_settings_class(cls, setting_types: Dict[str, Type]) -> None: + def derive_settings_class(cls, setting_types: dict[str, Type]) -> None: """Build a new Pydantic data model class for configuration. This will create a new class that includes all settings from the plugins available. @@ -131,13 +130,10 @@ def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None: def load_settings(self, args: argparse.Namespace): """Load settings from the configuration file (passed in from command line).""" - try: - toml_data = toml.load(args.path / args.config) - self.root_settings = HermesCommand.settings_class.model_validate(toml_data) - self.settings = getattr(self.root_settings, self.command_name) - except FileNotFoundError as e: - self.log.error("hermes.toml was not found. Try to run 'hermes init' first or create one manually.") - raise e # This will lead to our default error message & sys.exit + + toml_data = toml.load(args.path / args.config) + self.root_settings = HermesCommand.settings_class.model_validate(toml_data) + self.settings = getattr(self.root_settings, self.command_name) def patch_settings(self, args: argparse.Namespace): """Process command line options for the settings.""" @@ -164,7 +160,9 @@ def __call__(self, args: argparse.Namespace): class HermesPlugin(abc.ABC): """Base class for all HERMES plugins.""" - settings_class: Optional[Type] = None + pluing_node = None + + settings_class: Union[Type, None] = None @abc.abstractmethod def __call__(self, command: HermesCommand) -> None: @@ -202,27 +200,3 @@ def __call__(self, args: argparse.Namespace) -> None: # Otherwise, simply show the general help and exit (cleanly). self.parser.print_help() self.parser.exit() - - def load_settings(self, args: argparse.Namespace): - """No settings are needed for the help command.""" - pass - - -class HermesVersionSettings(BaseModel): - """Intentionally empty settings class for the version command.""" - pass - - -class HermesVersionCommand(HermesCommand): - """Show HERMES version and exit.""" - - command_name = "version" - settings_class = HermesVersionSettings - - def load_settings(self, args: argparse.Namespace): - """Pass loading settings as not necessary for this command.""" - pass - - def __call__(self, args: argparse.Namespace) -> None: - self.log.info(metadata.version("hermes")) - self.parser.exit() diff --git a/src/hermes/commands/cli.py b/src/hermes/commands/cli.py index 06a18ca7..565381fc 100644 --- a/src/hermes/commands/cli.py +++ b/src/hermes/commands/cli.py @@ -12,9 +12,11 @@ import sys from hermes import logger -from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand, - HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand, - HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand) +# FIXME: remove comments after new implementation of modules is available +# from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand, +# HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand, +# HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand) +from hermes.commands import HermesCurateCommand, HermesHarvestCommand from hermes.commands.base import HermesCommand @@ -36,15 +38,15 @@ def main() -> None: setting_types = {} for command in ( - HermesHelpCommand(parser), - HermesVersionCommand(parser), - HermesInitCommand(parser), - HermesCleanCommand(parser), + # HermesHelpCommand(parser), + # HermesVersionCommand(parser), + # HermesInitCommand(parser), + # HermesCleanCommand(parser), HermesHarvestCommand(parser), - HermesProcessCommand(parser), + # HermesProcessCommand(parser), HermesCurateCommand(parser), - HermesDepositCommand(parser), - HermesPostprocessCommand(parser), + # HermesDepositCommand(parser), + # HermesPostprocessCommand(parser), ): if command.settings_class is not None: setting_types[command.command_name] = command.settings_class diff --git a/src/hermes/commands/curate/base.py b/src/hermes/commands/curate/base.py index 4c990bc7..15d7c8db 100644 --- a/src/hermes/commands/curate/base.py +++ b/src/hermes/commands/curate/base.py @@ -5,17 +5,16 @@ # SPDX-FileContributor: Michael Meinel import argparse -import os -import shutil -import sys from pydantic import BaseModel from hermes.commands.base import HermesCommand -from hermes.model.context import CodeMetaContext +from hermes.model import SoftwareMetadata +from hermes.model.context_manager import HermesContext +from hermes.model.error import HermesValidationError -class _CurateSettings(BaseModel): +class CurateSettings(BaseModel): """Generic deposition settings.""" pass @@ -25,23 +24,30 @@ class HermesCurateCommand(HermesCommand): """ Curate the unified metadata before deposition. """ command_name = "curate" - settings_class = _CurateSettings + settings_class = CurateSettings def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None: pass def __call__(self, args: argparse.Namespace) -> None: - self.log.info("# Metadata curation") - ctx = CodeMetaContext() - process_output = ctx.hermes_dir / 'process' / (ctx.hermes_name + ".json") + ctx = HermesContext() + ctx.prepare_step("curate") + + ctx.prepare_step("process") + with ctx["result"] as process_ctx: + expanded_data = process_ctx["expanded"] + context_data = process_ctx["context"] + ctx.finalize_step("process") + + try: + data = SoftwareMetadata(expanded_data[0], context_data["@context"][1]) + except Exception as e: + raise HermesValidationError("The results of the process step are invalid.") from e - if not process_output.is_file(): - self.log.error( - "No processed metadata found. Please run `hermes process` before curation." - ) - sys.exit(1) + with ctx["result"] as curate_ctx: + curate_ctx["expanded"] = data.ld_value + curate_ctx["context"] = {"@context": data.full_context} - os.makedirs(ctx.hermes_dir / 'curate', exist_ok=True) - shutil.copy(process_output, ctx.hermes_dir / 'curate' / (ctx.hermes_name + '.json')) + ctx.finalize_step("curate") diff --git a/src/hermes/commands/harvest/base.py b/src/hermes/commands/harvest/base.py index 59fad8f1..19ccc623 100644 --- a/src/hermes/commands/harvest/base.py +++ b/src/hermes/commands/harvest/base.py @@ -5,14 +5,13 @@ # SPDX-FileContributor: Michael Meinel import argparse -import typing as t -from datetime import datetime from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin -from hermes.model.context import HermesContext, HermesHarvestContext -from hermes.model.error import HermesValidationError, HermesMergeError +from hermes.model.context_manager import HermesContext +from hermes.model.error import HermesValidationError +from hermes.model import SoftwareMetadata class HermesHarvestPlugin(HermesPlugin): @@ -21,11 +20,11 @@ class HermesHarvestPlugin(HermesPlugin): TODO: describe the harvesting process and how this is mapped to this plugin. """ - def __call__(self, command: HermesCommand) -> t.Tuple[t.Dict, t.Dict]: + def __call__(self, command: HermesCommand) -> tuple[SoftwareMetadata, dict]: pass -class _HarvestSettings(BaseModel): +class HarvestSettings(BaseModel): """Generic harvesting settings.""" sources: list[str] = [] @@ -35,32 +34,31 @@ class HermesHarvestCommand(HermesCommand): """ Harvest metadata from configured sources. """ command_name = "harvest" - settings_class = _HarvestSettings + settings_class = HarvestSettings def __call__(self, args: argparse.Namespace) -> None: self.args = args - ctx = HermesContext() # Initialize the harvest cache directory here to indicate the step ran - ctx.init_cache("harvest") + ctx = HermesContext() + ctx.prepare_step('harvest') for plugin_name in self.settings.sources: + plugin_cls = self.plugins[plugin_name] + try: - plugin_func = self.plugins[plugin_name]() - harvested_data, tags = plugin_func(self) - - with HermesHarvestContext(ctx, plugin_name) as harvest_ctx: - harvest_ctx.update_from(harvested_data, - plugin=plugin_name, - timestamp=datetime.now().isoformat(), **tags) - for _key, ((_value, _tag), *_trace) in harvest_ctx._data.items(): - if any(v != _value and t == _tag for v, t in _trace): - raise HermesMergeError(_key, None, _value) - - except KeyError as e: - self.log.error("Plugin '%s' not found.", plugin_name) - self.errors.append(e) + # Load plugin and run the harvester + plugin_func = plugin_cls() + harvested_data = plugin_func(self) + + with ctx[plugin_name] as plugin_ctx: + plugin_ctx["codemeta"] = harvested_data[0].compact() + plugin_ctx["context"] = {"@context": harvested_data[0].full_context} + + plugin_ctx["expanded"] = harvested_data[0].ld_value except HermesValidationError as e: self.log.error("Error while executing %s: %s", plugin_name, e) self.errors.append(e) + + ctx.finalize_step('harvest') diff --git a/src/hermes/commands/harvest/cff.py b/src/hermes/commands/harvest/cff.py index e333b27c..6c2b6594 100644 --- a/src/hermes/commands/harvest/cff.py +++ b/src/hermes/commands/harvest/cff.py @@ -9,16 +9,16 @@ import logging import pathlib import urllib.request -import typing as t from pydantic import BaseModel from ruamel.yaml import YAML import jsonschema from cffconvert import Citation +from typing import Any, Union -from hermes.model.context import ContextPath -from hermes.model.errors import HermesValidationError +from hermes.model.error import HermesValidationError from hermes.commands.harvest.base import HermesHarvestPlugin, HermesHarvestCommand +from hermes.model import SoftwareMetadata # TODO: should this be configurable via a CLI option? @@ -35,7 +35,7 @@ class CffHarvestSettings(BaseModel): class CffHarvestPlugin(HermesHarvestPlugin): settings_class = CffHarvestSettings - def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: + def __call__(self, command: HermesHarvestCommand) -> tuple[SoftwareMetadata, dict]: # Get source files cff_file = self._get_single_cff(command.args.path) if not cff_file: @@ -44,23 +44,24 @@ def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: # Read the content cff_data = cff_file.read_text() - - # Validate the content to be correct CFF cff_dict = self._load_cff_from_file(cff_data) - if command.settings.cff.enable_validation and not self._validate(cff_file, cff_dict): - raise HermesValidationError(cff_file) + if command.settings.cff.enable_validation: + # Validate the content to be correct CFF + if not self._validate(cff_file, cff_dict): + raise HermesValidationError(cff_file) # Convert to CodeMeta using cffconvert codemeta_dict = self._convert_cff_to_codemeta(cff_data) - # TODO Replace the following temp patch for #112 once there is a new cffconvert version with cffconvert#309 - codemeta_dict = self._patch_author_emails(cff_dict, codemeta_dict) if "version" in codemeta_dict: codemeta_dict["version"] = str(codemeta_dict["version"]) # Convert Version to string - return codemeta_dict, {'local_path': str(cff_file)} + # TODO Replace the following temp patch for #112 once there is a new cffconvert version with cffconvert#309 + codemeta_dict = self._patch_author_emails(cff_dict, codemeta_dict) + ld_codemeta = SoftwareMetadata(codemeta_dict, extra_vocabs={'legalName': {'@id': "http://schema.org/name"}}) + return ld_codemeta, {} - def _load_cff_from_file(self, cff_data: str) -> t.Any: + def _load_cff_from_file(self, cff_data: str) -> Any: yaml = YAML(typ='safe') yaml.constructor.yaml_constructors[u'tag:yaml.org,2002:timestamp'] = yaml.constructor.yaml_constructors[ u'tag:yaml.org,2002:str'] @@ -73,11 +74,11 @@ def _patch_author_emails(self, cff: dict, codemeta: dict) -> dict: codemeta["author"][i]["email"] = author["email"] return codemeta - def _convert_cff_to_codemeta(self, cff_data: str) -> t.Any: + def _convert_cff_to_codemeta(self, cff_data: str) -> Any: codemeta_str = Citation(cff_data).as_codemeta() return json.loads(codemeta_str) - def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool: + def _validate(self, cff_file: pathlib.Path, cff_dict: dict) -> bool: audit_log = logging.getLogger('audit.cff') cff_schema_url = f'https://citation-file-format.github.io/{_CFF_VERSION}/schema.json' @@ -93,7 +94,7 @@ def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool: audit_log.warning('!!! warning "%s is not valid according to <%s>"', cff_file, cff_schema_url) for error in errors: - path = ContextPath.make(error.absolute_path or ['root']) + path = error.absolute_path or ['root'] audit_log.info(' Invalid input for `%s`.', str(path)) audit_log.info(' !!! message "%s"', error.message) audit_log.debug(' !!! value "%s"', error.instance) @@ -108,7 +109,7 @@ def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool: audit_log.info('- Found valid Citation File Format file at: %s', cff_file) return True - def _get_single_cff(self, path: pathlib.Path) -> t.Optional[pathlib.Path]: + def _get_single_cff(self, path: pathlib.Path) -> Union[pathlib.Path, None]: # Find CFF files in directories and subdirectories cff_file = path / 'CITATION.cff' if cff_file.exists(): diff --git a/src/hermes/commands/harvest/codemeta.py b/src/hermes/commands/harvest/codemeta.py index b75bb002..5f211222 100644 --- a/src/hermes/commands/harvest/codemeta.py +++ b/src/hermes/commands/harvest/codemeta.py @@ -8,15 +8,16 @@ import glob import json import pathlib -import typing as t +from typing import Union from hermes.commands.harvest.base import HermesHarvestCommand, HermesHarvestPlugin from hermes.commands.harvest.util.validate_codemeta import validate_codemeta -from hermes.model.errors import HermesValidationError +from hermes.model.error import HermesValidationError +from hermes.model import SoftwareMetadata class CodeMetaHarvestPlugin(HermesHarvestPlugin): - def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: + def __call__(self, command: HermesHarvestCommand) -> tuple[SoftwareMetadata, dict]: """ Implementation of a harvester that provides data from a codemeta.json file format. @@ -39,7 +40,7 @@ def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: raise HermesValidationError(codemeta_file) codemeta = json.loads(codemeta_str) - return codemeta, {'local_path': str(codemeta_file)} + return SoftwareMetadata(codemeta), {'local_path': str(codemeta_file)} def _validate(self, codemeta_file: pathlib.Path) -> bool: with open(codemeta_file, "r") as fi: @@ -55,7 +56,7 @@ def _validate(self, codemeta_file: pathlib.Path) -> bool: return True - def _get_single_codemeta(self, path: pathlib.Path) -> t.Optional[pathlib.Path]: + def _get_single_codemeta(self, path: pathlib.Path) -> Union[pathlib.Path, None]: # Find CodeMeta files in directories and subdirectories # TODO: Do we really want to search recursive? Maybe add another option to enable pointing to a single file? # (So this stays "convention over configuration") diff --git a/src/hermes/model/__init__.py b/src/hermes/model/__init__.py index 4a4bca25..febdb0ff 100644 --- a/src/hermes/model/__init__.py +++ b/src/hermes/model/__init__.py @@ -2,4 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 +# This is an interface file that only provides a public interface, hence linter is disabled to avoid +# "unused import" errors. +# flake8: noqa + from hermes.model.api import SoftwareMetadata diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py new file mode 100644 index 00000000..3e43073d --- /dev/null +++ b/test/hermes_test/model/test_api_e2e.py @@ -0,0 +1,456 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +import pytest +import sys +from hermes.model import SoftwareMetadata +from hermes.model.context_manager import HermesContext +from hermes.commands import cli + + +@pytest.mark.parametrize( + "cff, res", + [ + ( + """cff-version: 1.2.0 +title: Temp\nmessage: >- + If you use this software, please cite it using the + metadata from this file. +type: software +authors: + - given-names: Max + family-names: Mustermann + email: max@muster.mann""", + SoftwareMetadata({ + "@type": "SoftwareSourceCode", + "author": { + "@list": [{ + "@type": "Person", + "email": ["max@muster.mann"], + "familyName": ["Mustermann"], + "givenName": ["Max"] + }] + }, + "name": ["Temp"] + }) + ), + ( + """# SPDX-FileCopyrightText: 2022 German Aerospace Center (DLR), Helmholtz-Zentrum Dresden-Rossendorf +# +# SPDX-License-Identifier: CC0-1.0 + +# SPDX-FileContributor: Michael Meinel + +cff-version: 1.2.0 +title: hermes +message: >- + If you use this software, please cite it using the + metadata from this file. +version: 0.9.0 +license: "Apache-2.0" +abstract: "Tool to automate software publication. Not stable yet." +type: software +authors: + - given-names: Michael + family-names: Meinel + email: michael.meinel@dlr.de + affiliation: German Aerospace Center (DLR) + orcid: "https://orcid.org/0000-0001-6372-3853" + - given-names: Stephan + family-names: Druskat + email: stephan.druskat@dlr.de + affiliation: German Aerospace Center (DLR) + orcid: "https://orcid.org/0000-0003-4925-7248" +identifiers: + - type: doi + value: 10.5281/zenodo.13221384 + description: Version 0.8.1b1 +""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/author": [ + { + "@list": [ + { + "@id": "https://orcid.org/0000-0001-6372-3853", + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "michael.meinel@dlr.de"}], + "http://schema.org/familyName": [{"@value": "Meinel"}], + "http://schema.org/givenName": [{"@value": "Michael"}] + }, + { + "@id": "https://orcid.org/0000-0003-4925-7248", + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "stephan.druskat@dlr.de"}], + "http://schema.org/familyName": [{"@value": "Druskat"}], + "http://schema.org/givenName": [{"@value": "Stephan"}] + } + ] + } + ], + "http://schema.org/description": [{"@value": "Tool to automate software publication. Not stable yet."}], + "http://schema.org/identifier": [{"@id": "https://doi.org/10.5281/zenodo.13221384"}], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + "http://schema.org/name": [{"@value": "hermes"}], + "http://schema.org/version": [{"@value": "0.9.0"}] + }) + ), + ( + """cff-version: 1.2.0 +title: Test +message: None +type: software +authors: + - given-names: Test + family-names: Testi + email: test.testi@test.testi + affiliation: German Aerospace Center (DLR) +identifiers: + - type: url + value: "https://arxiv.org/abs/2201.09015" + - type: doi + value: 10.5281/zenodo.13221384 +repository-code: "https://github.com/softwarepub/hermes" +abstract: for testing +url: "https://docs.software-metadata.pub/en/latest" +keywords: + - testing + - more testing +license: Apache-2.0 +version: 9.0.1 +date-released: "2026-01-16" """, + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/author": [ + { + "@list": [ + { + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ] + } + ], + "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], + "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/identifier": [{"@id": "https://doi.org/10.5281/zenodo.13221384"}], + "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/url": [ + {"@id": 'https://arxiv.org/abs/2201.09015'}, + {"@id": "https://docs.software-metadata.pub/en/latest"} + ], + "http://schema.org/version": [{"@value": "9.0.1"}] + }) + ) + ] +) +def test_cff_harvest(tmp_path, monkeypatch, cff, res): + monkeypatch.chdir(tmp_path) + cff_file = tmp_path / "CITATION.cff" + cff_file.write_text(cff) + + config_file = tmp_path / "hermes.toml" + config_file.write_text("[harvest]\nsources = [ \"cff\" ]") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit: + manager = HermesContext() + manager.prepare_step("harvest") + with manager["cff"] as cache: + result = SoftwareMetadata(cache["codemeta"]) + manager.finalize_step("harvest") + finally: + sys.argv = orig_argv + + # FIXME: update to compare the SoftwareMetadata objects instead of the data_dicts (in multiple places) + # after merge with refactor/data-model and/or refactor/423-implement-public-api + assert result.data_dict == res.data_dict + + +@pytest.mark.parametrize( + "codemeta, res", + [ + ( + """{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "description": "for testing", + "name": "Test" +}""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }) + ), + ( + """{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "applicationCategory": "Testing", + "author": [ + { + "id": "_:author_1", + "type": "Person", + "email": "test.testi@test.testi", + "familyName": "Testi", + "givenName": "Test" + } + ], + "codeRepository": "https://github.com/softwarepub/hermes", + "contributor": { + "id": "_:contributor_1", + "type": "Person", + "email": "test.testi@test.testi", + "familyName": "Testi", + "givenName": "Test" + }, + "dateCreated": "2026-01-16", + "dateModified": "2026-01-16", + "datePublished": "2026-01-16", + "description": "for testing", + "funder": { + "type": "Organization", + "name": "TestsTests" + }, + "keywords": [ + "testing", + "more testing" + ], + "license": [ + "https://spdx.org/licenses/Adobe-2006", + "https://spdx.org/licenses/Abstyles", + "https://spdx.org/licenses/AGPL-1.0-only" + ], + "name": "Test", + "operatingSystem": "Windows", + "programmingLanguage": [ + "Python", + "Python 3" + ], + "relatedLink": "https://docs.software-metadata.pub/en/latest", + "schema:releaseNotes": "get it now", + "version": "1.1.1", + "developmentStatus": "abandoned", + "funding": "none :(", + "codemeta:isSourceCodeOf": { + "id": "HERMES" + }, + "issueTracker": "https://github.com/softwarepub/hermes/issues", + "referencePublication": "https://arxiv.org/abs/2201.09015" +}""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/applicationCategory": [{"@id": "Testing"}], + "http://schema.org/author": [ + { + "@list": [ + { + "@id": "_:author_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ] + } + ], + "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], + "http://schema.org/contributor": [ + { + "@id": "_:contributor_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ], + "http://schema.org/dateCreated": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/dateModified": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/funder": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "TestsTests"}] + } + ], + "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], + "http://schema.org/license": [ + {"@id": "https://spdx.org/licenses/Adobe-2006"}, + {"@id": "https://spdx.org/licenses/Abstyles"}, + {"@id": "https://spdx.org/licenses/AGPL-1.0-only"} + ], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/operatingSystem": [{"@value": "Windows"}], + "http://schema.org/programmingLanguage": [{"@value": "Python"}, {"@value": "Python 3"}], + "http://schema.org/relatedLink": [{"@id": "https://docs.software-metadata.pub/en/latest"}], + "http://schema.org/releaseNotes": [{"@value": "get it now"}], + "http://schema.org/version": [{"@value": "1.1.1"}], + "https://codemeta.github.io/terms/developmentStatus": [{"@id": "abandoned"}], + "https://codemeta.github.io/terms/funding": [{"@value": "none :("}], + "https://codemeta.github.io/terms/isSourceCodeOf": [{"@id": "HERMES"}], + "https://codemeta.github.io/terms/issueTracker": [ + {"@id": "https://github.com/softwarepub/hermes/issues"} + ], + "https://codemeta.github.io/terms/referencePublication": [{"@id": "https://arxiv.org/abs/2201.09015"}] + }) + ) + ] +) +def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): + monkeypatch.chdir(tmp_path) + + codemeta_file = tmp_path / "codemeta.json" + codemeta_file.write_text(codemeta) + + config_file = tmp_path / "hermes.toml" + config_file.write_text("[harvest]\nsources = [ \"codemeta\" ]") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit: + manager = HermesContext() + manager.prepare_step("harvest") + with manager["codemeta"] as cache: + result = SoftwareMetadata(cache["codemeta"]) + manager.finalize_step("harvest") + finally: + sys.argv = orig_argv + + assert result.data_dict == res.data_dict + + +@pytest.mark.parametrize( + "process_result, res", + [ + 2 * ( + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }), + ), + 2 * ( + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/applicationCategory": [{"@id": "Testing"}], + "http://schema.org/author": [ + { + "@list": [ + { + "@id": "_:author_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ] + } + ], + "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], + "http://schema.org/contributor": [ + { + "@id": "_:contributor_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ], + "http://schema.org/dateCreated": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/dateModified": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/funder": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "TestsTests"}] + } + ], + "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], + "http://schema.org/license": [ + {"@id": "https://spdx.org/licenses/Adobe-2006"}, + {"@id": "https://spdx.org/licenses/Abstyles"}, + {"@id": "https://spdx.org/licenses/AGPL-1.0-only"} + ], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/operatingSystem": [{"@value": "Windows"}], + "http://schema.org/programmingLanguage": [{"@value": "Python"}, {"@value": "Python 3"}], + "http://schema.org/relatedLink": [{"@id": "https://docs.software-metadata.pub/en/latest"}], + "http://schema.org/releaseNotes": [{"@value": "get it now"}], + "http://schema.org/version": [{"@value": "1.1.1"}], + "https://codemeta.github.io/terms/developmentStatus": [{"@id": "abandoned"}], + "https://codemeta.github.io/terms/funding": [{"@value": "none :("}], + "https://codemeta.github.io/terms/isSourceCodeOf": [{"@id": "HERMES"}], + "https://codemeta.github.io/terms/issueTracker": [ + {"@id": "https://github.com/softwarepub/hermes/issues"} + ], + "https://codemeta.github.io/terms/referencePublication": [{"@id": "https://arxiv.org/abs/2201.09015"}] + }), + ), + ] +) +def test_do_nothing_curate(tmp_path, monkeypatch, process_result, res): + monkeypatch.chdir(tmp_path) + + manager = HermesContext(tmp_path) + manager.prepare_step("process") + with manager["result"] as cache: + cache["expanded"] = process_result.ld_value + cache["context"] = {"@context": process_result.full_context} + manager.finalize_step("process") + + config_file = tmp_path / "hermes.toml" + config_file.write_text("") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "curate", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit: + manager.prepare_step("curate") + with manager["result"] as cache: + result = SoftwareMetadata(cache["expanded"][0], cache["context"]["@context"][1]) + manager.finalize_step("curate") + finally: + sys.argv = orig_argv + + assert result.data_dict == res.data_dict