diff --git a/REUSE.toml b/REUSE.toml index b3033158..c81fab97 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -17,3 +17,9 @@ path = ["REUSE.toml"] precedence = "aggregate" SPDX-FileCopyrightText = "German Aerospace Center (DLR), Helmholtz-Zentrum Dresden-Rossendorf, Forschungszentrum Jülich" SPDX-License-Identifier = "CC0-1.0" + +[[annotations]] +path = ["src/**/*.py", "test/**/*.py"] +precedence = "aggregate" +SPDX-FileCopyrightText = "German Aerospace Center (DLR), Helmholtz-Zentrum Dresden-Rossendorf, Forschungszentrum Jülich" +SPDX-License-Identifier = "Apache-2.0" \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 74361955..476df72e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -927,7 +927,7 @@ version = "3.0.2" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.9" -groups = ["dev", "docs"] +groups = ["docs"] files = [ {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"}, {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"}, @@ -1574,21 +1574,6 @@ pytest = ">=4.6" [package.extras] testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] -[[package]] -name = "pytest-httpserver" -version = "1.1.3" -description = "pytest-httpserver is a httpserver for pytest" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "pytest_httpserver-1.1.3-py3-none-any.whl", hash = "sha256:5f84757810233e19e2bb5287f3826a71c97a3740abe3a363af9155c0f82fdbb9"}, - {file = "pytest_httpserver-1.1.3.tar.gz", hash = "sha256:af819d6b533f84b4680b9416a5b3f67f1df3701f1da54924afd4d6e4ba5917ec"}, -] - -[package.dependencies] -Werkzeug = ">=2.0.0" - [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -1699,14 +1684,14 @@ files = [ [[package]] name = "rdflib" -version = "7.1.4" +version = "7.5.0" description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information." optional = false -python-versions = "<4.0.0,>=3.8.1" +python-versions = ">=3.8.1" groups = ["main"] files = [ - {file = "rdflib-7.1.4-py3-none-any.whl", hash = "sha256:72f4adb1990fa5241abd22ddaf36d7cafa5d91d9ff2ba13f3086d339b213d997"}, - {file = "rdflib-7.1.4.tar.gz", hash = "sha256:fed46e24f26a788e2ab8e445f7077f00edcf95abb73bcef4b86cefa8b62dd174"}, + {file = "rdflib-7.5.0-py3-none-any.whl", hash = "sha256:b011dfc40d0fc8a44252e906dcd8fc806a7859bc231be190c37e9568a31ac572"}, + {file = "rdflib-7.5.0.tar.gz", hash = "sha256:663083443908b1830e567350d72e74d9948b310f827966358d76eebdc92bf592"}, ] [package.dependencies] @@ -1719,6 +1704,7 @@ html = ["html5rdf (>=1.2,<2)"] lxml = ["lxml (>=4.3,<6.0)"] networkx = ["networkx (>=2,<4)"] orjson = ["orjson (>=3.9.14,<4)"] +rdf4j = ["httpx (>=0.28.1,<0.29.0)"] [[package]] name = "requests" @@ -2454,24 +2440,6 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] -[[package]] -name = "werkzeug" -version = "3.1.3" -description = "The comprehensive WSGI web application library." -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e"}, - {file = "werkzeug-3.1.3.tar.gz", hash = "sha256:60723ce945c19328679790e3282cc758aa4a6040e4bb330f53d30fa546d44746"}, -] - -[package.dependencies] -MarkupSafe = ">=2.1.1" - -[package.extras] -watchdog = ["watchdog (>=2.3)"] - [[package]] name = "wheel" version = "0.45.1" @@ -2579,4 +2547,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.10, <4.0.0" -content-hash = "2e2405b30c3dee4416a6e77828c7cff1197a8be71665770bcbdb308c19ef4358" +content-hash = "e76de51d1f5dd86486d4cc24a5cdf7d007b16ce5d9d0cc3f7d0f353cf0defff0" diff --git a/pyproject.toml b/pyproject.toml index 49b64ee7..f9588a75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,7 +78,6 @@ pytest-cov = "^3.0.0" taskipy = "^1.10.3" flake8 = "^5.0.4" requests-mock = "^1.10.0" -pytest-httpserver = "^1.1.3" # Packages for developers for creating documentation [tool.poetry.group.docs] diff --git a/src/hermes/commands/__init__.py b/src/hermes/commands/__init__.py index 5203ac18..278faddf 100644 --- a/src/hermes/commands/__init__.py +++ b/src/hermes/commands/__init__.py @@ -8,12 +8,12 @@ # "unused import" errors. # flake8: noqa -from hermes.commands.base import HermesHelpCommand -from hermes.commands.base import HermesVersionCommand -from hermes.commands.clean.base import HermesCleanCommand -from hermes.commands.init.base import HermesInitCommand -from hermes.commands.curate.base import HermesCurateCommand +# from hermes.commands.base import HermesHelpCommand +# from hermes.commands.base import HermesVersionCommand +# from hermes.commands.clean.base import HermesCleanCommand +# from hermes.commands.init.base import HermesInitCommand +# from hermes.commands.curate.base import HermesCurateCommand from hermes.commands.harvest.base import HermesHarvestCommand -from hermes.commands.process.base import HermesProcessCommand +# from hermes.commands.process.base import HermesProcessCommand from hermes.commands.deposit.base import HermesDepositCommand -from hermes.commands.postprocess.base import HermesPostprocessCommand +# from hermes.commands.postprocess.base import HermesPostprocessCommand diff --git a/src/hermes/commands/base.py b/src/hermes/commands/base.py index 3ae9030b..2d182267 100644 --- a/src/hermes/commands/base.py +++ b/src/hermes/commands/base.py @@ -9,19 +9,19 @@ import logging import pathlib from importlib import metadata -from typing import Dict, Optional, Type +from typing import Type, Union import toml from pydantic import BaseModel from pydantic_settings import BaseSettings, SettingsConfigDict -class _HermesSettings(BaseSettings): +class HermesSettings(BaseSettings): """Root class for HERMES configuration model.""" model_config = SettingsConfigDict(env_file_encoding='utf-8') - logging: Dict = {} + logging: dict = {} class HermesCommand(abc.ABC): @@ -31,7 +31,7 @@ class HermesCommand(abc.ABC): """ command_name: str = "" - settings_class: Type = _HermesSettings + settings_class: Type = HermesSettings def __init__(self, parser: argparse.ArgumentParser): """Initialize a new instance of any HERMES command. @@ -45,28 +45,27 @@ def __init__(self, parser: argparse.ArgumentParser): self.log = logging.getLogger(f"hermes.{self.command_name}") self.errors = [] - @classmethod - def init_plugins(cls): + def init_plugins(self): """Collect and initialize the plugins available for the HERMES command.""" # Collect all entry points for this group (i.e., all valid plug-ins for the step) - entry_point_group = f"hermes.{cls.command_name}" - group_plugins = { - entry_point.name: entry_point.load() - for entry_point in metadata.entry_points(group=entry_point_group) - } - - # Collect the plug-in specific configurations - cls.derive_settings_class({ - plugin_name: plugin_class.settings_class - for plugin_name, plugin_class in group_plugins.items() - if hasattr(plugin_class, "settings_class") and plugin_class.settings_class is not None - }) + entry_point_group = f"hermes.{self.command_name}" + group_plugins = {} + group_settings = {} + + for entry_point in metadata.entry_points(group=entry_point_group): + plugin_cls = entry_point.load() + + group_plugins[entry_point.name] = plugin_cls + if hasattr(plugin_cls, 'settings_class') and plugin_cls.settings_class is not None: + group_settings[entry_point.name] = plugin_cls.settings_class + + self.derive_settings_class(group_settings) return group_plugins @classmethod - def derive_settings_class(cls, setting_types: Dict[str, Type]) -> None: + def derive_settings_class(cls, setting_types: dict[str, Type]) -> None: """Build a new Pydantic data model class for configuration. This will create a new class that includes all settings from the plugins available. @@ -131,13 +130,10 @@ def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None: def load_settings(self, args: argparse.Namespace): """Load settings from the configuration file (passed in from command line).""" - try: - toml_data = toml.load(args.path / args.config) - self.root_settings = HermesCommand.settings_class.model_validate(toml_data) - self.settings = getattr(self.root_settings, self.command_name) - except FileNotFoundError as e: - self.log.error("hermes.toml was not found. Try to run 'hermes init' first or create one manually.") - raise e # This will lead to our default error message & sys.exit + + toml_data = toml.load(args.path / args.config) + self.root_settings = HermesCommand.settings_class.model_validate(toml_data) + self.settings = getattr(self.root_settings, self.command_name) def patch_settings(self, args: argparse.Namespace): """Process command line options for the settings.""" @@ -164,7 +160,9 @@ def __call__(self, args: argparse.Namespace): class HermesPlugin(abc.ABC): """Base class for all HERMES plugins.""" - settings_class: Optional[Type] = None + pluing_node = None + + settings_class: Union[Type, None] = None @abc.abstractmethod def __call__(self, command: HermesCommand) -> None: @@ -202,27 +200,3 @@ def __call__(self, args: argparse.Namespace) -> None: # Otherwise, simply show the general help and exit (cleanly). self.parser.print_help() self.parser.exit() - - def load_settings(self, args: argparse.Namespace): - """No settings are needed for the help command.""" - pass - - -class HermesVersionSettings(BaseModel): - """Intentionally empty settings class for the version command.""" - pass - - -class HermesVersionCommand(HermesCommand): - """Show HERMES version and exit.""" - - command_name = "version" - settings_class = HermesVersionSettings - - def load_settings(self, args: argparse.Namespace): - """Pass loading settings as not necessary for this command.""" - pass - - def __call__(self, args: argparse.Namespace) -> None: - self.log.info(metadata.version("hermes")) - self.parser.exit() diff --git a/src/hermes/commands/cli.py b/src/hermes/commands/cli.py index 06a18ca7..0ec2d1ae 100644 --- a/src/hermes/commands/cli.py +++ b/src/hermes/commands/cli.py @@ -12,9 +12,11 @@ import sys from hermes import logger -from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand, - HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand, - HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand) +# FIXME: remove comments after new implementation of modules is available +# from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand, +# HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand, +# HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand) +from hermes.commands import HermesDepositCommand, HermesHarvestCommand from hermes.commands.base import HermesCommand @@ -36,15 +38,15 @@ def main() -> None: setting_types = {} for command in ( - HermesHelpCommand(parser), - HermesVersionCommand(parser), - HermesInitCommand(parser), - HermesCleanCommand(parser), + # HermesHelpCommand(parser), + # HermesVersionCommand(parser), + # HermesInitCommand(parser), + # HermesCleanCommand(parser), HermesHarvestCommand(parser), - HermesProcessCommand(parser), - HermesCurateCommand(parser), + # HermesProcessCommand(parser), + # HermesCurateCommand(parser), HermesDepositCommand(parser), - HermesPostprocessCommand(parser), + # HermesPostprocessCommand(parser), ): if command.settings_class is not None: setting_types[command.command_name] = command.settings_class diff --git a/src/hermes/commands/deposit/base.py b/src/hermes/commands/deposit/base.py index 75018579..6fbf3625 100644 --- a/src/hermes/commands/deposit/base.py +++ b/src/hermes/commands/deposit/base.py @@ -7,15 +7,13 @@ import abc import argparse -import json -import sys from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin -from hermes.model.context import CodeMetaContext -from hermes.model.path import ContextPath -from hermes.model.errors import HermesValidationError +from hermes.model.context_manager import HermesContext +from hermes.model import SoftwareMetadata +from hermes.model.error import HermesValidationError class BaseDepositPlugin(HermesPlugin): @@ -24,26 +22,35 @@ class BaseDepositPlugin(HermesPlugin): TODO: describe workflow... needs refactoring to be less stateful! """ - def __init__(self, command, ctx): - self.command = command - self.ctx = ctx - def __call__(self, command: HermesCommand) -> None: """Initiate the deposition process. This calls a list of additional methods on the class, none of which need to be implemented. """ self.command = command + self.ctx = HermesContext() + + self.ctx.prepare_step("curate") + self.metadata = SoftwareMetadata.load_from_cache(self.ctx, "result") + self.ctx.finalize_step("curate") self.prepare() - self.map_metadata() + deposit = self.map_metadata() + self.ctx.prepare_step("deposit") + with self.ctx[command.settings.target] as cache: + cache["deposit"] = deposit + self.ctx.finalize_step("deposit") if self.is_initial_publication(): self.create_initial_version() else: self.create_new_version() - self.update_metadata() + deposit = self.update_metadata() + self.ctx.prepare_step("deposit") + with self.ctx["deposit"] as cache: + cache["result"] = deposit + self.ctx.finalize_step("deposit") self.delete_artifacts() self.upload_artifacts() self.publish() @@ -58,8 +65,8 @@ def prepare(self) -> None: pass @abc.abstractmethod - def map_metadata(self) -> None: - """Map the given metadata to the target schema of the deposition platform. + def map_metadata(self) -> dict: + """Map the given metadata to the target schema of the deposition platform and return it. When mapping metadata, make sure to add traces to the HERMES software, e.g. via DataCite's ``relatedIdentifier`` using the ``isCompiledBy`` relation. Ideally, the value @@ -88,8 +95,9 @@ def create_new_version(self) -> None: """Create a new version of an existing publication on the target platform.""" pass - def update_metadata(self) -> None: - """Update the metadata of the newly created version.""" + @abc.abstractmethod + def update_metadata(self) -> dict: + """Update the metadata of the newly created version and return it even if it hasn't changed.""" pass def delete_artifacts(self) -> None: @@ -106,7 +114,7 @@ def publish(self) -> None: pass -class _DepositSettings(BaseModel): +class DepositSettings(BaseModel): """Generic deposition settings.""" target: str = "" @@ -116,7 +124,7 @@ class HermesDepositCommand(HermesCommand): """ Deposit the curated metadata to repositories. """ command_name = "deposit" - settings_class = _DepositSettings + settings_class = DepositSettings def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None: command_parser.add_argument('--file', '-f', nargs=1, action='append', @@ -128,26 +136,13 @@ def __call__(self, args: argparse.Namespace) -> None: self.args = args plugin_name = self.settings.target - ctx = CodeMetaContext() - codemeta_file = ctx.get_cache("curate", ctx.hermes_name) - if not codemeta_file.exists(): - self.log.error("You must run the 'curate' command before deposit") - sys.exit(1) - - codemeta_path = ContextPath("codemeta") - with open(codemeta_file) as codemeta_fh: - ctx.update(codemeta_path, json.load(codemeta_fh)) - try: - plugin_func = self.plugins[plugin_name](self, ctx) - + plugin_func = self.plugins[plugin_name]() except KeyError as e: self.log.error("Plugin '%s' not found.", plugin_name) self.errors.append(e) - try: plugin_func(self) - except HermesValidationError as e: self.log.error("Error while executing %s: %s", plugin_name, e) self.errors.append(e) diff --git a/src/hermes/commands/deposit/file.py b/src/hermes/commands/deposit/file.py index 6c5d6419..ed6bd570 100644 --- a/src/hermes/commands/deposit/file.py +++ b/src/hermes/commands/deposit/file.py @@ -11,22 +11,23 @@ from pydantic import BaseModel from hermes.commands.deposit.base import BaseDepositPlugin -from hermes.model.path import ContextPath class FileDepositSettings(BaseModel): - filename: str = 'hermes.json' + filename: str = 'codemeta.json' class FileDepositPlugin(BaseDepositPlugin): settings_class = FileDepositSettings - def map_metadata(self) -> None: - self.ctx.update(ContextPath.parse('deposit.file'), self.ctx['codemeta']) + def map_metadata(self) -> dict: + return self.metadata.compact() + + def update_metadata(self) -> dict: + return self.metadata.compact() def publish(self) -> None: file_config = self.command.settings.file - output_data = self.ctx['deposit.file'] with open(file_config.filename, 'w') as deposition_file: - json.dump(output_data, deposition_file, indent=2) + json.dump(self.metadata.compact(), deposition_file, indent=2) diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index 69fb87a0..3915d536 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -6,22 +6,20 @@ # SPDX-FileContributor: Oliver Bertuch # SPDX-FileContributor: Michael Meinel -import json import logging import pathlib -import typing as t from datetime import date, datetime from pathlib import Path from urllib.parse import urlparse import requests from pydantic import BaseModel +from typing import Union -from hermes.commands.deposit.base import BaseDepositPlugin, HermesDepositCommand +from hermes.commands.deposit.base import BaseDepositPlugin from hermes.commands.deposit.error import DepositionUnauthorizedError from hermes.error import MisconfigurationError -from hermes.model.context import CodeMetaContext -from hermes.model.path import ContextPath +from hermes.model.error import HermesValidationError from hermes.utils import hermes_doi, hermes_user_agent @@ -109,7 +107,7 @@ def __init__(self, client=None): def resolve_latest_id( self, record_id=None, doi=None, codemeta_identifier=None - ) -> t.Tuple[t.Optional[str], dict]: + ) -> tuple[Union[str, None], dict]: """ Using the given metadata parameters, figure out the latest record id. @@ -167,7 +165,7 @@ def resolve_doi(self, doi) -> str: *_, record_id = page_url.path.split('/') return record_id - def resolve_record_id(self, record_id: str) -> t.Tuple[str, dict]: + def resolve_record_id(self, record_id: str) -> tuple[str, dict]: """ Find the latest version of a given record. @@ -186,7 +184,7 @@ def resolve_record_id(self, record_id: str) -> t.Tuple[str, dict]: res_json = res.json() return res_json['id'], res_json['metadata'] - def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[str]: + def resolve_license_id(self, license_url: Union[str, None]) -> Union[str, None]: """Get Invenio license representation from CodeMeta. The license to use is extracted from the ``license`` field in the @@ -219,7 +217,7 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[str]: parsed_url = urlparse(license_url) url_path = parsed_url.path.rstrip("/") - license_id = url_path.split("/")[-1] + license_id = str.lower(url_path.split("/")[-1]) response = self.client.get_license(license_id) if response.status_code == 404: @@ -231,7 +229,8 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[str]: @staticmethod def _extract_license_id_from_response(data: dict) -> str: - return data["metadata"]["id"] + # TODO: find correct key, data["metadata"]["id"] did not work for me but data["id"] does + return data["id"] class InvenioDepositSettings(BaseModel): @@ -243,7 +242,7 @@ class InvenioDepositSettings(BaseModel): access_right: str = None embargo_date: str = None access_conditions: str = None - api_paths: t.Dict = {} + api_paths: dict = {} auth_token: str = '' files: list[pathlib.Path] = [] @@ -258,11 +257,13 @@ class InvenioDepositPlugin(BaseDepositPlugin): invenio_resolver_class = InvenioResolver settings_class = InvenioDepositSettings - def __init__(self, command: HermesDepositCommand, ctx: CodeMetaContext, client=None, resolver=None) -> None: - super().__init__(command, ctx) + def __init__(self) -> None: + super().__init__() - self.invenio_context_path = ContextPath.parse(f"deposit.{self.platform_name}") self.invenio_ctx = None + + def __call__(self, command, *, client=None, resolver=None): + self.command = command self.config = getattr(self.command.settings, self.platform_name) if client is None: @@ -292,7 +293,9 @@ def __init__(self, command: HermesDepositCommand, ctx: CodeMetaContext, client=N self.resolver = resolver or self.invenio_resolver_class(self.client) self.links = {} - # TODO: Populate some data structure here? Or move more of this into __init__? + super().__call__(command) + + # TODO: Populate some data structure here? Or move more of this into __init__.py? def prepare(self) -> None: """Prepare the deposition on an Invenio-based platform. @@ -305,49 +308,42 @@ def prepare(self) -> None: - check access modalities (access right, access conditions, embargo data, existence of license) - check whether required configuration options are present - - update ``self.ctx`` with metadata collected during the checks + - update ``self.metadata`` with metadata collected during the checks """ rec_id = self.config.record_id doi = self.config.doi - try: - codemeta_identifier = self.ctx["codemeta.identifier"] - except KeyError: - codemeta_identifier = None - + codemeta_identifier = self.metadata.get("identifier", None) rec_id, rec_meta = self.resolver.resolve_latest_id( record_id=rec_id, doi=doi, codemeta_identifier=codemeta_identifier ) - version = self.ctx["codemeta"].get("version") + if len(self.metadata.get("version", [])) > 1: + raise HermesValidationError("Too many licenses for invenio deposit.") + if len(self.metadata.get("version", [])) == 1: + version = self.metadata["version"][0] + else: + version = None if rec_meta and (version == rec_meta.get("version")): raise ValueError(f"Version {version} already deposited.") - self.ctx.update(self.invenio_context_path['latestRecord'], {'id': rec_id, 'metadata': rec_meta}) - - license = self._get_license_identifier() - self.ctx.update(self.invenio_context_path["license"], license) - - communities = self._get_community_identifiers() - self.ctx.update(self.invenio_context_path["communities"], communities) + deposition_data = {} + deposition_data["latestRecord"] = {'id': rec_id, 'metadata': rec_meta} + deposition_data["license"] = self._get_license_identifier() + deposition_data["communities"] = self._get_community_identifiers() access_right, embargo_date, access_conditions = self._get_access_modalities(license) - self.ctx.update(self.invenio_context_path["access_right"], access_right) - self.ctx.update(self.invenio_context_path["embargo_date"], embargo_date) - self.ctx.update(self.invenio_context_path["access_conditions"], access_conditions) - - self.invenio_ctx = self.ctx[self.invenio_context_path] + deposition_data["access_right"] = access_right + deposition_data["embargo_date"] = embargo_date + deposition_data["access_conditions"] = access_conditions - def map_metadata(self) -> None: - """Map the harvested metadata onto the Invenio schema.""" + self.invenio_ctx = deposition_data - deposition_metadata = self._codemeta_to_invenio_deposition() - self.ctx.update(self.invenio_context_path["depositionMetadata"], deposition_metadata) - - # Store a snapshot of the mapped data within the cache, useful for analysis, debugging, etc - with open(self.ctx.get_cache("deposit", self.platform_name, create=True), 'w') as invenio_json: - json.dump(deposition_metadata, invenio_json, indent=' ') + def map_metadata(self) -> dict: + """Map the harvested metadata onto the Invenio schema and return it.""" + self.invenio_ctx["depositionMetadata"] = self._codemeta_to_invenio_deposition() + return self.invenio_ctx["depositionMetadata"] def is_initial_publication(self) -> bool: latest_record_id = self.invenio_ctx.get("latestRecord", {}).get("id") @@ -406,8 +402,8 @@ def related_identifiers(self): }, ] - def update_metadata(self) -> None: - """Update the metadata of a draft.""" + def update_metadata(self) -> dict: + """Update the metadata of a draft and return it.""" draft_url = self.links["latest_draft"] @@ -426,8 +422,7 @@ def update_metadata(self) -> None: self.links.update(deposit["links"]) _log.debug("Created new version deposit: %s", self.links["html"]) - with open(self.ctx.get_cache('deposit', 'deposit', create=True), 'w') as deposit_file: - json.dump(deposit, deposit_file, indent=4) + return deposit def delete_artifacts(self) -> None: """Delete existing file artifacts. @@ -448,7 +443,11 @@ def upload_artifacts(self) -> None: bucket_url = self.links["bucket"] - files = *self.config.files, *[f[0] for f in self.command.args.file] + if self.command.args.file: + files = *self.config.files, *[f[0] for f in self.command.args.file] + else: + files = tuple(self.config.files) + for path_arg in files: path = Path(path_arg) @@ -505,14 +504,41 @@ def _codemeta_to_invenio_deposition(self) -> dict: differences between Invenio-based platforms. """ - metadata = self.ctx["codemeta"] + metadata = self.metadata license = self.invenio_ctx["license"] communities = self.invenio_ctx["communities"] access_right = self.invenio_ctx["access_right"] embargo_date = self.invenio_ctx["embargo_date"] access_conditions = self.invenio_ctx["access_conditions"] - creators = [ + creators = [] + for author in metadata.get("author", []): + if not "Person" in author.get("@type", []): + continue + creator = {} + if len( + affils := [ + name for affil in author.get("affiliation", []) for name in affil.get("legalname", []) + ] + ) != 0: + creator["affiliation"] = affils + + if len(author.get("familyName", [])) > 1: + raise HermesValidationError(f"Author has too many family names: {author}") + if len(author.get("familyName", [])) == 1: + given_names_str = " ".join(author.get("givenName", [])) + name = f"{author["familyName"][0]}, {given_names_str}" + elif len(author.get("name", [])) != 1: + raise HermesValidationError(f"Author has too many or no names: {author}") + else: + name = author["name"][0] + creator["name"] = name + if (id := author.get("@id", None)) is not None: + creator["orcid"] = id.replace("https://orcid.org/", "") + if creator: + creators.append(creator) + + """creators = [ # TODO: Distinguish between @type "Person" and others { k: v for k, v in { @@ -520,15 +546,16 @@ def _codemeta_to_invenio_deposition(self) -> dict: "affiliation": author.get("affiliation", {"legalName": None}).get("legalName"), # Invenio wants "family, given". author.get("name") might not have this format. "name": f"{author.get('familyName')}, {author.get('givenName')}" - if author.get("familyName") and author.get("givenName") + if "familyName" in author and "givenName" in author else author.get("name"), # Invenio expects the ORCID without the URL part "orcid": author.get("@id", "").replace("https://orcid.org/", "") or None, }.items() if v is not None } for author in metadata["author"] - ] + ]""" + # TODO: reimplement with new api # This is not used at the moment. See comment below in `deposition_metadata` dict. contributors = [ # noqa: F841 # TODO: Distinguish between @type "Person" and others @@ -538,7 +565,7 @@ def _codemeta_to_invenio_deposition(self) -> dict: "affiliation": contributor.get("affiliation", {"legalName": None}).get("legalName"), # Invenio wants "family, given". contributor.get("name") might not have this format. "name": f"{contributor.get('familyName')}, {contributor.get('givenName')}" - if contributor.get("familyName") and contributor.get("givenName") + if "familyName" in contributor and "givenName" in contributor else contributor.get("name"), # Invenio expects the ORCID without the URL part "orcid": contributor.get("@id", "").replace("https://orcid.org/", "") or None, @@ -550,6 +577,33 @@ def _codemeta_to_invenio_deposition(self) -> dict: for contributor in metadata.get("contributor", []) if contributor.get("name") != "GitHub" ] + if len(metadata.get("name", [])) != 1: + _log.error("More than one or zero names for the Software are given.") + raise HermesValidationError("More than one or zerno names for the Software.") + name = metadata["name"][0] + + if len(metadata.get("schema:description", [])) > 1: + _log.error("More than one descriptions of the Software are given.") + raise HermesValidationError("More than one descriptions of the Software are given.") + if len(metadata.get("schema:description", [])) == 1: + description = metadata["schema:description"][0] + else: + description = None + + if len(metadata.get("schema:version", [])) > 1: + _log.error("More than one version of the Software are given.") + raise HermesValidationError("More than one version of the Software are given.") + if len(metadata.get("schema:version", [])) == 1: + version = metadata["schema:version"][0] + else: + version = None + + keywords = metadata.get("schema:keywords", []) + if len(keywords) == 0: + keywords = None + else: + keywords = keywords.to_python() + # TODO: Use the fields currently set to `None`. # Some more fields are available but they most likely don't relate to software # publications targeted by hermes. @@ -563,12 +617,9 @@ def _codemeta_to_invenio_deposition(self) -> dict: # TODO: Maybe we want a different date? Then make this configurable. If not, # this can be removed as it defaults to today. "publication_date": date.today().isoformat(), - "title": metadata["name"], + "title": name, "creators": creators, - # TODO: Use a real description here. Possible sources could be - # `tool.poetry.description` from pyproject.toml or `abstract` from - # CITATION.cff. This should then be stored in codemeta description field. - "description": metadata["name"], + "description": description, "access_right": access_right, "license": license, "embargo_date": embargo_date, @@ -581,8 +632,8 @@ def _codemeta_to_invenio_deposition(self) -> dict: # them. # TODO: Use the DOI we get back from this. "prereserve_doi": True, - # TODO: A good source for this could be `tool.poetry.keywords` in pyproject.toml. - "keywords": None, + "keywords": keywords, + # TODO: Is there a good codemeta/ schema field? "notes": None, "related_identifiers": self.related_identifiers(), # TODO: Use `contributors`. In the case of the hermes workflow itself, the @@ -594,17 +645,21 @@ def _codemeta_to_invenio_deposition(self) -> dict: "communities": communities, "grants": None, "subjects": None, - "version": metadata.get('version'), + "version": version, }.items() if v is not None} return deposition_metadata - def _get_license_identifier(self) -> t.Optional[str]: + def _get_license_identifier(self) -> Union[str, None]: """Get Invenio license identifier that matches the given license URL. If no license is configured, ``None`` will be returned. """ - license_url = self.ctx["codemeta"].get("license") + if "license" not in self.metadata: + raise HermesValidationError("No license is given.") + if len(self.metadata["license"]) > 1: + raise HermesValidationError("Too many licenses for invenio deposit.") + license_url = self.metadata["license"][0] return self.resolver.resolve_license_id(license_url) def _get_community_identifiers(self): @@ -612,7 +667,7 @@ def _get_community_identifiers(self): This function gets the communities to be used for the deposition on an Invenio-based site from the config and checks their validity against the site's API. If one of the - identifiers can not be found on the site, a :class:`HermesMisconfigurationError` is + identifiers can not be found on the site, a :class:`MisconfigurationError` is raised. """ diff --git a/src/hermes/commands/deposit/invenio_rdm.py b/src/hermes/commands/deposit/invenio_rdm.py index a381db90..01e08371 100644 --- a/src/hermes/commands/deposit/invenio_rdm.py +++ b/src/hermes/commands/deposit/invenio_rdm.py @@ -6,9 +6,8 @@ # SPDX-FileContributor: Oliver Bertuch # SPDX-FileContributor: Michael Meinel -import typing as t - from requests import HTTPError +from typing import Union from hermes.commands.deposit.invenio import InvenioClient, InvenioDepositPlugin, InvenioResolver @@ -27,7 +26,7 @@ def get_licenses(self): class InvenioRDMResolver(InvenioResolver): invenio_client_class = InvenioRDMClient - def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[dict]: + def resolve_license_id(self, license_url: Union[str, None]) -> Union[dict, None]: """Deliberately try to resolve the license URL to a valid InvenioRDM license information record from the vocabulary. @@ -47,6 +46,12 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[dict]: except HTTPError: pass + # FIXME: Why not get all license_cross_refs and then use a query parameter like this: + # ?q=props.url:("license_url" OR "license_cross_ref[1]" OR ...)&size=1000 + # That would be able to replace _search_license_info. + # FIXME: Some licenses in valid_licenses["hits"]["hits"]["props"]["url"] are only http although + # https://spdx.org/licenses/license.json lists them in crossRef as https + # If the easy "mapping" did not work, we really need to "search" for the correct license ID. response = self.client.get_licenses() response.raise_for_status() @@ -65,6 +70,7 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[dict]: if license_info is not None: break else: + # FIXME: Why is this only raised here and not always when license_info is None? raise RuntimeError(f"Could not resolve license URL {license_url} to a valid identifier.") return license_info @@ -73,7 +79,7 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[dict]: def _extract_license_id_from_response(data: dict) -> str: return data["id"] - def _search_license_info(self, _url: str, valid_licenses: dict) -> t.Optional[dict]: + def _search_license_info(self, _url: str, valid_licenses: dict) -> Union[dict, None]: for license_info in valid_licenses['hits']['hits']: try: if license_info['props']['url'] == _url: diff --git a/src/hermes/commands/harvest/base.py b/src/hermes/commands/harvest/base.py index 59fad8f1..19ccc623 100644 --- a/src/hermes/commands/harvest/base.py +++ b/src/hermes/commands/harvest/base.py @@ -5,14 +5,13 @@ # SPDX-FileContributor: Michael Meinel import argparse -import typing as t -from datetime import datetime from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin -from hermes.model.context import HermesContext, HermesHarvestContext -from hermes.model.error import HermesValidationError, HermesMergeError +from hermes.model.context_manager import HermesContext +from hermes.model.error import HermesValidationError +from hermes.model import SoftwareMetadata class HermesHarvestPlugin(HermesPlugin): @@ -21,11 +20,11 @@ class HermesHarvestPlugin(HermesPlugin): TODO: describe the harvesting process and how this is mapped to this plugin. """ - def __call__(self, command: HermesCommand) -> t.Tuple[t.Dict, t.Dict]: + def __call__(self, command: HermesCommand) -> tuple[SoftwareMetadata, dict]: pass -class _HarvestSettings(BaseModel): +class HarvestSettings(BaseModel): """Generic harvesting settings.""" sources: list[str] = [] @@ -35,32 +34,31 @@ class HermesHarvestCommand(HermesCommand): """ Harvest metadata from configured sources. """ command_name = "harvest" - settings_class = _HarvestSettings + settings_class = HarvestSettings def __call__(self, args: argparse.Namespace) -> None: self.args = args - ctx = HermesContext() # Initialize the harvest cache directory here to indicate the step ran - ctx.init_cache("harvest") + ctx = HermesContext() + ctx.prepare_step('harvest') for plugin_name in self.settings.sources: + plugin_cls = self.plugins[plugin_name] + try: - plugin_func = self.plugins[plugin_name]() - harvested_data, tags = plugin_func(self) - - with HermesHarvestContext(ctx, plugin_name) as harvest_ctx: - harvest_ctx.update_from(harvested_data, - plugin=plugin_name, - timestamp=datetime.now().isoformat(), **tags) - for _key, ((_value, _tag), *_trace) in harvest_ctx._data.items(): - if any(v != _value and t == _tag for v, t in _trace): - raise HermesMergeError(_key, None, _value) - - except KeyError as e: - self.log.error("Plugin '%s' not found.", plugin_name) - self.errors.append(e) + # Load plugin and run the harvester + plugin_func = plugin_cls() + harvested_data = plugin_func(self) + + with ctx[plugin_name] as plugin_ctx: + plugin_ctx["codemeta"] = harvested_data[0].compact() + plugin_ctx["context"] = {"@context": harvested_data[0].full_context} + + plugin_ctx["expanded"] = harvested_data[0].ld_value except HermesValidationError as e: self.log.error("Error while executing %s: %s", plugin_name, e) self.errors.append(e) + + ctx.finalize_step('harvest') diff --git a/src/hermes/commands/harvest/cff.py b/src/hermes/commands/harvest/cff.py index e333b27c..6c2b6594 100644 --- a/src/hermes/commands/harvest/cff.py +++ b/src/hermes/commands/harvest/cff.py @@ -9,16 +9,16 @@ import logging import pathlib import urllib.request -import typing as t from pydantic import BaseModel from ruamel.yaml import YAML import jsonschema from cffconvert import Citation +from typing import Any, Union -from hermes.model.context import ContextPath -from hermes.model.errors import HermesValidationError +from hermes.model.error import HermesValidationError from hermes.commands.harvest.base import HermesHarvestPlugin, HermesHarvestCommand +from hermes.model import SoftwareMetadata # TODO: should this be configurable via a CLI option? @@ -35,7 +35,7 @@ class CffHarvestSettings(BaseModel): class CffHarvestPlugin(HermesHarvestPlugin): settings_class = CffHarvestSettings - def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: + def __call__(self, command: HermesHarvestCommand) -> tuple[SoftwareMetadata, dict]: # Get source files cff_file = self._get_single_cff(command.args.path) if not cff_file: @@ -44,23 +44,24 @@ def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: # Read the content cff_data = cff_file.read_text() - - # Validate the content to be correct CFF cff_dict = self._load_cff_from_file(cff_data) - if command.settings.cff.enable_validation and not self._validate(cff_file, cff_dict): - raise HermesValidationError(cff_file) + if command.settings.cff.enable_validation: + # Validate the content to be correct CFF + if not self._validate(cff_file, cff_dict): + raise HermesValidationError(cff_file) # Convert to CodeMeta using cffconvert codemeta_dict = self._convert_cff_to_codemeta(cff_data) - # TODO Replace the following temp patch for #112 once there is a new cffconvert version with cffconvert#309 - codemeta_dict = self._patch_author_emails(cff_dict, codemeta_dict) if "version" in codemeta_dict: codemeta_dict["version"] = str(codemeta_dict["version"]) # Convert Version to string - return codemeta_dict, {'local_path': str(cff_file)} + # TODO Replace the following temp patch for #112 once there is a new cffconvert version with cffconvert#309 + codemeta_dict = self._patch_author_emails(cff_dict, codemeta_dict) + ld_codemeta = SoftwareMetadata(codemeta_dict, extra_vocabs={'legalName': {'@id': "http://schema.org/name"}}) + return ld_codemeta, {} - def _load_cff_from_file(self, cff_data: str) -> t.Any: + def _load_cff_from_file(self, cff_data: str) -> Any: yaml = YAML(typ='safe') yaml.constructor.yaml_constructors[u'tag:yaml.org,2002:timestamp'] = yaml.constructor.yaml_constructors[ u'tag:yaml.org,2002:str'] @@ -73,11 +74,11 @@ def _patch_author_emails(self, cff: dict, codemeta: dict) -> dict: codemeta["author"][i]["email"] = author["email"] return codemeta - def _convert_cff_to_codemeta(self, cff_data: str) -> t.Any: + def _convert_cff_to_codemeta(self, cff_data: str) -> Any: codemeta_str = Citation(cff_data).as_codemeta() return json.loads(codemeta_str) - def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool: + def _validate(self, cff_file: pathlib.Path, cff_dict: dict) -> bool: audit_log = logging.getLogger('audit.cff') cff_schema_url = f'https://citation-file-format.github.io/{_CFF_VERSION}/schema.json' @@ -93,7 +94,7 @@ def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool: audit_log.warning('!!! warning "%s is not valid according to <%s>"', cff_file, cff_schema_url) for error in errors: - path = ContextPath.make(error.absolute_path or ['root']) + path = error.absolute_path or ['root'] audit_log.info(' Invalid input for `%s`.', str(path)) audit_log.info(' !!! message "%s"', error.message) audit_log.debug(' !!! value "%s"', error.instance) @@ -108,7 +109,7 @@ def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool: audit_log.info('- Found valid Citation File Format file at: %s', cff_file) return True - def _get_single_cff(self, path: pathlib.Path) -> t.Optional[pathlib.Path]: + def _get_single_cff(self, path: pathlib.Path) -> Union[pathlib.Path, None]: # Find CFF files in directories and subdirectories cff_file = path / 'CITATION.cff' if cff_file.exists(): diff --git a/src/hermes/commands/harvest/codemeta.py b/src/hermes/commands/harvest/codemeta.py index b75bb002..5f211222 100644 --- a/src/hermes/commands/harvest/codemeta.py +++ b/src/hermes/commands/harvest/codemeta.py @@ -8,15 +8,16 @@ import glob import json import pathlib -import typing as t +from typing import Union from hermes.commands.harvest.base import HermesHarvestCommand, HermesHarvestPlugin from hermes.commands.harvest.util.validate_codemeta import validate_codemeta -from hermes.model.errors import HermesValidationError +from hermes.model.error import HermesValidationError +from hermes.model import SoftwareMetadata class CodeMetaHarvestPlugin(HermesHarvestPlugin): - def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: + def __call__(self, command: HermesHarvestCommand) -> tuple[SoftwareMetadata, dict]: """ Implementation of a harvester that provides data from a codemeta.json file format. @@ -39,7 +40,7 @@ def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: raise HermesValidationError(codemeta_file) codemeta = json.loads(codemeta_str) - return codemeta, {'local_path': str(codemeta_file)} + return SoftwareMetadata(codemeta), {'local_path': str(codemeta_file)} def _validate(self, codemeta_file: pathlib.Path) -> bool: with open(codemeta_file, "r") as fi: @@ -55,7 +56,7 @@ def _validate(self, codemeta_file: pathlib.Path) -> bool: return True - def _get_single_codemeta(self, path: pathlib.Path) -> t.Optional[pathlib.Path]: + def _get_single_codemeta(self, path: pathlib.Path) -> Union[pathlib.Path, None]: # Find CodeMeta files in directories and subdirectories # TODO: Do we really want to search recursive? Maybe add another option to enable pointing to a single file? # (So this stays "convention over configuration") diff --git a/src/hermes/error.py b/src/hermes/error.py index e56c2499..1669ed39 100644 --- a/src/hermes/error.py +++ b/src/hermes/error.py @@ -4,5 +4,5 @@ # SPDX-FileContributor: David Pape -class HermesMisconfigurationError(Exception): +class MisconfigurationError(Exception): pass diff --git a/src/hermes/model/__init__.py b/src/hermes/model/__init__.py index faf5a2f5..febdb0ff 100644 --- a/src/hermes/model/__init__.py +++ b/src/hermes/model/__init__.py @@ -1,3 +1,9 @@ # SPDX-FileCopyrightText: 2022 German Aerospace Center (DLR) # # SPDX-License-Identifier: Apache-2.0 + +# This is an interface file that only provides a public interface, hence linter is disabled to avoid +# "unused import" errors. +# flake8: noqa + +from hermes.model.api import SoftwareMetadata diff --git a/src/hermes/model/api.py b/src/hermes/model/api.py new file mode 100644 index 00000000..24f1405e --- /dev/null +++ b/src/hermes/model/api.py @@ -0,0 +1,29 @@ +from hermes.model.context_manager import HermesContext, HermesContexError +from hermes.model.types import ld_dict +from hermes.model.types.ld_context import ALL_CONTEXTS +from hermes.model.types.ld_dict import bundled_loader + + +class SoftwareMetadata(ld_dict): + + def __init__(self, data: dict = None, extra_vocabs: dict[str, str] = None) -> None: + ctx = ALL_CONTEXTS + [{**extra_vocabs}] if extra_vocabs is not None else ALL_CONTEXTS + super().__init__([ld_dict.from_dict(data, context=ctx).data_dict if data else {}], context=ctx) + + @classmethod + def load_from_cache(cls, ctx: HermesContext, source: str) -> "SoftwareMetadata": + with ctx[source] as cache: + try: + return SoftwareMetadata(cache["codemeta"]) + except Exception: + pass + try: + context = cache["context"]["@context"] + data = SoftwareMetadata() + data.active_ctx = data.ld_proc.initial_ctx(context, {"documentLoader": bundled_loader}) + data.context = context + for key, value in cache["expanded"][0]: + data[key] = value + return data + except Exception as e: + raise HermesContexError("There is no (valid) data stored in the cache.") from e diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py index 8ab05171..9e4b1bf5 100644 --- a/src/hermes/model/types/__init__.py +++ b/src/hermes/model/types/__init__.py @@ -22,7 +22,7 @@ "ld_container": lambda c, **_: c, "json": lambda c, **_: c.compact(), "expanded_json": lambda c, **_: c.ld_value, - } + }, ), # Wrap item from ld_dict in ld_list diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index 88d92795..a18c886d 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -237,7 +237,7 @@ def _to_expanded_json( # while searching build a path such that it leads from the found ld_dicts ld_value to selfs data_dict/ item_list parent = self path = [] - while parent.__class__.__name__ != "ld_dict": + while parent.__class__.__name__ not in ("ld_dict", "SoftwareMetadata"): if parent.container_type == "@list": path.extend(["@list", 0]) elif parent.container_type == "@graph": @@ -250,7 +250,7 @@ def _to_expanded_json( # if neither self nor any of its parents is a ld_dict: # create a dict with the key of the outer most parent of self and this parents ld_value as a value # this dict is stored in an ld_container and simulates the most minimal JSON-LD object possible - if parent.__class__.__name__ != "ld_dict": + if parent.__class__.__name__ not in ("ld_dict", "SoftwareMetadata"): key = self.ld_proc.expand_iri(parent.active_ctx, parent.key) parent = ld_container([{key: parent._data}]) path.append(0) diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index 589e5246..f368ec73 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -22,12 +22,10 @@ def __init__(self, data, *, parent=None, key=None, index=None, context=None): def __getitem__(self, key): full_iri = self.ld_proc.expand_iri(self.active_ctx, key) - ld_value = self.data_dict[full_iri] - return self._to_python(full_iri, ld_value) + return self._to_python(full_iri, self.data_dict[full_iri]) def __setitem__(self, key, value): - full_iri = self.ld_proc.expand_iri(self.active_ctx, key) - ld_value = self._to_expanded_json({full_iri: value}) + ld_value = self._to_expanded_json({key: value}) self.data_dict.update(ld_value) def __delitem__(self, key): @@ -75,13 +73,18 @@ def __ne__(self, other): return not x def get(self, key, default=_NO_DEFAULT): - try: - value = self[key] - return value - except KeyError as e: - if default is not ld_dict._NO_DEFAULT: - return default - raise e + if key not in self and default is not ld_dict._NO_DEFAULT: + return default + return self[key] + + def setdefault(self, key, default): + if key not in self: + self[key] = default + return self[key] + + def emplace(self, key): + if key not in self: + self[key] = [] def update(self, other): for key, value in other.items(): @@ -130,7 +133,7 @@ def from_dict(cls, value, *, parent=None, key=None, context=None, ld_type=None): full_context = parent.full_context + merged_contexts ld_value = cls.ld_proc.expand(ld_data, {"expandContext": full_context, "documentLoader": bundled_loader}) - ld_value = cls(ld_value, parent=parent, key=key, context=merged_contexts) + ld_value = ld_dict(ld_value, parent=parent, key=key, context=merged_contexts) return ld_value diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py new file mode 100644 index 00000000..895968d7 --- /dev/null +++ b/test/hermes_test/model/test_api.py @@ -0,0 +1,140 @@ +import pytest + +from hermes.model import SoftwareMetadata +from hermes.model.types import ld_list, ld_dict + +from hermes.model.types.ld_context import ALL_CONTEXTS + +EXTRA_VOCABS = {"foo": "https://bar.net/schema"} + + +@pytest.fixture +def default_context(): + return {"@context": ALL_CONTEXTS} + + +@pytest.fixture +def custom_context(): + return {"@context": ALL_CONTEXTS + [EXTRA_VOCABS]} + + +@pytest.mark.parametrize("metadata,full_context", [ + (SoftwareMetadata(), "default_context"), + (SoftwareMetadata(extra_vocabs=EXTRA_VOCABS), "custom_context"), +]) +def test_init_no_data(metadata, full_context, request): + assert metadata.full_context == request.getfixturevalue(full_context)["@context"] + + +@pytest.mark.parametrize("metadata,full_context", [ + (SoftwareMetadata({"funding": "foo"}), "default_context"), + (SoftwareMetadata({"funding": "foo"}, extra_vocabs=EXTRA_VOCABS), "custom_context"), +]) +def test_init_with_data(metadata, full_context, request): + assert metadata.full_context == request.getfixturevalue(full_context)["@context"] + assert metadata["funding"][0] == "foo" + + +def test_init_nested_object(): + my_software = { + "schema:softwareName": "MySoftware", + "foo:egg": "spam", + "foo:ham": "eggs", + "maintainer": {"name": "Some Name", "email": "maintainer@example.com"}, + "author": [{"name": "Foo"}, {"name": "Bar"}], + } + data = SoftwareMetadata(my_software, extra_vocabs={"foo": "https://foo.bar"}) + assert data["schema:softwareName"][0] == "MySoftware" + assert data["maintainer"][0]["name"][0] == "Some Name" + for author in data["author"]: + for name in author["name"]: + assert name in ["Foo", "Bar"] + + +def test_append(): + data = SoftwareMetadata() + data.emplace("schema:name") + data["schema:name"].append("a") + assert type(data["schema:name"]) is ld_list + assert data["schema:name"][0] == "a" and data["schema:name"].item_list == [{"@value": "a"}] + data["schema:name"].append("b") + assert type(data["schema:name"]) is ld_list and data["schema:name"].item_list == [{"@value": "a"}, {"@value": "b"}] + data.emplace("schema:name") + data["schema:name"].append("c") + assert data["schema:name"].item_list == [{"@value": "a"}, {"@value": "b"}, {"@value": "c"}] + + data = SoftwareMetadata() + data.setdefault("schema:Person", []).append({"schema:name": "foo"}) + assert type(data["schema:Person"]) is ld_list and type(data["schema:Person"][0]) is ld_dict + assert data["schema:Person"][0].data_dict == {"http://schema.org/name": [{"@value": "foo"}]} + data["schema:Person"].append({"schema:name": "foo"}) + assert type(data["schema:Person"]) is ld_list + assert data["schema:Person"].item_list == 2 * [{"http://schema.org/name": [{"@value": "foo"}]}] + data["schema:Person"].append({"schema:name": "foo"}) + assert data["schema:Person"].item_list == 3 * [{"http://schema.org/name": [{"@value": "foo"}]}] + + +def test_iterative_assignment(): + # This tests iterative assignments/traversals to edit/appending values + data = SoftwareMetadata(extra_vocabs={"foo": "https://foo.bar"}) + data["author"] = {"name": "Foo"} + # Look, a squirrel! + authors = data["author"] + assert isinstance(authors, ld_list) + author1 = authors[0] + author1["email"] = "author@example.com" + authors[0] = author1 + authors.append({"name": "Bar", "email": "author2@example.com"}) + assert len(authors) == 2 + + +def test_usage(): + data = SoftwareMetadata() + data["author"] = {"name": "Foo"} + data["author"].append({"name": "Bar"}) + data["author"][0]["email"] = "foo@bar.net" + data["author"][0]["email"].append("foo@baz.com") + assert len(data["author"]) == 2 + assert len(data["author"][0]["email"]) == 2 + assert len(data["author"][1].get("email", [])) == 0 + harvest = { + "authors": [ + {"name": "Foo", "affiliation": ["Uni A", "Lab B"], "kw": ["a", "b", "c"]}, + {"name": "Bar", "affiliation": ["Uni C"], "email": "bar@c.edu"}, + {"name": "Baz", "affiliation": ["Lab E"]}, + ] + } + for author in harvest["authors"]: + for exist_author in data.get("author", []): + if author["name"] in exist_author.get("name", []): + exist_author["affiliation"] = author["affiliation"] + if "email" in author: + exist_author.emplace("email") + exist_author["email"].append(author["email"]) + if "kw" in author: + exist_author.emplace("schema:knowsAbout") + exist_author["schema:knowsAbout"].extend(author["kw"]) + break + else: + data.setdefault("author", []).append(author) + assert len(data.get("author", [])) == 3 + foo, bar, baz = data["author"] + assert foo["name"][0] == "Foo" + assert foo["affiliation"].to_python() == ["Uni A", "Lab B"] + assert foo["schema:knowsAbout"].to_python() == ["a", "b", "c"] + assert foo["email"].to_python() == ["foo@bar.net", "foo@baz.com"] + assert bar["name"][0] == "Bar" + assert bar["affiliation"].to_python() == ["Uni C"] + assert bar["email"].to_python() == ["bar@c.edu"] + assert baz["name"][0] == "Baz" + assert baz["affiliation"].to_python() == ["Lab E"] + assert len(baz.get("schema:knowsAbout", [])) == 0 + assert len(baz.get("email", [])) == 0 + for author in data["author"]: + assert "name" in author + if "Baz" not in author["name"]: + assert "email" in author + if "schema:knowsAbout" not in author: + # FIXME: None has to be discussed + author["schema:knowsAbout"] = None + author["schema:pronouns"] = "they/them" diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py new file mode 100644 index 00000000..18dc973c --- /dev/null +++ b/test/hermes_test/model/test_api_e2e.py @@ -0,0 +1,477 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +import json +import pytest +import sys +from hermes.model import context_manager, SoftwareMetadata +from hermes.commands import cli +from pathlib import Path + + +@pytest.fixture +def sandbox_auth(): + path = Path("./../auth.txt") + if not path.exists(): + pytest.skip("Local auth token file does not exist.") + with path.open() as f: + yield f.read() + + +@pytest.mark.parametrize( + "cff, res", + [ + ( + """cff-version: 1.2.0 +title: Temp\nmessage: >- + If you use this software, please cite it using the + metadata from this file. +type: software +authors: + - given-names: Max + family-names: Mustermann + email: max@muster.mann""", + SoftwareMetadata({ + "@type": "SoftwareSourceCode", + "author": { + "@list": [{ + "@type": "Person", + "email": ["max@muster.mann"], + "familyName": ["Mustermann"], + "givenName": ["Max"] + }] + }, + "name": ["Temp"] + }) + ), + ( + """# SPDX-FileCopyrightText: 2022 German Aerospace Center (DLR), Helmholtz-Zentrum Dresden-Rossendorf +# +# SPDX-License-Identifier: CC0-1.0 + +# SPDX-FileContributor: Michael Meinel + +cff-version: 1.2.0 +title: hermes +message: >- + If you use this software, please cite it using the + metadata from this file. +version: 0.9.0 +license: "Apache-2.0" +abstract: "Tool to automate software publication. Not stable yet." +type: software +authors: + - given-names: Michael + family-names: Meinel + email: michael.meinel@dlr.de + affiliation: German Aerospace Center (DLR) + orcid: "https://orcid.org/0000-0001-6372-3853" + - given-names: Stephan + family-names: Druskat + email: stephan.druskat@dlr.de + affiliation: German Aerospace Center (DLR) + orcid: "https://orcid.org/0000-0003-4925-7248" +identifiers: + - type: doi + value: 10.5281/zenodo.13221384 + description: Version 0.8.1b1 +""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/author": [ + { + "@list": [ + { + "@id": "https://orcid.org/0000-0001-6372-3853", + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "michael.meinel@dlr.de"}], + "http://schema.org/familyName": [{"@value": "Meinel"}], + "http://schema.org/givenName": [{"@value": "Michael"}] + }, + { + "@id": "https://orcid.org/0000-0003-4925-7248", + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "stephan.druskat@dlr.de"}], + "http://schema.org/familyName": [{"@value": "Druskat"}], + "http://schema.org/givenName": [{"@value": "Stephan"}] + } + ] + } + ], + "http://schema.org/description": [{"@value": "Tool to automate software publication. Not stable yet."}], + "http://schema.org/identifier": [{"@id": "https://doi.org/10.5281/zenodo.13221384"}], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + "http://schema.org/name": [{"@value": "hermes"}], + "http://schema.org/version": [{"@value": "0.9.0"}] + }) + ), + ( + """cff-version: 1.2.0 +title: Test +message: None +type: software +authors: + - given-names: Test + family-names: Testi + email: test.testi@test.testi + affiliation: German Aerospace Center (DLR) +identifiers: + - type: url + value: "https://arxiv.org/abs/2201.09015" + - type: doi + value: 10.5281/zenodo.13221384 +repository-code: "https://github.com/softwarepub/hermes" +abstract: for testing +url: "https://docs.software-metadata.pub/en/latest" +keywords: + - testing + - more testing +license: Apache-2.0 +version: 9.0.1 +date-released: "2026-01-16" """, + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/author": [ + { + "@list": [ + { + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ] + } + ], + "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], + "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/identifier": [{"@id": "https://doi.org/10.5281/zenodo.13221384"}], + "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/url": [ + {"@id": "https://arxiv.org/abs/2201.09015"}, + {"@id": "https://docs.software-metadata.pub/en/latest"} + ], + "http://schema.org/version": [{"@value": "9.0.1"}] + }) + ) + ] +) +def test_cff_harvest(tmp_path, monkeypatch, cff, res): + monkeypatch.chdir(tmp_path) + cff_file = tmp_path / "CITATION.cff" + cff_file.write_text(cff) + + config_file = tmp_path / "hermes.toml" + config_file.write_text("[harvest]\nsources = [ \"cff\" ]") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager = context_manager.HermesContext() + manager.prepare_step("harvest") + result = SoftwareMetadata.load_from_cache(manager, "cff") + manager.finalize_step("harvest") + sys.argv = orig_argv + + assert result == res + + +@pytest.mark.parametrize( + "codemeta, res", + [ + ( + """{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "description": "for testing", + "name": "Test" +}""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }) + ), + ( + """{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "applicationCategory": "Testing", + "author": [ + { + "id": "_:author_1", + "type": "Person", + "email": "test.testi@test.testi", + "familyName": "Testi", + "givenName": "Test" + } + ], + "codeRepository": "https://github.com/softwarepub/hermes", + "contributor": { + "id": "_:contributor_1", + "type": "Person", + "email": "test.testi@test.testi", + "familyName": "Testi", + "givenName": "Test" + }, + "dateCreated": "2026-01-16", + "dateModified": "2026-01-16", + "datePublished": "2026-01-16", + "description": "for testing", + "funder": { + "type": "Organization", + "name": "TestsTests" + }, + "keywords": [ + "testing", + "more testing" + ], + "license": [ + "https://spdx.org/licenses/Adobe-2006", + "https://spdx.org/licenses/Abstyles", + "https://spdx.org/licenses/AGPL-1.0-only" + ], + "name": "Test", + "operatingSystem": "Windows", + "programmingLanguage": [ + "Python", + "Python 3" + ], + "relatedLink": "https://docs.software-metadata.pub/en/latest", + "schema:releaseNotes": "get it now", + "version": "1.1.1", + "developmentStatus": "abandoned", + "funding": "none :(", + "codemeta:isSourceCodeOf": { + "id": "HERMES" + }, + "issueTracker": "https://github.com/softwarepub/hermes/issues", + "referencePublication": "https://arxiv.org/abs/2201.09015" +}""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/applicationCategory": [{"@id": "Testing"}], + "http://schema.org/author": [ + { + "@list": [ + { + "@id": "_:author_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ] + } + ], + "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], + "http://schema.org/contributor": [ + { + "@id": "_:contributor_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ], + "http://schema.org/dateCreated": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/dateModified": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/funder": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "TestsTests"}] + } + ], + "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], + "http://schema.org/license": [ + {"@id": "https://spdx.org/licenses/Adobe-2006"}, + {"@id": "https://spdx.org/licenses/Abstyles"}, + {"@id": "https://spdx.org/licenses/AGPL-1.0-only"} + ], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/operatingSystem": [{"@value": "Windows"}], + "http://schema.org/programmingLanguage": [{"@value": "Python"}, {"@value": "Python 3"}], + "http://schema.org/relatedLink": [{"@id": "https://docs.software-metadata.pub/en/latest"}], + "http://schema.org/releaseNotes": [{"@value": "get it now"}], + "http://schema.org/version": [{"@value": "1.1.1"}], + "https://codemeta.github.io/terms/developmentStatus": [{"@id": "abandoned"}], + "https://codemeta.github.io/terms/funding": [{"@value": "none :("}], + "https://codemeta.github.io/terms/isSourceCodeOf": [{"@id": "HERMES"}], + "https://codemeta.github.io/terms/issueTracker": [ + {"@id": "https://github.com/softwarepub/hermes/issues"} + ], + "https://codemeta.github.io/terms/referencePublication": [{"@id": "https://arxiv.org/abs/2201.09015"}] + }) + ) + ] +) +def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): + monkeypatch.chdir(tmp_path) + + codemeta_file = tmp_path / "codemeta.json" + codemeta_file.write_text(codemeta) + + config_file = tmp_path / "hermes.toml" + config_file.write_text("[harvest]\nsources = [ \"codemeta\" ]") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager = context_manager.HermesContext() + manager.prepare_step("harvest") + result = SoftwareMetadata.load_from_cache(manager, "codemeta") + manager.finalize_step("harvest") + sys.argv = orig_argv + + assert result == res + + +@pytest.mark.parametrize( + "metadata", + [ + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }), + ] +) +def test_file_deposit(tmp_path, monkeypatch, metadata): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("curate") + with manager["result"] as cache: + cache["codemeta"] = metadata.compact() + manager.finalize_step("curate") + + config_file = tmp_path / "hermes.toml" + config_file.write_text("[deposit]\ntarget = \"file\"") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "deposit", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + with open("codemeta.json", "r") as cache: + result = SoftwareMetadata(json.load(cache)) + sys.argv = orig_argv + + assert result == metadata + + +@pytest.mark.parametrize( + "metadata, invenio_metadata", + [ + ( + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [{ + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}] + }], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] + }), + { + "upload_type": "software", + "publication_date": "2026-02-02", + "title": "Test", + "creators": [{"name": "Test, Testi"}], + "description": "for testing", + "access_right": "closed", + "license": "apache-2.0", + "prereserve_doi": True, + "related_identifiers": [ + {"identifier": "10.5281/zenodo.13311079", "relation": "isCompiledBy", "scheme": "doi"} + ] + } + ) + ] +) +def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_metadata): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("curate") + with manager["result"] as cache: + cache["codemeta"] = metadata.compact() + manager.finalize_step("curate") + + config_file = tmp_path / "hermes.toml" + config_file.write_text(f"""[deposit] +target = "invenio" +[deposit.invenio] +site_url = "https://sandbox.zenodo.org" +access_right = "closed" +auth_token = "{sandbox_auth}" +files = ["hermes.toml"] +[deposit.invenio.api_paths] +licenses = "api/vocabularies/licenses" +""") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "deposit", "--path", str(tmp_path), "--config", str(config_file), "--initial"] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager.prepare_step("deposit") + with manager["invenio"] as cache: + result = cache["deposit"] + manager.finalize_step("deposit") + sys.argv = orig_argv + + # TODO: compare to actually expected value + assert result == invenio_metadata diff --git a/test/hermes_test/model/types/test_ld_dict.py b/test/hermes_test/model/types/test_ld_dict.py index 545b704f..8736439d 100644 --- a/test/hermes_test/model/types/test_ld_dict.py +++ b/test/hermes_test/model/types/test_ld_dict.py @@ -27,14 +27,17 @@ def test_malformed_input(): def test_build_in_get(): - di = ld_dict([{"http://xmlns.com/foaf/0.1/name": [{"@value": "Manu Sporny"}], - "http://xmlns.com/foaf/0.1/homepage": [{"@id": "http://manu.sporny.org/"}], - "http://xmlns.com/foaf/0.1/foo": [{"http://xmlns.com/foaf/0.1/foobar": [{"@value": "bar"}], - "http://xmlns.com/foaf/0.1/barfoo": [{"@value": "foo"}]}]}], - context=[{"xmlns": "http://xmlns.com/foaf/0.1/"}]) - assert di["xmlns:name"] == ["Manu Sporny"] - assert di["xmlns:homepage"] == ["http://manu.sporny.org/"] - assert di["xmlns:foo"] == [{"xmlns:foobar": ["bar"], "xmlns:barfoo": ["foo"]}] + di = ld_dict([{"http://schema.org/name": [{"@value": "Manu Sporny"}], + "http://schema.org/homepage": [{"@id": "http://manu.sporny.org/"}], + "http://schema.org/foo": [{"http://schema.org/foobar": [{"@value": "bar"}], + "http://schema.org/barfoo": [{"@value": "foo"}]}]}], + context=[{"schema": "http://schema.org/"}]) + assert isinstance(di["schema:name"], ld_list) and di["schema:name"].item_list == [{"@value": "Manu Sporny"}] + assert isinstance(di["schema:homepage"], ld_list) + assert di["schema:homepage"].item_list == [{"@id": "http://manu.sporny.org/"}] + assert isinstance(di["http://schema.org/foo"], ld_list) and isinstance(di["http://schema.org/foo"][0], ld_dict) + assert di["http://schema.org/foo"][0].data_dict == {"http://schema.org/foobar": [{"@value": "bar"}], + "http://schema.org/barfoo": [{"@value": "foo"}]} with pytest.raises(KeyError): di["bar"] @@ -187,11 +190,30 @@ def test_build_in_comparison(): def test_get(): di = ld_dict([{"https://schema.org/name": [{"@value": "Manu Sporny"}]}], context=[{"schema": "https://schema.org/"}]) - assert di.get("https://schema.org/name") == ["Manu Sporny"] - assert di.get("schema:name") == ["Manu Sporny"] + assert di.get("https://schema.org/name").item_list == [{"@value": "Manu Sporny"}] + assert di.get("schema:name").item_list == [{"@value": "Manu Sporny"}] assert di.get("bar", None) is None with pytest.raises(KeyError): - di.get("bar") + di["bar"] + + +def test_setdefault(): + di = ld_dict([{"https://schema.org/name": [{"@value": "Manu Sporny"}]}], + context=[{"schema": "https://schema.org/"}]) + assert di.setdefault("schema:name", []) == [{"@value": "Manu Sporny"}] + assert di.setdefault("schema:email", []) == [] + assert di["schema:email"] == [] + + +def test_emplace(): + di = ld_dict([{"https://schema.org/name": [{"@value": "Manu Sporny"}]}], + context=[{"schema": "https://schema.org/"}]) + di.emplace("schema:name") + assert di["schema:name"] == [{"@value": "Manu Sporny"}] + with pytest.raises(KeyError): + di["schema:email"] + di.emplace("schema:email") + assert di["schema:email"] == [] def test_update(): @@ -254,11 +276,12 @@ def test_items(): inner_di = ld_dict([{}], parent=di, key="http://xmlns.com/foaf/0.1/foo") inner_di.update({"xmlns:foobar": "bar", "http://xmlns.com/foaf/0.1/barfoo": {"@id": "foo"}}) di.update({"http://xmlns.com/foaf/0.1/name": "foo", "xmlns:homepage": {"@id": "bar"}, "xmlns:foo": inner_di}) - assert [*di.items()][0:2] == [("http://xmlns.com/foaf/0.1/name", ["foo"]), - ("http://xmlns.com/foaf/0.1/homepage", ["bar"])] - assert [*di.items()][2][0] == "http://xmlns.com/foaf/0.1/foo" - assert [*di.items()][2][1][0] == {"http://xmlns.com/foaf/0.1/foobar": [{"@value": "bar"}], - "http://xmlns.com/foaf/0.1/barfoo": [{"@id": "foo"}]} + items = [*di.items()] + assert (items[0][0], items[1][0]) == ("http://xmlns.com/foaf/0.1/name", "http://xmlns.com/foaf/0.1/homepage") + assert (items[0][1].item_list, items[1][1].item_list) == ([{"@value": "foo"}], [{"@id": "bar"}]) + assert items[2][0] == "http://xmlns.com/foaf/0.1/foo" and isinstance(items[2][1], ld_list) + assert items[2][1][0].data_dict == {"http://xmlns.com/foaf/0.1/foobar": [{"@value": "bar"}], + "http://xmlns.com/foaf/0.1/barfoo": [{"@id": "foo"}]} def test_ref():