diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 048162f..bdd2716 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -90,14 +90,16 @@ jobs: uv run pytest -m "not integration_gitlab" \ --cov=src/sw_metadata_bot \ --cov-report=term-missing \ + --cov-report=xml:coverage.xml \ tests/ + + - name: Generate coverage badge + run: uv run genbadge coverage -i coverage.xml -o coverage.svg + continue-on-error: true - name: Run small analysis without publishing run: uv run sw-metadata-bot run-analysis --config-file assets/example_list_repo.json - - name: Generate coverage badge - run: uv run coverage-badge -o coverage.svg -f - continue-on-error: true - name: Commit coverage badge if: github.ref == 'refs/heads/main' && github.event_name == 'push' diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5dbb190 --- /dev/null +++ b/Makefile @@ -0,0 +1,15 @@ + +.PHONY: rsmetacheck-run clean + +# arguments + + +# Run rsmetacheck via the `uv` runner and place outputs under assets/example_analysis/ +rsmetacheck-run: + @mkdir -p assets/example_analysis + @echo "Running: uv run rsmetacheck --input https://github.com/SoftwareUnderstanding/rsmetacheck --analysis-output assets/example_analysis/" + @uv run rsmetacheck --input https://github.com/SoftwareUnderstanding/rsmetacheck --somef-output assets/example_analysis/somef/ --pitfalls-output assets/example_analysis/pitfalls/ --analysis-output assets/example_analysis/rsmetacheck_analysis.json + +clean: + @rm -rf assets/example_analysis + @echo "Cleaned assets/example_analysis" diff --git a/assets/config_su.json b/assets/config_su.json index c4fbb04..64ca990 100644 --- a/assets/config_su.json +++ b/assets/config_su.json @@ -1,25 +1,28 @@ { - "repositories": [ - "https://github.com/SoftwareUnderstanding/arxiv_category_pull", - "https://github.com/SoftwareUnderstanding/CodeMetaSoft_website", - "https://github.com/SoftwareUnderstanding/Metadata-Adoption-Quantify", - "https://github.com/SoftwareUnderstanding/ScientificSoftwareKG", - "https://github.com/SoftwareUnderstanding/auto-papers-with-artifacts", - "https://github.com/SoftwareUnderstanding/somef_server", - "https://github.com/SoftwareUnderstanding/inspect4py", - "https://github.com/SoftwareUnderstanding/SOMEF-Vider", - "https://github.com/SoftwareUnderstanding/completeR", - "https://github.com/SoftwareUnderstanding/rolf", - "https://github.com/SoftwareUnderstanding/software_types", - "https://github.com/SoftwareUnderstanding/c2t" - ], + "version": "1.0.0", + "analysis": { + "repositories": [ + "https://github.com/SoftwareUnderstanding/arxiv_category_pull", + "https://github.com/SoftwareUnderstanding/CodeMetaSoft_website", + "https://github.com/SoftwareUnderstanding/Metadata-Adoption-Quantify", + "https://github.com/SoftwareUnderstanding/ScientificSoftwareKG", + "https://github.com/SoftwareUnderstanding/auto-papers-with-artifacts", + "https://github.com/SoftwareUnderstanding/somef_server", + "https://github.com/SoftwareUnderstanding/inspect4py", + "https://github.com/SoftwareUnderstanding/SOMEF-Vider", + "https://github.com/SoftwareUnderstanding/completeR", + "https://github.com/SoftwareUnderstanding/rolf", + "https://github.com/SoftwareUnderstanding/software_types", + "https://github.com/SoftwareUnderstanding/c2t" + ] + }, "issues": { - "custom_message": "You are part of the Software Understanding metadata quality initiative. Several metadata issues were identified and could be addressed.", + "custom_issue_message": "You are part of the Software Understanding metadata quality initiative. Several metadata issues were identified and could be addressed.", "generate_codemeta_if_missing": true, "opt_outs": [] }, "outputs": { - "root_dir": "outputs", + "output_root_dir": "outputs", "run_name": "SoftwareUnderstanding_new", "snapshot_tag_format": "%Y%m%d" } diff --git a/assets/config_unsubscribe.json b/assets/config_unsubscribe.json new file mode 100644 index 0000000..1ca1509 --- /dev/null +++ b/assets/config_unsubscribe.json @@ -0,0 +1,18 @@ +{ + "version": "1.0.0", + "analysis": { + "repositories": [ + "https://github.com/SoftwareUnderstanding/sw-metadata-bot" + ] + }, + "issues": { + "custom_issue_message": "Unsubscribe list", + "opt_outs": [ + ] + }, + "outputs": { + "output_root_dir": "outputs", + "run_name": "unsubscribe", + "snapshot_tag_format": "%Y%m%d" + } +} \ No newline at end of file diff --git a/assets/example_analysis/pitfalls/output_1_pitfalls.jsonld b/assets/example_analysis/pitfalls/output_1_pitfalls.jsonld new file mode 100644 index 0000000..94bfb6b --- /dev/null +++ b/assets/example_analysis/pitfalls/output_1_pitfalls.jsonld @@ -0,0 +1,75 @@ +{ + "@context": "[IN PROCESS]", + "@type": "SoftwareQualityAssessment", + "name": "Quality Assessment for SoftwareUnderstanding/RsMetaCheck", + "description": "Automated tool to detect metadata quality pitfalls in software repositories (Python, Java, C++, etc.). Analyzes SoMEF output files for version mismatches, license issues, broken URLs, and more.", + "creator": { + "@type": "schema:Person", + "name": "Anonymous", + "email": "example@email.com" + }, + "dateCreated": "2026-05-29T14:41:56Z", + "license": { + "@id": "https://opensource.org/license/mit" + }, + "assessedSoftware": { + "@type": "schema:SoftwareApplication", + "name": "SoftwareUnderstanding/RsMetaCheck", + "softwareVersion": "0.3.2", + "url": "https://github.com/SoftwareUnderstanding/RsMetaCheck", + "schema:identifier": { + "@id": "https://doi.org/10.5281/zenodo.18956787" + }, + "commit_id": "e15a9e2aebb63d08b94cc10f12f5efd4a374a65d" + }, + "checkingSoftware": { + "@type": "schema:SoftwareApplication", + "name": "RSMetacheck", + "@id": "https://w3id.org/rsmetacheck", + "softwareVersion": "0.3.1" + }, + "checks": [ + { + "@type": "CheckResult", + "assessesIndicator": { + "@id": "https://w3id.org/rsmetacheck/catalog/#W001" + }, + "process": "Analyzes software requirements in metadata to see if they lack explicit version constraints.", + "status": { + "@id": "schema:CompletedActionStatus" + }, + "output": "true", + "evidence": "W001 detected: pyproject.toml contains software requirements without versions: poetry-core", + "suggestion": "Add version numbers to your dependencies. This provides stability for users and allows reproducibility across different environments.", + "checkId": "c6106e77f284408904dc592481cae5bfd5fcf66e9e4ecbfeab0c2dbbca4923d1" + }, + { + "@type": "CheckResult", + "assessesIndicator": { + "@id": "https://w3id.org/rsmetacheck/catalog/#W002" + }, + "process": "Compares the dateModified field against the last updated date of the actual repository.", + "status": { + "@id": "schema:CompletedActionStatus" + }, + "output": "true", + "evidence": "W002 detected: metadata files dateModified '2025-09-19T00:00:00' is outdated compared to repository date '2026-05-27T16:30:05'", + "suggestion": "The data in the metadata file should be updated to be aligned with the date of the latest release (2026-05-27T16:30:05).", + "checkId": "9fc5111ff6139c772042983c3b0611efaa23fec563d4edda52bc90caf0daf403" + }, + { + "@type": "CheckResult", + "assessesIndicator": { + "@id": "https://w3id.org/rsmetacheck/catalog/#W004" + }, + "process": "Checks programming language declarations in codemeta.json to see if they lack specific version numbers.", + "status": { + "@id": "schema:CompletedActionStatus" + }, + "output": "true", + "evidence": "W004 detected: metadata files Programming languages without versions: Python", + "suggestion": "Include version numbers for each programming language used. Defining these helps ensure reproducibility and compatibility across systems.", + "checkId": "d4846ed456a4db4a83afef9594d6fbec1e97a1ca5b54e3080ea1397c90c99b07" + } + ] +} \ No newline at end of file diff --git a/assets/example_analysis/rsmetacheck_analysis.json b/assets/example_analysis/rsmetacheck_analysis.json new file mode 100644 index 0000000..9cf4342 --- /dev/null +++ b/assets/example_analysis/rsmetacheck_analysis.json @@ -0,0 +1,263 @@ +{ + "summary": { + "total_repositories_analyzed": 1, + "repositories_with_target_languages": 1, + "individual_jsonld_files_created": 1, + "total_pitfalls_detected": 0, + "total_warnings_detected": 3, + "target_languages": [ + "Python", + "Java", + "C++", + "C", + "R", + "Rust" + ], + "evaluated_repositories": { + "SoftwareUnderstanding/RsMetaCheck": { + "url": "https://github.com/SoftwareUnderstanding/RsMetaCheck", + "commit_id": "e15a9e2aebb63d08b94cc10f12f5efd4a374a65d" + } + } + }, + "pitfalls & warnings": [ + { + "pitfall_code": "P001", + "pitfall_desc": "The metadata file (codemeta or other) has a version which does not correspond to the version used in the latest release", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P001" + }, + { + "pitfall_code": "P002", + "pitfall_desc": "LICENSE file contains template placeholders like , , that were not replaced", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P002" + }, + { + "pitfall_code": "P003", + "pitfall_desc": "Metadata files have multiple authors in single field instead of a list", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P003" + }, + { + "pitfall_code": "P004", + "pitfall_desc": "In codemeta.json README property pointing to their homepage/wiki instead of README file", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P004" + }, + { + "pitfall_code": "P005", + "pitfall_desc": "codemeta.json referencePublication refers to software archive instead of paper", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P005" + }, + { + "pitfall_code": "P006", + "pitfall_desc": "The metadata file has License pointing to a local file instead of stating the name", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P006" + }, + { + "pitfall_code": "P007", + "pitfall_desc": "CITATION.cff does not have referencePublication even though it's referenced in codemeta.json", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P007" + }, + { + "pitfall_code": "P008", + "pitfall_desc": "The metadata file softwareRequirement points to an invalid page", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P008" + }, + { + "pitfall_code": "P009", + "pitfall_desc": "The metadata file coderepository points to their homepage", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P009" + }, + { + "pitfall_code": "P010", + "pitfall_desc": "LICENSE file only contains copyright information without actual license terms", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P010" + }, + { + "pitfall_code": "P011", + "pitfall_desc": "codemeta.json IssueTracker violates the expected URL format", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P011" + }, + { + "pitfall_code": "P012", + "pitfall_desc": "codemeta.json downloadURL is outdated", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P012" + }, + { + "pitfall_code": "P013", + "pitfall_desc": "The metadata file License does not have the specific version", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P013" + }, + { + "pitfall_code": "P014", + "pitfall_desc": "codemeta.json uses bare DOIs in the identifier field instead of full https://doi.org/ URL", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P014" + }, + { + "pitfall_code": "P015", + "pitfall_desc": "In codemeta.json contIntegration link returns 404", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P015" + }, + { + "pitfall_code": "P016", + "pitfall_desc": "The metadata file codeRepository does not point to the same repository", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P016" + }, + { + "pitfall_code": "P017", + "pitfall_desc": "codemeta.json version does not match the package's", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P017" + }, + { + "pitfall_code": "P018", + "pitfall_desc": "codemeta.json Identifier uses raw SWHIDs without their resolvable URL", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P018" + }, + { + "pitfall_code": "P019", + "pitfall_desc": "Inconsistent author counts found across metadata files", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#P019" + }, + { + "warning_code": "W001", + "warning_desc": "Software requirements in metadata files don't have version specifications", + "count": 1, + "percentage": 100.0, + "languages": { + "Python": 1 + }, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#W001" + }, + { + "warning_code": "W002", + "warning_desc": "The dateModified in codemeta.json is outdated compared to the actual repository last update date", + "count": 1, + "percentage": 100.0, + "languages": { + "Python": 1 + }, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#W002" + }, + { + "warning_code": "W003", + "warning_desc": "Codemeta.json repository has multiple licenses but only one is listed", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#W003" + }, + { + "warning_code": "W004", + "warning_desc": "Programming languages in codemeta.json do not have versions", + "count": 1, + "percentage": 100.0, + "languages": { + "Python": 1 + }, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#W004" + }, + { + "warning_code": "W005", + "warning_desc": "The metadata file softwareRequirements have more than one req, but it's written as one string", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#W005" + }, + { + "warning_code": "W006", + "warning_desc": "codemeta.json Identifier is a name instead of a valid unique identifier, but an identifier exist", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#W006" + }, + { + "warning_code": "W007", + "warning_desc": "codemeta.json Identifier is empty", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#W007" + }, + { + "warning_code": "W008", + "warning_desc": "The metadata file GivenName is a list instead of a string", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#W008" + }, + { + "pitfall_code": "W009", + "pitfall_desc": "codemeta.json developmentStatus is a URL instead of a string", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#W009" + }, + { + "pitfall_code": "W010", + "pitfall_desc": "The metadata file codeRepository uses Git remote-style shorthand instead of full URL", + "count": 0, + "percentage": 0.0, + "languages": {}, + "pitfall": "https://w3id.org/rsmetacheck/catalog/#W010" + } + ] +} \ No newline at end of file diff --git a/assets/example_analysis/somef/output_1.json b/assets/example_analysis/somef/output_1.json new file mode 100644 index 0000000..adc7020 --- /dev/null +++ b/assets/example_analysis/somef/output_1.json @@ -0,0 +1 @@ +{"somef_provenance": {"somef_version": "0.10.3", "somef_schema_version": "1.0.0", "date": "2026-05-29 16:41:38"}, "code_repository": [{"result": {"value": "https://github.com/SoftwareUnderstanding/RsMetaCheck", "type": "Url"}, "confidence": 1, "technique": "GitHub_API"}, {"result": {"value": "git+https://github.com/SoftwareUnderstanding/RsMetaCheck.git", "type": "Url"}, "confidence": 1, "technique": "code_parser", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/codemeta.json"}], "owner": [{"result": {"value": "SoftwareUnderstanding", "type": "Organization"}, "confidence": 1, "technique": "GitHub_API"}], "date_created": [{"result": {"value": "2025-09-04T06:02:24Z", "type": "Date"}, "confidence": 1, "technique": "GitHub_API"}], "date_updated": [{"result": {"value": "2026-05-27T16:30:05Z", "type": "Date"}, "confidence": 1, "technique": "GitHub_API"}, {"result": {"value": "2025-09-19", "type": "String"}, "confidence": 1, "technique": "code_parser", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/codemeta.json"}], "license": [{"result": {"value": "MIT", "type": "License", "name": "MIT License", "url": "https://spdx.org/licenses/MIT", "spdx_id": "MIT", "identifier": "https://spdx.org/licenses/MIT"}, "confidence": 1, "technique": ["code_parser", "GitHub_API", "file_exploration"], "source": ["https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/LICENSE", "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/codemeta.json"]}], "description": [{"result": {"value": "Automated tool to detect metadata quality pitfalls in software repositories (Python, Java, C++, etc.). Analyzes SoMEF output files for version mismatches, license issues, broken URLs, and more.", "type": "String"}, "confidence": 1, "technique": ["code_parser", "GitHub_API"], "source": ["https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/codemeta.json"]}, {"result": {"value": "Detect metadata pitfalls in software repositories", "type": "String"}, "confidence": 1, "technique": "code_parser", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/pyproject.toml"}, {"result": {"type": "Text_excerpt", "value": "This project provides an automated tool for detecting common metadata quality issues (pitfalls & Warnings)\nin software repositories. The tool analyzes SoMEF (Software Metadata Extraction Framework) output\nfiles to identify various problems in repository metadata\nfiles such as `codemeta.json`, `package.json`, `setup.py`, `DESCRIPTION`, and others.\n \n", "original_header": "Research Software MetaCheck (a Pitfall/Warning Detection Tool)"}, "confidence": 0.9915292177525223, "technique": "supervised_classification", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"type": "Text_excerpt", "value": "- **Version-related pitfalls**: Version mismatches between metadata files and releases\n- **License-related pitfalls**: Template placeholders, copyright-only licenses, missing version specifications\n- **URL validation pitfalls**: Broken links for CI, software requirements, download URLs\n- **Metadata format pitfalls**: Improper field formatting, multiple authors in single fields, etc...\n- **Identifier pitfalls**: Invalid or missing unique identifiers, bare DOIs\n- **Repository reference pitfalls**: Mismatched code repositories, Git shorthand usage\n \n", "original_header": "Supported Pitfall Types"}, "confidence": 0.9547777160114155, "technique": "supervised_classification", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"type": "Text_excerpt", "value": "1. **\"There is no valid repository URL\" error**: Ensure the JSON file that contains the repositories\n has a valid structure and that you are inputing the correct path\n2. **Network timeouts**: Some pitfalls validate URLs and may time out this is normal behavior\n \n", "original_header": "Common Issues"}, "confidence": 0.804048052952073, "technique": "supervised_classification", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}], "name": [{"result": {"value": "RsMetaCheck", "type": "String"}, "confidence": 1, "technique": ["code_parser", "GitHub_API"], "source": ["https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/codemeta.json"]}], "full_name": [{"result": {"value": "SoftwareUnderstanding/RsMetaCheck", "type": "String"}, "confidence": 1, "technique": "GitHub_API"}], "issue_tracker": [{"result": {"value": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/issues", "type": "Url"}, "confidence": 1, "technique": "GitHub_API"}, {"result": {"value": "https://github.com/SoftwareUnderstanding/RsMetaCheck/issues", "type": "Url"}, "confidence": 1, "technique": "code_parser", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/codemeta.json"}], "forks_url": [{"result": {"value": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/forks", "type": "Url"}, "confidence": 1, "technique": "GitHub_API"}], "stargazers_count": [{"result": {"value": 3, "type": "Number"}, "confidence": 1, "technique": "GitHub_API"}], "keywords": [{"result": {"value": "fair-principles, fairness, metadata, metadata-extraction, pitfalls", "type": "String"}, "confidence": 1, "technique": "GitHub_API"}, {"result": {"value": ["codemeta", "pitfalls", "metadata", "analysis"], "type": "String"}, "confidence": 1, "technique": "code_parser", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/codemeta.json"}], "forks_count": [{"result": {"value": 2, "type": "Number"}, "confidence": 1, "technique": "GitHub_API"}], "homepage": [{"result": {"value": "https://rsmetacheck.readthedocs.io/en/latest/?badge=latest", "type": "Url"}, "confidence": 1, "technique": "GitHub_API"}], "download_url": [{"result": {"value": "https://github.com/SoftwareUnderstanding/rsmetacheck/releases", "type": "Url"}, "confidence": 1, "technique": "GitHub_API"}, {"result": {"value": "https://github.com/SoftwareUnderstanding/RsMetaCheck/archive/refs/heads/main.tar.gz", "type": "Url"}, "confidence": 1, "technique": "code_parser", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/codemeta.json"}], "programming_languages": [{"result": {"value": "Python", "name": "Python", "type": "Programming_language", "size": 660806}, "confidence": 1, "technique": ["code_parser", "GitHub_API"], "source": ["https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/codemeta.json"]}], "releases": [{"result": {"type": "Release", "value": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/releases/318305648", "tag": "0.3.2", "name": "0.3.2 - Pitfalls & Warning Patches II", "author": {"name": "Anas-Elhounsri", "type": "User"}, "description": "- Patches for issues #78, #80 and #81\r\n- Now P001 is more flexible, taking into account best practices of preparing next release version in metadata before making new release.\r\n- W004 is more relaxed with versions that are not precise.", "tarball_url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/tarball/0.3.2", "zipball_url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/zipball/0.3.2", "html_url": "https://github.com/SoftwareUnderstanding/RsMetaCheck/releases/tag/0.3.2", "url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/releases/318305648", "release_id": 318305648, "date_created": "2026-05-06T09:28:58Z", "date_published": "2026-05-06T09:39:55Z", "assets": []}, "confidence": 1, "technique": "GitHub_API"}, {"result": {"type": "Release", "value": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/releases/314861320", "tag": "0.3.1", "name": "0.3.1 - Pitfalls & Warning Patches", "author": {"name": "Anas-Elhounsri", "type": "User"}, "description": "- Patches for false flags in P002, P013, W003, W005\r\n- Better report for P016\r\n- Bumped SoMEF to 0.10.3\r\n- Unit testing files refactored to match the new file format. Across all test files, there were occurrences of from metacheck, when the project was recently renamed to rsmetacheck, it caused extensive ModuleNotFoundError failures during the test collection phase", "tarball_url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/tarball/0.3.1", "zipball_url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/zipball/0.3.1", "html_url": "https://github.com/SoftwareUnderstanding/RsMetaCheck/releases/tag/0.3.1", "url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/releases/314861320", "release_id": 314861320, "date_created": "2026-04-28T20:56:56Z", "date_published": "2026-04-28T21:51:22Z", "assets": []}, "confidence": 1, "technique": "GitHub_API"}, {"result": {"type": "Release", "value": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/releases/311451109", "tag": "0.3.0", "name": "0.3.0 - User Experience Enhancement ", "author": {"name": "Anas-Elhounsri", "type": "User"}, "description": "- Users can now specify the branch for analysis of pitfalls\r\n- Users can now specify for RSMetaCheck to generate a codemeta file of the repository thanks to @francoto PR at #72 \r\n- Implemented dynamic suggestions tailored to the user's repository instead of general ones\r\n- Improved the evidence report pinpointing the files concerned\r\n- Ground truth repositories have commit IDs for better tracking and analysis", "tarball_url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/tarball/0.3.0", "zipball_url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/zipball/0.3.0", "html_url": "https://github.com/SoftwareUnderstanding/RsMetaCheck/releases/tag/0.3.0", "url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/releases/311451109", "release_id": 311451109, "date_created": "2026-04-20T22:30:52Z", "date_published": "2026-04-20T22:39:55Z", "assets": []}, "confidence": 1, "technique": "GitHub_API"}, {"result": {"type": "Release", "value": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/releases/295590507", "tag": "0.2.1", "name": "0.2.1 - Restructuring & Patches", "author": {"name": "Anas-Elhounsri", "type": "User"}, "description": "- Users can now specify the location for SoMEF outputs thanks to Tom report @francoto \r\n- Patches inconsistent flagging for P002, P008, W001\r\n- Improved the report of json-ld with @francoto suggestions\r\n- Added latest commit ID of the default branch in the analysis report and jsonld thanks to @francoto suggestion\r\n- SoMEF automatically configures to install the necessary packages without the need to run somef config unless needed", "tarball_url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/tarball/0.2.1", "zipball_url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/zipball/0.2.1", "html_url": "https://github.com/SoftwareUnderstanding/RsMetaCheck/releases/tag/0.2.1", "url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/releases/295590507", "release_id": 295590507, "date_created": "2026-03-11T10:42:51Z", "date_published": "2026-03-11T10:55:04Z", "assets": []}, "confidence": 1, "technique": "GitHub_API"}, {"result": {"type": "Release", "value": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/releases/278252816", "tag": "0.2.0", "name": "0.2.0 - Poetry Update & Patches", "author": {"name": "Anas-Elhounsri", "type": "User"}, "description": "- Now supports Poetry installation and package management\r\n- Updated the commands to work with poetry\r\n- Patches inconsistent flagging for W003, P013\r\n- Implemented P019: Inconsistent number of authors across metadata\r\n- Patched inconsistent number of warnings and pitfalls in report thank to Tom @francoto", "tarball_url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/tarball/0.2.0", "zipball_url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/zipball/0.2.0", "html_url": "https://github.com/SoftwareUnderstanding/RsMetaCheck/releases/tag/0.2.0", "url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/releases/278252816", "release_id": 278252816, "date_created": "2026-01-20T12:50:40Z", "date_published": "2026-01-20T12:55:25Z", "assets": []}, "confidence": 1, "technique": "GitHub_API"}, {"result": {"type": "Release", "value": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/releases/263596572", "tag": "0.1.1", "name": "0.1.1 \u2013 Stability and Refactoring", "author": {"name": "Anas-Elhounsri", "type": "User"}, "tarball_url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/tarball/0.1.1", "zipball_url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/zipball/0.1.1", "html_url": "https://github.com/SoftwareUnderstanding/RsMetaCheck/releases/tag/0.1.1", "url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/releases/263596572", "release_id": 263596572, "date_created": "2025-11-19T11:19:55Z", "date_published": "2025-11-19T11:22:16Z", "assets": []}, "confidence": 1, "technique": "GitHub_API"}, {"result": {"type": "Release", "value": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/releases/249641359", "tag": "0.1.0", "name": "0.1.0 \u2013 Alpha Release", "author": {"name": "Anas-Elhounsri", "type": "User"}, "tarball_url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/tarball/0.1.0", "zipball_url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/zipball/0.1.0", "html_url": "https://github.com/SoftwareUnderstanding/RsMetaCheck/releases/tag/0.1.0", "url": "https://api.github.com/repos/SoftwareUnderstanding/RsMetaCheck/releases/249641359", "release_id": 249641359, "date_created": "2025-09-22T01:37:46Z", "date_published": "2025-09-24T11:47:41Z", "assets": []}, "confidence": 1, "technique": "GitHub_API"}], "copyright_holder": [{"result": {"value": "Anas El Hounsri", "type": "Agent", "year": "2025"}, "confidence": 1, "technique": "file_exploration", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/LICENSE"}], "readme_url": [{"result": {"value": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md", "type": "Url"}, "confidence": 1, "technique": "file_exploration"}], "development_status": [{"result": {"value": "active", "type": "String"}, "confidence": 1, "technique": "code_parser", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/codemeta.json"}], "version": [{"result": {"value": "0.3.2", "type": "String", "tag": "0.3.2"}, "confidence": 1, "technique": ["code_parser"], "source": ["https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/pyproject.toml", "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/codemeta.json"]}], "author": [{"result": {"value": "Anas El Hounsri", "type": "Agent", "email": "a.elhounsri@upm.es", "affiliation": "Ontology Engineering Group, Universidad Politecnica de Madrid", "name": "Anas El Hounsri"}, "confidence": 1, "technique": ["code_parser"], "source": ["https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/pyproject.toml", "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/codemeta.json"]}], "has_build_file": [{"result": {"value": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/pyproject.toml", "type": "Url", "format": "pyproject.toml"}, "confidence": 1, "technique": "file_exploration", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/pyproject.toml"}], "has_package_file": [{"result": {"value": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/pyproject.toml", "type": "Url"}, "confidence": 1, "technique": "code_parser", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/pyproject.toml"}], "package_id": [{"result": {"value": "rsmetacheck", "type": "String"}, "confidence": 1, "technique": "code_parser", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/pyproject.toml"}], "requirements": [{"result": {"value": "python>=3.11,<3.13", "name": "python", "version": ">=3.11,<3.13", "type": "SoftwareDependency", "dependency_type": "runtime", "dependency_resolver": "python"}, "confidence": 1, "technique": "code_parser", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/pyproject.toml"}, {"result": {"value": "requests*", "name": "requests", "version": "*", "type": "SoftwareDependency", "dependency_type": "runtime", "dependency_resolver": "python"}, "confidence": 1, "technique": "code_parser", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/pyproject.toml"}, {"result": {"value": "somef0.10.3", "name": "somef", "version": "0.10.3", "type": "SoftwareDependency", "dependency_type": "runtime", "dependency_resolver": "python"}, "confidence": 1, "technique": "code_parser", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/pyproject.toml"}, {"result": {"value": "poetry-core", "name": "poetry-core", "version": "", "type": "SoftwareDependency", "dependency_type": "runtime", "dependency_resolver": "python"}, "confidence": 1, "technique": "code_parser", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/pyproject.toml"}, {"result": {"value": "- **Python 3.11**\n- Required Python packages:\n - `requests` (for URL validation)\n - `pathlib` (built-in)\n - `json` (built-in)\n - `re` (built-in)\n - `somef` (For extracting metadata from the repositories)\n", "type": "Text_excerpt", "original_header": "Requirements", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}], "runtime_platform": [{"result": {"value": "Python>=3.11,<3.13", "name": "Python", "version": ">=3.11,<3.13", "type": "String"}, "confidence": 1, "technique": "code_parser", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/pyproject.toml"}], "documentation": [{"result": {"value": "https://github.com/SoftwareUnderstanding/rsmetacheck/tree/main/docs", "type": "Url"}, "confidence": 1, "technique": "file_exploration"}, {"result": {"type": "Url", "value": "https://rsmetacheck.readthedocs.io/", "format": "readthedocs"}, "confidence": 1, "technique": ["regular_expression"], "source": ["https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"]}], "installation": [{"result": {"value": "# Installation\n\nRSMetaCheck requires **Python 3.11**.\n\n## Using Poetry (Recommended)\n\n1. **Clone the repository**:\n\n ```bash\n git clone https://github.com/SoftwareUnderstanding/RsMetaCheck.git\n cd RsMetaCheck\n ```\n\n2. **Install with Poetry**:\n\n ```bash\n poetry install\n ```\n\n3. **Configure SoMEF** (Optional but recommended):\n\n Initially, the installation process runs `somef configure -a` automatically. If you need to reconfigure it (e.g., to add a GitHub token to avoid rate limits), run:\n\n ```bash\n poetry run somef configure\n ```\n\n## Using pip\n\nAlternatively, you can install directly from GitHub:\n\n```bash\npip install git+https://github.com/SoftwareUnderstanding/RsMetaCheck.git\n```\n", "type": "File_dump"}, "confidence": 1, "technique": "file_exploration", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/docs/installation.md"}, {"result": {"value": "1. **Clone the repository**:\n\n ```bash\n git clone https://github.com/SoftwareUnderstanding/RsMetaCheck.git\n cd RsMetaCheck\n ```\n\n2. **Install with Poetry**:\n\n ```bash\n poetry install\n ```\n\n3. **Configure SoMEF** (optional but recommended):\n Initially, the installation process will run `somef configure -a` to automatically set it up and install the necessary packages but the rate limit will be low. If you need more, you should reconfigure SoMEF, you can run the following command:\n ```bash\n poetry run somef configure\n ```\n Then add your GitHub authentication token to avoid API rate limits when analyzing repositories in batches.\n", "type": "Text_excerpt", "original_header": "Using Poetry (Recommended)", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Installation"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "Alternatively, you can install directly from GitHub:\n\n```bash\npip install git+https://github.com/SoftwareUnderstanding/RsMetaCheck.git\n```\n", "type": "Text_excerpt", "original_header": "Using pip", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Installation"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}], "continuous_integration": [{"result": {"value": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/.github/workflows/ci.yml", "type": "Url"}, "confidence": 1, "technique": "file_exploration"}, {"result": {"value": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/.github/workflows/pypi-publish.yml", "type": "Url"}, "confidence": 1, "technique": "file_exploration"}], "usage": [{"result": {"value": "RsMetaCheck can be easily integrated into your CI/CD pipelines as a GitHub Action. We have set it up in GitHub Action in the following repository: [rs-metacheck-action](https://github.com/SoftwareUnderstanding/rs-metacheck-action) and is up in GitHub MarketPlace at [rsmetacheck actions](https://github.com/marketplace/actions/rsmetacheck).\n\nThe action will generate `all_pitfalls_results.json`, along with the `pitfalls/` and `somef_outputs/` directories directly in your workflow workspace.\n", "type": "Text_excerpt", "original_header": "GitHub Action", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "```bash\npoetry run rsmetacheck --input https://github.com/tidyverse/tidyverse\n```\n", "type": "Text_excerpt", "original_header": "Analyze a Single Repository", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage", "Run the Detection Tool locally"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "You can analyze a specific branch of a repository by using the `--branch` or `-b` flag:\n\n```bash\npoetry run rsmetacheck --input https://github.com/tidyverse/tidyverse --branch develop\n```\n", "type": "Text_excerpt", "original_header": "Analyze a Specific Branch", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage", "Run the Detection Tool locally"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "```bash\npoetry run rsmetacheck --input repositories.json\n```\n\nThe `repositories.json` file should be structured as follows:\n\n```json\n{\n \"repositories\": [\n \"https://gitlab.com/example/example_repo_1\",\n \"https://gitlab.com/example/example_repo_2\",\n \"https://github.com/example/example_repo_3\"\n ]\n}\n```\n", "type": "Text_excerpt", "original_header": "Analyze Multiple Repositories from a JSON File", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage", "Run the Detection Tool locally"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "```bash\npoetry run rsmetacheck --input repositories.json \\\n --somef-output ./results/somef \\\n --pitfalls-output ./results/pitfalls \\\n --analysis-output ./results/summary.json \\\n --notes-output ./results/notes.json\n```\n", "type": "Text_excerpt", "original_header": "Customize Output Paths", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage", "Run the Detection Tool locally"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "When a metadata version differs from the release version by a small margin (all version components differ by less than 2, e.g., `0.4.3.dev1` vs `0.4.2`), MetaCheck records a **note** rather than a full pitfall. To capture these observations, use the `--notes-output` flag:\n\n```bash\npoetry run rsmetacheck --input https://github.com/example/repo --notes-output ./notes.json\n```\n\nThe notes file is only created when there are observations to report and the `--notes-output` path is specified. Its structure is:\n\n```json\n{\n \"total_notes\": 1,\n \"notes\": [\n {\n \"repository\": \"example/repo\",\n \"file_name\": \"repo_output.json\",\n \"code\": \"P001\",\n \"note\": \"Version discrepancy: metadata '0.4.3.dev1' vs release '0.4.2'\"\n }\n ]\n}\n```\n\nIf the version difference is significant (any component differs by 2 or more, e.g., `0.12.4` vs `0.12.1`), it is still flagged as a pitfall.\n", "type": "Text_excerpt", "original_header": "Version Discrepancy Notes", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage", "Run the Detection Tool locally"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "If you've already run SoMEF separately:\n\n```bash\npoetry run rsmetacheck --skip-somef --input somef_outputs/*.json\n```\n\nOr for multiple paths:\n\n```bash\npoetry run rsmetacheck --skip-somef --input my_somef_outputs_1/*.json my_somef_outputs_2/*.json\n```\n", "type": "Text_excerpt", "original_header": "Skip SoMEF and Analyze Existing Outputs", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage", "Run the Detection Tool locally"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "By default, the JSON-LD files generated by RsMetaCheck will only contain information about pitfalls and warnings that were actually detected. If you want to include all tests in the final JSON-LD, even tests that the repository successfully passed, use the `--verbose` flag:\n\n```bash\npoetry run rsmetacheck --input https://github.com/tidyverse/tidyverse --verbose\n```\n", "type": "Text_excerpt", "original_header": "Verbose Output for Passed Checks", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage", "Run the Detection Tool locally"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "You can configure RsMetaCheck with a TOML file at the repository root named `.rsmetacheck.toml` (auto-detected), or pass a custom path with `--config`.\n\nSupported options:\n\n- `ignore`: warnings/pitfalls to ignore (e.g. `P001`, `W002`)\n- `exclude_files`: metadata sources to ignore (glob, filename, or substring match)\n- `parameters`: per-check parameters for configurable checks\n- `profiles`: alternate configurations such as `unstable` or `prerelease`\n\nExample:\n\n```toml\nignore = [\"W002\"]\nexclude_files = [\"**/generated/**\", \"tmp_metadata.json\"]\n\n[parameters.P001]\nahead_significant_diff = 2\n\n[parameters.W002]\nstale_after_days = 3\n\n[profiles.unstable]\nignore = [\"W002\", \"P017\"]\n\n[profiles.unstable.parameters.P001]\nahead_significant_diff = 10\n\n[profiles.prerelease]\nignore = []\n\n[profiles.prerelease.parameters.P001]\nahead_significant_diff = 1\n```\n\nUse a specific profile:\n\n```bash\npoetry run rsmetacheck --input https://github.com/example/repo --config-profile unstable\n```\n\nUse a custom config path:\n\n```bash\npoetry run rsmetacheck --input https://github.com/example/repo --config ./ci/rsmetacheck.toml\n```\n", "type": "Text_excerpt", "original_header": "Configure Analysis with a Root Config File", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage", "Run the Detection Tool locally"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "The tool will:\n\n- Process all JSON files in the SoMEF output directory (by default `somef_outputs` created by the tool)\n- Display progress messages showing detected pitfalls\n- Generate JSON-LD files of detailed Pitfalls and Warnings detected by the tool in `output_1_pitfalls.jsonld`,\n `output_2_pitfalls.jsonld`, etc... in `pitfalls` (by default created by the tool) directory\n- Generate a comprehensive report in `all_pitfalls_results.json`\n\nThe output file contains:\n\n- EVERSE standardized JSON-LD output of each repository\n- Summary statistics of analyzed repositories\n- Count and percentage for each pitfall type\n- Language-specific breakdown for repositories with target languages\n", "type": "Text_excerpt", "original_header": "Output", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}], "run": [{"result": {"value": "```bash\npoetry run rsmetacheck --input https://github.com/tidyverse/tidyverse\n```\n", "type": "Text_excerpt", "original_header": "Analyze a Single Repository", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage", "Run the Detection Tool locally"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "You can analyze a specific branch of a repository by using the `--branch` or `-b` flag:\n\n```bash\npoetry run rsmetacheck --input https://github.com/tidyverse/tidyverse --branch develop\n```\n", "type": "Text_excerpt", "original_header": "Analyze a Specific Branch", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage", "Run the Detection Tool locally"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "```bash\npoetry run rsmetacheck --input repositories.json\n```\n\nThe `repositories.json` file should be structured as follows:\n\n```json\n{\n \"repositories\": [\n \"https://gitlab.com/example/example_repo_1\",\n \"https://gitlab.com/example/example_repo_2\",\n \"https://github.com/example/example_repo_3\"\n ]\n}\n```\n", "type": "Text_excerpt", "original_header": "Analyze Multiple Repositories from a JSON File", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage", "Run the Detection Tool locally"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "```bash\npoetry run rsmetacheck --input repositories.json \\\n --somef-output ./results/somef \\\n --pitfalls-output ./results/pitfalls \\\n --analysis-output ./results/summary.json \\\n --notes-output ./results/notes.json\n```\n", "type": "Text_excerpt", "original_header": "Customize Output Paths", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage", "Run the Detection Tool locally"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "When a metadata version differs from the release version by a small margin (all version components differ by less than 2, e.g., `0.4.3.dev1` vs `0.4.2`), MetaCheck records a **note** rather than a full pitfall. To capture these observations, use the `--notes-output` flag:\n\n```bash\npoetry run rsmetacheck --input https://github.com/example/repo --notes-output ./notes.json\n```\n\nThe notes file is only created when there are observations to report and the `--notes-output` path is specified. Its structure is:\n\n```json\n{\n \"total_notes\": 1,\n \"notes\": [\n {\n \"repository\": \"example/repo\",\n \"file_name\": \"repo_output.json\",\n \"code\": \"P001\",\n \"note\": \"Version discrepancy: metadata '0.4.3.dev1' vs release '0.4.2'\"\n }\n ]\n}\n```\n\nIf the version difference is significant (any component differs by 2 or more, e.g., `0.12.4` vs `0.12.1`), it is still flagged as a pitfall.\n", "type": "Text_excerpt", "original_header": "Version Discrepancy Notes", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage", "Run the Detection Tool locally"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "If you've already run SoMEF separately:\n\n```bash\npoetry run rsmetacheck --skip-somef --input somef_outputs/*.json\n```\n\nOr for multiple paths:\n\n```bash\npoetry run rsmetacheck --skip-somef --input my_somef_outputs_1/*.json my_somef_outputs_2/*.json\n```\n", "type": "Text_excerpt", "original_header": "Skip SoMEF and Analyze Existing Outputs", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage", "Run the Detection Tool locally"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "By default, the JSON-LD files generated by RsMetaCheck will only contain information about pitfalls and warnings that were actually detected. If you want to include all tests in the final JSON-LD, even tests that the repository successfully passed, use the `--verbose` flag:\n\n```bash\npoetry run rsmetacheck --input https://github.com/tidyverse/tidyverse --verbose\n```\n", "type": "Text_excerpt", "original_header": "Verbose Output for Passed Checks", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage", "Run the Detection Tool locally"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}, {"result": {"value": "You can configure RsMetaCheck with a TOML file at the repository root named `.rsmetacheck.toml` (auto-detected), or pass a custom path with `--config`.\n\nSupported options:\n\n- `ignore`: warnings/pitfalls to ignore (e.g. `P001`, `W002`)\n- `exclude_files`: metadata sources to ignore (glob, filename, or substring match)\n- `parameters`: per-check parameters for configurable checks\n- `profiles`: alternate configurations such as `unstable` or `prerelease`\n\nExample:\n\n```toml\nignore = [\"W002\"]\nexclude_files = [\"**/generated/**\", \"tmp_metadata.json\"]\n\n[parameters.P001]\nahead_significant_diff = 2\n\n[parameters.W002]\nstale_after_days = 3\n\n[profiles.unstable]\nignore = [\"W002\", \"P017\"]\n\n[profiles.unstable.parameters.P001]\nahead_significant_diff = 10\n\n[profiles.prerelease]\nignore = []\n\n[profiles.prerelease.parameters.P001]\nahead_significant_diff = 1\n```\n\nUse a specific profile:\n\n```bash\npoetry run rsmetacheck --input https://github.com/example/repo --config-profile unstable\n```\n\nUse a custom config path:\n\n```bash\npoetry run rsmetacheck --input https://github.com/example/repo --config ./ci/rsmetacheck.toml\n```\n", "type": "Text_excerpt", "original_header": "Configure Analysis with a Root Config File", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)", "Usage", "Run the Detection Tool locally"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}], "contributing_guidelines": [{"result": {"value": "The system is designed with modularity in mind. Each pitfall detector is implemented as a\nseparate module in the `scripts/` directory, making it easy to add new pitfall types or modify\nexisting detection logic.", "type": "Text_excerpt", "original_header": "Contributing", "parent_header": ["Research Software MetaCheck (a Pitfall/Warning Detection Tool)"]}, "confidence": 1, "technique": "header_analysis", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}], "identifier": [{"result": {"type": "Url", "value": "https://doi.org/10.5281/zenodo.18956787"}, "confidence": 1, "technique": "regular_expression", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}], "full_title": [{"result": {"type": "String", "value": "Research Software MetaCheck (a Pitfall/Warning Detection Tool)"}, "confidence": 1, "technique": "regular_expression", "source": "https://raw.githubusercontent.com/SoftwareUnderstanding/rsmetacheck/main/README.md"}]} \ No newline at end of file diff --git a/assets/example_list_repo.json b/assets/example_list_repo.json index 16ee689..ce8369a 100644 --- a/assets/example_list_repo.json +++ b/assets/example_list_repo.json @@ -1,15 +1,18 @@ { - "repositories": [ - "https://github.com/SoftwareUnderstanding/sw-metadata-bot", - "https://github.com/SoftwareUnderstanding/RsMetaCheck" - ], + "version": "1.0.0", + "analysis": { + "repositories": [ + "https://github.com/SoftwareUnderstanding/sw-metadata-bot", + "https://github.com/SoftwareUnderstanding/RsMetaCheck" + ] + }, "issues": { - "custom_message": "This is a issue created for testing purposes. Several metadata issues were identified and could be addressed.", + "custom_issue_message": "This is a issue created for testing purposes. Several metadata issues were identified and could be addressed.", "opt_outs": [ ] }, "outputs": { - "root_dir": "assets", + "output_root_dir": "assets", "run_name": "example_run", "snapshot_tag_format": "%Y%m%d" } diff --git a/assets/existing_metacheck_analysis/example_pitfall_1.jsonld b/assets/existing_metacheck_analysis/example_pitfall_1.jsonld deleted file mode 100644 index 1e55cb3..0000000 --- a/assets/existing_metacheck_analysis/example_pitfall_1.jsonld +++ /dev/null @@ -1,130 +0,0 @@ -{ - "@context": "[IN PROCESS]", - "@type": "SoftwareQualityAssessment", - "name": "Quality Assessment for Unknown", - "description": "LOFAR Pipeline System", - "creator": { - "@type": "schema:Person", - "name": "Anonymous", - "email": "example@email.com" - }, - "dateCreated": "2026-03-05T15:57:06Z", - "license": { - "@id": "https://opensource.org/license/mit" - }, - "commit_id": "Unknown", - "assessedSoftware": { - "@type": "schema:SoftwareApplication", - "name": "Unknown", - "softwareVersion": "Before-Remove-TMSS", - "url": "https://git.astron.nl/ro/lofar/", - "commit_id": "Unknown" - }, - "checks": [ - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/metadatafile" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Compares the version found in the metadata file with the latest repository release tag.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#P001", - "output": "true", - "evidence": "P001 detected: pom.xml version '2.22.0-SNAPSHOT' does not match release version 'Before-Remove-TMSS'", - "suggestion": "Ensure the version in your metadata matches the latest official release. Keeping these synchronized avoids confusion for users and improves reproducibility.", - "checkId": "8036a85b58a90340eee71682b743c6e47e8455f37d47414278e12121c86c3715" - }, - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/license" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Searches for common template placeholders (e.g., , ) within the LICENSE file.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#P002", - "output": "true", - "evidence": "P002 detected: LICENSE file contains unreplaced template placeholders", - "suggestion": "Update the copyright section with accurate names, organizations, and the current year. Personalizing this section ensures clarity and legal accuracy.", - "checkId": "8454f0d79563a0ecd16bd6e439d5a76d282cc8071ca936f496d2c8fa8073ec6f" - }, - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/metadatafile" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Checks if the codeRepository field points to a project homepage rather than the actual source code repository.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#P009", - "output": "true", - "evidence": "P009 detected: setup.py codeRepository points to homepage instead of repository: http://www.transientskp.org/", - "suggestion": "You need to update the codeRepository field to point directly to your repository's source code instead of a homepage. Accurate links improve traceability and user access.", - "checkId": "30d666820d2e730be4ba5c7261cb2d893bb81f3c4b20044c517a891d78af0000" - }, - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/metadatafile" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Analyzes software requirements in metadata to see if they lack explicit version constraints.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#W001", - "output": "true", - "evidence": "W001 detected: pom.xml contains software requirements without versions: django, psycopg2-binary, djangorestframework, django-polymorphic, django-rest-polymorphic, requests, gunicorn, fabric, invocations, beautifultable, blessings, celery, django-filter, coreapi, matplotlib", - "suggestion": "Add version numbers to your dependencies. This provides stability for users and allows reproducibility across different environments.", - "checkId": "5a28a7a1d78554975164d60957f31f2601eaf60dfa9ebe9b2835e585853f075e" - }, - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/codemeta" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Detects if multiple distinct licenses are found in the repository but only a single license is declared in codemeta.json.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#W003", - "output": "true", - "evidence": "W003 detected: Repository has multiple licenses but codemeta.json only lists one. Found in: https:///ro/lofar/-/blob/master/LICENSE", - "suggestion": "Make sure you are using the correct licenses. This avoids confusion about terms of use and ensures full transparency.", - "checkId": "efe9bf2946d3bba4dc5eae8f3f425cfe06a05172953e95284eb8db3c18792811" - } - ] -} \ No newline at end of file diff --git a/assets/existing_metacheck_analysis/example_pitfall_2.jsonld b/assets/existing_metacheck_analysis/example_pitfall_2.jsonld deleted file mode 100644 index 173c529..0000000 --- a/assets/existing_metacheck_analysis/example_pitfall_2.jsonld +++ /dev/null @@ -1,112 +0,0 @@ -{ - "@context": "[IN PROCESS]", - "@type": "SoftwareQualityAssessment", - "name": "Quality Assessment for Unknown", - "description": "This is a container to extract Gravitational Wave (GW) data from the datalake using Rucio and feed 1 second GW frames to the GW pipelines.", - "creator": { - "@type": "schema:Person", - "name": "Anonymous", - "email": "example@email.com" - }, - "dateCreated": "2026-03-05T15:55:32Z", - "license": { - "@id": "https://opensource.org/license/mit" - }, - "commit_id": "Unknown", - "assessedSoftware": { - "@type": "schema:SoftwareApplication", - "name": "Unknown", - "softwareVersion": "Unknown", - "url": "https://git.ligo.org/rhys.poulton/escape-datalake-shared-volume-writer/", - "schema:identifier": { - "@id": "https://doi.org/10.5281/zenodo.5742053" - }, - "commit_id": "Unknown" - }, - "checks": [ - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/license" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Searches for common template placeholders (e.g., , ) within the LICENSE file.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#P002", - "output": "true", - "evidence": "P002 detected: LICENSE file contains unreplaced template placeholders", - "suggestion": "Update the copyright section with accurate names, organizations, and the current year. Personalizing this section ensures clarity and legal accuracy.", - "checkId": "8454f0d79563a0ecd16bd6e439d5a76d282cc8071ca936f496d2c8fa8073ec6f" - }, - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/codemeta" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Checks the identifier field in codemeta.json to see if it uses a bare DOI string instead of a full HTTPS URL.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#P014", - "output": "true", - "evidence": "P014 detected: codemeta.json Identifier uses bare DOI instead of full URL: '10.5281/zenodo.5742053'", - "suggestion": "You should include the full DOI URL form in your metadata (e.g., https://doi.org/XX.XXXX/zenodo.XXXX)", - "checkId": "5332af69762b78de21b8b3f64a63a6fc781cc390029bc18fab5adbd4e1565953" - }, - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/codemeta" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Detects if multiple distinct licenses are found in the repository but only a single license is declared in codemeta.json.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#W003", - "output": "true", - "evidence": "W003 detected: Repository has multiple licenses but codemeta.json only lists one. Found in: https:///rhys.poulton/escape-datalake-shared-volume-writer/-/blob/main/LICENSE", - "suggestion": "Make sure you are using the correct licenses. This avoids confusion about terms of use and ensures full transparency.", - "checkId": "3dbc3727e714a3623407acc90516150321adddd8b06d7552db68a1356e88dbc8" - }, - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/codemeta" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Checks programming language declarations in codemeta.json to see if they lack specific version numbers.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#W004", - "output": "true", - "evidence": "W004 detected: codemeta.json Programming languages without versions: Python, bash", - "suggestion": "Include version numbers for each programming language used. Defining these helps ensure reproducibility and compatibility across systems.", - "checkId": "79c379cb7ed9ae82341ac78424c44cb61bbafca03f7142d9da9c69e1017b964d" - } - ] -} \ No newline at end of file diff --git a/assets/existing_metacheck_analysis/example_pitfall_3.jsonld b/assets/existing_metacheck_analysis/example_pitfall_3.jsonld deleted file mode 100644 index 1b552a6..0000000 --- a/assets/existing_metacheck_analysis/example_pitfall_3.jsonld +++ /dev/null @@ -1,109 +0,0 @@ -{ - "@context": "[IN PROCESS]", - "@type": "SoftwareQualityAssessment", - "name": "Quality Assessment for AMIGA-IAA/hcg-16", - "description": "HCG-16 Project", - "creator": { - "@type": "schema:Person", - "name": "Anonymous", - "email": "example@email.com" - }, - "dateCreated": "2026-03-05T15:57:03Z", - "license": { - "@id": "https://opensource.org/license/mit" - }, - "commit_id": "3e46b026f96b30c4b7d69546720bdb9debb07f99", - "assessedSoftware": { - "@type": "schema:SoftwareApplication", - "name": "AMIGA-IAA/hcg-16", - "softwareVersion": "v1.2.3", - "url": "https://github.com/AMIGA-IAA/hcg-16", - "commit_id": "3e46b026f96b30c4b7d69546720bdb9debb07f99" - }, - "checks": [ - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/metadatafile" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Compares the version found in the metadata file with the latest repository release tag.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#P001", - "output": "true", - "evidence": "P001 detected: codemeta.json version '1.2.1' does not match release version '1.2.3'", - "suggestion": "Ensure the version in your metadata matches the latest official release. Keeping these synchronized avoids confusion for users and improves reproducibility.", - "checkId": "36bc1bbaee0838ae68e47514b4ad7ef566f44f66ee3e3175a1168277ac4326d3" - }, - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/metadatafile" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Analyzes software requirements in metadata to see if they lack explicit version constraints.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#W001", - "output": "true", - "evidence": "W001 detected: codemeta.json contains software requirements without versions: https://github.com/AMIGA-IAA/hcg-16/blob/master/conda-linux-64.lock", - "suggestion": "Add version numbers to your dependencies. This provides stability for users and allows reproducibility across different environments.", - "checkId": "c7bc3040b51b441ad9224cabe8a9f65cf6473a0ad2ddfa26386894fd3a526d5e" - }, - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/codemeta" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Compares the dateModified field against the last updated date of the actual repository.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#W002", - "output": "true", - "evidence": "W002 detected: codemeta.json dateModified '2021-09-28T00:00:00' is outdated compared to repository date '2023-06-14T17:26:23'", - "suggestion": "The data in the metadata file should be updated to be aligned with the date of the latest release. Automating this synchronization as part of your release process is highly recommended.", - "checkId": "9899b42e1ee09c93bf269e81574db71daca18a345e1f9f5bd43f6022fcf39fba" - }, - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/codemeta" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Checks programming language declarations in codemeta.json to see if they lack specific version numbers.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#W004", - "output": "true", - "evidence": "W004 detected: codemeta.json Programming languages without versions: Python", - "suggestion": "Include version numbers for each programming language used. Defining these helps ensure reproducibility and compatibility across systems.", - "checkId": "9afd141924d0a42df9c1b53a6552790dccf6bd40886d704cb8699191578eb846" - } - ] -} \ No newline at end of file diff --git a/assets/existing_metacheck_analysis/example_pitfall_4.jsonld b/assets/existing_metacheck_analysis/example_pitfall_4.jsonld deleted file mode 100644 index ac8b4e9..0000000 --- a/assets/existing_metacheck_analysis/example_pitfall_4.jsonld +++ /dev/null @@ -1,70 +0,0 @@ -{ - "@context": "[IN PROCESS]", - "@type": "SoftwareQualityAssessment", - "name": "Quality Assessment for FairRootGroup/FairMQ", - "description": "C++ Message Queuing Library and Framework", - "creator": { - "@type": "schema:Person", - "name": "Anonymous", - "email": "example@email.com" - }, - "dateCreated": "2026-03-05T15:55:22Z", - "license": { - "@id": "https://opensource.org/license/mit" - }, - "commit_id": "fa64faf3f755e9b00e0af738826718bf5117c8ea", - "assessedSoftware": { - "@type": "schema:SoftwareApplication", - "name": "FairRootGroup/FairMQ", - "softwareVersion": "v1.10.1", - "url": "https://github.com/FairRootGroup/FairMQ", - "schema:identifier": { - "@id": "https://doi.org/10.5281/zenodo.1689985" - }, - "commit_id": "fa64faf3f755e9b00e0af738826718bf5117c8ea" - }, - "checks": [ - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/metadatafile" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Compares the version found in the metadata file with the latest repository release tag.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#P001", - "output": "true", - "evidence": "P001 detected: codemeta.json version 'master' does not match release version '1.10.1'", - "suggestion": "Ensure the version in your metadata matches the latest official release. Keeping these synchronized avoids confusion for users and improves reproducibility.", - "checkId": "e41e9300f64f91efe5164dbcf3013229f358f8f920e5861c10c81aecfebac2d0" - }, - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/metadatafile" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Checks if the License property in the metadata file points to a local file path instead of an SPDX license identifier.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#P006", - "output": "true", - "evidence": "P006 detected: codemeta.json License points to local file instead of license name: './COPYRIGHT'", - "suggestion": "You need to replace local file paths with recognized SPDX license identifiers, such as MIT or GPL-3.0-only in URL form. This ensures your license can be correctly detected by automated tools.", - "checkId": "406196bfdc138505c4aea1a00c2ac38e5b9972b95562427db5f53abbedbf5232" - } - ] -} \ No newline at end of file diff --git a/assets/existing_metacheck_analysis/example_pitfall_5.jsonld b/assets/existing_metacheck_analysis/example_pitfall_5.jsonld deleted file mode 100644 index 9410f60..0000000 --- a/assets/existing_metacheck_analysis/example_pitfall_5.jsonld +++ /dev/null @@ -1,70 +0,0 @@ -{ - "@context": "[IN PROCESS]", - "@type": "SoftwareQualityAssessment", - "name": "Quality Assessment for IndexedConv/IndexedConv", - "description": "Code for the indexed convolution", - "creator": { - "@type": "schema:Person", - "name": "Anonymous", - "email": "example@email.com" - }, - "dateCreated": "2026-03-05T15:57:10Z", - "license": { - "@id": "https://opensource.org/license/mit" - }, - "commit_id": "62d46bc283a911a7e38c79195f7cd25428dba112", - "assessedSoftware": { - "@type": "schema:SoftwareApplication", - "name": "IndexedConv/IndexedConv", - "softwareVersion": "v1.3.2", - "url": "https://github.com/IndexedConv/IndexedConv", - "schema:identifier": { - "@id": "https://doi.org/10.5281/zenodo.2542651" - }, - "commit_id": "62d46bc283a911a7e38c79195f7cd25428dba112" - }, - "checks": [ - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/codemeta" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Compares the dateModified field against the last updated date of the actual repository.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#W002", - "output": "true", - "evidence": "W002 detected: codemeta.json dateModified '2024-04-05T00:00:00' is outdated compared to repository date '2026-03-04T14:06:26'", - "suggestion": "The data in the metadata file should be updated to be aligned with the date of the latest release. Automating this synchronization as part of your release process is highly recommended.", - "checkId": "194f9735a1916b672922ad2a73d6f142b30bce64a7d84a3be62be82e3e53f3ae" - }, - { - "@type": "CheckResult", - "assessesIndicator": { - "@id": "https://w3id.org/example/metacheck/i/indicators/codemeta" - }, - "checkingSoftware": { - "@type": "schema:SoftwareApplication", - "name": "metacheck", - "@id": "https://w3id.org/example/metacheck/tools/", - "softwareVersion": "0.2.0" - }, - "process": "Checks programming language declarations in codemeta.json to see if they lack specific version numbers.", - "status": { - "@id": "schema:CompletedActionStatus" - }, - "pitfall": "https://w3id.org/rsmetacheck/catalog/#W004", - "output": "true", - "evidence": "W004 detected: codemeta.json Programming languages without versions: Python 3.8", - "suggestion": "Include version numbers for each programming language used. Defining these helps ensure reproducibility and compatibility across systems.", - "checkId": "6d43d42bb77a7d22a68e8fcf8f3f36a16b5b60bd1d4c4ed638c92db88a273f2b" - } - ] -} \ No newline at end of file diff --git a/assets/ossr_list_url.json b/assets/ossr_list_url.json index 2377f3b..22b5ae3 100644 --- a/assets/ossr_list_url.json +++ b/assets/ossr_list_url.json @@ -1,42 +1,46 @@ { - "repositories": [ - "https://github.com/atlas-outreach-data-tools/atlas-outreach-cpp-framework-13tev", - "https://git.astron.nl/astron-sdc/escape-wp5/esap-deployment", - "https://git.astron.nl/ro/lofar", - "https://git.ligo.org/rhys.poulton/escape-datalake-shared-volume-writer", - "https://github.com/AMIGA-IAA/hcg-16", - "https://github.com/FairRootGroup/FairMQ/", - "https://github.com/IndexedConv/IndexedConv", - "https://github.com/R3BRootGroup/R3BRoot", - "https://github.com/aardk/jupyter-casa", - "https://github.com/cds-astro/aladin-lite", - "https://github.com/cds-astro/cds-moc-rust", - "https://github.com/cds-astro/mocpy", - "https://github.com/cds-astro/tutorials", - "https://github.com/cosimoNigro/agnpy", - "https://github.com/ctlearn-project/ctlearn", - "https://github.com/explore-platform/g-tomo", - "https://github.com/gammapy/gammapy", - "https://github.com/javierrico/gLike", - "https://gitlab.com/escape-ossr/eossr", - "https://gitlab.com/ska-telescope/sdc/sdc1-solution", - "https://gitlab.desy.de/jannisnecker/timewise_sup", - "https://gitlab.in2p3.fr/CTA-LAPP/HiPeRTA", - "https://gitlab.in2p3.fr/CTA-LAPP/RTA/HiPeRTA", - "https://gitlab.in2p3.fr/christopher.eckner/mlfermilatdwarfs/", - "https://gitlab.in2p3.fr/escape2020/wp3/eossr", - "https://gitlab.in2p3.fr/escape2020/wp3/ossr-curation", - "https://gitlab.in2p3.fr/escape2020/wp3/template_project_escape", - "https://gitlab.in2p3.fr/escape2020/wp3/zenodoci", - "https://gitlab.in2p3.fr/gammalearn/gammalearn", - "https://gitlab.in2p3.fr/gammalearn/publications/2025-stereograph" - ], + "version": "1.0.0", + "analysis": { + "repositories": [ + "https://github.com/atlas-outreach-data-tools/atlas-outreach-cpp-framework-13tev", + "https://git.astron.nl/astron-sdc/escape-wp5/esap-deployment", + "https://git.astron.nl/ro/lofar", + "https://git.ligo.org/rhys.poulton/escape-datalake-shared-volume-writer", + "https://github.com/AMIGA-IAA/hcg-16", + "https://github.com/FairRootGroup/FairMQ/", + "https://github.com/IndexedConv/IndexedConv", + "https://github.com/R3BRootGroup/R3BRoot", + "https://github.com/aardk/jupyter-casa", + "https://github.com/cds-astro/aladin-lite", + "https://github.com/cds-astro/cds-moc-rust", + "https://github.com/cds-astro/mocpy", + "https://github.com/cds-astro/tutorials", + "https://github.com/cosimoNigro/agnpy", + "https://github.com/ctlearn-project/ctlearn", + "https://github.com/explore-platform/g-tomo", + "https://github.com/gammapy/gammapy", + "https://github.com/javierrico/gLike", + "https://gitlab.com/escape-ossr/eossr", + "https://gitlab.com/ska-telescope/sdc/sdc1-solution", + "https://gitlab.desy.de/jannisnecker/timewise_sup", + "https://gitlab.in2p3.fr/CTA-LAPP/HiPeRTA", + "https://gitlab.in2p3.fr/CTA-LAPP/RTA/HiPeRTA", + "https://gitlab.in2p3.fr/christopher.eckner/mlfermilatdwarfs/", + "https://gitlab.in2p3.fr/escape2020/wp3/eossr", + "https://gitlab.in2p3.fr/escape2020/wp3/ossr-curation", + "https://gitlab.in2p3.fr/escape2020/wp3/template_project_escape", + "https://gitlab.in2p3.fr/escape2020/wp3/zenodoci", + "https://gitlab.in2p3.fr/gammalearn/gammalearn", + "https://gitlab.in2p3.fr/gammalearn/publications/2025-stereograph" + ] + }, + "issues": { - "custom_message": "Your repository was analyzed as part of the ESCAPE OSSR metadata quality initiative. Several metadata issues were identified and could be addressed.", + "custom_issue_message": "Your repository was analyzed as part of the ESCAPE OSSR metadata quality initiative. Several metadata issues were identified and could be addressed.", "opt_outs": [] }, "outputs": { - "root_dir": "outputs", + "output_root_dir": "outputs", "run_name": "ossr", "snapshot_tag_format": "%Y%m%d" } diff --git a/coverage.svg b/coverage.svg index d79e240..dd7bbd6 100644 --- a/coverage.svg +++ b/coverage.svg @@ -1,21 +1 @@ - - - - - - - - - - - - - - - - coverage - coverage - 65% - 65% - - +coverage: 70.61%coverage70.61% \ No newline at end of file diff --git a/interrogate_badge.svg b/interrogate_badge.svg index 13bcd6a..2643362 100644 --- a/interrogate_badge.svg +++ b/interrogate_badge.svg @@ -1,5 +1,5 @@ - interrogate: 99.1% + interrogate: 95.9% @@ -12,8 +12,8 @@ interrogate interrogate - 99.1% - 99.1% + 95.9% + 95.9% diff --git a/pyproject.toml b/pyproject.toml index ebbec41..eea69d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ dependencies = [ "python-dotenv>=1.0.0", "requests>=2.32.5", "setuptools[test]<83.0.0", + "pydantic>=2.13.4", ] keywords = ["codemeta", "metadata", "bot", "quality software"] @@ -43,10 +44,10 @@ dev = [ "ty>=0.0.12", ] test = [ - "coverage-badge", "pytest", "pytest-cov", "setuptools<83.0.0", + "genbadge[coverage]>=1.0.0", ] docs = [ "furo>=2025.12.19", diff --git a/src/sw_metadata_bot/analysis_runtime.py b/src/sw_metadata_bot/analysis_runtime.py index 85e7e9a..5f4e1da 100644 --- a/src/sw_metadata_bot/analysis_runtime.py +++ b/src/sw_metadata_bot/analysis_runtime.py @@ -10,7 +10,7 @@ from . import __version__, constants, history, incremental, pitfalls, utils from .check_parsing import extract_check_ids from .codemeta_runtime import evaluate_and_persist_codemeta_status, load_codemeta_status -from .config_utils import detect_platform, normalize_repo_url, sanitize_repo_name +from .config.config_utils import detect_platform, normalize_repo_url, sanitize_repo_name from .reporting import ( RecordAnalysis, RecordLifecycle, @@ -524,6 +524,7 @@ def build_analysis_run_report( run_root: Path, analysis_summary_file: Path, previous_report: Path | None, + input_config_file: Path | None = None, ) -> dict[str, object]: """Build run-level report payload from analysis decision records.""" return { @@ -532,6 +533,7 @@ def build_analysis_run_report( run_root=run_root, analysis_summary_file=analysis_summary_file, previous_report=previous_report, + input_config_file=input_config_file, ), "counters": build_analysis_counters(records), "records": records, diff --git a/src/sw_metadata_bot/config/__init__.py b/src/sw_metadata_bot/config/__init__.py new file mode 100644 index 0000000..21cd8dd --- /dev/null +++ b/src/sw_metadata_bot/config/__init__.py @@ -0,0 +1 @@ +"""Configuration schemas and utilities for the bot.""" diff --git a/src/sw_metadata_bot/config/config_utils.py b/src/sw_metadata_bot/config/config_utils.py new file mode 100644 index 0000000..496aff3 --- /dev/null +++ b/src/sw_metadata_bot/config/config_utils.py @@ -0,0 +1,99 @@ +"""Helpers for the unified configuration file.""" + +from pathlib import Path + +import click + +from sw_metadata_bot.config.schemas import BotConfig + +from .. import constants + + +def normalize_repo_url(url: str) -> str: + """Normalize repository URLs for matching and persistence.""" + return url.strip().rstrip("/") + + +def detect_platform(url: str) -> str | None: + """Detect publishing platform from repository URL. + + Returns ``"github"`` for GitHub URLs, ``"gitlab"`` for any GitLab URL, + or ``None`` when the URL does not match a known platform. + """ + lowered = url.lower() + if "github.com" in lowered: + return "github" + if "gitlab" in lowered: + return "gitlab" + return None + + +def sanitize_repo_name(repo_url: str) -> str: + """Sanitize repository URL to a safe folder name format. + + Uses a generic URL-safe transformation so non-standard URLs still map to + deterministic folder names. + + Args: + repo_url: Repository URL or identifier string + + Returns: + Sanitized folder name (lowercase, underscores only) + """ + import re + + normalized = normalize_repo_url(repo_url) + no_scheme = re.sub(r"^[a-zA-Z][a-zA-Z0-9+.-]*://", "", normalized) + no_git_suffix = re.sub(r"\.git$", "", no_scheme, flags=re.IGNORECASE) + sanitized = re.sub(r"[./-]", "_", no_git_suffix) + sanitized = re.sub(r"[^a-zA-Z0-9_]", "_", sanitized) + sanitized = re.sub(r"_+", "_", sanitized).strip("_").lower() + + if not sanitized: + raise click.ClickException(f"Unable to sanitize repository URL: {repo_url}") + + return sanitized + + +def _find_project_root(config_path: Path) -> Path: + """Return the nearest ancestor that looks like the project root. + This expects the config file to be located somewhere within the project directory structure, and looks for common project root markers (e.g., .git, pyproject.toml) in the config file's parent directories. If no markers are found, it defaults to the current working directory.""" + resolved_config_path = config_path.resolve() + for candidate in (resolved_config_path.parent, *resolved_config_path.parents): + if any( + (candidate / marker).exists() for marker in constants.PROJECT_ROOT_MARKERS + ): + return candidate + return Path.cwd().resolve() + + +def resolve_output_root(output_root_dir: str, config_path: Path) -> Path: + """Return the configured output root, resolving relative paths from project root.""" + root_path = Path(output_root_dir) + if not root_path.is_absolute(): + root_path = _find_project_root(config_path) / root_path + return root_path + + +def resolve_run_name(run_name: str | None, config_path: Path) -> str: + """Return the configured run name or a sensible default. + If run_name is provided in the config, it is used directly. Otherwise, the default is derived from the config file name (without extension). + """ + if run_name is not None: + if not isinstance(run_name, str) or not run_name.strip(): + raise click.ClickException( + "Invalid config: 'outputs.run_name' must be a non-empty string" + ) + return run_name + + return config_path.stem + + +def append_opt_out_to_config(config_file: Path, repo_url: str, explicit: bool) -> None: + """Helper to append a repository URL to the config's opt-out list from file.""" + normalized_url = normalize_repo_url(repo_url) + + config = BotConfig.from_json(config_file) + is_new_repo_added = config.add_opt_out_repository(normalized_url) + if is_new_repo_added: + config.to_json(config_file, explicit=explicit) diff --git a/src/sw_metadata_bot/config/schemas.py b/src/sw_metadata_bot/config/schemas.py new file mode 100644 index 0000000..9617bdb --- /dev/null +++ b/src/sw_metadata_bot/config/schemas.py @@ -0,0 +1,168 @@ +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional + +from pydantic import BaseModel, Field, field_validator, model_validator + +from ..constants import DEFAULT_OUTPUT_ROOT, DEFAULT_SNAPSHOT_TAG_FORMAT + + +class AnalysisConfig(BaseModel): + """Configuration fields relevant to analysis and reporting. + repositories is a list of repository URLs that the bot will analyze. Each URL should be in a standard format (e.g., https://github.com/user/repo.git). + generate_codemeta_if_missing is a boolean flag that indicates whether the bot should attempt to generate a codemeta.json file for repositories that are missing one. If set to True, the bot will use available metadata and heuristics to create a codemeta.json file, which can help improve the quality of the analysis and reporting. If set to False, the bot will skip repositories that do not have a codemeta.json file, potentially resulting in less comprehensive analysis. + """ + + repositories: list[str] + generate_codemeta_if_missing: Optional[bool] = True + + @field_validator("repositories", mode="after") + @classmethod + def validate_repositories(cls, v): + if not v: + raise ValueError("Repositories list cannot be empty") + return v + + +class IssueConfig(BaseModel): + """Configuration fields relevant to issue publishing. + custom_message allows users to specify a custom message template for issues, which can include placeholders for dynamic content such as repository name, check results, etc. If not provided, a default message will be used. + opt_outs is a list of repository URLs that should be excluded from issue creation, even if they are included in the main repositories list. This allows users to selectively opt out of issue creation + """ + + custom_issue_message: Optional[str] = None + opt_outs: Optional[list[str]] = Field(default_factory=list) + + +class OutputConfig(BaseModel): + """Configuration fields relevant to output generation. + output_root_dir specifies the root directory where the bot will save its output files, such as analysis reports and generated codemeta.json files. If not provided, it defaults to "outputs"."" + run_name is an optional identifier for the current run, which can be used to create a subdirectory within the output root directory. This allows users to organize outputs from different runs separately. If not provided, outputs will be saved directly under the root directory. + snapshot_tag_format is an optional string that defines the format for snapshot tags used in output filenames. This can include placeholders for dynamic content such as timestamps or repository names. If not provided, it defaults to a standard format defined in constants.py. + """ + + output_root_dir: Optional[str] = DEFAULT_OUTPUT_ROOT + run_name: Optional[str] = None + snapshot_tag_format: Optional[str] = DEFAULT_SNAPSHOT_TAG_FORMAT + + @field_validator("snapshot_tag_format", mode="after") + @classmethod + def validate_snapshot_tag_format(cls, v): + """Check that snapshot_tag_format is a valid string format handled by strftime.""" + if v is None: + return v + if not isinstance(v, str) or not v.strip(): + raise ValueError("snapshot_tag_format must be a non-empty string or null") + # Test that the format string can be used with strftime + try: + from datetime import datetime + + datetime.now().strftime(v) + except Exception as e: + raise ValueError(f"Invalid snapshot_tag_format: {e}") + return v + + +class BotConfig(BaseModel): + """Top-level configuration model for the bot.""" + + version: str = "1.0.0" + analysis: AnalysisConfig + # Optional sections in configuration + issues: IssueConfig = Field(default_factory=IssueConfig) + outputs: OutputConfig = Field(default_factory=OutputConfig) + + @model_validator(mode="after") + def validate_opt_outs(self) -> "BotConfig": + """Validate that opt-out repository URLs parts of the repositories list. + If any opt-out URL is not in the repositories list, remove it from the config and log a warning.""" + valid_opt_outs = [] + if not self.issues.opt_outs: + return self + for url in self.issues.opt_outs: + if url not in self.analysis.repositories: + print( + f"Warning: Opt-out URL '{url}' is not in the repositories list and will be ignored." + ) + else: + valid_opt_outs.append(url) + self.issues.opt_outs = valid_opt_outs + + return self + + @classmethod + def from_json(cls, path: Path) -> "BotConfig": + """Load configuration from a JSON file and validate it against the schema. + The extra="forbid" option in model validation ensures that any unexpected fields in the config will raise a validation error, helping catch typos and misconfigurations early. + """ + with path.open() as f: + data = json.load(f) + # extra forbid ensures that any unexpected fields in the config will raise a validation error, helping catch typos and misconfigurations early. + return cls.model_validate(data, extra="forbid") + + def to_json( + self, + path: Path, + explicit: bool = False, + ) -> None: + """Save configuration to a JSON file. + If explicit is True, only fields that were explicitly set (not default values) will be included in the output JSON. This can help reduce clutter and make the config easier to read by omitting fields that are using default values. + """ + data = self.model_dump(exclude_unset=not explicit) + with path.open("w") as f: + json.dump(data, f, indent=4) + + def get_repositories(self) -> list[str]: + """Return the list of repositories to analyze, excluding any opt-outs.""" + return self.analysis.repositories + + def get_issue_opt_outs(self) -> list[str]: + """Return the list of repositories that are opted out of issue creation.""" + if self.issues.opt_outs is None: + return [] + return self.issues.opt_outs + + def get_generate_codemeta_if_missing(self) -> bool: + """Return whether to generate codemeta.json if missing, defaulting to True.""" + return self.analysis.generate_codemeta_if_missing + + def get_custom_issue_message(self) -> Optional[str]: + """Return the custom issue message template, or None if not set.""" + return self.issues.custom_issue_message + + def get_output_root_dir(self) -> str: + """Return the configured output root directory.""" + return self.outputs.output_root_dir or DEFAULT_OUTPUT_ROOT + + def get_snapshot_tag_format(self) -> str: + """Return the configured snapshot tag format.""" + return self.outputs.snapshot_tag_format or DEFAULT_SNAPSHOT_TAG_FORMAT + + def get_run_name(self) -> str: + """Return the configured run name, or empty string if not set.""" + return self.outputs.run_name or "" + + def add_opt_out_repository(self, repo_url: str) -> bool: + """Add a repository URL to the opt-out list for issue creation. Returns True if the URL was added, False if it was already in the opt-out list.""" + if repo_url not in self.analysis.repositories: + print( + f"Warning: Cannot add '{repo_url}' to opt-out list because it is not in the repositories list." + ) + return False + # create empty list if opt_outs is None (should not happen due to default_factory, but linters may not recognize it) + if self.issues.opt_outs is None: + self.issues.opt_outs = [] + if repo_url in self.issues.opt_outs: + print(f"Repository '{repo_url}' is already in the opt-out list.") + return False + self.issues.opt_outs.append(repo_url) + return True + + def resolve_snapshot_tag(self, explicit_snapshot_tag: Optional[str] = None) -> str: + """Resolve the snapshot tag to use for output files. + If an explicit snapshot tag is provided, it takes precedence. Otherwise, the snapshot tag is generated based on the current timestamp and the configured format.""" + snapshot_tag_format = self.get_snapshot_tag_format() + if explicit_snapshot_tag is not None: + return explicit_snapshot_tag + return datetime.now(timezone.utc).strftime(snapshot_tag_format) diff --git a/src/sw_metadata_bot/config_utils.py b/src/sw_metadata_bot/config_utils.py deleted file mode 100644 index b0c065a..0000000 --- a/src/sw_metadata_bot/config_utils.py +++ /dev/null @@ -1,262 +0,0 @@ -"""Helpers for the unified configuration file.""" - -import json -from datetime import datetime, timezone -from pathlib import Path - -import click - -from . import constants - -DEFAULT_OUTPUT_ROOT = Path("outputs") -DEFAULT_SNAPSHOT_TAG_FORMAT = "%Y%m%d" -PROJECT_ROOT_MARKERS = ("pyproject.toml", ".git") - - -def normalize_repo_url(url: str) -> str: - """Normalize repository URLs for matching and persistence.""" - return url.strip().rstrip("/") - - -def detect_platform(url: str) -> str | None: - """Detect publishing platform from repository URL. - - Returns ``"github"`` for GitHub URLs, ``"gitlab"`` for any GitLab URL, - or ``None`` when the URL does not match a known platform. - """ - lowered = url.lower() - if "github.com" in lowered: - return "github" - if "gitlab" in lowered: - return "gitlab" - return None - - -def load_config(config_path: Path) -> dict: - """Load and validate a unified configuration file.""" - with open(config_path, encoding="utf-8") as f: - data = json.load(f) - - if not isinstance(data, dict): - raise click.ClickException( - f"Invalid format in {config_path}: top-level JSON value must be an object" - ) - - repositories = data.get("repositories") - if not isinstance(repositories, list): - raise click.ClickException( - f"Invalid format in {config_path}: 'repositories' must be a list" - ) - - return data - - -def get_repositories(config: dict) -> list[str]: - """Return normalized repositories preserving order and uniqueness.""" - repositories = config.get("repositories", []) - if not isinstance(repositories, list): - raise click.ClickException("Invalid config: 'repositories' must be a list") - - seen: set[str] = set() - ordered: list[str] = [] - for item in repositories: - if not isinstance(item, str): - continue - normalized = normalize_repo_url(item) - if normalized in seen: - continue - seen.add(normalized) - ordered.append(normalized) - return ordered - - -def get_custom_message(config: dict) -> str | None: - """Return the configured issue custom message if present.""" - issues = config.get("issues", {}) - if not isinstance(issues, dict): - raise click.ClickException("Invalid config: 'issues' must be an object") - - custom_message = issues.get("custom_message") - if custom_message is None: - return None - if not isinstance(custom_message, str): - raise click.ClickException( - "Invalid config: 'issues.custom_message' must be a string" - ) - return custom_message - - -def get_opt_out_repositories(config: dict) -> set[str]: - """Return normalized repository URLs configured as inline opt-outs.""" - issues = config.get("issues", {}) - if not isinstance(issues, dict): - raise click.ClickException("Invalid config: 'issues' must be an object") - - opt_outs = issues.get("opt_outs", []) - if not isinstance(opt_outs, list): - raise click.ClickException("Invalid config: 'issues.opt_outs' must be a list") - - return {normalize_repo_url(url) for url in opt_outs if isinstance(url, str)} - - -def get_generate_codemeta_if_missing(config: dict) -> bool: - """Return whether codemeta suggestions should be generated when missing.""" - issues = config.get("issues", {}) - if not isinstance(issues, dict): - raise click.ClickException("Invalid config: 'issues' must be an object") - - value = issues.get("generate_codemeta_if_missing", True) - if not isinstance(value, bool): - raise click.ClickException( - "Invalid config: 'issues.generate_codemeta_if_missing' must be a boolean" - ) - return value - - -def append_opt_out_repository(config_path: Path, repo_url: str) -> bool: - """Persist a repository to the inline opt-outs list when not already present.""" - data = load_config(config_path) - issues = data.setdefault("issues", {}) - if not isinstance(issues, dict): - raise click.ClickException("Invalid config: 'issues' must be an object") - - opt_outs = issues.setdefault("opt_outs", []) - if not isinstance(opt_outs, list): - raise click.ClickException("Invalid config: 'issues.opt_outs' must be a list") - - normalized_repo = normalize_repo_url(repo_url) - normalized_existing = { - normalize_repo_url(url) for url in opt_outs if isinstance(url, str) - } - if normalized_repo in normalized_existing: - return False - - opt_outs.append(normalized_repo) - with open(config_path, "w", encoding="utf-8") as f: - json.dump(data, f, indent=2) - return True - - -def _find_project_root(config_path: Path) -> Path: - """Return the nearest ancestor that looks like the project root.""" - resolved_config_path = config_path.resolve() - for candidate in (resolved_config_path.parent, *resolved_config_path.parents): - if any((candidate / marker).exists() for marker in PROJECT_ROOT_MARKERS): - return candidate - return Path.cwd().resolve() - - -def resolve_output_root(config: dict, config_path: Path) -> Path: - """Return the configured output root, resolving relative paths from project root.""" - outputs = config.get("outputs", {}) - if not isinstance(outputs, dict): - raise click.ClickException("Invalid config: 'outputs' must be an object") - - root_dir = outputs.get("root_dir", str(DEFAULT_OUTPUT_ROOT)) - if not isinstance(root_dir, str) or not root_dir.strip(): - raise click.ClickException( - "Invalid config: 'outputs.root_dir' must be a non-empty string" - ) - - root_path = Path(root_dir) - if not root_path.is_absolute(): - root_path = _find_project_root(config_path) / root_path - return root_path - - -def resolve_run_name(config: dict, config_path: Path) -> str: - """Return the configured run name or a sensible default.""" - outputs = config.get("outputs", {}) - if not isinstance(outputs, dict): - raise click.ClickException("Invalid config: 'outputs' must be an object") - - run_name = outputs.get("run_name") - if run_name is not None: - if not isinstance(run_name, str) or not run_name.strip(): - raise click.ClickException( - "Invalid config: 'outputs.run_name' must be a non-empty string" - ) - return run_name - - return config_path.stem - - -def resolve_snapshot_tag( - config: dict, - explicit_snapshot_tag: str | None, -) -> str | None: - """Resolve the snapshot tag from CLI override or config defaults.""" - if explicit_snapshot_tag is not None: - return explicit_snapshot_tag - - outputs = config.get("outputs", {}) - if not isinstance(outputs, dict): - raise click.ClickException("Invalid config: 'outputs' must be an object") - - snapshot_tag_format = outputs.get( - "snapshot_tag_format", DEFAULT_SNAPSHOT_TAG_FORMAT - ) - if snapshot_tag_format is None: - return None - if not isinstance(snapshot_tag_format, str) or not snapshot_tag_format.strip(): - raise click.ClickException( - "Invalid config: 'outputs.snapshot_tag_format' must be a string or null" - ) - - return datetime.now(timezone.utc).strftime(snapshot_tag_format) - - -def sanitize_repo_name(repo_url: str) -> str: - """Sanitize repository URL to a safe folder name format. - - Uses a generic URL-safe transformation so non-standard URLs still map to - deterministic folder names. - - Args: - repo_url: Repository URL or identifier string - - Returns: - Sanitized folder name (lowercase, underscores only) - """ - import re - - normalized = normalize_repo_url(repo_url) - no_scheme = re.sub(r"^[a-zA-Z][a-zA-Z0-9+.-]*://", "", normalized) - no_git_suffix = re.sub(r"\.git$", "", no_scheme, flags=re.IGNORECASE) - sanitized = re.sub(r"[./-]", "_", no_git_suffix) - sanitized = re.sub(r"[^a-zA-Z0-9_]", "_", sanitized) - sanitized = re.sub(r"_+", "_", sanitized).strip("_").lower() - - if not sanitized: - raise click.ClickException(f"Unable to sanitize repository URL: {repo_url}") - - return sanitized - - -def copy_config_to_analysis_root(config_path: Path, analysis_root: Path) -> None: - """Copy the configuration file to the analysis root directory. - - Args: - config_path: Path to the input configuration file - analysis_root: Root analysis directory where config will be copied - - Raises: - IOError: If copying fails - """ - config_path = config_path.resolve() - analysis_root = analysis_root.resolve() - - if not config_path.exists(): - raise click.ClickException(f"Config file not found: {config_path}") - - # Ensure analysis root exists - analysis_root.mkdir(parents=True, exist_ok=True) - - # Copy config to config.json in analysis root - dest_path = analysis_root / constants.FILENAME_CONFIG_SNAPSHOT - - with open(config_path, "r", encoding="utf-8") as src: - content = json.load(src) - - with open(dest_path, "w", encoding="utf-8") as dst: - json.dump(content, dst, indent=2) diff --git a/src/sw_metadata_bot/constants.py b/src/sw_metadata_bot/constants.py index bbcf622..5b57ec4 100644 --- a/src/sw_metadata_bot/constants.py +++ b/src/sw_metadata_bot/constants.py @@ -5,6 +5,21 @@ duplication and makes it easy to find and update values consistently. """ +# ============================================================================= +# Configuration Defaults +# ============================================================================= +# Default values for configuration fields, used when not specified in config.json. + +DEFAULT_OUTPUT_ROOT = "outputs" +DEFAULT_SNAPSHOT_TAG_FORMAT = "%Y%m%d" +PROJECT_ROOT_MARKERS = ("pyproject.toml", ".git") + +CONFIG_SECTION_ISSUES = "issues" +CONFIG_KEY_CUSTOM_MESSAGE = "custom_message" +CONFIG_KEY_GENERATE_CODEMETA_IF_MISSING = "generate_codemeta_if_missing" +CONFIG_KEY_OPT_OUTS = "opt_outs" +CONFIG_SECTION_OUTPUTS = "outputs" + # ============================================================================= # Issue Publishing Action Names # ============================================================================= diff --git a/src/sw_metadata_bot/history.py b/src/sw_metadata_bot/history.py index 7546f29..c959beb 100644 --- a/src/sw_metadata_bot/history.py +++ b/src/sw_metadata_bot/history.py @@ -4,7 +4,7 @@ from pathlib import Path from . import utils -from .config_utils import normalize_repo_url +from .config.config_utils import normalize_repo_url def _read_report_records(report_path: Path | None) -> list[dict]: diff --git a/src/sw_metadata_bot/main.py b/src/sw_metadata_bot/main.py index 7734467..0d68cff 100644 --- a/src/sw_metadata_bot/main.py +++ b/src/sw_metadata_bot/main.py @@ -3,7 +3,7 @@ import click from .pipeline import run_analysis_command -from .publish import publish_command +from .publish import publish_command, simulate_publish_command from .verify_tokens import verify_tokens_command @@ -21,6 +21,7 @@ def cli(): cli.add_command(verify_tokens_command, name="verify-tokens") cli.add_command(run_analysis_command, name="run-analysis") cli.add_command(publish_command, name="publish") +cli.add_command(simulate_publish_command, name="simulate-publish") def main(): diff --git a/src/sw_metadata_bot/pipeline.py b/src/sw_metadata_bot/pipeline.py index aee134b..d4a6bbd 100644 --- a/src/sw_metadata_bot/pipeline.py +++ b/src/sw_metadata_bot/pipeline.py @@ -7,19 +7,10 @@ import click +from sw_metadata_bot.config.schemas import BotConfig + from . import __version__, analysis_runtime, commit_lookup, constants -from .config_utils import ( - copy_config_to_analysis_root, - get_custom_message, - get_generate_codemeta_if_missing, - get_opt_out_repositories, - get_repositories, - load_config, - resolve_output_root, - resolve_run_name, - resolve_snapshot_tag, - sanitize_repo_name, -) +from .config.config_utils import sanitize_repo_name from .reporting import RecordAnalysis, RecordLifecycle, build_record_entry SNAPSHOT_TAG_PATTERN = re.compile(r"^(\d{8})(?:_(\d+))?$") @@ -137,16 +128,21 @@ def run_pipeline( When force_analysis is True, the pipeline will bypass artifact reuse for unchanged repositories and treat them as if the repository was updated. """ - config = load_config(config_file) - repositories = get_repositories(config) - custom_message = get_custom_message(config) - generate_codemeta_if_missing = get_generate_codemeta_if_missing(config) - opt_out_repos = get_opt_out_repositories(config) - output_root = resolve_output_root(config, config_file) - run_folder_name = resolve_run_name(config, config_file) - requested_snapshot_tag = resolve_snapshot_tag(config, snapshot_tag) + # Ensure the provided config path is absolute and resolvable so we can + # persist a resolvable `input_config_file` in run metadata. + config_file = config_file.resolve() + config = BotConfig.from_json(config_file) + + repositories = config.get_repositories() + custom_message = config.get_custom_issue_message() + generate_codemeta_if_missing = config.get_generate_codemeta_if_missing() + opt_out_repos = config.get_issue_opt_outs() + output_root = Path(config.get_output_root_dir()) + run_folder_name = config.get_run_name() + requested_snapshot_tag = config.resolve_snapshot_tag(snapshot_tag) run_root = output_root / run_folder_name + run_root.mkdir(parents=True, exist_ok=True) resolved_snapshot_tag = _resolve_unique_snapshot_tag( run_root=run_root, snapshot_tag=requested_snapshot_tag, @@ -155,10 +151,10 @@ def run_pipeline( analysis_root = ( run_root / resolved_snapshot_tag if resolved_snapshot_tag else run_root ) - analysis_output_file = analysis_root / constants.FILENAME_ANALYSIS_RESULTS - - copy_config_to_analysis_root(config_file, analysis_root) analysis_root.mkdir(parents=True, exist_ok=True) + config_analysis_path = analysis_root / constants.FILENAME_CONFIG_SNAPSHOT + analysis_report_path = analysis_root / constants.FILENAME_ANALYSIS_RESULTS + config.to_json(config_analysis_path, explicit=True) resolved_previous_report = previous_report if resolved_previous_report is None: @@ -258,7 +254,7 @@ def run_pipeline( record, dry_run=dry_run, run_root=run_root, - analysis_summary_file=analysis_output_file, + analysis_summary_file=analysis_report_path, previous_report=resolved_previous_report, ) run_records.append(record) @@ -272,15 +268,16 @@ def run_pipeline( "generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), "summary": {"evaluated_repositories": evaluated_repositories}, } - with open(analysis_output_file, "w", encoding="utf-8") as f: + with open(analysis_report_path, "w", encoding="utf-8") as f: json.dump(analysis_summary, f, indent=2) run_report = analysis_runtime.build_analysis_run_report( run_records, dry_run=dry_run, run_root=run_root, - analysis_summary_file=analysis_output_file, + analysis_summary_file=analysis_report_path, previous_report=resolved_previous_report, + input_config_file=config_file, ) run_report_file = analysis_root / constants.FILENAME_RUN_REPORT with open(run_report_file, "w", encoding="utf-8") as f: diff --git a/src/sw_metadata_bot/publish.py b/src/sw_metadata_bot/publish.py index 46939ac..22cd8dc 100644 --- a/src/sw_metadata_bot/publish.py +++ b/src/sw_metadata_bot/publish.py @@ -8,17 +8,43 @@ import click from . import constants, github_api, gitlab_api, pitfalls, utils -from .config_utils import ( +from .config.config_utils import ( + append_opt_out_to_config, detect_platform, - get_custom_message, - load_config, sanitize_repo_name, ) +from .config.schemas import BotConfig from .reporting import build_counters, write_report_file MAX_PUBLISH_RETRY_ATTEMPTS = 3 +class FakeIssueClient: + """Issue client used only for local publish simulation.""" + + def __init__(self, comments_for=None): + self._comments_for = comments_for or (lambda url: []) + self.created: list[tuple[str, str, str]] = [] + self.commented: list[str] = [] + self.closed: list[str] = [] + + def create_issue(self, repo_url: str, title: str, body: str) -> str: + self.created.append((repo_url, title, body)) + return f"{repo_url}/issues/99" + + def get_issue(self, issue_url: str) -> dict[str, object]: + return {"state": "open"} + + def get_issue_comments(self, issue_url: str) -> list[str]: + return self._comments_for(issue_url) + + def add_issue_comment(self, issue_url: str, body: str) -> None: + self.commented.append(issue_url) + + def close_issue(self, issue_url: str) -> None: + self.closed.append(issue_url) + + def _is_unsubscribe_comment(comment: str) -> bool: """Return True when a comment is exactly the unsubscribe keyword.""" return comment.strip().lower() == "unsubscribe" @@ -145,7 +171,8 @@ def _load_publish_body(analysis_root: Path, repo_url: str) -> str: config_file = analysis_root / constants.FILENAME_CONFIG_SNAPSHOT custom_message = None if config_file.exists(): - custom_message = get_custom_message(load_config(config_file)) + config = BotConfig.from_json(config_file) + custom_message = config.get_custom_issue_message() report = pitfalls.format_report(repo_url, data) return pitfalls.create_issue_body(report, custom_message) @@ -198,7 +225,12 @@ def _write_per_repo_report( ) -def publish_analysis(analysis_root: Path, retry_failed: bool = False) -> None: +def publish_analysis( + analysis_root: Path, + retry_failed: bool = False, + github_client: github_api.GitHubAPI | None = None, + gitlab_client: gitlab_api.GitLabAPI | None = None, +) -> None: """Publish issues from an existing analysis snapshot without re-running analysis.""" run_report_file = analysis_root / constants.FILENAME_RUN_REPORT try: @@ -219,6 +251,7 @@ def publish_analysis(analysis_root: Path, retry_failed: bool = False) -> None: run_metadata = {} analysis_summary_value = run_metadata.get("analysis_summary_file") previous_report_value = run_metadata.get("previous_report_source") + input_config_value = run_metadata.get("input_config_file") analysis_summary_file = ( Path(analysis_summary_value) if isinstance(analysis_summary_value, str) @@ -227,6 +260,9 @@ def publish_analysis(analysis_root: Path, retry_failed: bool = False) -> None: previous_report = ( Path(previous_report_value) if isinstance(previous_report_value, str) else None ) + input_config_file = ( + Path(input_config_value) if isinstance(input_config_value, str) else None + ) records = run_report.get("records") if isinstance(run_report, dict) else None if not isinstance(records, list): @@ -234,21 +270,21 @@ def publish_analysis(analysis_root: Path, retry_failed: bool = False) -> None: f"Invalid run_report.json format in {run_report_file}: records must be a list" ) - github_client: github_api.GitHubAPI | None = None - gitlab_client: gitlab_api.GitLabAPI | None = None + github_client_instance = github_client + gitlab_client_instance = gitlab_client def issue_client_for_platform(platform: str): """Return lazily initialized issue client for the requested platform.""" - nonlocal github_client, gitlab_client + nonlocal github_client_instance, gitlab_client_instance if platform == "github": - if github_client is None: - github_client = github_api.GitHubAPI(dry_run=False) - return github_client + if github_client_instance is None: + github_client_instance = github_api.GitHubAPI(dry_run=False) + return github_client_instance if platform in {"gitlab", "gitlab.com"}: - if gitlab_client is None: - gitlab_client = gitlab_api.GitLabAPI(dry_run=False) - return gitlab_client + if gitlab_client_instance is None: + gitlab_client_instance = gitlab_api.GitLabAPI(dry_run=False) + return gitlab_client_instance raise click.ClickException(f"Unsupported platform for publish: {platform}") @@ -265,6 +301,45 @@ def issue_client_for_platform(platform: str): updated_records.append(record) continue + action = str(record.get("action", "")) + platform = _detect_platform_for_publish(repo_url, record) + issue_url = _issue_url_for_publish(record) + + if action == constants.ACTION_SKIPPED and issue_url: + issue_client = issue_client_for_platform(platform) + comments = issue_client.get_issue_comments(issue_url) + unsubscribe_detected = any( + _is_unsubscribe_comment(comment) for comment in comments + ) + record["unsubscribe_detected"] = unsubscribe_detected + if unsubscribe_detected: + config_file = analysis_root / constants.FILENAME_CONFIG_SNAPSHOT + if config_file.exists(): + append_opt_out_to_config(config_file, repo_url, explicit=False) + + if input_config_file is not None: + original_input_path = input_config_file + if not original_input_path.is_absolute(): + original_input_path = analysis_root.parent / original_input_path + if original_input_path.exists(): + append_opt_out_to_config( + original_input_path, repo_url, explicit=False + ) + + record["action"] = constants.ACTION_SKIPPED + record["reason_code"] = constants.REASON_CODE_UNSUBSCRIBE + record["dry_run"] = False + record["issue_persistence"] = "none" + record.pop("simulated_issue_url", None) + updated_records.append(record) + _write_per_repo_report( + analysis_root, + record, + analysis_summary_file, + previous_report, + ) + continue + if ( record.get("dry_run") is False and record.get("action") != constants.ACTION_FAILED @@ -273,7 +348,6 @@ def issue_client_for_platform(platform: str): updated_records.append(record) continue - action = str(record.get("action", "")) if action == constants.ACTION_FAILED: if not retry_failed: skipped_failed_retry += 1 @@ -312,6 +386,23 @@ def issue_client_for_platform(platform: str): _is_unsubscribe_comment(comment) for comment in comments ) if unsubscribe_detected: + # update config of analysis snapshot when present + config_file = analysis_root / constants.FILENAME_CONFIG_SNAPSHOT + if config_file.exists(): + append_opt_out_to_config(config_file, repo_url, explicit=False) + + # also update the original input config file when available + input_config_value = run_metadata.get("input_config_file") + if isinstance(input_config_value, str): + input_config_path = Path(input_config_value) + if not input_config_path.is_absolute(): + input_config_path = analysis_root.parent / input_config_path + if input_config_path.exists(): + append_opt_out_to_config( + input_config_path, repo_url, explicit=False + ) + + # skip publish record["action"] = constants.ACTION_SKIPPED record["reason_code"] = constants.REASON_CODE_UNSUBSCRIBE record["unsubscribe_detected"] = True @@ -441,6 +532,7 @@ def issue_client_for_platform(platform: str): run_root=analysis_root.parent, analysis_summary_file=analysis_summary_file, previous_report=previous_report, + input_config_file=input_config_file, ) run_metadata_candidate = run_report.get("run_metadata") if isinstance(run_metadata_candidate, dict): @@ -474,3 +566,48 @@ def issue_client_for_platform(platform: str): def publish_command(analysis_root: Path, retry_failed: bool) -> None: """Publish issues using precomputed decisions from an analysis snapshot.""" publish_analysis(analysis_root, retry_failed=retry_failed) + + +@click.command() +@click.option( + "--analysis-root", + type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path), + required=True, + help="Existing analysis snapshot folder containing run_report.json.", +) +@click.option( + "--retry-failed", + is_flag=True, + default=False, + help="Retry records previously marked as failed when they are eligible for retry.", +) +@click.option( + "--unsubscribe", + is_flag=True, + default=False, + help="Simulate an unsubscribe comment on all issue comment checks.", +) +@click.option( + "--fake-comment", + multiple=True, + help="Fake issue comment text returned for all issue URLs. Can be repeated.", +) +def simulate_publish_command( + analysis_root: Path, + retry_failed: bool, + unsubscribe: bool, + fake_comment: tuple[str, ...], +) -> None: + """Simulate publish using a local fake issue client without external API access.""" + fake_comments = [] + if unsubscribe: + fake_comments.append("unsubscribe") + fake_comments.extend(fake_comment) + + fake_client = FakeIssueClient(comments_for=lambda url: list(fake_comments)) + publish_analysis( + analysis_root, + retry_failed=retry_failed, + github_client=fake_client, + gitlab_client=fake_client, + ) diff --git a/src/sw_metadata_bot/reporting.py b/src/sw_metadata_bot/reporting.py index 75811be..4d22e0a 100644 --- a/src/sw_metadata_bot/reporting.py +++ b/src/sw_metadata_bot/reporting.py @@ -46,14 +46,20 @@ class RecordLifecycle: def relative_to_run_root(path: Path | None, run_root: Path) -> str | None: """Return a run-root-relative path string. - Accepts both absolute and already-relative input paths. Absolute paths must - be inside run_root; otherwise Path.relative_to raises ValueError. + Accepts both absolute and relative input paths. + Relative paths are resolved against the current working directory before + comparing against run_root. """ if path is None: return None - if path.is_absolute(): + + if not path.is_absolute(): + path = path.resolve() + + try: return str(path.relative_to(run_root)) - return str(path) + except ValueError: + return str(path) def build_counters(records: list[dict[str, object]]) -> dict[str, int]: @@ -81,6 +87,7 @@ def build_run_metadata( run_root: Path, analysis_summary_file: Path | None, previous_report: Path | None, + input_config_file: Path | None = None, ) -> dict[str, object]: """Build run metadata with normalized relative paths.""" return { @@ -88,6 +95,7 @@ def build_run_metadata( "dry_run": dry_run, "analysis_summary_file": relative_to_run_root(analysis_summary_file, run_root), "previous_report_source": relative_to_run_root(previous_report, run_root), + "input_config_file": relative_to_run_root(input_config_file, run_root), } @@ -157,17 +165,25 @@ def write_report_file( run_root: Path, analysis_summary_file: Path | None, previous_report: Path | None, + input_config_file: Path | None = None, ) -> dict[str, object]: """Write a report payload to disk and return the payload.""" + normalized_records: list[dict[str, object]] = [] + for record in records: + if isinstance(record, dict): + record.setdefault("unsubscribe_detected", False) + normalized_records.append(record) + payload = { "run_metadata": build_run_metadata( dry_run=dry_run, run_root=run_root, analysis_summary_file=analysis_summary_file, previous_report=previous_report, + input_config_file=input_config_file, ), - "counters": build_counters(records), - "records": records, + "counters": build_counters(normalized_records), + "records": normalized_records, } report_file.parent.mkdir(parents=True, exist_ok=True) with open(report_file, "w", encoding="utf-8") as f: diff --git a/tests/test_config_schemas.py b/tests/test_config_schemas.py new file mode 100644 index 0000000..b7cf68c --- /dev/null +++ b/tests/test_config_schemas.py @@ -0,0 +1,274 @@ +"""Tests for the BotConfig schema and related configuration utilities.""" + +import json + +from pydantic import ValidationError + +from sw_metadata_bot.config.schemas import BotConfig + +CONFIG_DATA = { + "analysis": { + "repositories": [ + "https://github.com/SoftwareUnderstanding/sw-metadata-bot", + "https://github.com/example/repo3", + ] + }, + "issues": { + "custom_issue_message": "This is a custom issue message.", + "opt_outs": [], + }, + "outputs": { + "output_root_dir": "custom_outputs", + "run_name": "test_run", + "snapshot_tag_format": "custom_snapshot_%Y%m%d", + }, +} + +CONFIG_DATA_MINIMAL = { + "analysis": { + "repositories": ["https://github.com/SoftwareUnderstanding/sw-metadata-bot"] + }, +} + +CONFIG_DATA_INVALID = { + "analysis": {"repositories": []}, + "issues": { + "custom_issue_message": "This is a custom issue message.", + "opt_outs": ["https://github.com/SoftwareUnderstanding/sw-metadata-bot"], + }, +} + +CONFIG_DATA_INVALID_OPT_OUT = { + "analysis": { + "repositories": ["https://github.com/SoftwareUnderstanding/sw-metadata-bot"] + }, + "issues": { + "custom_issue_message": "This is a custom issue message.", + "opt_outs": ["https://github/other_repo/not_in_list"], + }, +} + + +def test_bot_config_schema_from_json(tmp_path): + """Test that loading a config from JSON works and that all fields are correctly parsed.""" + config_file = tmp_path / "config.json" + json.dump(CONFIG_DATA, config_file.open("w"), indent=4) + + config = BotConfig.from_json(config_file) + + assert config.analysis.repositories == CONFIG_DATA["analysis"]["repositories"] + assert ( + config.issues.custom_issue_message + == CONFIG_DATA["issues"]["custom_issue_message"] + ) + assert config.issues.opt_outs == CONFIG_DATA["issues"]["opt_outs"] + assert config.outputs.output_root_dir == CONFIG_DATA["outputs"]["output_root_dir"] + assert config.outputs.run_name == CONFIG_DATA["outputs"]["run_name"] + assert ( + config.outputs.snapshot_tag_format + == CONFIG_DATA["outputs"]["snapshot_tag_format"] + ) + + +def test_bot_config_schema_from_json_minimal(tmp_path): + """Test that loading a minimal config with only required fields works and that optional fields get default values.""" + config_file = tmp_path / "config_minimal.json" + json.dump(CONFIG_DATA_MINIMAL, config_file.open("w"), indent=4) + + config = BotConfig.from_json(config_file) + + assert ( + config.analysis.repositories == CONFIG_DATA_MINIMAL["analysis"]["repositories"] + ) + assert config.issues.custom_issue_message is None + assert config.issues.opt_outs == [] + assert config.outputs.output_root_dir == "outputs" + assert config.outputs.run_name is None + assert config.outputs.snapshot_tag_format == "%Y%m%d" + + +def test_bot_config_schema_from_json_invalid_repository_list_empty(tmp_path): + """Test that if the repositories list is empty, a validation error is raised.""" + config_file = tmp_path / "config_invalid.json" + json.dump(CONFIG_DATA_INVALID, config_file.open("w"), indent=4) + + try: + _ = BotConfig.from_json(config_file) + assert False, "Expected validation error for invalid config" + except Exception as e: + assert "Repositories list" in str(e) + + +def test_bot_config_schema_from_json_invalid_opt_out(tmp_path): + """Test that if an opt-out URL is not in the repositories list, it is removed from the config and a warning is printed.""" + config_file = tmp_path / "config_invalid_opt_out.json" + json.dump(CONFIG_DATA_INVALID_OPT_OUT, config_file.open("w"), indent=4) + + config = BotConfig.from_json(config_file) + + # The invalid opt-out should be removed from the config and a warning printed + assert config.issues.opt_outs == [] + + +def test_bot_config_schema_from_json_invalid_field(tmp_path): + """Test that if the config JSON contains an unexpected field, a validation error is raised.""" + invalid_config_data = CONFIG_DATA.copy() + invalid_config_data["unexpected_field"] = "unexpected_value" + config_file = tmp_path / "config_invalid_field.json" + json.dump(invalid_config_data, config_file.open("w"), indent=4) + + try: + _ = BotConfig.from_json(config_file) + assert False, "Expected validation error for unexpected field in config" + except Exception as e: + assert "unexpected_field" in str(e) + + +def test_bot_config_schema_from_json_invalid_type_field(tmp_path): + """Test that if the config JSON contains a field with an invalid type, a validation error is raised.""" + import copy + + """Test that if the config JSON contains an unexpected field, a validation error is raised.""" + invalid_config_data = copy.deepcopy(CONFIG_DATA) + invalid_config_data["analysis"]["repositories"] = "not_a_list" + config_file = tmp_path / "config_invalid_type_field.json" + json.dump(invalid_config_data, config_file.open("w"), indent=4) + + try: + _ = BotConfig.from_json(config_file) + assert False, "Expected validation error for unexpected field in config" + except ValidationError as e: + assert "list" in str(e) + + +# export tests + + +def test_bot_config_export_to_json(tmp_path): + """Test that exporting config to JSON and reloading it produces the same config data (round-trip test).""" + + # load config from dict + config = BotConfig.model_validate(CONFIG_DATA) + + config_file = tmp_path / "config_export.json" + config.to_json(config_file, explicit=False) + + # load exported config and compare to original + with config_file.open() as f: + exported_data = json.load(f) + assert exported_data == CONFIG_DATA + + +def test_bot_config_export_to_json_explicit(tmp_path): + """Test that exporting config with explicit=True includes default values for optional fields.""" + + # load config from dict + config = BotConfig.model_validate(CONFIG_DATA_MINIMAL) + + config_file = tmp_path / "config_export_explicit.json" + config.to_json(config_file, explicit=True) + + # load exported config and check that all fields are present (explicit=True should include default values) + with config_file.open() as f: + exported_data = json.load(f) + # check that repositories list is correct + assert ( + exported_data["analysis"]["repositories"] + == CONFIG_DATA_MINIMAL["analysis"]["repositories"] + ) + # check that default values are included for optional fields + assert "custom_issue_message" in exported_data["issues"] + assert "opt_outs" in exported_data["issues"] + assert exported_data["issues"]["custom_issue_message"] is None + assert exported_data["issues"]["opt_outs"] == [] + assert "output_root_dir" in exported_data["outputs"] + assert "run_name" in exported_data["outputs"] + assert "snapshot_tag_format" in exported_data["outputs"] + assert exported_data["outputs"]["output_root_dir"] == "outputs" + assert exported_data["outputs"]["run_name"] is None + assert exported_data["outputs"]["snapshot_tag_format"] == "%Y%m%d" + + +## getters + + +def test_bot_config_getters(tmp_path): + """Test that the getter methods on BotConfig return the expected values.""" + config = BotConfig.model_validate(CONFIG_DATA) + + assert config.get_repositories() == CONFIG_DATA["analysis"]["repositories"] + assert ( + config.get_custom_issue_message() + == CONFIG_DATA["issues"]["custom_issue_message"] + ) + assert config.get_issue_opt_outs() == CONFIG_DATA["issues"]["opt_outs"] + assert config.get_output_root_dir() == CONFIG_DATA["outputs"]["output_root_dir"] + assert config.get_run_name() == CONFIG_DATA["outputs"]["run_name"] + assert ( + config.get_snapshot_tag_format() + == CONFIG_DATA["outputs"]["snapshot_tag_format"] + ) + + +def test_bot_config_add_opt_out_repository(tmp_path): + """Test that adding an opt-out repository works and that it is reflected in the config.""" + config = BotConfig.model_validate(CONFIG_DATA) + + new_opt_out = "https://github.com/example/repo3" + result = config.add_opt_out_repository(new_opt_out) + assert result is True + assert new_opt_out in config.get_issue_opt_outs() + + +def test_bot_config_add_opt_out_repository_invalid(tmp_path): + """Test that adding an opt-out repository that is not in the repositories list does not work.""" + config = BotConfig.model_validate(CONFIG_DATA) + + invalid_opt_out = "https://github.com/example/repo4" + result = config.add_opt_out_repository(invalid_opt_out) + assert result is False + assert invalid_opt_out not in config.get_issue_opt_outs() + + +def test_bot_config_add_opt_out_repository_export_and_duplicate(tmp_path): + """Double test: + 1/ that after adding an opt-out repository, exporting the config to JSON reflects the change. + 2/ that if we add the same opt-out repository again, it does not create duplicates in the config. + """ + config = BotConfig.model_validate(CONFIG_DATA) + + new_opt_out = "https://github.com/example/repo3" + config.add_opt_out_repository(new_opt_out) + + config_file = tmp_path / "config_export_opt_out.json" + config.to_json(config_file, explicit=False) + + # load exported config and compare to original + new_config = BotConfig.from_json(config_file) + assert new_config.get_issue_opt_outs() == [new_opt_out] + + # also check that if we add this repo again it does not create duplicates + result = new_config.add_opt_out_repository(new_opt_out) + assert result is False + assert new_config.get_issue_opt_outs() == [new_opt_out] + + +def test_resolve_resolve_snapshot_tag_empty(): + """Test that the resolve_snapshot_tag method returns the expected snapshot tag based on the configured format and timestamp.""" + from datetime import datetime + + config = BotConfig.model_validate(CONFIG_DATA) + actual_timestamp = datetime.now().strftime("%Y%m%d") + snapshot_tag = config.resolve_snapshot_tag() + assert snapshot_tag.startswith("custom_snapshot_") + assert snapshot_tag == f"custom_snapshot_{actual_timestamp}" + + +def test_resolve_resolve_snapshot_tag_explicit(): + """Test that if an explicit snapshot tag is provided, it is returned directly.""" + + config = BotConfig.model_validate(CONFIG_DATA) + + explicit_tag = "explicit_snapshot_tag" + snapshot_tag = config.resolve_snapshot_tag(explicit_snapshot_tag=explicit_tag) + assert snapshot_tag == explicit_tag diff --git a/tests/test_config_utils.py b/tests/test_config_utils.py index 8fb356f..cb6b9c2 100644 --- a/tests/test_config_utils.py +++ b/tests/test_config_utils.py @@ -1,6 +1,10 @@ -"""Tests for platform detection helper.""" +"""Tests for configuration and platform handling.""" -from sw_metadata_bot.config_utils import detect_platform +from sw_metadata_bot.config.config_utils import ( + detect_platform, + normalize_repo_url, + sanitize_repo_name, +) def test_detect_platform_github(): @@ -21,3 +25,39 @@ def test_detect_platform_self_hosted_gitlab(): def test_detect_platform_unsupported(): """Return None for URLs that do not match a known platform.""" assert detect_platform("https://example.org/org/repo") is None + + +def test_sanitize_repo_name(): + """Sanitize repository URLs to safe folder names.""" + assert sanitize_repo_name("https://github.com/org/repo") == "github_com_org_repo" + + +def test_sanitize_repo_name_with_git_suffix(): + """Remove .git suffix when sanitizing repository URLs.""" + assert ( + sanitize_repo_name("https://github.com/org/repo.git") == "github_com_org_repo" + ) + + +def test_normalize_repo_url(): + """Normalize repository URLs by stripping whitespace and trailing slashes.""" + assert ( + normalize_repo_url("https://github.com/org/repo/") + == "https://github.com/org/repo" + ) + assert ( + normalize_repo_url(" https://github.com/org/repo ") + == "https://github.com/org/repo" + ) + + +# def test_resolve_output_root(): +# """Resolve output root directory from config or default.""" +# assert resolve_output_root(None) == "outputs" +# assert resolve_output_root("custom_outputs") == "custom_outputs" + + +# def test_resolve_run_name(): +# """Resolve run name from config or default.""" +# assert resolve_run_name(None) is None +# assert resolve_run_name("custom_run") == "custom_run" diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index a3c5e98..f197c38 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -1,23 +1,26 @@ """Tests for pipeline module.""" import json +import os from pathlib import Path from click.testing import CliRunner from sw_metadata_bot import analysis_runtime, commit_lookup, pipeline from sw_metadata_bot import publish as publish_module +from sw_metadata_bot.config.schemas import BotConfig # --------------------------------------------------------------------------- # is_previous_issue_open # --------------------------------------------------------------------------- +FAKE_REPO_URL = "https://github.com/example/repo" def test_is_previous_issue_open_false_when_action_closed(): """Return False when the previous record action is closed, regardless of issue_url.""" record = { "action": "closed", - "issue_url": "https://github.com/example/repo/issues/1", + "issue_url": FAKE_REPO_URL + "/issues/1", "issue_persistence": "posted", } assert analysis_runtime.is_previous_issue_open(record) is False @@ -27,7 +30,7 @@ def test_is_previous_issue_open_false_when_previous_issue_state_closed(): """Return False when previous_issue_state is explicitly closed.""" record = { "action": "closed", - "issue_url": "https://github.com/example/repo/issues/1", + "issue_url": FAKE_REPO_URL + "/issues/1", "issue_persistence": "posted", "previous_issue_state": "closed", } @@ -38,7 +41,7 @@ def test_is_previous_issue_open_true_for_posted_open_issue(): """Return True when an issue was posted and no closing signal exists.""" record = { "action": "created", - "issue_url": "https://github.com/example/repo/issues/2", + "issue_url": FAKE_REPO_URL + "/issues/2", "issue_persistence": "posted", } assert analysis_runtime.is_previous_issue_open(record) is True @@ -48,26 +51,28 @@ def test_is_previous_issue_open_false_for_simulated_issue(): """Return False for simulated (dry-run) issues that were never posted.""" record = { "action": "simulated_created", - "issue_url": "https://github.com/example/repo/issues/3", + "issue_url": FAKE_REPO_URL + "/issues/3", "issue_persistence": "simulated", } assert analysis_runtime.is_previous_issue_open(record) is False -def _write_config(tmp_path, **overrides): - """Write a minimal config and return its path.""" - config = { - "repositories": ["https://github.com/example/repo"], - "issues": {"custom_message": None, "opt_outs": []}, +def _write_config(tmp_path, **overrides) -> Path: + """Write a minimal config and return its path. + The config is populated with default values and can be overridden by passing fields as keyword arguments.""" + config_data = { + "analysis": {"repositories": [FAKE_REPO_URL]}, + "issues": {"custom_issue_message": None, "opt_outs": []}, "outputs": { - "root_dir": str(tmp_path / "outputs"), + "output_root_dir": str(tmp_path / "outputs"), "run_name": "batch-a", "snapshot_tag_format": "%Y%m%d", }, } - config.update(overrides) + config_data.update(overrides) config_path = tmp_path / "config.json" - config_path.write_text(json.dumps(config)) + config = BotConfig.model_validate(config_data) + config.to_json(config_path) return config_path @@ -77,7 +82,7 @@ def test_resolve_per_repo_paths_uses_analysis_root(tmp_path): paths = analysis_runtime.resolve_per_repo_paths( analysis_root=analysis_root, - repo_url="https://github.com/example/repo", + repo_url=FAKE_REPO_URL, ) repo_root = analysis_root / "github_com_example_repo" @@ -129,44 +134,12 @@ def test_standardize_metacheck_outputs_recovers_swapped_somef_and_codemeta(tmp_p assert not (repo_folder / "output_1.json").exists() -def test_resolve_output_root_relative_uses_project_root(tmp_path): - """Resolve relative output root from project root rather than config directory.""" - (tmp_path / "pyproject.toml").write_text("[project]\nname = 'example'\n") - config_dir = tmp_path / "assets" - config_dir.mkdir() - config_path = config_dir / "config.json" - config = { - "repositories": [], - "outputs": {"root_dir": "assets"}, - } - config_path.write_text(json.dumps(config)) - - resolved_output_root = pipeline.resolve_output_root(config, config_path) - - assert resolved_output_root == tmp_path / "assets" - - -def test_resolve_output_root_keeps_absolute_path(tmp_path): - """Keep absolute output root unchanged.""" - config_path = tmp_path / "config.json" - absolute_output_root = tmp_path / "custom-output" - config = { - "repositories": [], - "outputs": {"root_dir": str(absolute_output_root)}, - } - config_path.write_text(json.dumps(config)) - - resolved_output_root = pipeline.resolve_output_root(config, config_path) - - assert resolved_output_root == absolute_output_root - - def test_create_analysis_record_reads_rsmetacheck_version_from_checking_software( tmp_path, ): """Read RSMetacheck version from checkingSoftware.softwareVersion.""" expected_version = "0.3.0" - repo_url = "https://github.com/example/repo" + repo_url = FAKE_REPO_URL repo_folder = tmp_path / "github_com_example_repo" repo_folder.mkdir(parents=True) @@ -204,7 +177,7 @@ def test_create_analysis_record_reads_rsmetacheck_version_from_checking_software def test_create_analysis_record_creates_codemeta_issue_without_findings(tmp_path): """Create codemeta-only issue when no pitfalls/warnings are reported.""" - repo_url = "https://github.com/example/repo" + repo_url = FAKE_REPO_URL repo_folder = tmp_path / "github_com_example_repo" repo_folder.mkdir(parents=True) @@ -289,18 +262,18 @@ def fake_run_rsmetacheck(**kwargs): monkeypatch.setattr(analysis_runtime, "run_rsmetacheck", fake_run_rsmetacheck) output_root = tmp_path / "outputs" - config = _write_config( + config_path = _write_config( tmp_path, - repositories=["https://github.com/example/repo"], + analysis={"repositories": [FAKE_REPO_URL]}, outputs={ - "root_dir": str(output_root), + "output_root_dir": str(output_root), "run_name": "batch-a", "snapshot_tag_format": None, }, ) pipeline.run_pipeline( - config_file=config, + config_file=config_path, dry_run=False, snapshot_tag="202603", previous_report=None, @@ -336,28 +309,129 @@ def fake_run_rsmetacheck(**kwargs): monkeypatch.setattr(analysis_runtime, "run_rsmetacheck", fake_run_rsmetacheck) output_root = tmp_path / "outputs" - config = _write_config( + output_root.mkdir(parents=True) + config_path = _write_config( tmp_path, outputs={ - "root_dir": str(output_root), + "output_root_dir": str(output_root), "run_name": "batch-a", "snapshot_tag_format": None, }, ) pipeline.run_pipeline( - config_file=config, + config_file=config_path, dry_run=True, snapshot_tag=None, previous_report=None, ) - - run_report_path = output_root / "batch-a" / "run_report.json" + config = BotConfig.from_json(config_path) + snapshot_tag = config.resolve_snapshot_tag() + run_report_path = output_root / "batch-a" / snapshot_tag / "run_report.json" assert run_report_path.exists() + run_report = json.loads(run_report_path.read_text()) assert run_report["run_metadata"]["dry_run"] is True +def test_run_pipeline_persists_input_config_file_in_run_metadata(monkeypatch, tmp_path): + """Persist the input config file path in run report metadata.""" + + def fake_run_rsmetacheck(**kwargs): + """Accept rsmetacheck invocation without side effects.""" + return None + + monkeypatch.setattr(analysis_runtime, "run_rsmetacheck", fake_run_rsmetacheck) + + output_root = tmp_path / "outputs" + snapshot_tag = "202603" + config_path = _write_config( + tmp_path, + analysis={"repositories": ["https://github.com/example/repo"]}, + outputs={ + "output_root_dir": str(output_root), + "run_name": "batch-a", + "snapshot_tag_format": "%Y%m%d", + }, + ) + + pipeline.run_pipeline( + config_file=config_path, + dry_run=True, + snapshot_tag=snapshot_tag, + previous_report=None, + ) + analysis_root = output_root / "batch-a" / snapshot_tag + run_report_path = analysis_root / "run_report.json" + assert run_report_path.exists() + run_report = json.loads(run_report_path.read_text()) + assert run_report["run_metadata"]["input_config_file"] == str(config_path) + + # check if config file has been created in analysis root with expected content + config_file_path = analysis_root / "config.json" + assert config_file_path.exists(), "Config file should be copied to analysis root" + with open(config_file_path, "r") as f: + config_content = json.load(f) + + # check that config content matches the given inputs and has default keys populated + assert config_content["analysis"]["repositories"] == [ + "https://github.com/example/repo" + ] + assert config_content["issues"]["custom_issue_message"] is None + assert config_content["issues"]["opt_outs"] == [] + assert config_content["outputs"]["output_root_dir"] == str(output_root) + assert config_content["outputs"]["run_name"] == "batch-a" + assert config_content["outputs"]["snapshot_tag_format"] == "%Y%m%d" + assert config_content["outputs"]["snapshot_tag_format"] == "%Y%m%d" + + # default content is popolated (don't check default values) + assert config_content["analysis"]["generate_codemeta_if_missing"] is not None + assert config_content["version"] is not None + + +def test_run_pipeline_persists_relative_input_config_file_in_run_metadata( + monkeypatch, tmp_path +): + """Persist a relative input config file path as an absolute path in run report metadata.""" + + def fake_run_rsmetacheck(**kwargs): + return None + + monkeypatch.setattr(analysis_runtime, "run_rsmetacheck", fake_run_rsmetacheck) + + output_root = tmp_path / "outputs" + config_path = _write_config( + tmp_path, + analysis={"repositories": ["https://github.com/example/repo"]}, + outputs={ + "output_root_dir": str(output_root), + "run_name": "batch-a", + "snapshot_tag_format": "202603", + }, + ) + + cwd = Path.cwd() + try: + # Run from a different current working directory to simulate CLI usage. + Path(tmp_path / "cwd").mkdir() + os.chdir(tmp_path / "cwd") + relative_config = Path(os.path.relpath(config_path, start=Path.cwd())) + + pipeline.run_pipeline( + config_file=relative_config, + dry_run=True, + snapshot_tag="202603", + previous_report=None, + ) + finally: + os.chdir(cwd) + + run_report_path = output_root / "batch-a" / "202603" / "run_report.json" + assert run_report_path.exists() + run_report = json.loads(run_report_path.read_text()) + assert run_report["run_metadata"]["input_config_file"] == str(config_path) + + def test_run_analysis_command_forwards_to_run_pipeline(monkeypatch, tmp_path): """run-analysis CLI wrapper forwards arguments and enforces dry-run mode.""" captured: dict[str, object] = {} @@ -368,10 +442,10 @@ def fake_run_pipeline(**kwargs): monkeypatch.setattr(pipeline, "run_pipeline", fake_run_pipeline) - config = _write_config( + config_path = _write_config( tmp_path, outputs={ - "root_dir": str(tmp_path / "results"), + "output_root_dir": str(tmp_path / "results"), "run_name": "custom-run", "snapshot_tag_format": None, }, @@ -382,21 +456,21 @@ def fake_run_pipeline(**kwargs): pipeline.run_analysis_command, [ "--config-file", - str(config), + str(config_path), "--snapshot-tag", "2026-03", "--previous-report", - str(config), + str(config_path), "--force-analysis", ], ) assert result.exit_code == 0 assert captured == { - "config_file": config, + "config_file": config_path, "dry_run": True, "snapshot_tag": "2026-03", - "previous_report": config, + "previous_report": config_path, "force_analysis": True, } @@ -464,10 +538,10 @@ def fake_run_rsmetacheck(**kwargs): monkeypatch.setattr(analysis_runtime, "run_rsmetacheck", fake_run_rsmetacheck) output_root = tmp_path / "outputs" - config = _write_config( + config_path = _write_config( tmp_path, outputs={ - "root_dir": str(output_root), + "output_root_dir": str(output_root), "run_name": "batch-a", "snapshot_tag_format": None, }, @@ -478,7 +552,7 @@ def fake_run_rsmetacheck(**kwargs): (previous_snapshot / "run_report.json").write_text("{}") pipeline.run_pipeline( - config_file=config, + config_file=config_path, dry_run=False, snapshot_tag="20260311", previous_report=None, @@ -503,10 +577,10 @@ def fake_run_rsmetacheck(**kwargs): monkeypatch.setattr(analysis_runtime, "run_rsmetacheck", fake_run_rsmetacheck) output_root = tmp_path / "outputs" - config = _write_config( + config_path = _write_config( tmp_path, outputs={ - "root_dir": str(output_root), + "output_root_dir": str(output_root), "run_name": "batch-a", "snapshot_tag_format": None, }, @@ -515,7 +589,7 @@ def fake_run_rsmetacheck(**kwargs): (output_root / "batch-a" / "X").mkdir(parents=True) pipeline.run_pipeline( - config_file=config, + config_file=config_path, dry_run=False, snapshot_tag="X", previous_report=None, @@ -639,9 +713,9 @@ def fake_run_rsmetacheck(**kwargs): output_root = tmp_path / "outputs" config = _write_config( tmp_path, - repositories=["https://github.com/example/repo"], + analysis={"repositories": ["https://github.com/example/repo"]}, outputs={ - "root_dir": str(output_root), + "output_root_dir": str(output_root), "run_name": "batch-a", "snapshot_tag_format": None, }, @@ -740,12 +814,14 @@ def fake_get_head(repo_url: str) -> str | None: output_root = tmp_path / "outputs" config = _write_config( tmp_path, - repositories=[ - "https://github.com/example/old-repo", - "https://github.com/example/new-repo", - ], + analysis={ + "repositories": [ + "https://github.com/example/old-repo", + "https://github.com/example/new-repo", + ] + }, outputs={ - "root_dir": str(output_root), + "output_root_dir": str(output_root), "run_name": "batch-a", "snapshot_tag_format": None, }, @@ -838,7 +914,7 @@ def fake_run_rsmetacheck(**kwargs): config = _write_config( tmp_path, outputs={ - "root_dir": str(output_root), + "output_root_dir": str(output_root), "run_name": "batch-a", "snapshot_tag_format": "%Y%m%d", }, @@ -851,9 +927,8 @@ def fake_run_rsmetacheck(**kwargs): previous_report=None, ) - expected_snapshot = pipeline.resolve_snapshot_tag( - pipeline.load_config(config), None - ) + config = BotConfig.from_json(config) + expected_snapshot = config.resolve_snapshot_tag() somef_output = calls["rsmetacheck"]["somef_output"] assert "/batch-a/" in somef_output assert somef_output.endswith(f"/{expected_snapshot}/github_com_example_repo") @@ -877,9 +952,9 @@ def fake_run_rsmetacheck(**kwargs): output_root = tmp_path / "outputs" config = _write_config( tmp_path, - repositories=["https://github.com/example/repo"], + analysis={"repositories": ["https://github.com/example/repo"]}, outputs={ - "root_dir": str(output_root), + "output_root_dir": str(output_root), "run_name": "batch-a", "snapshot_tag_format": None, }, @@ -954,9 +1029,9 @@ def fake_run_rsmetacheck(**kwargs): output_root = tmp_path / "outputs" config = _write_config( tmp_path, - repositories=["https://github.com/example/repo"], + analysis={"repositories": ["https://github.com/example/repo"]}, outputs={ - "root_dir": str(output_root), + "output_root_dir": str(output_root), "run_name": "batch-a", "snapshot_tag_format": None, }, @@ -1037,9 +1112,9 @@ def fake_run_rsmetacheck(**kwargs): output_root = tmp_path / "outputs" config = _write_config( tmp_path, - repositories=["https://github.com/example/repo"], + analysis={"repositories": ["https://github.com/example/repo"]}, outputs={ - "root_dir": str(output_root), + "output_root_dir": str(output_root), "run_name": "batch-a", "snapshot_tag_format": None, }, diff --git a/tests/test_pitfalls.py b/tests/test_pitfalls.py index 6d438ec..a20463e 100644 --- a/tests/test_pitfalls.py +++ b/tests/test_pitfalls.py @@ -81,6 +81,30 @@ def test_get_pitfalls_list(sample_data): assert pitfalls[1]["pitfall"].endswith("#P002") +def test_load_pitfalls_from_example_analysis(): + """Load generated RSMetacheck pitfalls from assets/example_analysis/pitfalls.""" + base_path = ( + Path(__file__).resolve().parents[1] / "assets" / "example_analysis" / "pitfalls" + ) + if not base_path.exists(): + pytest.skip( + "Generated example analysis files are not available in assets/example_analysis/pitfalls" + ) + + jsonld_files = sorted(base_path.rglob("*.jsonld")) + assert jsonld_files, f"No JSON-LD files found in {base_path}" + + # Validate at least one generated file can be loaded and has the expected shape. + data = load_pitfalls(jsonld_files[0]) + assert isinstance(data, dict) + assert get_repository_url(data), "Generated file missing assessedSoftware.url" + assert get_rsmetacheck_version(data), "Generated file missing RSMetacheck version" + + # Sanity-check the top-level payload a bit more. + assert "checks" in data + assert isinstance(data["checks"], list) + + def test_get_warnings_list(sample_data): """Test filtering warnings from checks.""" warnings = get_warnings_list(sample_data) @@ -127,35 +151,6 @@ def test_get_pitfalls_list_empty(): assert pitfalls == [] -@pytest.mark.parametrize( - ("filename", "expected_pitfalls", "expected_warnings"), - [ - ("example_pitfall_1.jsonld", {"P001", "P002", "P009"}, {"W001", "W003"}), - ("example_pitfall_2.jsonld", {"P002", "P014"}, {"W003", "W004"}), - ("example_pitfall_3.jsonld", {"P001"}, {"W001", "W002", "W004"}), - ("example_pitfall_4.jsonld", {"P001", "P006"}, set()), - ("example_pitfall_5.jsonld", set(), {"W002", "W004"}), - ], -) -def test_existing_metacheck_analysis_jsonld_files( - filename, expected_pitfalls, expected_warnings -): - """Test parsing of existing metacheck analysis JSON-LD files.""" - base_path = Path(__file__).resolve().parents[1] - data = load_pitfalls( - base_path / "assets" / "existing_metacheck_analysis" / filename - ) - - pitfalls = get_pitfalls_list(data) - warnings = get_warnings_list(data) - - pitfall_codes = {item["pitfall"].split("#")[-1] for item in pitfalls} - warning_codes = {item["pitfall"].split("#")[-1] for item in warnings} - - assert pitfall_codes == expected_pitfalls - assert warning_codes == expected_warnings - - def test_format_report(sample_data): """Test report formatting.""" report = format_report("https://github.com/example/repo", sample_data) diff --git a/tests/test_publish.py b/tests/test_publish.py index 908ae7e..1c068dc 100644 --- a/tests/test_publish.py +++ b/tests/test_publish.py @@ -2,10 +2,12 @@ import json from datetime import datetime, timedelta, timezone +from pathlib import Path from click.testing import CliRunner from sw_metadata_bot import publish as publish_module +from sw_metadata_bot.config.schemas import BotConfig from sw_metadata_bot.publish import publish_command # --------------------------------------------------------------------------- @@ -70,13 +72,23 @@ def _write_run_report(snapshot_dir, records, run_metadata=None): def _write_issue_report(snapshot_dir, repo_url, body="Issue body text"): """Write a per-repo issue_report.md so publish can find the body.""" - from sw_metadata_bot.config_utils import sanitize_repo_name + from sw_metadata_bot.config.config_utils import sanitize_repo_name repo_folder = snapshot_dir / sanitize_repo_name(repo_url) repo_folder.mkdir(parents=True, exist_ok=True) (repo_folder / "issue_report.md").write_text(body) +def _create_minimal_config_with_repos(config_path: Path, repo_url: str): + config_data = { + "analysis": { + "repositories": [repo_url], + }, + "issues": {"opt_outs": []}, + } + json.dump(config_data, config_path.open("w"), indent=4) + + # --------------------------------------------------------------------------- # simulated_created → created # --------------------------------------------------------------------------- @@ -351,6 +363,182 @@ def test_publish_unsubscribe_detected_during_publish(tmp_path, monkeypatch): assert record["reason_code"] == "unsubscribe" +def test_publish_unsubscribe_persists_opt_out_to_input_config(tmp_path, monkeypatch): + """publish appends opt-out to both snapshot config and original config when unsubscribe is detected.""" + + snapshot_dir = tmp_path / "snapshot" + snapshot_dir.mkdir() + repo_url = "https://github.com/example/repo" + issue_url = f"{repo_url}/issues/3" + original_config_path = tmp_path / "config.json" + snapshot_config_path = snapshot_dir / "config.json" + _create_minimal_config_with_repos(original_config_path, repo_url) + _create_minimal_config_with_repos(snapshot_config_path, repo_url) + + _write_run_report( + snapshot_dir, + records=[ + { + "repo_url": repo_url, + "action": "updated_by_comment", + "platform": "github", + "issue_url": issue_url, + "dry_run": True, + "issue_persistence": "simulated", + } + ], + run_metadata={"input_config_file": str(original_config_path)}, + ) + _write_issue_report(snapshot_dir, repo_url) + + fake = _FakeIssueClient(comments_for=lambda url: ["unsubscribe"]) + _patch_clients(monkeypatch, fake) + + runner = CliRunner() + result = runner.invoke(publish_command, ["--analysis-root", str(snapshot_dir)]) + + assert result.exit_code == 0, result.output + assert not fake.commented + + snapshot_config = BotConfig.from_json(snapshot_config_path) + original_config = BotConfig.from_json(original_config_path) + + assert snapshot_config.issues.opt_outs == [repo_url] + assert original_config.issues.opt_outs == [repo_url] + + +def test_publish_preserves_input_config_file_in_run_report(tmp_path, monkeypatch): + """publish preserves run_metadata.input_config_file when rewriting the report.""" + snapshot_dir = tmp_path / "snapshot" + snapshot_dir.mkdir() + repo_url = "https://github.com/example/repo" + issue_url = f"{repo_url}/issues/3" + original_config_path = tmp_path / "config.json" + _create_minimal_config_with_repos(original_config_path, repo_url) + + _write_run_report( + snapshot_dir, + records=[ + { + "repo_url": repo_url, + "action": "updated_by_comment", + "platform": "github", + "issue_url": issue_url, + "dry_run": True, + "issue_persistence": "simulated", + } + ], + run_metadata={"input_config_file": str(original_config_path)}, + ) + _write_issue_report(snapshot_dir, repo_url) + + fake = _FakeIssueClient(comments_for=lambda url: ["unsubscribe"]) + _patch_clients(monkeypatch, fake) + + runner = CliRunner() + result = runner.invoke(publish_command, ["--analysis-root", str(snapshot_dir)]) + + assert result.exit_code == 0, result.output + report = json.loads((snapshot_dir / "run_report.json").read_text()) + assert report["run_metadata"]["input_config_file"] is not None + assert ( + Path(report["run_metadata"]["input_config_file"]).name + == original_config_path.name + ) + + +def test_publish_detects_unsubscribe_on_skipped_previous_issue_url( + tmp_path, monkeypatch +): + """publish detects unsubscribe on skipped records that carry previous_issue_url.""" + snapshot_dir = tmp_path / "snapshot" + snapshot_dir.mkdir() + repo_url = "https://github.com/example/repo" + issue_url = f"{repo_url}/issues/3" + original_config_path = tmp_path / "config.json" + snapshot_config_path = snapshot_dir / "config.json" + _create_minimal_config_with_repos(original_config_path, repo_url) + _create_minimal_config_with_repos(snapshot_config_path, repo_url) + + _write_run_report( + snapshot_dir, + records=[ + { + "repo_url": repo_url, + "action": "skipped", + "platform": "github", + "reason_code": "repo_not_updated", + "previous_issue_url": issue_url, + "dry_run": False, + "issue_persistence": "none", + } + ], + run_metadata={"input_config_file": str(original_config_path)}, + ) + _write_issue_report(snapshot_dir, repo_url) + + fake = _FakeIssueClient(comments_for=lambda url: ["unsubscribe"]) + _patch_clients(monkeypatch, fake) + + runner = CliRunner() + result = runner.invoke(publish_command, ["--analysis-root", str(snapshot_dir)]) + + assert result.exit_code == 0, result.output + + report = json.loads((snapshot_dir / "run_report.json").read_text()) + record = report["records"][0] + assert record["action"] == "skipped" + assert record["reason_code"] == "unsubscribe" + assert record["unsubscribe_detected"] is True + + snapshot_config = BotConfig.from_json(snapshot_config_path) + original_config = BotConfig.from_json(original_config_path) + assert snapshot_config.issues.opt_outs == [repo_url] + assert original_config.issues.opt_outs == [repo_url] + + +def test_simulate_publish_command_updates_opt_out_with_fake_unsubscribe(tmp_path): + """simulate-publish can use a fake unsubscribe comment and update config files.""" + snapshot_dir = tmp_path / "snapshot" + snapshot_dir.mkdir() + repo_url = "https://github.com/example/repo" + issue_url = f"{repo_url}/issues/3" + original_config_path = tmp_path / "config.json" + snapshot_config_path = snapshot_dir / "config.json" + _create_minimal_config_with_repos(original_config_path, repo_url) + _create_minimal_config_with_repos(snapshot_config_path, repo_url) + + _write_run_report( + snapshot_dir, + records=[ + { + "repo_url": repo_url, + "action": "updated_by_comment", + "platform": "github", + "issue_url": issue_url, + "dry_run": True, + "issue_persistence": "simulated", + } + ], + run_metadata={"input_config_file": str(original_config_path)}, + ) + _write_issue_report(snapshot_dir, repo_url) + + runner = CliRunner() + result = runner.invoke( + publish_module.simulate_publish_command, + ["--analysis-root", str(snapshot_dir), "--unsubscribe"], + ) + + assert result.exit_code == 0, result.output + + snapshot_config = BotConfig.from_json(snapshot_config_path) + original_config = BotConfig.from_json(original_config_path) + + assert snapshot_config.issues.opt_outs == [repo_url] + assert original_config.issues.opt_outs == [repo_url] + + def test_publish_api_error_marks_record_as_failed(tmp_path, monkeypatch): """publish catches API errors and records them as failed with error message.""" snapshot_dir = tmp_path / "snapshot" diff --git a/uv.lock b/uv.lock index 7b40e31..3297bdc 100644 --- a/uv.lock +++ b/uv.lock @@ -36,6 +36,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, ] +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + [[package]] name = "anyascii" version = "0.3.3" @@ -424,19 +433,6 @@ toml = [ { name = "tomli", marker = "python_full_version <= '3.11'" }, ] -[[package]] -name = "coverage-badge" -version = "1.1.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "coverage" }, - { name = "setuptools" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/be/8f/e92b0a010c76b0da82709838b3f3ae9aec638d0c44dbfb1186a5751f5d2e/coverage_badge-1.1.2.tar.gz", hash = "sha256:fe7ed58a3b72dad85a553b64a99e963dea3847dcd0b8ddd2b38a00333618642c", size = 6335, upload-time = "2024-08-02T23:34:08.58Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/90/3d/5642a1a06191b2e1e0f87a2e824e6d3eb7c32c589a68ed4d1dcbd3324d63/coverage_badge-1.1.2-py2.py3-none-any.whl", hash = "sha256:d8413ce51c91043a1692b943616b450868cbeeb0ea6a0c54a32f8318c9c96ff7", size = 6493, upload-time = "2024-08-02T23:34:07.063Z" }, -] - [[package]] name = "cryptography" version = "48.0.0" @@ -483,6 +479,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" }, ] +[[package]] +name = "defusedxml" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" }, +] + [[package]] name = "distlib" version = "0.4.0" @@ -651,6 +656,26 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f4/b2/50e9b292b5cac13e9e81272c7171301abc753a60460d21505b606e15cf21/furo-2025.12.19-py3-none-any.whl", hash = "sha256:bb0ead5309f9500130665a26bee87693c41ce4dbdff864dbfb6b0dae4673d24f", size = 339262, upload-time = "2025-12-19T17:34:38.905Z" }, ] +[[package]] +name = "genbadge" +version = "1.1.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "pillow" }, + { name = "requests" }, + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/88/08/686a720bd9f407a2b689c50a94e53b2d26f6ddc6f921ae45ec15c401ee67/genbadge-1.1.3.tar.gz", hash = "sha256:2292ea9cc20af4463dfde952c6b15544fdab9d6e50945f63a42cc400c521fa74", size = 138264, upload-time = "2025-11-24T14:55:01.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/cc/e67b1fe7a9d76a316e9149855a953c37c463caf1e351b1a0abf7f2fb9e38/genbadge-1.1.3-py2.py3-none-any.whl", hash = "sha256:6e4316c171c6f0f84becae4eb116258340bdc054458632abc622d36b8040655e", size = 101262, upload-time = "2025-11-24T14:54:59.925Z" }, +] + +[package.optional-dependencies] +coverage = [ + { name = "defusedxml" }, +] + [[package]] name = "idna" version = "3.13" @@ -1618,6 +1643,78 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" }, ] +[[package]] +name = "pydantic" +version = "2.13.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/18/a5/b60d21ac674192f8ab0ba4e9fd860690f9b4a6e51ca5df118733b487d8d6/pydantic-2.13.4.tar.gz", hash = "sha256:c40756b57adaa8b1efeeced5c196f3f3b7c435f90e84ea7f443901bec8099ef6", size = 844775, upload-time = "2026-05-06T13:43:05.343Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/7b/122376b1fd3c62c1ed9dc80c931ace4844b3c55407b6fb2d199377c9736f/pydantic-2.13.4-py3-none-any.whl", hash = "sha256:45a282cde31d808236fd7ea9d919b128653c8b38b393d1c4ab335c62924d9aba", size = 472262, upload-time = "2026-05-06T13:43:02.641Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.46.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/56/921726b776ace8d8f5db44c4ef961006580d91dc52b803c489fafd1aa249/pydantic_core-2.46.4.tar.gz", hash = "sha256:62f875393d7f270851f20523dd2e29f082bcc82292d66db2b64ea71f64b6e1c1", size = 471464, upload-time = "2026-05-06T13:37:06.98Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/fa/6d7708d2cfc1a832acb6aeb0cd16e801902df8a0f583bb3b4b527fde022e/pydantic_core-2.46.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:0e96592440881c74a213e5ad528e2b24d3d4f940de2766bed9010ab1d9e51594", size = 2111872, upload-time = "2026-05-06T13:40:27.596Z" }, + { url = "https://files.pythonhosted.org/packages/ae/6f/aa064a3e74b5745afbdf250594f38e7ead05e2d651bcb35994b9417a0d4d/pydantic_core-2.46.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0d65b8c354be7fb5f720c3caa8bc940bc2d20ce749c8e06135f07f8ed95dd7c", size = 1948255, upload-time = "2026-05-06T13:39:12.574Z" }, + { url = "https://files.pythonhosted.org/packages/43/3a/41114a9f7569b84b4d84e7a018c57c56347dac30c0d4a872946ec4e36c46/pydantic_core-2.46.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bfb192b3f4b9e8a89b6277b6ce787564f62cfd272055f6e685726b111dc7826", size = 1972827, upload-time = "2026-05-06T13:38:19.841Z" }, + { url = "https://files.pythonhosted.org/packages/ef/25/1ab42e8048fe551934d9884e8d64daa7e990ad386f310a15981aeb6a5b08/pydantic_core-2.46.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9037063db01f09b09e237c282b6792bd4da634b5402c4e7f0c61effed7701a04", size = 2041051, upload-time = "2026-05-06T13:38:10.447Z" }, + { url = "https://files.pythonhosted.org/packages/94/c2/1a934597ddf08da410385b3b7aae91956a5a76c635effef456074fad7e88/pydantic_core-2.46.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc010ab034c8c7452522748bf937df58020d256ccae0874463d1f4d01758af8e", size = 2221314, upload-time = "2026-05-06T13:40:13.089Z" }, + { url = "https://files.pythonhosted.org/packages/02/6d/9e8ad178c9c4df27ad3c8f25d1fe2a7ab0d2ba0559fad4aee5d3d1f16771/pydantic_core-2.46.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c5dac79fa1614d1e06ca695109c6105923bd9c7d1d6c918d4e637b7e6b32fd3", size = 2285146, upload-time = "2026-05-06T13:38:59.224Z" }, + { url = "https://files.pythonhosted.org/packages/80/50/540cd3aeefc041beb111125c4bff779831a2111fc6b15a9138cda277d32c/pydantic_core-2.46.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9fa868638bf362d3d138ea55829cefb3d5f4b0d7f142234382a15e2485dbec4", size = 2089685, upload-time = "2026-05-06T13:38:17.762Z" }, + { url = "https://files.pythonhosted.org/packages/6b/a4/b440ad35f05f6a38f89fa0f149accb3f0e02be94ca5e15f3c449a61b4bc9/pydantic_core-2.46.4-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:17299feefe090f2caa5b8e37222bb5f663e4935a8bfa6931d4102e5df1a9f398", size = 2115420, upload-time = "2026-05-06T13:37:58.195Z" }, + { url = "https://files.pythonhosted.org/packages/99/61/de4f55db8dfd57bfdfa9a12ec90fe1b57c4f41062f7ca86f08586b3e0ac0/pydantic_core-2.46.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4c63ebc82684aa89d9a3bcbd13d515b3be44250dc68dd3bd81526c1cb31286c3", size = 2165122, upload-time = "2026-05-06T13:37:01.167Z" }, + { url = "https://files.pythonhosted.org/packages/f7/52/7c529d7bdb2d1068bd52f51fe32572c8301f9a4febf1948f10639f1436f5/pydantic_core-2.46.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:aaa2a54443eff1950ba5ddc6b6ccda0d9c84a364276a62f969bdf2a390650848", size = 2182573, upload-time = "2026-05-06T13:38:45.04Z" }, + { url = "https://files.pythonhosted.org/packages/37/b3/7c40325848ba78247f2812dcf9c7274e38cd801820ca6dd9fe63bcfb0eb4/pydantic_core-2.46.4-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:18e5ceec2ab67e6d5f1a9085e5a24c9c4e2ac4545730bfe668680bca05e555f3", size = 2317139, upload-time = "2026-05-06T13:37:15.539Z" }, + { url = "https://files.pythonhosted.org/packages/d9/37/f913f81a657c865b75da6c0dbed79876073c2a43b5bd9edbe8da785e4d49/pydantic_core-2.46.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a0f62d0a58f4e7da165457e995725421e0064f2255d8eccebc49f41bbc23b109", size = 2360433, upload-time = "2026-05-06T13:37:30.099Z" }, + { url = "https://files.pythonhosted.org/packages/c4/67/6acaa1be2567f9256b056d8477158cac7240813956ce86e49deae8e173b4/pydantic_core-2.46.4-cp311-cp311-win32.whl", hash = "sha256:041bde0a48fd37cf71cab1c9d56d3e8625a3793fef1f7dd232b3ff37e978ecda", size = 1985513, upload-time = "2026-05-06T13:38:15.669Z" }, + { url = "https://files.pythonhosted.org/packages/aa/e6/c505f83dfeda9a2e5c995cfd872949e4d05e12f7feb3dca72f633daefa94/pydantic_core-2.46.4-cp311-cp311-win_amd64.whl", hash = "sha256:6f2eeda33a839975441c86a4119e1383c50b47faf0cbb5176985565c6bb02c33", size = 2071114, upload-time = "2026-05-06T13:40:35.416Z" }, + { url = "https://files.pythonhosted.org/packages/0f/da/7a263a96d965d9d0df5e8de8a475f33495451117035b09acb110288c381f/pydantic_core-2.46.4-cp311-cp311-win_arm64.whl", hash = "sha256:14f4c5d6db102bd796a627bbb3a17b4cf4574b9ae861d8b7c9a9661c6dd3362d", size = 2044298, upload-time = "2026-05-06T13:38:29.754Z" }, + { url = "https://files.pythonhosted.org/packages/ce/8c/af022f0af448d7747c5154288d46b5f2bc5f17366eaa0e23e9aa04d59f3b/pydantic_core-2.46.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3245406455a5d98187ec35530fd772b1d799b26667980872c8d4614991e2c4a2", size = 2106158, upload-time = "2026-05-06T13:38:57.215Z" }, + { url = "https://files.pythonhosted.org/packages/19/95/6195171e385007300f0f5574592e467c568becce2d937a0b6804f218bc49/pydantic_core-2.46.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:962ccbab7b642487b1d8b7df90ef677e03134cf1fd8880bf698649b22a69371f", size = 1951724, upload-time = "2026-05-06T13:37:02.697Z" }, + { url = "https://files.pythonhosted.org/packages/8e/bc/f47d1ff9cbb1620e1b5b697eef06010035735f07820180e74178226b27b3/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8233f2947cf85404441fd7e0085f53b10c93e0ee78611099b5c7237e36aacbf7", size = 1975742, upload-time = "2026-05-06T13:37:09.448Z" }, + { url = "https://files.pythonhosted.org/packages/5b/11/9b9a5b0306345664a2da6410877af6e8082481b5884b3ddd78d47c6013ce/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3a233125ac121aa3ffba9a2b59edfc4a985a76092dc8279586ab4b71390875e7", size = 2052418, upload-time = "2026-05-06T13:37:38.234Z" }, + { url = "https://files.pythonhosted.org/packages/f1/b7/a65fec226f5d78fc39f4a13c4cc0c768c22b113438f60c14adc9d2865038/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b712b53160b79a5850310b912a5ef8e57e56947c8ad690c227f5c9d7e561712", size = 2232274, upload-time = "2026-05-06T13:38:27.753Z" }, + { url = "https://files.pythonhosted.org/packages/68/f0/92039db98b907ef49269a8271f67db9cb78ae2fc68062ef7e4e77adb5f61/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9401557acd873c3a7f3eb9383edef8ac4968f9510e340f4808d427e75667e7b4", size = 2309940, upload-time = "2026-05-06T13:38:05.353Z" }, + { url = "https://files.pythonhosted.org/packages/5f/97/2aab507d3d00ca626e8e57c1eac6a79e4e5fbcc63eb99733ff55d1717f65/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:926c9541b14b12b1681dca8a0b75feb510b06c6341b70a8e500c2fdcff837cce", size = 2094516, upload-time = "2026-05-06T13:39:10.577Z" }, + { url = "https://files.pythonhosted.org/packages/22/37/a8aca44d40d737dde2bc05b3c6c07dff0de07ce6f82e9f3167aeaf4d5dea/pydantic_core-2.46.4-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:56cb4851bcaf3d117eddcef4fe66afd750a50274b0da8e22be256d10e5611987", size = 2136854, upload-time = "2026-05-06T13:40:22.59Z" }, + { url = "https://files.pythonhosted.org/packages/24/99/fcef1b79238c06a8cbec70819ac722ba76e02bc8ada9b0fd66eba40da01b/pydantic_core-2.46.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c68fcd102d71ea85c5b2dfac3f4f8476eff42a9e078fd5faefff6d145063536b", size = 2180306, upload-time = "2026-05-06T13:40:10.666Z" }, + { url = "https://files.pythonhosted.org/packages/ae/6c/fc44000918855b42779d007ae63b0532794739027b2f417321cddbc44f6a/pydantic_core-2.46.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b2f69dec1725e79a012d920df1707de5caf7ed5e08f3be4435e25803efc47458", size = 2190044, upload-time = "2026-05-06T13:40:43.231Z" }, + { url = "https://files.pythonhosted.org/packages/6b/65/d9cadc9f1920d7a127ad2edba16c1db7916e59719285cd6c94600b0080ba/pydantic_core-2.46.4-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:8d0820e8192167f80d88d64038e609c31452eeca865b4e1d9950a27a4609b00b", size = 2329133, upload-time = "2026-05-06T13:39:57.365Z" }, + { url = "https://files.pythonhosted.org/packages/d0/cf/c873d91679f3a30bcf5e7ac280ce5573483e72295307685120d0d5ad3416/pydantic_core-2.46.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fbdb89b3e1c94a30cc5edfce477c6e6a5dc4d8f84665b455c27582f211a1c72c", size = 2374464, upload-time = "2026-05-06T13:38:06.976Z" }, + { url = "https://files.pythonhosted.org/packages/47/bd/6f2fc8188f31bf10590f1e98e7b306336161fac930a8c514cd7bd828c7dc/pydantic_core-2.46.4-cp312-cp312-win32.whl", hash = "sha256:9aa768456404a8bf48a4406685ac2bec8e72b62c69313734fa3b73cf33b3a894", size = 1974823, upload-time = "2026-05-06T13:40:47.985Z" }, + { url = "https://files.pythonhosted.org/packages/40/8c/985c1d41ea1107c2534abd9870e4ed5c8e7669b5c308297835c001e7a1c4/pydantic_core-2.46.4-cp312-cp312-win_amd64.whl", hash = "sha256:e9c26f834c65f5752f3f06cb08cb86a913ceb7274d0db6e267808a708b46bc89", size = 2072919, upload-time = "2026-05-06T13:39:21.153Z" }, + { url = "https://files.pythonhosted.org/packages/c4/ba/f463d006e0c47373ca7ec5e1a261c59dc01ef4d62b2657af925fb0deee3a/pydantic_core-2.46.4-cp312-cp312-win_arm64.whl", hash = "sha256:4fc73cb559bdb54b1134a706a2802a4cddd27a0633f5abb7e53056268751ac6a", size = 2027604, upload-time = "2026-05-06T13:39:03.753Z" }, + { url = "https://files.pythonhosted.org/packages/ee/a4/73995fd4ebbb46ba0ee51e6fa049b8f02c40daebb762208feda8a6b7894d/pydantic_core-2.46.4-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:14d4edf427bdcf950a8a02d7cb44a08614388dd6e1bdcbf4f67504fa7887da9c", size = 2111589, upload-time = "2026-05-06T13:37:10.817Z" }, + { url = "https://files.pythonhosted.org/packages/fb/7f/f37d3a5e8bfcc2e403f5c57a730f2d815693fb42119e8ea48b3789335af1/pydantic_core-2.46.4-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:0ce40cd7b21210e99342afafbd4d0f76d784eb5b1d60f3bdc566be4983c6c73b", size = 1944552, upload-time = "2026-05-06T13:36:56.717Z" }, + { url = "https://files.pythonhosted.org/packages/15/3c/d7eb777b3ff43e8433a4efb39a17aa8fd98a4ee8561a24a67ef5db07b2d6/pydantic_core-2.46.4-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:90884113d8b48f760e9587002789ddd741e76ab9f89518cd1e43b1f1a52ec44b", size = 1982984, upload-time = "2026-05-06T13:39:06.207Z" }, + { url = "https://files.pythonhosted.org/packages/63/87/70b9f40170a81afd55ca26c9b2acb25c20d64bcfbf888fafecb3ba077d4c/pydantic_core-2.46.4-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66ce7632c22d837c95301830e111ad0128a32b8207533b60896a96c4915192ea", size = 2138417, upload-time = "2026-05-06T13:39:45.476Z" }, + { url = "https://files.pythonhosted.org/packages/9d/1d/8987ad40f65ae1432753072f214fb5c74fe47ffbd0698bb9cbbb585664f8/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:1d8ba486450b14f3b1d63bc521d410ec7565e52f887b9fb671791886436a42f7", size = 2095527, upload-time = "2026-05-06T13:39:52.283Z" }, + { url = "https://files.pythonhosted.org/packages/64/d3/84c282a7eee1d3ac4c0377546ef5a1ea436ce26840d9ac3b7ed54a377507/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:3009f12e4e90b7f88b4f9adb1b0c4a3d58fe7820f3238c190047209d148026df", size = 1936024, upload-time = "2026-05-06T13:40:15.671Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ca/eac61596cdeb4d7e174d3dc0bd8a6238f14f75f97a24e7b7db4c7e7340a0/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad785e92e6dc634c21555edc8bd6b64957ab844541bcb96a1366c202951ae526", size = 1990696, upload-time = "2026-05-06T13:38:34.717Z" }, + { url = "https://files.pythonhosted.org/packages/fa/c3/7c8b240552251faf6b3a957db200fcfbbcec36763c050428b601e0c9b83b/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00c603d540afdd6b80eb39f078f33ebd46211f02f33e34a32d9f053bba711de0", size = 2147590, upload-time = "2026-05-06T13:39:29.883Z" }, + { url = "https://files.pythonhosted.org/packages/11/cb/428de0385b6c8d44b716feba566abfacfbd23ee3c4439faa789a1456242f/pydantic_core-2.46.4-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:0c563b08bca408dc7f65f700633d8442fffb2421fc47b8101377e9fd65051ff0", size = 2112782, upload-time = "2026-05-06T13:37:04.016Z" }, + { url = "https://files.pythonhosted.org/packages/0b/b5/6a17bdadd0fc1f170adfd05a20d37c832f52b117b4d9131da1f41bb097ce/pydantic_core-2.46.4-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:db06ffe51636ffe9ca531fe9023dd64bdd794be8754cb5df57c5498ae5b518a7", size = 1952146, upload-time = "2026-05-06T13:39:43.092Z" }, + { url = "https://files.pythonhosted.org/packages/2a/dc/03734d80e362cd43ef65428e9de77c730ce7f2f11c60d2b1e1b39f0fbf99/pydantic_core-2.46.4-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:133878133d271ade3d41d1bfb2a45ec38dbdbda40bc065921c6b04e4630127e2", size = 2134492, upload-time = "2026-05-06T13:36:58.124Z" }, + { url = "https://files.pythonhosted.org/packages/de/df/5e5ffc085ed07cc22d298134d3d911c63e91f6a0eb91fe646750a3209910/pydantic_core-2.46.4-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9bc519fbf2b7578398853d815009ae5e4d4603d12f4e3f91da8c06852d3da3e9", size = 2156604, upload-time = "2026-05-06T13:37:49.88Z" }, + { url = "https://files.pythonhosted.org/packages/81/44/6e112a4253e56f5705467cbab7ab5e91ee7398ba3d56d358635958893d3e/pydantic_core-2.46.4-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c7a7bd4e39e8e4c12c39cd480356842b6a8a06e41b23a55a5e3e191718838ddf", size = 2183828, upload-time = "2026-05-06T13:37:43.053Z" }, + { url = "https://files.pythonhosted.org/packages/ac/ad/5565071e937d8e752842ac241463944c9eb14c87e2d269f2658a5bd05e98/pydantic_core-2.46.4-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:d396ec2b979760aaf3218e76c24e65bd0aca24983298653b3a9d7a45f9e47b30", size = 2310000, upload-time = "2026-05-06T13:37:56.694Z" }, + { url = "https://files.pythonhosted.org/packages/4f/c3/66883a5cec183e7fba4d024b4cbbe61851a63750ef606b0afecc46d1f2bf/pydantic_core-2.46.4-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:86e1a4418c6cd97d60c95c71164158eaf7324fae7b0923264016baa993eba6fc", size = 2361286, upload-time = "2026-05-06T13:40:05.667Z" }, + { url = "https://files.pythonhosted.org/packages/4b/2d/69abac8f838090bbecd5df894befb2c2619e7996a98ddb949db9f3b93225/pydantic_core-2.46.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:d51026d73fcfd93610abc7b27789c26b313920fcfb20e27462d74a7f8b06e983", size = 2193071, upload-time = "2026-05-06T13:38:08.682Z" }, +] + [[package]] name = "pygments" version = "2.20.0" @@ -4960,6 +5057,7 @@ version = "0.4.3" source = { editable = "." } dependencies = [ { name = "click" }, + { name = "pydantic" }, { name = "python-dotenv" }, { name = "requests" }, { name = "rsmetacheck" }, @@ -4980,7 +5078,7 @@ docs = [ { name = "sphinxcontrib-mermaid" }, ] test = [ - { name = "coverage-badge" }, + { name = "genbadge", extra = ["coverage"] }, { name = "pytest" }, { name = "pytest-cov" }, { name = "setuptools" }, @@ -4990,9 +5088,10 @@ test = [ requires-dist = [ { name = "bandit", marker = "extra == 'dev'", specifier = ">=1.9.3" }, { name = "click", specifier = ">=8.3.1" }, - { name = "coverage-badge", marker = "extra == 'test'" }, { name = "furo", marker = "extra == 'docs'", specifier = ">=2025.12.19" }, + { name = "genbadge", extras = ["coverage"], marker = "extra == 'test'", specifier = ">=1.0.0" }, { name = "interrogate", marker = "extra == 'dev'", specifier = ">=1.7.0" }, + { name = "pydantic", specifier = ">=2.13.4" }, { name = "pytest", marker = "extra == 'test'" }, { name = "pytest-cov", marker = "extra == 'test'" }, { name = "python-dotenv", specifier = ">=1.0.0" }, @@ -5223,6 +5322,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, ] +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, +] + [[package]] name = "tzdata" version = "2026.2"