From 6d8116ba52583ec1cba43bd351da262f8a419123 Mon Sep 17 00:00:00 2001 From: Carl Flottmann Date: Mon, 9 Mar 2026 14:11:45 +1000 Subject: [PATCH 01/13] chore: update inline imports to just look for process spawning, file writes, and environment modification to reduce fps (#1322) The builtins, sys, and os modules now are more fine-grained in their inline import obfuscation matching, looking only for file writes, process spawning, and environment modification. Signed-off-by: Carl Flottmann --- .../pypi_malware_rules/obfuscation.yaml | 80 ++++++++++++++++++- .../obfuscation/excessive_spacing.py | 4 +- .../obfuscation/expected_results.json | 65 ++++++++++----- .../obfuscation/inline_imports.py | 19 ++++- 4 files changed, 139 insertions(+), 29 deletions(-) diff --git a/src/macaron/resources/pypi_malware_rules/obfuscation.yaml b/src/macaron/resources/pypi_malware_rules/obfuscation.yaml index 81b2f08f8..c8283fafa 100644 --- a/src/macaron/resources/pypi_malware_rules/obfuscation.yaml +++ b/src/macaron/resources/pypi_malware_rules/obfuscation.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. rules: @@ -11,10 +11,82 @@ rules: severity: ERROR pattern-either: - pattern: __import__('base64') - - pattern: __import__('builtins') - pattern: __import__('subprocess') - - pattern: __import__('sys') - - pattern: __import__('os') + # process execution obfuscation using inline builtins import + - pattern: __import__('builtins').eval(...) + - pattern: __import__('builtins').exec(...) + # writing to a file obfuscation using inline builtins import + - patterns: + - pattern: __import__('builtins').open(..., $MODE, ...) + - pattern-not: __import__('builtins').open(..., 'r', ...) + - pattern-not: __import__('builtins').open(..., 'rb', ...) + - pattern-not: __import__('builtins').open(..., 'rt', ...) + - pattern-not: __import__('builtins').open(..., 'br', ...) + - pattern-not: __import__('builtins').open(..., 'tr', ...) + - patterns: + - pattern: __import__('builtins').open(..., mode=$MODE, ...) + - pattern-not: __import__('builtins').open(..., mode='r', ...) + - pattern-not: __import__('builtins').open(..., mode='rb', ...) + - pattern-not: __import__('builtins').open(..., mode='rt', ...) + - pattern-not: __import__('builtins').open(..., mode='br', ...) + - pattern-not: __import__('builtins').open(..., mode='tr', ...) + - pattern: __import__('sys').setrecursionlimit(...) + - pattern: __import__('sys').remote_exec(...) + # process execution obfuscation using inline os import + - pattern: __import__('os').execl(...) + - pattern: __import__('os').execle(...) + - pattern: __import__('os').execlp(...) + - pattern: __import__('os').execlpe(...) + - pattern: __import__('os').execv(...) + - pattern: __import__('os').execve(...) + - pattern: __import__('os').execvp(...) + - pattern: __import__('os').execvpe(...) + - pattern: __import__('os').popen(...) + - pattern: __import__('os').posix_spawn(...) + - pattern: __import__('os').posix_spawnp(...) + - pattern: __import__('os').spawnl(...) + - pattern: __import__('os').spawnle(...) + - pattern: __import__('os').spawnlp(...) + - pattern: __import__('os').spawnlpe(...) + - pattern: __import__('os').spawnv(...) + - pattern: __import__('os').spawnve(...) + - pattern: __import__('os').spawnvp(...) + - pattern: __import__('os').spawnvpe(...) + - pattern: __import__('os').system(...) + # environmen modification obfuscation using inline import + - pattern: __import__('os').putenv(...) + - pattern: __import__('os').unsetenv(...) + - pattern: __import__('os').environ[...] = ... + - pattern: __import__('os').environb[...] = ... + - pattern: del __import__('os').environ[...] + - pattern: del __import__('os').environb[...] + - pattern: __import__('os').environ.update(...) + - pattern: __import__('os').environb.update(...) + - pattern: __import__('os').environ.pop(...) + - pattern: __import__('os').environb.pop(...) + - pattern: __import__('os').environ.clear() + - pattern: __import__('os').environb.clear() + # writing to a file obfuscation using inline os import + - pattern: __import__('os').write(...) + - patterns: + - pattern: __import__('os').fdopen(..., $MODE, ...) + - pattern-not: __import__('os').fdopen(..., 'r', ...) + - pattern-not: __import__('os').fdopen(..., 'rb', ...) + - pattern-not: __import__('os').fdopen(..., 'rt', ...) + - pattern-not: __import__('os').fdopen(..., 'br', ...) + - pattern-not: __import__('os').fdopen(..., 'tr', ...) + - patterns: + - pattern: __import__('os').fdopen(..., mode=$MODE, ...) + - pattern-not: __import__('os').fdopen(..., mode='r', ...) + - pattern-not: __import__('os').fdopen(..., mode='rb', ...) + - pattern-not: __import__('os').fdopen(..., mode='rt', ...) + - pattern-not: __import__('os').fdopen(..., mode='br', ...) + - pattern-not: __import__('os').fdopen(..., mode='tr', ...) + - patterns: + - pattern: __import__('os').open(..., $FLAGS, ...) + - metavariable-regex: + metavariable: $FLAGS + regex: .*O_(WRONLY|RDWR|APPEND|CREAT|TRUNC).* - pattern: __import__('zlib') - pattern: __import__('marshal') # python will evaluate a hex/oct string diff --git a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/excessive_spacing.py b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/excessive_spacing.py index 4f9a77616..d1eadfa1b 100644 --- a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/excessive_spacing.py +++ b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/excessive_spacing.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ @@ -21,6 +21,6 @@ def test_function(): sys.exit() # excessive spacing obfuscation. The second line here will trigger two detections, which is expected since it matches both patterns. - print("hello"); __import__('os') + print("hello"); __import__('sys') print("hi") ; __import__('base64') print("things") ;__import__('zlib') diff --git a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/expected_results.json b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/expected_results.json index 5fb7c3965..4d49c84b2 100644 --- a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/expected_results.json +++ b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/expected_results.json @@ -43,11 +43,6 @@ "start": 53, "end": 53 }, - { - "file": "obfuscation/excessive_spacing.py", - "start": 24, - "end": 24 - }, { "file": "obfuscation/excessive_spacing.py", "start": 25, @@ -63,11 +58,6 @@ "start": 23, "end": 23 }, - { - "file": "obfuscation/inline_imports.py", - "start": 24, - "end": 24 - }, { "file": "obfuscation/inline_imports.py", "start": 25, @@ -85,23 +75,58 @@ }, { "file": "obfuscation/inline_imports.py", - "start": 28, - "end": 28 + "start": 30, + "end": 30 }, { "file": "obfuscation/inline_imports.py", - "start": 29, - "end": 29 + "start": 35, + "end": 35 }, { "file": "obfuscation/inline_imports.py", - "start": 31, - "end": 31 + "start": 36, + "end": 36 }, { "file": "obfuscation/inline_imports.py", - "start": 32, - "end": 32 + "start": 37, + "end": 37 + }, + { + "file": "obfuscation/inline_imports.py", + "start": 38, + "end": 38 + }, + { + "file": "obfuscation/inline_imports.py", + "start": 39, + "end": 39 + }, + { + "file": "obfuscation/inline_imports.py", + "start": 40, + "end": 40 + }, + { + "file": "obfuscation/inline_imports.py", + "start": 41, + "end": 41 + }, + { + "file": "obfuscation/inline_imports.py", + "start": 42, + "end": 42 + }, + { + "file": "obfuscation/inline_imports.py", + "start": 44, + "end": 44 + }, + { + "file": "obfuscation/inline_imports.py", + "start": 45, + "end": 45 }, { "file": "obfuscation/obfuscation_tools.py", @@ -135,8 +160,8 @@ }, { "file": "obfuscation/inline_imports.py", - "start": 27, - "end": 27 + "start": 32, + "end": 32 } ] }, diff --git a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/inline_imports.py b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/inline_imports.py index 4e37c7c02..73e8ac30b 100644 --- a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/inline_imports.py +++ b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/inline_imports.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ @@ -21,10 +21,23 @@ def test_function(): sys.exit() __import__('base64') - __import__('builtins') + __import__('builtins') # should not be detected + __import__('builtins').eval("print('hello')") + __import__('builtins').exec("print('hello')") + __import__('builtins').open("written.txt", "w") + __import__('builtins').open("README.md", "r") # should not be detected + _ = open("README.md").read() if __import__("os").path.exists("README.md") else "" # should not be detected __import__('subprocess') __import__('sys') - print("Hello world!") ;__import__('os') + print("Hello world!") ;__import__('sys') + __import__('os').getcwd() # should not be detected + __import__('os').path.join("docs", "README.md") # should not be detected + __import__('os').putenv("CRITICAL_ENV", "1") + __import__('os').environ["CRITICAL_ENV"] = "1" + del __import__('os').environ["CRITICAL_ENV"] + __import__('os').open("written.txt", __import__('os').O_WRONLY | __import__('os').O_CREAT) + __import__('os').write(1, b"hello") + __import__('os').fdopen(1, "w") __import__('zlib') __import__('marshal') # these both just import builtins From 8d0ccbb57ef68067a90d7674a351150fda9579b8 Mon Sep 17 00:00:00 2001 From: Behnaz Hassanshahi Date: Wed, 25 Mar 2026 17:59:18 +1000 Subject: [PATCH 02/13] chore: update dependencies to address CVEs (#1338) This PR updates the pre-commit hooks, adjusts the code to pass the updated hooks, and temporarily adds GHSA-5239-wwwm-4pmq to the ignore list since a patch is not available. Signed-off-by: behnazh-w --- .github/workflows/_release-notifications.yaml | 50 ------------------ .github/workflows/release.yaml | 18 +------ .pre-commit-config.yaml | 24 ++++----- Makefile | 3 +- scripts/release_scripts/run_macaron.sh | 3 +- src/macaron/__main__.py | 6 +-- src/macaron/artifact/maven.py | 3 +- .../common_spec/maven_spec.py | 1 - src/macaron/config/global_config.py | 3 +- src/macaron/database/database_manager.py | 3 +- src/macaron/database/table_definitions.py | 3 +- src/macaron/json_tools.py | 3 +- .../metadata/typosquatting_presence.py | 3 +- .../metadata/unchanged_release.py | 3 +- src/macaron/output_reporter/results.py | 5 +- src/macaron/parsers/pomparser.py | 3 +- .../provenance/provenance_extractor.py | 3 +- src/macaron/provenance/provenance_finder.py | 3 +- src/macaron/provenance/provenance_verifier.py | 1 + src/macaron/repo_finder/commit_finder.py | 3 +- .../repo_finder/repo_finder_deps_dev.py | 1 + src/macaron/repo_finder/repo_finder_enums.py | 3 +- src/macaron/repo_finder/repo_finder_java.py | 3 +- src/macaron/repo_finder/repo_finder_npm.py | 3 +- src/macaron/repo_finder/repo_finder_pypi.py | 3 +- src/macaron/repo_finder/repo_utils.py | 3 +- src/macaron/repo_finder/repo_validator.py | 3 +- src/macaron/repo_verifier/repo_verifier.py | 3 +- .../repo_verifier/repo_verifier_base.py | 3 +- .../repo_verifier/repo_verifier_gradle.py | 3 +- .../repo_verifier/repo_verifier_maven.py | 3 +- .../slsa_analyzer/checks/build_tool_check.py | 3 +- .../slsa_analyzer/checks/check_result.py | 3 +- .../checks/provenance_commit_check.py | 3 +- .../checks/provenance_repo_check.py | 3 +- .../checks/provenance_verified_check.py | 3 +- src/macaron/slsa_analyzer/checks/vcs_check.py | 3 +- .../slsa_analyzer/git_service/github.py | 3 +- src/macaron/slsa_analyzer/git_url.py | 1 - .../maven_central_registry.py | 3 +- .../package_registry/npm_registry.py | 3 +- .../package_registry/pypi_registry.py | 1 + .../specs/inferred_provenance.py | 3 +- .../specs/pypi_certificate_predicate.py | 3 +- .../test_gradle_cli_command.py | 3 +- .../test_maven_cli_parser.py | 3 +- .../common_spec/compare_default_buildspec.py | 3 +- tests/conftest.py | 3 +- .../cyclonedx/test_cyclonedx.py | 3 +- .../pypi/test_anomalous_version.py | 3 +- .../pypi/test_closer_release_join_date.py | 1 + .../pypi/test_empty_project_link_analyzer.py | 3 +- .../malware_analyzer/pypi/test_fake_email.py | 3 +- .../pypi/test_one_release_analyzer.py | 3 +- .../pypi/test_pypi_sourcecode_analyzer.py | 3 +- .../pypi/test_similar_projects.py | 3 +- .../pypi/test_suspicious_setup.py | 3 +- .../pypi/test_typosquatting_presence.py | 3 +- .../pypi/test_unchanged_release.py | 3 +- .../pypi/test_wheel_absence.py | 3 +- .../output_reporter/test_jinja_extensions.py | 3 +- tests/policy_engine/compare_policy_reports.py | 3 +- tests/provenance/test_provenance_extractor.py | 51 +++++++------------ tests/provenance/test_provenance_finder.py | 3 +- tests/repo_finder/test_commit_finder.py | 3 +- tests/repo_finder/test_repo_finder.py | 3 +- .../repo_finder/test_repo_finder_deps_dev.py | 3 +- tests/repo_finder/test_report_schema.py | 1 + tests/repo_verifier/test_repo_verifier.py | 3 +- .../test_provenance_repo_commit_checks.py | 3 +- .../ci_service/test_base_ci_service.py | 3 +- .../slsa_analyzer/git_service/test_github.py | 3 +- .../test_jfrog_maven_registry.py | 8 ++- .../provenance/test_witness_provenance.py | 32 ++++-------- tests/slsa_analyzer/test_git_url.py | 8 ++- tests/test_util.py | 3 +- tests/vsa/test_vsa.py | 3 +- 77 files changed, 168 insertions(+), 224 deletions(-) delete mode 100644 .github/workflows/_release-notifications.yaml diff --git a/.github/workflows/_release-notifications.yaml b/.github/workflows/_release-notifications.yaml deleted file mode 100644 index 246f6bc2c..000000000 --- a/.github/workflows/_release-notifications.yaml +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. - -# Send a Slack release notification. Instructions to set up Slack to receive -# messages can be found here: https://github.com/slackapi/slack-github-action#setup-2 - -name: Release Notifications -on: - workflow_call: - inputs: - repo_name: - required: true - type: string - release_tag: - required: true - type: string - release_url: - required: true - type: string - secrets: - SLACK_WEBHOOK_URL: - required: true - -# Grant no permissions to this workflow. -permissions: {} - -jobs: - slack: - name: Slack release notification - runs-on: ubuntu-latest - steps: - - - name: Notify via Slack - run: | - curl --header "Content-Type: application/json; charset=UTF-8" --request POST --data "$SLACK_WEBHOOK_MSG" "$SLACK_WEBHOOK_URL" - env: - SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} - SLACK_WEBHOOK_MSG: | - { - "text": "${{ inputs.repo_name }} published a new release ${{ inputs.release_tag }}", - "blocks": [ - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": "*${{ inputs.repo_name }}* published a new release <${{ inputs.release_url }}|${{ inputs.release_tag }}>" - } - } - ] - } diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index e63867579..da967efaf 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. # We run checks on pushing to the specified branches. @@ -333,19 +333,3 @@ jobs: # https://github.com/actions/runner/issues/2394. artifact-name: artifact-ubuntu-latest-python-3.11 artifact-sha256: ${{ needs.build.outputs.artifacts-sha256 }} - - # Send out release notifications after the Release was published on GitHub. - # Uncomment the `if` to disable sending release notifications. - notifications: - if: ${{ false }} - needs: [release] - name: Send Release notifications - uses: ./.github/workflows/_release-notifications.yaml - permissions: - contents: read - with: - repo_name: ${{ github.event.repository.name }} - release_tag: ${{ needs.release.outputs.release-tag }} - release_url: ${{ needs.release.outputs.release-url }} - secrets: - SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6166236b5..4d0c14627 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,7 +17,7 @@ repos: # Commitizen enforces semantic and conventional commit messages. - repo: https://github.com/commitizen-tools/commitizen - rev: v4.8.3 + rev: v4.13.9 hooks: - id: commitizen name: Check conventional commit message @@ -25,7 +25,7 @@ repos: # Sort imports. - repo: https://github.com/pycqa/isort - rev: 6.0.1 + rev: 8.0.1 hooks: - id: isort name: Sort import statements @@ -34,14 +34,14 @@ repos: # Add Black code formatters. - repo: https://github.com/ambv/black - rev: 25.1.0 + rev: 26.3.1 hooks: - id: black name: Format code - args: [--config, pyproject.toml] + args: [--config, pyproject.toml, --target-version, py311] exclude: ^tests/malware_analyzer/pypi/resources/sourcecode_samples.* - repo: https://github.com/asottile/blacken-docs - rev: 1.19.1 + rev: 1.20.0 hooks: - id: blacken-docs name: Format code in docstrings @@ -50,7 +50,7 @@ repos: # Upgrade and rewrite Python idioms. - repo: https://github.com/asottile/pyupgrade - rev: v3.20.0 + rev: v3.21.2 hooks: - id: pyupgrade name: Upgrade code idioms @@ -72,13 +72,13 @@ repos: # Check GitHub Actions workflow files. - repo: https://github.com/Mateusz-Grzelinski/actionlint-py - rev: v1.7.7.23 + rev: v1.7.11.24 hooks: - id: actionlint # Check shell scripts with shellcheck. - repo: https://github.com/shellcheck-py/shellcheck-py - rev: v0.10.0.1 + rev: v0.11.0.1 hooks: - id: shellcheck exclude: ^tests/ @@ -110,7 +110,7 @@ repos: # Check for potential security issues. - repo: https://github.com/PyCQA/bandit - rev: 1.8.6 + rev: 1.9.4 hooks: - id: bandit name: Check for security issues @@ -123,7 +123,7 @@ repos: # Enable a whole bunch of useful helper hooks, too. # See https://pre-commit.com/hooks.html for more hooks. - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: check-ast - id: check-case-conflict @@ -164,7 +164,7 @@ repos: # Check and prettify the configuration files. - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks - rev: v2.15.0 + rev: v2.16.0 hooks: - id: pretty-format-ini args: [--autofix] @@ -224,7 +224,7 @@ repos: # A linter for Golang - repo: https://github.com/golangci/golangci-lint - rev: v2.8.0 + rev: v2.11.4 hooks: - id: golangci-lint diff --git a/Makefile b/Makefile index 1209a07b3..13394ddb3 100644 --- a/Makefile +++ b/Makefile @@ -299,12 +299,13 @@ requirements.txt: pyproject.toml # editable mode (like the one in development here) because they may not have # a PyPI entry; also print out CVE description and potential fixes if audit # found an issue. +# Remove GHSA-5239-wwwm-4pmq from the ignore list when it is patched. .PHONY: audit audit: if ! $$(python -c "import pip_audit" &> /dev/null); then \ echo "No package pip_audit installed, upgrade your environment!" && exit 1; \ fi; - python -m pip_audit --skip-editable --desc on --fix --dry-run + python -m pip_audit --skip-editable --desc on --fix --dry-run --ignore-vuln GHSA-5239-wwwm-4pmq # Run some or all checks over the package code base. .PHONY: check check-code check-bandit check-flake8 check-lint check-mypy check-go check-actionlint diff --git a/scripts/release_scripts/run_macaron.sh b/scripts/release_scripts/run_macaron.sh index 306ae1bf7..503b414b6 100755 --- a/scripts/release_scripts/run_macaron.sh +++ b/scripts/release_scripts/run_macaron.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. # This script runs the Macaron Docker image. @@ -238,6 +238,7 @@ function mount_dir_rw_allow_create() { # Note: This function is currently unused but retained to avoid using `_mount_dir` # if not necessary, which may have unintended side effects. # shellcheck disable=SC2317 +# shellcheck disable=SC2329 function mount_dir_rw_forbid_create() { arg_name=$1 dir_on_host=$2 diff --git a/src/macaron/__main__.py b/src/macaron/__main__.py index addb0f881..034fc0525 100644 --- a/src/macaron/__main__.py +++ b/src/macaron/__main__.py @@ -54,11 +54,9 @@ def analyze_slsa_levels_single(analyzer_single_args: argparse.Namespace) -> None if not (analyzer_single_args.repo_path or analyzer_single_args.package_url): # We don't mention --config-path as a possible option in this log message as it going to be move soon. # See: https://github.com/oracle/macaron/issues/417 - logger.error( - """Analysis target missing. Please provide a package url (PURL) and/or repo path. + logger.error("""Analysis target missing. Please provide a package url (PURL) and/or repo path. Examples of a PURL can be seen at https://github.com/package-url/purl-spec: - pkg:github/micronaut-projects/micronaut-core.""" - ) + pkg:github/micronaut-projects/micronaut-core.""") sys.exit(os.EX_USAGE) # Set provenance expectation path. diff --git a/src/macaron/artifact/maven.py b/src/macaron/artifact/maven.py index 8b9b0721c..b39036ea4 100644 --- a/src/macaron/artifact/maven.py +++ b/src/macaron/artifact/maven.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module declares types and utilities for Maven artifacts.""" + import re from collections.abc import Sequence diff --git a/src/macaron/build_spec_generator/common_spec/maven_spec.py b/src/macaron/build_spec_generator/common_spec/maven_spec.py index de0b4c5df..6082b5a91 100644 --- a/src/macaron/build_spec_generator/common_spec/maven_spec.py +++ b/src/macaron/build_spec_generator/common_spec/maven_spec.py @@ -3,7 +3,6 @@ """This module includes build specification and helper classes for Maven packages.""" - import logging from packageurl import PackageURL diff --git a/src/macaron/config/global_config.py b/src/macaron/config/global_config.py index 78bedc34b..5bbf275d3 100644 --- a/src/macaron/config/global_config.py +++ b/src/macaron/config/global_config.py @@ -1,7 +1,8 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the GlobalConfig class to be used globally.""" + import logging import os from dataclasses import dataclass, field diff --git a/src/macaron/database/database_manager.py b/src/macaron/database/database_manager.py index 78e4395df..2d7e85482 100644 --- a/src/macaron/database/database_manager.py +++ b/src/macaron/database/database_manager.py @@ -1,7 +1,8 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This DatabaseManager module handles the sqlite database connection.""" + import collections.abc import functools import logging diff --git a/src/macaron/database/table_definitions.py b/src/macaron/database/table_definitions.py index 6414555c2..a3e53f5d7 100644 --- a/src/macaron/database/table_definitions.py +++ b/src/macaron/database/table_definitions.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ @@ -10,6 +10,7 @@ For table associated with a check see the check module. """ + import logging import string from datetime import datetime diff --git a/src/macaron/json_tools.py b/src/macaron/json_tools.py index a69b0eaa8..df8126074 100644 --- a/src/macaron/json_tools.py +++ b/src/macaron/json_tools.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module provides utility functions for JSON data.""" + import logging from collections.abc import Sequence from typing import TypeVar diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/typosquatting_presence.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/typosquatting_presence.py index 87658f714..810d7523b 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/typosquatting_presence.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/typosquatting_presence.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Analyzer checks if there is typosquatting presence in the package name.""" + import logging import os diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/unchanged_release.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/unchanged_release.py index 278f3eeb5..053709cd2 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/unchanged_release.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/unchanged_release.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Heuristics analyzer to check unchanged content in multiple releases.""" + import logging from collections import Counter diff --git a/src/macaron/output_reporter/results.py b/src/macaron/output_reporter/results.py index 2af4fc269..f2e86dcba 100644 --- a/src/macaron/output_reporter/results.py +++ b/src/macaron/output_reporter/results.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains classes that represent the result of the Macaron analysis.""" @@ -162,7 +162,8 @@ def get_dep_summary(self) -> DepSummary: analyzed_deps=0, unique_dep_repos=0, checks_summary=[ - {"check_id": check_id, "num_deps_pass": 0} for check_id in registry.get_all_checks_mapping() + {"check_id": check_id, "num_deps_pass": 0} # nosec B105 + for check_id in registry.get_all_checks_mapping() ], dep_status=[dep.get_summary() for dep in self.dependencies], ) diff --git a/src/macaron/parsers/pomparser.py b/src/macaron/parsers/pomparser.py index 2597e1938..fe1f90bee 100644 --- a/src/macaron/parsers/pomparser.py +++ b/src/macaron/parsers/pomparser.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the parser for POM files.""" + import logging from xml.etree.ElementTree import Element # nosec B405 diff --git a/src/macaron/provenance/provenance_extractor.py b/src/macaron/provenance/provenance_extractor.py index 4366ab299..b4003b0d0 100644 --- a/src/macaron/provenance/provenance_extractor.py +++ b/src/macaron/provenance/provenance_extractor.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains methods for extracting repository and commit metadata from provenance files.""" + import logging import urllib.parse from abc import ABC, abstractmethod diff --git a/src/macaron/provenance/provenance_finder.py b/src/macaron/provenance/provenance_finder.py index 0c1385d0f..e841fd397 100644 --- a/src/macaron/provenance/provenance_finder.py +++ b/src/macaron/provenance/provenance_finder.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains methods for finding provenance files.""" + import json import logging import os diff --git a/src/macaron/provenance/provenance_verifier.py b/src/macaron/provenance/provenance_verifier.py index 72b457ca0..2ab200b0b 100644 --- a/src/macaron/provenance/provenance_verifier.py +++ b/src/macaron/provenance/provenance_verifier.py @@ -2,6 +2,7 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains methods for verifying provenance files.""" + import glob import hashlib import logging diff --git a/src/macaron/repo_finder/commit_finder.py b/src/macaron/repo_finder/commit_finder.py index 25f24272b..b7f306e03 100644 --- a/src/macaron/repo_finder/commit_finder.py +++ b/src/macaron/repo_finder/commit_finder.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the logic for matching PackageURL versions to repository commits via the tags they contain.""" + import logging import re from datetime import datetime diff --git a/src/macaron/repo_finder/repo_finder_deps_dev.py b/src/macaron/repo_finder/repo_finder_deps_dev.py index e3f92cc4c..e47111081 100644 --- a/src/macaron/repo_finder/repo_finder_deps_dev.py +++ b/src/macaron/repo_finder/repo_finder_deps_dev.py @@ -2,6 +2,7 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the PythonRepoFinderDD class to be used for finding repositories using deps.dev.""" + import json import logging import urllib.parse diff --git a/src/macaron/repo_finder/repo_finder_enums.py b/src/macaron/repo_finder/repo_finder_enums.py index 02e98e262..f1a256053 100644 --- a/src/macaron/repo_finder/repo_finder_enums.py +++ b/src/macaron/repo_finder/repo_finder_enums.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains Enums used to represent the outcome of Repo Finder or Commit Finder executions.""" + from enum import Enum diff --git a/src/macaron/repo_finder/repo_finder_java.py b/src/macaron/repo_finder/repo_finder_java.py index 9b2e111cf..16889603d 100644 --- a/src/macaron/repo_finder/repo_finder_java.py +++ b/src/macaron/repo_finder/repo_finder_java.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the JavaRepoFinder class to be used for finding Java repositories.""" + import logging import re import urllib.parse diff --git a/src/macaron/repo_finder/repo_finder_npm.py b/src/macaron/repo_finder/repo_finder_npm.py index 4cd337dce..20b4c4bd6 100644 --- a/src/macaron/repo_finder/repo_finder_npm.py +++ b/src/macaron/repo_finder/repo_finder_npm.py @@ -1,7 +1,8 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the logic for finding repositories of NPM projects.""" + import logging from packageurl import PackageURL diff --git a/src/macaron/repo_finder/repo_finder_pypi.py b/src/macaron/repo_finder/repo_finder_pypi.py index 2941c71cf..0f2860d72 100644 --- a/src/macaron/repo_finder/repo_finder_pypi.py +++ b/src/macaron/repo_finder/repo_finder_pypi.py @@ -1,7 +1,8 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the logic for finding repositories of PyPI projects.""" + import logging from packageurl import PackageURL diff --git a/src/macaron/repo_finder/repo_utils.py b/src/macaron/repo_finder/repo_utils.py index 56d48b42a..92fc243d5 100644 --- a/src/macaron/repo_finder/repo_utils.py +++ b/src/macaron/repo_finder/repo_utils.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the utility functions for repo and commit finder operations.""" + import json import logging import os diff --git a/src/macaron/repo_finder/repo_validator.py b/src/macaron/repo_finder/repo_validator.py index 4e2e7d639..acaf5fec9 100644 --- a/src/macaron/repo_finder/repo_validator.py +++ b/src/macaron/repo_finder/repo_validator.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module exists to validate URLs in terms of their use as a repository that can be analyzed.""" + import urllib.parse from collections.abc import Iterable diff --git a/src/macaron/repo_verifier/repo_verifier.py b/src/macaron/repo_verifier/repo_verifier.py index a99538fb7..7d7b9aab5 100644 --- a/src/macaron/repo_verifier/repo_verifier.py +++ b/src/macaron/repo_verifier/repo_verifier.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains code to verify whether a reported repository can be linked back to the artifact.""" + import logging from macaron.repo_verifier.repo_verifier_base import ( diff --git a/src/macaron/repo_verifier/repo_verifier_base.py b/src/macaron/repo_verifier/repo_verifier_base.py index b056eac86..dffa61141 100644 --- a/src/macaron/repo_verifier/repo_verifier_base.py +++ b/src/macaron/repo_verifier/repo_verifier_base.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the base class and core data models for repository verification.""" + import abc import logging from dataclasses import dataclass diff --git a/src/macaron/repo_verifier/repo_verifier_gradle.py b/src/macaron/repo_verifier/repo_verifier_gradle.py index 6b4960958..c6f863d62 100644 --- a/src/macaron/repo_verifier/repo_verifier_gradle.py +++ b/src/macaron/repo_verifier/repo_verifier_gradle.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains code to verify whether a Gradle-based repository can be linked back to the artifact.""" + import logging from pathlib import Path diff --git a/src/macaron/repo_verifier/repo_verifier_maven.py b/src/macaron/repo_verifier/repo_verifier_maven.py index f09d9ad3b..fc3a30b2d 100644 --- a/src/macaron/repo_verifier/repo_verifier_maven.py +++ b/src/macaron/repo_verifier/repo_verifier_maven.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains code to verify whether a reported Maven-based repository can be linked back to the artifact.""" + import logging from urllib.parse import urlparse diff --git a/src/macaron/slsa_analyzer/checks/build_tool_check.py b/src/macaron/slsa_analyzer/checks/build_tool_check.py index 8432b014e..68790f279 100644 --- a/src/macaron/slsa_analyzer/checks/build_tool_check.py +++ b/src/macaron/slsa_analyzer/checks/build_tool_check.py @@ -1,9 +1,8 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the implementation of the build tool detection check.""" - import logging from sqlalchemy import ForeignKey, String diff --git a/src/macaron/slsa_analyzer/checks/check_result.py b/src/macaron/slsa_analyzer/checks/check_result.py index f9d5c1ad0..12a4af0a7 100644 --- a/src/macaron/slsa_analyzer/checks/check_result.py +++ b/src/macaron/slsa_analyzer/checks/check_result.py @@ -1,7 +1,8 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the CheckResult class for storing the result of a check.""" + import json from dataclasses import dataclass from enum import Enum diff --git a/src/macaron/slsa_analyzer/checks/provenance_commit_check.py b/src/macaron/slsa_analyzer/checks/provenance_commit_check.py index b2b5d7297..7e271ffea 100644 --- a/src/macaron/slsa_analyzer/checks/provenance_commit_check.py +++ b/src/macaron/slsa_analyzer/checks/provenance_commit_check.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module adds a check that determines whether the repository URL came from provenance.""" + import logging from sqlalchemy import ForeignKey, String diff --git a/src/macaron/slsa_analyzer/checks/provenance_repo_check.py b/src/macaron/slsa_analyzer/checks/provenance_repo_check.py index 1f35fef39..e1260d76c 100644 --- a/src/macaron/slsa_analyzer/checks/provenance_repo_check.py +++ b/src/macaron/slsa_analyzer/checks/provenance_repo_check.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module adds a check that determines whether the repository URL came from provenance.""" + import logging from sqlalchemy import ForeignKey, String diff --git a/src/macaron/slsa_analyzer/checks/provenance_verified_check.py b/src/macaron/slsa_analyzer/checks/provenance_verified_check.py index 65f028ec0..46ac145e7 100644 --- a/src/macaron/slsa_analyzer/checks/provenance_verified_check.py +++ b/src/macaron/slsa_analyzer/checks/provenance_verified_check.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module adds a Check that checks whether the provenance is verified.""" + import logging from sqlalchemy import ForeignKey, String diff --git a/src/macaron/slsa_analyzer/checks/vcs_check.py b/src/macaron/slsa_analyzer/checks/vcs_check.py index ec70731e2..259838477 100644 --- a/src/macaron/slsa_analyzer/checks/vcs_check.py +++ b/src/macaron/slsa_analyzer/checks/vcs_check.py @@ -1,9 +1,8 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the implementation of the VCS check.""" - import logging from sqlalchemy import ForeignKey, String diff --git a/src/macaron/slsa_analyzer/git_service/github.py b/src/macaron/slsa_analyzer/git_service/github.py index d5e1c8548..48922562b 100644 --- a/src/macaron/slsa_analyzer/git_service/github.py +++ b/src/macaron/slsa_analyzer/git_service/github.py @@ -1,7 +1,8 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the spec for the GitHub service.""" + import logging from pydriller.git import Git diff --git a/src/macaron/slsa_analyzer/git_url.py b/src/macaron/slsa_analyzer/git_url.py index 6fa019991..8c3bf25b0 100644 --- a/src/macaron/slsa_analyzer/git_url.py +++ b/src/macaron/slsa_analyzer/git_url.py @@ -3,7 +3,6 @@ """This module provides methods to perform generic actions on Git URLS.""" - import logging import os import re diff --git a/src/macaron/slsa_analyzer/package_registry/maven_central_registry.py b/src/macaron/slsa_analyzer/package_registry/maven_central_registry.py index 010cb20cf..957193229 100644 --- a/src/macaron/slsa_analyzer/package_registry/maven_central_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/maven_central_registry.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """The module provides abstractions for the Maven Central package registry.""" + import hashlib import logging import urllib.parse diff --git a/src/macaron/slsa_analyzer/package_registry/npm_registry.py b/src/macaron/slsa_analyzer/package_registry/npm_registry.py index 7d33f3986..5ff8f9709 100644 --- a/src/macaron/slsa_analyzer/package_registry/npm_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/npm_registry.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """The module provides abstractions for the npm package registry.""" + from __future__ import annotations import json diff --git a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py index 432d14aa7..579a13d5b 100644 --- a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py @@ -2,6 +2,7 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """The module provides abstractions for the pypi package registry.""" + from __future__ import annotations import bisect diff --git a/src/macaron/slsa_analyzer/specs/inferred_provenance.py b/src/macaron/slsa_analyzer/specs/inferred_provenance.py index ee23b021f..ca04b77bc 100644 --- a/src/macaron/slsa_analyzer/specs/inferred_provenance.py +++ b/src/macaron/slsa_analyzer/specs/inferred_provenance.py @@ -1,9 +1,8 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the inferred SLSA provenance spec.""" - from macaron.slsa_analyzer.provenance.intoto import v01 diff --git a/src/macaron/slsa_analyzer/specs/pypi_certificate_predicate.py b/src/macaron/slsa_analyzer/specs/pypi_certificate_predicate.py index 2ae7cfb6e..aef18c128 100644 --- a/src/macaron/slsa_analyzer/specs/pypi_certificate_predicate.py +++ b/src/macaron/slsa_analyzer/specs/pypi_certificate_predicate.py @@ -1,7 +1,8 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the spec for predicates derived from a PyPI attestation certificate.""" + from dataclasses import dataclass diff --git a/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py b/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py index 7363fe991..e837ab299 100644 --- a/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py +++ b/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py @@ -1,9 +1,8 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains tests for the gradle_cli_command module.""" - import pytest from macaron.build_spec_generator.cli_command_parser.gradle_cli_command import GradleCLIOptions diff --git a/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py b/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py index 6b4611bd8..03d3644ba 100644 --- a/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py +++ b/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py @@ -1,9 +1,8 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the tests for maven cli parser.""" - import pytest from macaron.build_spec_generator.cli_command_parser.maven_cli_parser import ( diff --git a/tests/build_spec_generator/common_spec/compare_default_buildspec.py b/tests/build_spec_generator/common_spec/compare_default_buildspec.py index 51fd9ea1c..d949f1377 100644 --- a/tests/build_spec_generator/common_spec/compare_default_buildspec.py +++ b/tests/build_spec_generator/common_spec/compare_default_buildspec.py @@ -1,9 +1,8 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Script to compare a generated default buildspec.""" - import argparse import json import logging diff --git a/tests/conftest.py b/tests/conftest.py index 413de3498..6290dd8f5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Fixtures for tests.""" + import os import urllib.parse from pathlib import Path diff --git a/tests/dependency_analyzer/cyclonedx/test_cyclonedx.py b/tests/dependency_analyzer/cyclonedx/test_cyclonedx.py index 8f44f409b..7452552c3 100644 --- a/tests/dependency_analyzer/cyclonedx/test_cyclonedx.py +++ b/tests/dependency_analyzer/cyclonedx/test_cyclonedx.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the CycloneDX helper functions.""" + from pathlib import Path import pytest diff --git a/tests/malware_analyzer/pypi/test_anomalous_version.py b/tests/malware_analyzer/pypi/test_anomalous_version.py index ef1d141d2..45e533738 100644 --- a/tests/malware_analyzer/pypi/test_anomalous_version.py +++ b/tests/malware_analyzer/pypi/test_anomalous_version.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for heuristic detecting anomalous version numbers""" + from unittest.mock import MagicMock import pytest diff --git a/tests/malware_analyzer/pypi/test_closer_release_join_date.py b/tests/malware_analyzer/pypi/test_closer_release_join_date.py index 5eb131300..ed1232bbc 100644 --- a/tests/malware_analyzer/pypi/test_closer_release_join_date.py +++ b/tests/malware_analyzer/pypi/test_closer_release_join_date.py @@ -2,6 +2,7 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for closer release join date heuristic.""" + from datetime import datetime from unittest.mock import MagicMock diff --git a/tests/malware_analyzer/pypi/test_empty_project_link_analyzer.py b/tests/malware_analyzer/pypi/test_empty_project_link_analyzer.py index 5dad60add..ecb774da8 100644 --- a/tests/malware_analyzer/pypi/test_empty_project_link_analyzer.py +++ b/tests/malware_analyzer/pypi/test_empty_project_link_analyzer.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for heuristic detecting malicious metadata from PyPI""" + from unittest.mock import MagicMock import pytest diff --git a/tests/malware_analyzer/pypi/test_fake_email.py b/tests/malware_analyzer/pypi/test_fake_email.py index 56e81e035..d7e33a4f0 100644 --- a/tests/malware_analyzer/pypi/test_fake_email.py +++ b/tests/malware_analyzer/pypi/test_fake_email.py @@ -1,9 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for the FakeEmailAnalyzer heuristic.""" - import os from pathlib import Path from unittest.mock import MagicMock diff --git a/tests/malware_analyzer/pypi/test_one_release_analyzer.py b/tests/malware_analyzer/pypi/test_one_release_analyzer.py index 60ad244ab..78ce0fbf9 100644 --- a/tests/malware_analyzer/pypi/test_one_release_analyzer.py +++ b/tests/malware_analyzer/pypi/test_one_release_analyzer.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for heuristic detecting malicious metadata from PyPI""" + from unittest.mock import MagicMock import pytest diff --git a/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py b/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py index c18369bb4..11aa3c6f8 100644 --- a/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py +++ b/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests detecting malicious patterns in PyPI package sourcecode.""" + import json import os from unittest.mock import MagicMock, patch diff --git a/tests/malware_analyzer/pypi/test_similar_projects.py b/tests/malware_analyzer/pypi/test_similar_projects.py index ed61a204e..2cf13d4d6 100644 --- a/tests/malware_analyzer/pypi/test_similar_projects.py +++ b/tests/malware_analyzer/pypi/test_similar_projects.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for the SimilarProjectAnalyzer heuristic.""" + # pylint: disable=redefined-outer-name import os diff --git a/tests/malware_analyzer/pypi/test_suspicious_setup.py b/tests/malware_analyzer/pypi/test_suspicious_setup.py index ec9af0f0f..649c4d67d 100644 --- a/tests/malware_analyzer/pypi/test_suspicious_setup.py +++ b/tests/malware_analyzer/pypi/test_suspicious_setup.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for suspicious setup.py heuristic.""" + from unittest.mock import MagicMock, patch import pytest diff --git a/tests/malware_analyzer/pypi/test_typosquatting_presence.py b/tests/malware_analyzer/pypi/test_typosquatting_presence.py index 2d23233a2..468cd8c8c 100644 --- a/tests/malware_analyzer/pypi/test_typosquatting_presence.py +++ b/tests/malware_analyzer/pypi/test_typosquatting_presence.py @@ -1,9 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for the TyposquattingPresenceAnalyzer heuristic.""" - import os from pathlib import Path from unittest.mock import MagicMock diff --git a/tests/malware_analyzer/pypi/test_unchanged_release.py b/tests/malware_analyzer/pypi/test_unchanged_release.py index f1162aaea..0a04c4292 100644 --- a/tests/malware_analyzer/pypi/test_unchanged_release.py +++ b/tests/malware_analyzer/pypi/test_unchanged_release.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for heuristic detecting malicious metadata from PyPI""" + from unittest.mock import MagicMock from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult diff --git a/tests/malware_analyzer/pypi/test_wheel_absence.py b/tests/malware_analyzer/pypi/test_wheel_absence.py index c00feb2cb..37716d3cc 100644 --- a/tests/malware_analyzer/pypi/test_wheel_absence.py +++ b/tests/malware_analyzer/pypi/test_wheel_absence.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for heuristic detecting wheel (.whl) file absence from PyPI packages""" + from unittest.mock import MagicMock, patch import pytest diff --git a/tests/output_reporter/test_jinja_extensions.py b/tests/output_reporter/test_jinja_extensions.py index 8baaa528f..1e0e0d46f 100644 --- a/tests/output_reporter/test_jinja_extensions.py +++ b/tests/output_reporter/test_jinja_extensions.py @@ -1,9 +1,8 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains tests for the Jinja2 filter and test extensions.""" - from hypothesis import given from hypothesis import strategies as st diff --git a/tests/policy_engine/compare_policy_reports.py b/tests/policy_engine/compare_policy_reports.py index 88e7d0cc9..e02cf09df 100644 --- a/tests/policy_engine/compare_policy_reports.py +++ b/tests/policy_engine/compare_policy_reports.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This script checks the policy engine report against expected results.""" + import json import logging import sys diff --git a/tests/provenance/test_provenance_extractor.py b/tests/provenance/test_provenance_extractor.py index 2f1581200..aa207d952 100644 --- a/tests/provenance/test_provenance_extractor.py +++ b/tests/provenance/test_provenance_extractor.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the provenance extractor on valid example provenances.""" + import json import pytest @@ -19,8 +20,7 @@ @pytest.fixture(name="slsa_v1_gcb_1_provenance") def slsa_v1_gcb_1_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v1 provenance using build type gcb and sourceToBuild.""" - return _load_and_validate_json( - """ + return _load_and_validate_json(""" { "_type": "https://in-toto.io/Statement/v1", "subject": [], @@ -42,15 +42,13 @@ def slsa_v1_gcb_1_provenance_() -> dict[str, JsonType]: } } } - """ - ) + """) @pytest.fixture(name="slsa_v1_gcb_2_provenance") def slsa_v1_gcb_2_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v1 provenance using build type gcb and configSource.""" - return _load_and_validate_json( - """ + return _load_and_validate_json(""" { "_type": "https://in-toto.io/Statement/v1", "subject": [], @@ -74,15 +72,13 @@ def slsa_v1_gcb_2_provenance_() -> dict[str, JsonType]: } } } - """ - ) + """) @pytest.fixture(name="slsa_v1_github_provenance") def slsa_v1_github_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v1 provenance using build type GitHub.""" - return _load_and_validate_json( - """ + return _load_and_validate_json(""" { "_type": "https://in-toto.io/Statement/v1", "subject": [], @@ -109,15 +105,13 @@ def slsa_v1_github_provenance_() -> dict[str, JsonType]: } } } - """ - ) + """) @pytest.fixture(name="slsa_v1_oci_provenance") def slsa_v1_oci_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v1 provenance using the OCI build type.""" - payload = _load_and_validate_json( - """ + payload = _load_and_validate_json(""" { "_type": "https://in-toto.io/Statement/v1", "predicateType": "https://slsa.dev/provenance/v1", @@ -136,8 +130,7 @@ def slsa_v1_oci_provenance_() -> dict[str, JsonType]: } } } - """ - ) + """) # The build type is modified here to avoid issues with excessive line length. _json_modify( payload, @@ -150,8 +143,7 @@ def slsa_v1_oci_provenance_() -> dict[str, JsonType]: @pytest.fixture(name="slsa_v02_provenance") def slsa_v02_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v02 provenance.""" - return _load_and_validate_json( - """ + return _load_and_validate_json(""" { "_type": "https://in-toto.io/Statement/v0.1", "subject": [], @@ -167,15 +159,13 @@ def slsa_v02_provenance_() -> dict[str, JsonType]: } } } - """ - ) + """) @pytest.fixture(name="slsa_v01_provenance") def slsa_v01_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v01 provenance.""" - return _load_and_validate_json( - """ + return _load_and_validate_json(""" { "_type": "https://in-toto.io/Statement/v0.1", "subject": [], @@ -197,15 +187,13 @@ def slsa_v01_provenance_() -> dict[str, JsonType]: ] } } - """ - ) + """) @pytest.fixture(name="witness_gitlab_provenance") def witness_gitlab_provenance_() -> dict[str, JsonType]: """Return a Witness v0.1 provenance with a GitLab attestation.""" - return _load_and_validate_json( - """ + return _load_and_validate_json(""" { "_type": "https://in-toto.io/Statement/v0.1", "subject": [], @@ -228,15 +216,13 @@ def witness_gitlab_provenance_() -> dict[str, JsonType]: ] } } - """ - ) + """) @pytest.fixture(name="witness_github_provenance") def witness_github_provenance_() -> dict[str, JsonType]: """Return a Witness v0.1 provenance with a GitHub attestation.""" - return _load_and_validate_json( - """ + return _load_and_validate_json(""" { "_type": "https://in-toto.io/Statement/v0.1", "subject": [], @@ -259,8 +245,7 @@ def witness_github_provenance_() -> dict[str, JsonType]: ] } } - """ - ) + """) @pytest.fixture(name="target_repository") diff --git a/tests/provenance/test_provenance_finder.py b/tests/provenance/test_provenance_finder.py index 774d2ff9e..3e2389873 100644 --- a/tests/provenance/test_provenance_finder.py +++ b/tests/provenance/test_provenance_finder.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the provenance finder.""" + import os import shutil import tempfile diff --git a/tests/repo_finder/test_commit_finder.py b/tests/repo_finder/test_commit_finder.py index 3fdefcb36..a505b3df2 100644 --- a/tests/repo_finder/test_commit_finder.py +++ b/tests/repo_finder/test_commit_finder.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the commit finder.""" + import logging import os import re diff --git a/tests/repo_finder/test_repo_finder.py b/tests/repo_finder/test_repo_finder.py index 8471af70b..25a917b3b 100644 --- a/tests/repo_finder/test_repo_finder.py +++ b/tests/repo_finder/test_repo_finder.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the repo finder.""" + import os from pathlib import Path diff --git a/tests/repo_finder/test_repo_finder_deps_dev.py b/tests/repo_finder/test_repo_finder_deps_dev.py index 1de5fae25..10cb1a5e5 100644 --- a/tests/repo_finder/test_repo_finder_deps_dev.py +++ b/tests/repo_finder/test_repo_finder_deps_dev.py @@ -1,7 +1,8 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the deps.dev repo finder.""" + import pytest from packageurl import PackageURL from pytest_httpserver import HTTPServer diff --git a/tests/repo_finder/test_report_schema.py b/tests/repo_finder/test_report_schema.py index f3fbbbde3..2f851d557 100644 --- a/tests/repo_finder/test_report_schema.py +++ b/tests/repo_finder/test_report_schema.py @@ -2,6 +2,7 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the report schema of the repo finder.""" + import json import os from typing import Any diff --git a/tests/repo_verifier/test_repo_verifier.py b/tests/repo_verifier/test_repo_verifier.py index 0c01a8bff..74f3cfe63 100644 --- a/tests/repo_verifier/test_repo_verifier.py +++ b/tests/repo_verifier/test_repo_verifier.py @@ -1,7 +1,8 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the repo verifier.""" + from pathlib import Path import pytest diff --git a/tests/slsa_analyzer/checks/test_provenance_repo_commit_checks.py b/tests/slsa_analyzer/checks/test_provenance_repo_commit_checks.py index fa65d2002..d1dfbdaa4 100644 --- a/tests/slsa_analyzer/checks/test_provenance_repo_commit_checks.py +++ b/tests/slsa_analyzer/checks/test_provenance_repo_commit_checks.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains tests for the provenance available check.""" + from pathlib import Path from typing import TypeVar diff --git a/tests/slsa_analyzer/ci_service/test_base_ci_service.py b/tests/slsa_analyzer/ci_service/test_base_ci_service.py index 510d8cf01..71bfeac0e 100644 --- a/tests/slsa_analyzer/ci_service/test_base_ci_service.py +++ b/tests/slsa_analyzer/ci_service/test_base_ci_service.py @@ -1,9 +1,8 @@ -# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the base CI service.""" - from pathlib import Path import pytest diff --git a/tests/slsa_analyzer/git_service/test_github.py b/tests/slsa_analyzer/git_service/test_github.py index e01a415b7..604b0a50c 100644 --- a/tests/slsa_analyzer/git_service/test_github.py +++ b/tests/slsa_analyzer/git_service/test_github.py @@ -1,11 +1,10 @@ -# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ This module tests the GitHub git service. """ - from macaron.slsa_analyzer.git_service import GitHub from ...macaron_testcase import MacaronTestCase diff --git a/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py b/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py index de9609dfe..85d115a07 100644 --- a/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py +++ b/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for the ``JFrogMavenRegistry`` class.""" @@ -260,8 +260,7 @@ def test_extract_folder_names_from_folder_info_payload( ("args", "expected_file_names"), [ pytest.param( - { - "folder_info_payload": """ + {"folder_info_payload": """ { "children": [ { @@ -274,8 +273,7 @@ def test_extract_folder_names_from_folder_info_payload( } ] } - """ - }, + """}, ["child2"], id="Payload with both files and folders", ), diff --git a/tests/slsa_analyzer/provenance/test_witness_provenance.py b/tests/slsa_analyzer/provenance/test_witness_provenance.py index 576787aed..576bb4005 100644 --- a/tests/slsa_analyzer/provenance/test_witness_provenance.py +++ b/tests/slsa_analyzer/provenance/test_witness_provenance.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for witness provenance.""" @@ -68,21 +68,17 @@ def test_load_witness_predicate_types( ("payload_json", "predicate_types", "expected_result"), [ pytest.param( - json.loads( - """ + json.loads(""" { "predicateType": "https://witness.testifysec.com/attestation-collection/v0.1" } - """ - ), + """), {"https://witness.testifysec.com/attestation-collection/v0.1"}, True, id="Valid predicateType", ), pytest.param( - json.loads( - """ + json.loads(""" { "predicateType": "https://witness.net/attestation-collection/v0.1" } - """ - ), + """), {"https://witness.testifysec.com/attestation-collection/v0.1"}, False, id="Invalid predicateType", @@ -103,8 +99,7 @@ def test_is_witness_provenance_payload( ("payload_json", "expected_subjects"), [ pytest.param( - json.loads( - """ + json.loads(""" { "subject": [ { @@ -121,8 +116,7 @@ def test_is_witness_provenance_payload( } ] } - """ - ), + """), [ { "name": "https://witness.dev/attestations/product/v0.1/file:target/jackson-annotations-2.9.9.jar", @@ -140,8 +134,7 @@ def test_is_witness_provenance_payload( id="Valid payload", ), pytest.param( - json.loads( - """ + json.loads(""" { "subject": [ { @@ -158,8 +151,7 @@ def test_is_witness_provenance_payload( } ] } - """ - ), + """), [ { "name": "https://witness.dev/attestations/product/v0.1/file:target/jackson-annotations-2.9.9.jar", @@ -171,8 +163,7 @@ def test_is_witness_provenance_payload( id="Missing sha256", ), pytest.param( - json.loads( - """ + json.loads(""" { "subject": [ { @@ -189,8 +180,7 @@ def test_is_witness_provenance_payload( } ] } -""" - ), +"""), [ { "name": "https://witness.dev/attestations/product/v0.1/file:target/jackson-annotations-2.9.9.jar", diff --git a/tests/slsa_analyzer/test_git_url.py b/tests/slsa_analyzer/test_git_url.py index 006a92608..f84bbbdfa 100644 --- a/tests/slsa_analyzer/test_git_url.py +++ b/tests/slsa_analyzer/test_git_url.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the generic actions on Git repositories.""" @@ -220,12 +220,10 @@ def test_get_remote_vcs_url_with_user_defined_allowed_hostnames(tmp_path: Path) user_config_path = os.path.join(tmp_path, "config.ini") with open(user_config_path, "w", encoding="utf-8") as user_config_file: - user_config_file.write( - """ + user_config_file.write(""" [git_service.gitlab.self_hosted] hostname = internal.gitlab.org - """ - ) + """) # We don't have to worry about modifying the ``defaults`` object causing test # pollution here, since we reload the ``defaults`` object before every test with the # ``setup_test`` fixture. diff --git a/tests/test_util.py b/tests/test_util.py index 168d0a880..fa68b6123 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,9 +1,10 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ This module test the Util methods """ + from collections.abc import Callable from unittest import TestCase from unittest.mock import call, patch diff --git a/tests/vsa/test_vsa.py b/tests/vsa/test_vsa.py index dbe8b768c..04d0732c1 100644 --- a/tests/vsa/test_vsa.py +++ b/tests/vsa/test_vsa.py @@ -1,9 +1,8 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for VSA generation.""" - import pytest from macaron.vsa.vsa import get_common_purl_from_artifact_purls, get_components_passing_policy From 09cf40b4db99c0eaeae7542dbbe0f5ce5d01a068 Mon Sep 17 00:00:00 2001 From: Abhinav Pradeep Date: Fri, 27 Mar 2026 10:22:19 +1000 Subject: [PATCH 03/13] feat: adjusted max_download_size to 30MB (#1337) Signed-off-by: Abhinav Pradeep --- src/macaron/config/defaults.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini index 96b702b8e..5a0068e2f 100644 --- a/src/macaron/config/defaults.ini +++ b/src/macaron/config/defaults.ini @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. [requests] @@ -11,7 +11,7 @@ error_retries = 5 # The default timeout in seconds for downloading assets. timeout = 120 # This is the acceptable maximum size (in bytes) to download an asset. -max_download_size = 10000000 +max_download_size = 30000000 # This is the database to store Macaron's results. [database] From 7db6d9b05e8b0bc70f22aaadd667f926107af17f Mon Sep 17 00:00:00 2001 From: Abhinav Pradeep Date: Fri, 27 Mar 2026 13:10:56 +1000 Subject: [PATCH 04/13] fix: improve has_binary flag condition for Python buildspec generation (#1333) Signed-off-by: Abhinav Pradeep --- .../common_spec/pypi_spec.py | 2 +- .../dockerfile/pypi_dockerfile_output.py | 14 +-- .../package_registry/pypi_registry.py | 13 +-- .../expected_default.buildspec | 39 ++++++++ .../expected_dockerfile.buildspec | 91 +++++++++++++++++++ .../cases/pypi_pytesseract/test.yaml | 45 +++++++++ 6 files changed, 188 insertions(+), 16 deletions(-) create mode 100644 tests/integration/cases/pypi_pytesseract/expected_default.buildspec create mode 100644 tests/integration/cases/pypi_pytesseract/expected_dockerfile.buildspec create mode 100644 tests/integration/cases/pypi_pytesseract/test.yaml diff --git a/src/macaron/build_spec_generator/common_spec/pypi_spec.py b/src/macaron/build_spec_generator/common_spec/pypi_spec.py index ee67578c9..e7ce40ee9 100644 --- a/src/macaron/build_spec_generator/common_spec/pypi_spec.py +++ b/src/macaron/build_spec_generator/common_spec/pypi_spec.py @@ -132,7 +132,7 @@ def resolve_fields(self, purl: PackageURL) -> None: logger.debug("From package JSON inferred Python constraints: %s", python_version_set) - self.data["has_binaries"] = not pypi_package_json.has_pure_wheel() + self.data["has_binaries"] = pypi_package_json.has_non_pure_wheel() if self.data["has_binaries"]: logger.debug("Can not find a pure wheel") diff --git a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py index 67d1c6308..62954e312 100644 --- a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py +++ b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py @@ -69,14 +69,14 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str: "else python -m build --wheel -n; fi" ) - wheel_url: str = "" + # Initialized empty so that the validation script can exit gracefully in the case we find no upstream wheel wheel_name: str = "" - - wheel_urls = buildspec["upstream_artifacts"]["wheels"] - # We currently only look for the pure wheel, if it exists - if wheel_urls: - wheel_url = list(wheel_urls)[0] - wheel_name = wheel_url.rsplit("/", 1)[-1] + wheel_url: str = "" + if "wheels" in buildspec["upstream_artifacts"]: + wheel_urls = buildspec["upstream_artifacts"]["wheels"] + if wheel_urls: + wheel_url = wheel_urls[0] + wheel_name = wheel_url.rsplit("/", 1)[-1] else: logger.debug("We could not find an upstream artifact, and therefore we cannot run validation") diff --git a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py index 579a13d5b..6741fd208 100644 --- a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py @@ -965,13 +965,13 @@ def download_wheel(self) -> bool: logger.debug(error) return False - def has_pure_wheel(self) -> bool: - """Check whether the PURL has a pure wheel from its package json. + def has_non_pure_wheel(self) -> bool: + """Check whether the PURL has any non-pure wheel from its package json. Returns ------- bool - Whether the PURL has a pure wheel or not. + Whether the PURL has any non-pure wheel or not. """ if self.component_version: urls = json_extract(self.package_json, ["releases", self.component_version], list) @@ -982,16 +982,13 @@ def has_pure_wheel(self) -> bool: return False for distribution in urls: file_name: str = distribution.get("filename") or "" - # Parse out and check none and any - # Catch exceptions try: _, _, _, tags = parse_wheel_filename(file_name) - # Check if none and any are in the tags (i.e. the wheel is pure) - if all(tag.abi == "none" and tag.platform == "any" for tag in tags): + # A wheel is non-pure if any tag is not abi=none and platform=any + if any(tag.abi != "none" or tag.platform != "any" for tag in tags): return True except InvalidWheelFilename: logger.debug("Could not parse wheel name.") - return False return False @contextmanager diff --git a/tests/integration/cases/pypi_pytesseract/expected_default.buildspec b/tests/integration/cases/pypi_pytesseract/expected_default.buildspec new file mode 100644 index 000000000..9daecb465 --- /dev/null +++ b/tests/integration/cases/pypi_pytesseract/expected_default.buildspec @@ -0,0 +1,39 @@ +{ + "macaron_version": "0.20.0", + "group_id": null, + "artifact_id": "pytesseract", + "version": "0.3.8", + "git_repo": "https://github.com/madmaze/pytesseract", + "git_tag": "805d3959496232edf2f0feb41af750d5702d85b7", + "newline": "lf", + "language_version": [ + ">=3.7" + ], + "ecosystem": "pypi", + "purl": "pkg:pypi/pytesseract@0.3.8", + "language": "python", + "build_tools": [ + "pip" + ], + "build_commands": [ + [ + "python", + "-m", + "build", + "--wheel", + "-n" + ] + ], + "has_binaries": false, + "build_requires": { + "setuptools": "==67.7.2" + }, + "build_backends": [ + "setuptools.build_meta" + ], + "upstream_artifacts": { + "sdist": [ + "https://files.pythonhosted.org/packages/a3/c9/d6e8903482bd6fb994c32722831d15842dd8b614f94ad9ca735807252671/pytesseract-0.3.8.tar.gz" + ] + } +} diff --git a/tests/integration/cases/pypi_pytesseract/expected_dockerfile.buildspec b/tests/integration/cases/pypi_pytesseract/expected_dockerfile.buildspec new file mode 100644 index 000000000..af9562fd4 --- /dev/null +++ b/tests/integration/cases/pypi_pytesseract/expected_dockerfile.buildspec @@ -0,0 +1,91 @@ + +#syntax=docker/dockerfile:1.10 +FROM oraclelinux:9 + +# Install core tools +RUN dnf -y install which wget tar unzip git + +# Install compiler and make +RUN dnf -y install gcc make + +# Download and unzip interpreter +RUN </validate + [ -n "" ] || { echo "No upstream artifact to validate against."; exit 1; } + # Capture artifacts generated + WHEELS=(/src/dist/*.whl) + # Ensure we only have one artifact + [ ${#WHEELS[@]} -eq 1 ] || { echo "Unexpected artifacts produced!"; exit 1; } + # BUILT_WHEEL is the artifact we built + BUILT_WHEEL=${WHEELS[0]} + # Ensure the artifact produced is not the literal returned by the glob + [ -e $BUILT_WHEEL ] || { echo "No wheels found!"; exit 1; } + # Download the wheel + wget -q + # Compare wheel names + [ $(basename $BUILT_WHEEL) == "" ] || { echo "Wheel name does not match!"; exit 1; } + # Compare file tree + (unzip -Z1 $BUILT_WHEEL | grep -v '\.dist-info' | sort) > built.tree + (unzip -Z1 "" | grep -v '\.dist-info' | sort ) > pypi_artifact.tree + diff -u built.tree pypi_artifact.tree || { echo "File trees do not match!"; exit 1; } + echo "Success!" +EOF + +ENTRYPOINT ["/bin/bash","/validate"] diff --git a/tests/integration/cases/pypi_pytesseract/test.yaml b/tests/integration/cases/pypi_pytesseract/test.yaml new file mode 100644 index 000000000..7407ae002 --- /dev/null +++ b/tests/integration/cases/pypi_pytesseract/test.yaml @@ -0,0 +1,45 @@ +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Integration test to ensure that has_binaries is not a false positive. + +tags: +- macaron-python-package +- tutorial + +steps: +- name: Run macaron analyze + kind: analyze + options: + command_args: + - -purl + - pkg:pypi/pytesseract@0.3.8 +- name: Generate the buildspec + kind: gen-build-spec + options: + command_args: + - -purl + - pkg:pypi/pytesseract@0.3.8 + - --output-format + - default-buildspec +- name: Compare Buildspec. + kind: compare + options: + kind: default_build_spec + result: output/buildspec/pypi/pytesseract/macaron.buildspec + expected: expected_default.buildspec +- name: Generate the buildspec + kind: gen-build-spec + options: + command_args: + - -purl + - pkg:pypi/pytesseract@0.3.8 + - --output-format + - dockerfile +- name: Compare Dockerfile + kind: compare + options: + kind: dockerfile_build_spec + result: output/buildspec/pypi/pytesseract/dockerfile.buildspec + expected: expected_dockerfile.buildspec From 15bc2ab3665f0742a70c55fb0dde67bac709b379 Mon Sep 17 00:00:00 2001 From: Behnaz Hassanshahi Date: Fri, 27 Mar 2026 14:18:02 +1000 Subject: [PATCH 05/13] ci: run Macaron as a GitHub Action to check workflows (#1327) This PR adds a new GitHub Actions workflow that runs Oracle Macaron to scan and validate our GitHub Actions workflows for supply chain security issues whenever workflow files change. It also fixes a bug in sending the query to osv.dev and adds relevant tests. Signed-off-by: behnazh-w --- .github/workflows/macaron-analysis.yaml | 63 +++++++++++++++++++ .github/workflows/test_macaron_action.yaml | 2 +- docs/source/pages/macaron_action.rst | 17 ++--- .../detect_vulnerable_github_actions.rst | 2 + .../github_actions_vulnerability_check.py | 9 ++- .../slsa_analyzer/package_registry/osv_dev.py | 7 +-- .../oracle_coherence-js-client/policy.dl | 17 +++++ .../oracle_coherence-js-client/test.yaml | 23 +++++++ .../package_registry/test_osv_dev.py | 44 ++++++++++++- 9 files changed, 167 insertions(+), 17 deletions(-) create mode 100644 .github/workflows/macaron-analysis.yaml create mode 100644 tests/integration/cases/oracle_coherence-js-client/policy.dl create mode 100644 tests/integration/cases/oracle_coherence-js-client/test.yaml diff --git a/.github/workflows/macaron-analysis.yaml b/.github/workflows/macaron-analysis.yaml new file mode 100644 index 000000000..0970d54b5 --- /dev/null +++ b/.github/workflows/macaron-analysis.yaml @@ -0,0 +1,63 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +# Run Macaron's policies and generate Verification Summary Attestation reports. +# See https://github.com/oracle/macaron + +name: Run Macaron to check supply chain security issues +on: + push: + branches: + - main + paths: + - .github/workflows/** + pull_request: + paths: + - .github/workflows/** + schedule: + - cron: 20 15 * * 3 +permissions: + contents: read + +jobs: + run_macaron: + runs-on: ubuntu-latest + + steps: + + - name: Check out repository + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + fetch-depth: 0 + persist-credentials: false + + # Check the GitHub Actions workflows in the repository for vulnerabilities. + # Note: adjust the policy_purl to refer to your repository URL. + - name: Run Macaron action + id: run_macaron + continue-on-error: true + uses: oracle/macaron@fda4dda04aa7228fcaba162804891806cf5a1375 # v0.22.0 + with: + repo_path: ./ + policy_file: check-github-actions + policy_purl: pkg:github.com/oracle/macaron@.* + + - name: Upload Macaron reports + if: ${{ always() }} + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: macaron-reports + path: | + output/reports/github_com/oracle/macaron/macaron.html + output/reports/github_com/oracle/macaron/macaron.json + output/macaron.db + if-no-files-found: warn + retention-days: 90 + + - name: Check Verification Summary Attestation check passes + if: ${{ always() }} + run: | + if [ ! -f output/vsa.intoto.jsonl ]; then + echo "The check-github-actions policy failed, therefore VSA was not generated at output/vsa.intoto.jsonl. Check the uploaded reports." + exit 1 + fi diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index 930863d30..42845951f 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -21,7 +21,7 @@ jobs: name: Analyzing and comparing different versions of an artifact runs-on: ubuntu-latest steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Run Macaron (analyze arrow@1.3.0) uses: ./ diff --git a/docs/source/pages/macaron_action.rst b/docs/source/pages/macaron_action.rst index dc8ebb477..55ce483ae 100644 --- a/docs/source/pages/macaron_action.rst +++ b/docs/source/pages/macaron_action.rst @@ -9,7 +9,7 @@ This document describes the composite GitHub Action defined in ``action.yaml`` a Quick usage ----------- -When using this action you can reference the action in your workflow. Example: +When you use this action, you can reference it directly in your workflow. For a real-world example, check out our `workflow `_ (we use it for dogfooding), or follow the example below to understand how it works: .. code-block:: yaml @@ -19,13 +19,14 @@ When using this action you can reference the action in your workflow. Example: steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Run Macaron Security Analysis Action - uses: oracle/macaron@v0.22.0 + uses: oracle/macaron@fda4dda04aa7228fcaba162804891806cf5a1375 # v0.22.0 with: repo_path: 'https://github.com/example/project' policy_file: check-github-actions policy_purl: 'pkg:github.com/example/project' output_dir: 'macaron-output' - upload_attestation: true + +If you upload the results like in this `workflow `_ check this :ref:`documentation ` to see how to read and understand them. Example: policy verification only ---------------------------------- @@ -37,11 +38,10 @@ directory containing ``macaron.db``: .. code-block:: yaml - name: Verify policy - uses: oracle/macaron@v0.22.0 + uses: oracle/macaron@fda4dda04aa7228fcaba162804891806cf5a1375 # v0.22.0 with: policy_file: policy.dl output_dir: macaron-output - upload_attestation: true Inputs ------ @@ -103,7 +103,9 @@ options. Key inputs are listed below (see ``action.yaml`` for the full list): - ``output`` * - ``upload_attestation`` - When ``true``, the action will attempt to upload a generated - verification attestation (VSA) after policy verification. + verification attestation (VSA) after policy verification. The attestation will be available + under the ``Actions/management`` tab. This feature requires ``id-token: write`` and + ``attestations: write`` Job permissions in the GitHub Actions workflow. - ``false`` * - ``subject_path`` - Path to the artifact serving as the subject of the attestation. @@ -129,7 +131,8 @@ The composite action exposes the following outputs (set by the - Path to the generated VSA (Verification Summary Attestation) in `in-toto `_ JSONL format. If no VSA was produced during verification, the action emits the string ``"VSA Not Generated."`` - instead of a path. + instead of a path. The attestation will be available + under the ``Actions/management`` tab. Default Policies ---------------- diff --git a/docs/source/pages/tutorials/detect_vulnerable_github_actions.rst b/docs/source/pages/tutorials/detect_vulnerable_github_actions.rst index 953523a82..aa3c8c6aa 100644 --- a/docs/source/pages/tutorials/detect_vulnerable_github_actions.rst +++ b/docs/source/pages/tutorials/detect_vulnerable_github_actions.rst @@ -126,6 +126,8 @@ Run the ``verify-policy`` command to verify that the check passes: ./run_macaron.sh verify-policy --database ./output/macaron.db --file ./check_github_actions_vuln.dl +.. _detect-vuln-gh-actions-results: + ****************** Review the Results ****************** diff --git a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py index 48c6d445e..3b350091c 100644 --- a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py +++ b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the implementation of the GitHub Actions vulnerabilities check.""" @@ -144,7 +144,9 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: # We first send a batch query to see which GitHub Actions are potentially vulnerable. # OSV's querybatch returns minimal results but this allows us to only make subsequent # queries to get vulnerability details when needed. - batch_query = [{"name": k, "ecosystem": "GitHub Actions"} for k, _ in external_workflows.items()] + batch_query = [ + {"package": {"name": k, "ecosystem": "GitHub Actions"}} for k, _ in external_workflows.items() if k + ] batch_vulns = [] try: batch_vulns = OSVDevService.get_vulnerabilities_package_name_batch(batch_query) @@ -152,7 +154,8 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: logger.debug(error) result_tables: list[CheckFacts] = [] - for vuln_res in batch_vulns: + for pkg in batch_vulns: + vuln_res = pkg["package"] vulns: list = [] workflow_name = vuln_res["name"] try: diff --git a/src/macaron/slsa_analyzer/package_registry/osv_dev.py b/src/macaron/slsa_analyzer/package_registry/osv_dev.py index 9a5c96c13..b5955ffa5 100644 --- a/src/macaron/slsa_analyzer/package_registry/osv_dev.py +++ b/src/macaron/slsa_analyzer/package_registry/osv_dev.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains implementation of osv.dev service.""" @@ -102,10 +102,7 @@ def get_vulnerabilities_package_name_batch(packages: list) -> list: APIAccessError If there is an issue with querying the OSV API or if the results do not match the expected size. """ - query_data: dict[str, list] = {"queries": []} - - for pkg in packages: - query_data["queries"].append({"package": {"ecosystem": pkg["ecosystem"], "name": pkg["name"]}}) + query_data: dict[str, list] = {"queries": packages} # The results returned by OSV reports the vulnerabilities, preserving the order. osv_res = OSVDevService.call_osv_querybatch_api(query_data, len(packages)) diff --git a/tests/integration/cases/oracle_coherence-js-client/policy.dl b/tests/integration/cases/oracle_coherence-js-client/policy.dl new file mode 100644 index 000000000..5b814eb39 --- /dev/null +++ b/tests/integration/cases/oracle_coherence-js-client/policy.dl @@ -0,0 +1,17 @@ +/* Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy("check-github-actions-vulnerabilities", component_id, "Check GitHub Actions vulnerabilities for coherence-js-client") :- + check_failed(component_id, "mcn_githubactions_vulnerabilities_1"), + github_actions_vulnerabilities_check( + _, + "[\"https://osv.dev/vulnerability/GHSA-69fq-xp46-6x23\", \"https://osv.dev/vulnerability/GHSA-9p44-j4g5-cfx5\"]", + "aquasecurity/trivy-action", + "0.32.0", + "https://github.com/oracle/coherence-js-client/blob/39166341bc31f75b663ff439dae36170fb3e99a9/.github/workflows/trivy-scan.yml" + ). + +apply_policy_to("check-github-actions-vulnerabilities", component_id) :- + is_component(component_id, "pkg:github.com/oracle/coherence-js-client@39166341bc31f75b663ff439dae36170fb3e99a9"). diff --git a/tests/integration/cases/oracle_coherence-js-client/test.yaml b/tests/integration/cases/oracle_coherence-js-client/test.yaml new file mode 100644 index 000000000..43a28b268 --- /dev/null +++ b/tests/integration/cases/oracle_coherence-js-client/test.yaml @@ -0,0 +1,23 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Analyzing oracle/coherence-js-client at commit 39166341bc31f75b663ff439dae36170fb3e99a9 + and verifying that the GitHub Actions vulnerabilities check fails. + +tags: +- macaron-python-package + +steps: +- name: Run macaron analyze + kind: analyze + options: + command_args: + - -rp + - https://github.com/oracle/coherence-js-client + - -d + - 39166341bc31f75b663ff439dae36170fb3e99a9 +- name: Run macaron verify-policy to verify that the GitHub Actions vulnerabilities check fails. + kind: verify + options: + policy: policy.dl diff --git a/tests/slsa_analyzer/package_registry/test_osv_dev.py b/tests/slsa_analyzer/package_registry/test_osv_dev.py index b96773f15..6856818ae 100644 --- a/tests/slsa_analyzer/package_registry/test_osv_dev.py +++ b/tests/slsa_analyzer/package_registry/test_osv_dev.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for the osv.dev service.""" @@ -222,3 +222,45 @@ def test_is_affected_version_ranges(vuln: dict, workflow: str, version: str, exp OSVDevService.is_version_affected(vuln=vuln, pkg_name=workflow, pkg_version=version, ecosystem="GitHub Actions") == expected ) + + +@pytest.mark.parametrize( + ("packages", "osv_batch_response", "expected"), + [ + pytest.param( + [{"package": {"ecosystem": "GitHub Actions", "name": "aquasecurity/trivy-action"}}], + { + "results": [ + { + "vulns": [ + {"id": "GHSA-69fq-xp46-6x23", "modified": "2026-03-24T18:02:32.837793Z"}, + {"id": "GHSA-9p44-j4g5-cfx5", "modified": "2026-02-22T23:23:29.929429Z"}, + ] + } + ] + }, + [{"package": {"ecosystem": "GitHub Actions", "name": "aquasecurity/trivy-action"}}], + id="Single vulnerable package", + ), + pytest.param( + [{"package": {"ecosystem": "GitHub Actions", "name": ""}}], + {"results": [{}]}, + [], + id="Empty package name", + ), + ], +) +def test_get_vulnerabilities_package_name_batch( + monkeypatch: pytest.MonkeyPatch, packages: list, osv_batch_response: dict[str, list], expected: list +) -> None: + """Test filtering vulnerable packages from OSV batch query results.""" + + def mock_call_osv_querybatch_api(query_data: dict, expected_size: int | None = None) -> list: + assert query_data == {"queries": packages} + assert query_data["queries"][0]["package"]["name"] == packages[0]["package"]["name"] + assert expected_size == len(packages) + return osv_batch_response["results"] + + monkeypatch.setattr(OSVDevService, "call_osv_querybatch_api", staticmethod(mock_call_osv_querybatch_api)) + + assert OSVDevService.get_vulnerabilities_package_name_batch(packages) == expected From f7634e5a9be4b71691425c123c6b735d1569a792 Mon Sep 17 00:00:00 2001 From: Abhinav Pradeep Date: Fri, 27 Mar 2026 16:00:54 +1000 Subject: [PATCH 06/13] feat: change dockerfile generation for Python rebuild to always default to standard build command (#1336) Signed-off-by: Abhinav Pradeep --- .../dockerfile/pypi_dockerfile_output.py | 30 ++++++------------- .../test_pypi_dockerfile_output.ambr | 2 +- .../expected_dockerfile.buildspec | 2 +- .../expected_dockerfile.buildspec | 2 +- .../expected_dockerfile.buildspec | 2 +- .../pypi_toga/expected_dockerfile.buildspec | 2 +- 6 files changed, 14 insertions(+), 26 deletions(-) diff --git a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py index 62954e312..7a3cf9539 100644 --- a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py +++ b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py @@ -46,27 +46,13 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str: except InvalidVersion as error: logger.debug("Ran into issue converting %s to a version: %s", language_version, error) raise GenerateBuildSpecError("Derived interpreter version could not be parsed") from error - if not buildspec["build_tools"]: - raise GenerateBuildSpecError("Cannot generate dockerfile when build tool is unknown") - if not buildspec["build_commands"]: - raise GenerateBuildSpecError("Cannot generate dockerfile when build command is unknown") - backend_install_commands: str = " && ".join(build_backend_commands(buildspec)) - build_tool_install: str = "" - if ( - buildspec["build_tools"][0] != "pip" - and buildspec["build_tools"][0] != "conda" - and buildspec["build_tools"][0] != "flit" - ): - build_tool_install = f"pip install {buildspec['build_tools'][0]} && " - elif buildspec["build_tools"][0] == "flit": - build_tool_install = ( - f"pip install {buildspec['build_tools'][0]} && if test -f \"flit.ini\"; then python -m flit.tomlify; fi && " - ) - - modern_build_command = build_tool_install + " ".join(x for x in buildspec["build_commands"][0]) + + backend_install_commands = " && ".join(build_backend_commands(buildspec)) + + modern_build_command = "python -m build --wheel -n" + legacy_build_command = ( - 'if test -f "setup.py"; then pip install wheel && python setup.py bdist_wheel; ' - "else python -m build --wheel -n; fi" + 'if test -f "setup.py"; then python setup.py bdist_wheel; else python -m build --wheel -n; fi' ) # Initialized empty so that the validation script can exit gracefully in the case we find no upstream wheel @@ -138,7 +124,9 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str: EOF # Run the build - RUN source /deps/bin/activate && {modern_build_command if version in SpecifierSet(">=3.6") else legacy_build_command} + RUN source /deps/bin/activate && /deps/bin/pip install wheel && {modern_build_command + if version in SpecifierSet(">=3.6") + else legacy_build_command} # Validate script RUN cat <<'EOF' >/validate diff --git a/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr b/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr index 8ff65b0da..8b94d8833 100644 --- a/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr +++ b/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr @@ -67,7 +67,7 @@ EOF # Run the build - RUN source /deps/bin/activate && python -m build + RUN source /deps/bin/activate && /deps/bin/pip install wheel && python -m build --wheel -n # Validate script RUN cat <<'EOF' >/validate diff --git a/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec b/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec index 9fbfdddd3..3eb549766 100644 --- a/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec +++ b/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec @@ -64,7 +64,7 @@ RUN </validate diff --git a/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec b/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec index e6596fc1b..6a1614371 100644 --- a/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec +++ b/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec @@ -64,7 +64,7 @@ RUN </validate diff --git a/tests/integration/cases/pypi_pytesseract/expected_dockerfile.buildspec b/tests/integration/cases/pypi_pytesseract/expected_dockerfile.buildspec index af9562fd4..fb840efae 100644 --- a/tests/integration/cases/pypi_pytesseract/expected_dockerfile.buildspec +++ b/tests/integration/cases/pypi_pytesseract/expected_dockerfile.buildspec @@ -64,7 +64,7 @@ RUN </validate diff --git a/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec b/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec index a8918d0ce..8618316e9 100644 --- a/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec +++ b/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec @@ -64,7 +64,7 @@ RUN </validate From 87bd0ffc3b2b994532bc29c6a532df0e31353f57 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 27 Mar 2026 17:41:15 +1000 Subject: [PATCH 07/13] chore(deps): bump actions/upload-artifact from 5.0.0 to 7.0.0 (#1318) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/_build.yaml | 2 +- .github/workflows/macaron-analysis.yaml | 2 +- .github/workflows/scorecards-analysis.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_build.yaml b/.github/workflows/_build.yaml index 9949d0a0e..5f7255bb0 100644 --- a/.github/workflows/_build.yaml +++ b/.github/workflows/_build.yaml @@ -129,7 +129,7 @@ jobs: # Currently reusable workflows do not support setting strategy property from the caller workflow. - name: Upload the package artifact for debugging and release if: matrix.os == env.ARTIFACT_OS && matrix.python == env.ARTIFACT_PYTHON - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: artifact-${{ matrix.os }}-python-${{ matrix.python }} path: dist diff --git a/.github/workflows/macaron-analysis.yaml b/.github/workflows/macaron-analysis.yaml index 0970d54b5..aca12d881 100644 --- a/.github/workflows/macaron-analysis.yaml +++ b/.github/workflows/macaron-analysis.yaml @@ -44,7 +44,7 @@ jobs: - name: Upload Macaron reports if: ${{ always() }} - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: macaron-reports path: | diff --git a/.github/workflows/scorecards-analysis.yaml b/.github/workflows/scorecards-analysis.yaml index 0162de5d7..544046506 100644 --- a/.github/workflows/scorecards-analysis.yaml +++ b/.github/workflows/scorecards-analysis.yaml @@ -49,7 +49,7 @@ jobs: # Upload the results as artifacts (optional). - name: Upload artifact - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: SARIF file path: results.sarif From 5f01e8202340f225c65bdeb4969e60fd721fdc3a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 27 Mar 2026 18:57:57 +1000 Subject: [PATCH 08/13] chore(deps): bump actions/download-artifact from 6.0.0 to 8.0.1 (#1330) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/_build_docker.yaml | 2 +- .github/workflows/_deploy-github-pages.yaml | 2 +- .github/workflows/release.yaml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/_build_docker.yaml b/.github/workflows/_build_docker.yaml index 06f836280..4df9824aa 100644 --- a/.github/workflows/_build_docker.yaml +++ b/.github/workflows/_build_docker.yaml @@ -38,7 +38,7 @@ jobs: python-version: '3.11' - name: Download artifact - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: ${{ inputs.artifact-name }} path: dist diff --git a/.github/workflows/_deploy-github-pages.yaml b/.github/workflows/_deploy-github-pages.yaml index bc56e33ee..ebbba1386 100644 --- a/.github/workflows/_deploy-github-pages.yaml +++ b/.github/workflows/_deploy-github-pages.yaml @@ -35,7 +35,7 @@ jobs: fetch-depth: 0 - name: Download artifact - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: ${{ inputs.artifact-name }} path: dist diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index da967efaf..ad6c04965 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -131,7 +131,7 @@ jobs: } >> "$GITHUB_OUTPUT" - name: Download artifact - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: ${{ env.ARTIFACT_NAME }} path: dist @@ -310,7 +310,7 @@ jobs: # fetch-depth: 0 # - name: Download provenance - # uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + # uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 # with: # name: ${{ needs.provenance.outputs.provenance-name }} From 9247e6dddd01d26e3dbcb5dfda92b6ca096909dd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 29 Mar 2026 08:21:09 +1000 Subject: [PATCH 09/13] chore(deps): bump actions/setup-python from 5.4.0 to 6.2.0 (#1284) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/_build.yaml | 2 +- .github/workflows/_build_docker.yaml | 2 +- .github/workflows/codeql-analysis.yaml | 2 +- .github/workflows/pr-conventional-commits.yaml | 2 +- .github/workflows/release.yaml | 4 ++-- .github/workflows/test_macaron_action.yaml | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/_build.yaml b/.github/workflows/_build.yaml index 5f7255bb0..1a1c887b4 100644 --- a/.github/workflows/_build.yaml +++ b/.github/workflows/_build.yaml @@ -57,7 +57,7 @@ jobs: fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{ matrix.python }} diff --git a/.github/workflows/_build_docker.yaml b/.github/workflows/_build_docker.yaml index 4df9824aa..8c5ea7dba 100644 --- a/.github/workflows/_build_docker.yaml +++ b/.github/workflows/_build_docker.yaml @@ -33,7 +33,7 @@ jobs: # The Docker integration tests require Python 3.11. - name: Set up Python - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.11' diff --git a/.github/workflows/codeql-analysis.yaml b/.github/workflows/codeql-analysis.yaml index f72b51262..20e969518 100644 --- a/.github/workflows/codeql-analysis.yaml +++ b/.github/workflows/codeql-analysis.yaml @@ -38,7 +38,7 @@ jobs: uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Set up Python - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.11' diff --git a/.github/workflows/pr-conventional-commits.yaml b/.github/workflows/pr-conventional-commits.yaml index 3ca285fdd..b42ac0fb9 100644 --- a/.github/workflows/pr-conventional-commits.yaml +++ b/.github/workflows/pr-conventional-commits.yaml @@ -30,7 +30,7 @@ jobs: fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.11' diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index ad6c04965..14a0857d2 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -45,7 +45,7 @@ jobs: token: ${{ secrets.REPO_ACCESS_TOKEN }} - name: Set up Python - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.11' @@ -215,7 +215,7 @@ jobs: rm -f "$CHECKSUMS" - name: Set up Python - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.11' diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index 42845951f..f560c027f 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -59,7 +59,7 @@ jobs: output_dir: macaron_output/detect_malicious_package - name: Setup Python for analyzed venv - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: 3.11.14 From 016ef836efba651fa75ba72d665996bf5d12c283 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 29 Mar 2026 20:03:31 +1000 Subject: [PATCH 10/13] chore(deps): bump actions/setup-java from 4.4.0 to 5.2.0 (#1286) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/_build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_build.yaml b/.github/workflows/_build.yaml index 1a1c887b4..649d95b05 100644 --- a/.github/workflows/_build.yaml +++ b/.github/workflows/_build.yaml @@ -63,7 +63,7 @@ jobs: # Install Java. - name: Set up JDK - uses: actions/setup-java@b36c23c0d998641eff861008f374ee103c25ac73 # v4.4.0 + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 with: distribution: oracle java-version: '17' From 011e21aa3394a6c692d1a9017c16dc50420b3646 Mon Sep 17 00:00:00 2001 From: Nicholas Allen Date: Mon, 30 Mar 2026 13:04:02 +1000 Subject: [PATCH 11/13] fix: allow parsing of github expressions containing non-breaking-space characters, and allow dataflow analysis to fail (#1340) Signed-off-by: Nicholas Allen --- .../dataflow_analysis/analysis.py | 73 +++++++++++-------- .../code_analyzer/dataflow_analysis/github.py | 8 +- .../dataflow_analysis/github_expr.py | 13 ++-- .../github_actions/github_actions_ci.py | 8 +- 4 files changed, 61 insertions(+), 41 deletions(-) diff --git a/src/macaron/code_analyzer/dataflow_analysis/analysis.py b/src/macaron/code_analyzer/dataflow_analysis/analysis.py index 6f7c3f35f..1fed33070 100644 --- a/src/macaron/code_analyzer/dataflow_analysis/analysis.py +++ b/src/macaron/code_analyzer/dataflow_analysis/analysis.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Entry points to perform and use the dataflow analysis.""" @@ -30,19 +30,22 @@ def analyse_github_workflow_file(workflow_path: str, repo_path: str | None, dump core.Node Graph representation of workflow and analysis results. """ - workflow = actionparser.parse(workflow_path) + try: + workflow = actionparser.parse(workflow_path) - analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) + analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) - core.reset_debug_sequence_number() - raw_workflow_node = github.RawGitHubActionsWorkflowNode.create(workflow, analysis_context, workflow_path) - core.increment_debug_sequence_number() + core.reset_debug_sequence_number() + raw_workflow_node = github.RawGitHubActionsWorkflowNode.create(workflow, analysis_context, workflow_path) + core.increment_debug_sequence_number() - raw_workflow_node.analyse() + raw_workflow_node.analyse() - if dump_debug: - with open("analysis." + workflow_path.replace("/", "_") + ".dot", "w", encoding="utf-8") as f: - printing.print_as_dot_graph(raw_workflow_node, f, include_properties=True, include_states=True) + if dump_debug: + with open("analysis." + workflow_path.replace("/", "_") + ".dot", "w", encoding="utf-8") as f: + printing.print_as_dot_graph(raw_workflow_node, f, include_properties=True, include_states=True) + except Exception as e: + raise CallGraphError("Failed to analyze github workflow '" + workflow_path + "'") from e return raw_workflow_node @@ -68,17 +71,20 @@ def analyse_github_workflow( core.Node Graph representation of workflow and analysis results. """ - analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) + try: + analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) - core.reset_debug_sequence_number() - raw_workflow_node = github.RawGitHubActionsWorkflowNode.create(workflow, analysis_context, workflow_source_path) - core.increment_debug_sequence_number() + core.reset_debug_sequence_number() + raw_workflow_node = github.RawGitHubActionsWorkflowNode.create(workflow, analysis_context, workflow_source_path) + core.increment_debug_sequence_number() - raw_workflow_node.analyse() + raw_workflow_node.analyse() - if dump_debug: - with open("analysis." + workflow_source_path.replace("/", "_") + ".dot", "w", encoding="utf-8") as f: - printing.print_as_dot_graph(raw_workflow_node, f, include_properties=True, include_states=True) + if dump_debug: + with open("analysis." + workflow_source_path.replace("/", "_") + ".dot", "w", encoding="utf-8") as f: + printing.print_as_dot_graph(raw_workflow_node, f, include_properties=True, include_states=True) + except Exception as e: + raise CallGraphError("Failed to analyze github workflow '" + workflow_source_path + "'") from e return raw_workflow_node @@ -104,19 +110,24 @@ def analyse_bash_script( core.Node Graph representation of Bash script and analysis results. """ - analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) - bash_context = core.OwningContextRef(bash.BashScriptContext.create_in_isolation(analysis_context, source_path)) - core.reset_debug_sequence_number() - bash_node = bash.RawBashScriptNode(facts.StringLiteral(bash_content), bash_context) - core.increment_debug_sequence_number() - - bash_node.analyse() - - if dump_debug: - with open( - "analysis." + source_path.replace("/", "_") + "." + str(hash(bash_content)) + ".dot", "w", encoding="utf-8" - ) as f: - printing.print_as_dot_graph(bash_node, f, include_properties=True, include_states=True) + try: + analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) + bash_context = core.OwningContextRef(bash.BashScriptContext.create_in_isolation(analysis_context, source_path)) + core.reset_debug_sequence_number() + bash_node = bash.RawBashScriptNode(facts.StringLiteral(bash_content), bash_context) + core.increment_debug_sequence_number() + + bash_node.analyse() + + if dump_debug: + with open( + "analysis." + source_path.replace("/", "_") + "." + str(hash(bash_content)) + ".dot", + "w", + encoding="utf-8", + ) as f: + printing.print_as_dot_graph(bash_node, f, include_properties=True, include_states=True) + except Exception as e: + raise CallGraphError("Failed to analyze bash script '" + source_path + "'") from e return bash_node diff --git a/src/macaron/code_analyzer/dataflow_analysis/github.py b/src/macaron/code_analyzer/dataflow_analysis/github.py index 222f55fb1..6231c0ea6 100644 --- a/src/macaron/code_analyzer/dataflow_analysis/github.py +++ b/src/macaron/code_analyzer/dataflow_analysis/github.py @@ -363,12 +363,16 @@ def create( for need in needs: actual_need = GitHubActionsWorkflowNode._find_job_id_case_insensitive(jobs, need) if actual_need is None: - raise CallGraphError("needs refers to invalid job") + raise CallGraphError( + "needs refers to invalid job '" + need + "', jobs are " + str(list(jobs.keys())) + ) edges.append(actual_need) elif isinstance(needs, str): actual_need = GitHubActionsWorkflowNode._find_job_id_case_insensitive(jobs, needs) if actual_need is None: - raise CallGraphError("needs refers to invalid job") + raise CallGraphError( + "needs refers to invalid job '" + needs + "', jobs are " + str(list(jobs.keys())) + ) edges.append(actual_need) dependency_graph[job_id] = edges diff --git a/src/macaron/code_analyzer/dataflow_analysis/github_expr.py b/src/macaron/code_analyzer/dataflow_analysis/github_expr.py index 8961750a4..9ecb39426 100644 --- a/src/macaron/code_analyzer/dataflow_analysis/github_expr.py +++ b/src/macaron/code_analyzer/dataflow_analysis/github_expr.py @@ -1,13 +1,14 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Parser for GitHub Actions expression language.""" from typing import cast -from lark import Lark, Token, Tree +from lark import Lark, Token, Tree, UnexpectedInput from macaron.code_analyzer.dataflow_analysis import facts +from macaron.errors import CallGraphError # Parser for GitHub Actions expression language grammar. github_expr_parser = Lark( @@ -67,7 +68,7 @@ function_call: identifier "(" _expr ("," _expr)* ")" %import common.SIGNED_NUMBER - %import common.WS + %import unicode.WS %import common.LETTER %import common.DIGIT %import common._STRING_INNER @@ -111,8 +112,10 @@ def extract_value_from_expr_string(s: str, var_scope: facts.Scope | None) -> fac values.append(facts.StringLiteral(cur_str)) cur_expr_end = s.find("}}", cur_expr_begin) cur_expr = s[cur_expr_begin + 3 : cur_expr_end] - parse_tree = github_expr_parser.parse(cur_expr) - + try: + parse_tree = github_expr_parser.parse(cur_expr) + except UnexpectedInput as e: + raise CallGraphError("Failed to parse github expression '" + cur_expr + "' in string '" + s + "'") from e node = parse_tree.children[0] var_str = extract_expr_variable_name(node) diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py index b24dc5963..4700e5e85 100644 --- a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py +++ b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module analyzes GitHub Actions CI.""" @@ -8,13 +8,14 @@ import glob import logging import os +import traceback from datetime import datetime, timedelta, timezone from macaron.code_analyzer.dataflow_analysis.analysis import analyse_github_workflow_file from macaron.code_analyzer.dataflow_analysis.core import Node, NodeForest from macaron.config.defaults import defaults from macaron.config.global_config import global_config -from macaron.errors import GitHubActionsValueError, ParseError +from macaron.errors import CallGraphError, GitHubActionsValueError, ParseError from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService from macaron.slsa_analyzer.git_service.api_client import GhAPIClient, get_default_gh_client from macaron.slsa_analyzer.git_service.base_git_service import BaseGitService @@ -593,8 +594,9 @@ def build_call_graph(self, repo_path: str, macaron_path: str = "") -> NodeForest try: workflow_node = analyse_github_workflow_file(workflow_path, repo_path) - except ParseError: + except (ParseError, CallGraphError): logger.debug("Skip adding workflow at %s to the callgraph.", workflow_path) + logger.debug("Reason: %s", traceback.format_exc()) continue nodes.append(workflow_node) return NodeForest(nodes) From d902a14deca375f33e86cf937b2729cc8899be0b Mon Sep 17 00:00:00 2001 From: Behnaz Hassanshahi Date: Tue, 31 Mar 2026 11:38:12 +1000 Subject: [PATCH 12/13] feat!: add more inputs to Macaron Action and improve GitHub Action analysis (#1339) This PR enhances the Macaron GitHub Action by updating its input/output interface and introducing new analysis to detect injection risks and other security issues in GitHub Actions workflows. It also improves result visibility by showing findings directly in the Action run summary. Signed-off-by: behnazh-w --- .github/workflows/_build_docker.yaml | 29 +- .github/workflows/macaron-analysis.yaml | 22 +- .github/workflows/test_macaron_action.yaml | 220 +++- action.yaml | 107 +- docs/source/pages/macaron_action.rst | 42 +- golang/cmd/bashparser/bashparser.go | 15 +- golang/internal/bashparser/bashparser.go | 108 +- golang/internal/bashparser/bashparser_test.go | 48 + pyproject.toml | 4 + scripts/actions/run_macaron_analysis.sh | 28 +- scripts/actions/setup_macaron.sh | 13 +- scripts/actions/write_job_summary.py | 545 ++++++++++ scripts/actions/write_job_summary.sh | 18 + .../code_analyzer/dataflow_analysis/bash.py | 38 +- .../gha_security_analysis/detect_injection.py | 969 ++++++++++++++++++ .../gha_security_analysis/recommendation.py | 220 ++++ src/macaron/parsers/bashparser.py | 70 +- .../policies/sql/check-github-actions.sql | 26 + .../sql/malware-detection-dependencies.sql | 19 + .../policies/sql/malware-detection.sql | 19 + .../github_actions_vulnerability_check.py | 298 ++++-- .../github_actions/github_actions_ci.py | 22 +- .../slsa_analyzer/git_service/api_client.py | 25 +- .../test_gha_security_analysis.ambr | 30 + .../workflow_files/injection_pattern_1.yaml | 75 ++ .../test_gha_security_analysis.py | 156 +++ .../test_recommendation.py | 69 ++ .../oracle_coherence-js-client/policy.dl | 10 +- .../policy_repo_url.dl | 2 +- .../cases/org_apache_logging_log4j/test.yaml | 2 + .../output_reporter/test_write_job_summary.py | 104 ++ tests/parsers/bashparser/test_bashparser.py | 14 +- .../provenance/attest-macaron-supply-chain.dl | 16 + 33 files changed, 3228 insertions(+), 155 deletions(-) create mode 100644 scripts/actions/write_job_summary.py create mode 100755 scripts/actions/write_job_summary.sh create mode 100644 src/macaron/code_analyzer/gha_security_analysis/detect_injection.py create mode 100644 src/macaron/code_analyzer/gha_security_analysis/recommendation.py create mode 100644 src/macaron/resources/policies/sql/check-github-actions.sql create mode 100644 src/macaron/resources/policies/sql/malware-detection-dependencies.sql create mode 100644 src/macaron/resources/policies/sql/malware-detection.sql create mode 100644 tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr create mode 100644 tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml create mode 100644 tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py create mode 100644 tests/code_analyzer/gha_security_analysis/test_recommendation.py create mode 100644 tests/output_reporter/test_write_job_summary.py create mode 100644 tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl diff --git a/.github/workflows/_build_docker.yaml b/.github/workflows/_build_docker.yaml index 8c5ea7dba..d503bfff9 100644 --- a/.github/workflows/_build_docker.yaml +++ b/.github/workflows/_build_docker.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. # This is a reuseable workflow to build and test the Docker image. Note that this workflow does not @@ -63,6 +63,22 @@ jobs: IMAGE_NAME: ghcr.io/oracle/macaron run: make build-docker + # Export the built image so downstream jobs/workflows can load and reuse + # the exact same image without pushing to a registry. + - name: Export test Docker image + run: docker save ghcr.io/oracle/macaron:test --output /tmp/macaron-test-image.tar + + # Upload the image tarball for the reusable action test workflow. + - name: Upload test Docker image artifact + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: macaron-test-image + path: /tmp/macaron-test-image.tar + if-no-files-found: error + retention-days: 1 + + # Install helper tooling used by integration test utilities that validate + # the built Docker image behavior. - name: Install dependencies for integration test utility run: make setup-integration-test-utility-for-docker @@ -74,3 +90,14 @@ jobs: DOCKER_PULL: never GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: make integration-test-docker + + test-macaron-action: + # Reuse the action test workflow against the exact Docker image built above. + # The image is transferred via artifact to avoid pushing to a registry. + needs: [build-docker] + permissions: + contents: read + uses: ./.github/workflows/test_macaron_action.yaml + with: + docker_image_artifact_name: macaron-test-image + macaron_image_tag: test diff --git a/.github/workflows/macaron-analysis.yaml b/.github/workflows/macaron-analysis.yaml index aca12d881..d0da663d1 100644 --- a/.github/workflows/macaron-analysis.yaml +++ b/.github/workflows/macaron-analysis.yaml @@ -35,29 +35,9 @@ jobs: # Note: adjust the policy_purl to refer to your repository URL. - name: Run Macaron action id: run_macaron - continue-on-error: true uses: oracle/macaron@fda4dda04aa7228fcaba162804891806cf5a1375 # v0.22.0 with: repo_path: ./ policy_file: check-github-actions policy_purl: pkg:github.com/oracle/macaron@.* - - - name: Upload Macaron reports - if: ${{ always() }} - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 - with: - name: macaron-reports - path: | - output/reports/github_com/oracle/macaron/macaron.html - output/reports/github_com/oracle/macaron/macaron.json - output/macaron.db - if-no-files-found: warn - retention-days: 90 - - - name: Check Verification Summary Attestation check passes - if: ${{ always() }} - run: | - if [ ! -f output/vsa.intoto.jsonl ]; then - echo "The check-github-actions policy failed, therefore VSA was not generated at output/vsa.intoto.jsonl. Check the uploaded reports." - exit 1 - fi + reports_retention_days: 90 diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index f560c027f..4c72dffda 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -1,62 +1,106 @@ # Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. -name: Test Macaron Action (tutorials) +name: Test Macaron Action on: - push: - paths: - - action.yaml - pull_request: - paths: - - action.yaml - workflow_dispatch: + workflow_call: + # Optional overrides used by reusable callers (for example _build_docker.yaml). + # Defaults target the test image artifact produced by our Docker build workflow. + inputs: + docker_image_artifact_name: + required: false + type: string + default: macaron-test-image + macaron_image_tag: + required: false + type: string + default: test permissions: - id-token: write - attestations: write + contents: read jobs: tutorial-commit-finder: name: Analyzing and comparing different versions of an artifact runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze arrow@1.3.0) uses: ./ with: package_url: pkg:pypi/arrow@1.3.0 output_dir: macaron_output/commit_finder + upload_reports: 'false' - name: Run Macaron (analyze arrow@0.15.0) uses: ./ with: package_url: pkg:pypi/arrow@0.15.0 output_dir: macaron_output/commit_finder + upload_reports: 'false' - name: Run Macaron (verify policy - has-hosted-build) + id: verify_has_hosted_build + # This verification is expected to fail for this tutorial scenario. + continue-on-error: true uses: ./ with: policy_file: ./tests/tutorial_resources/commit_finder/has-hosted-build.dl output_dir: macaron_output/commit_finder + upload_reports: 'false' + - name: Assert expected failure (has-hosted-build) + if: ${{ always() }} + run: | + # Keep this workflow green only when the verify step actually fails. + if [ "${{ steps.verify_has_hosted_build.outcome }}" != "failure" ]; then + echo "Expected verify step to fail, but it did not." + exit 1 + fi tutorial-detect-malicious-package: name: Detecting malicious packages runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze django@5.0.6 without dependencies) uses: ./ with: package_url: pkg:pypi/django@5.0.6 output_dir: macaron_output/detect_malicious_package + upload_reports: 'false' - name: Run Macaron (verify policy - check-django) uses: ./ with: policy_file: ./tests/tutorial_resources/detect_malicious_package/check-django.dl output_dir: macaron_output/detect_malicious_package + upload_reports: 'false' - name: Setup Python for analyzed venv uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 @@ -83,101 +127,159 @@ jobs: output_dir: macaron_output/detect_malicious_package deps_depth: '1' python_venv: /tmp/.django_venv + upload_reports: 'false' - name: Run Macaron (verify policy - check-dependencies) uses: ./ with: policy_file: ./tests/tutorial_resources/detect_malicious_package/check-dependencies.dl output_dir: macaron_output/detect_malicious_package + upload_reports: 'false' tutorial-detect-vulnerable-actions: name: How to detect vulnerable GitHub Actions runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze repo - apache/logging-log4j2) uses: ./ with: repo_path: https://github.com/apache/logging-log4j2 output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'false' + write_job_summary: 'false' - name: Run Macaron (verify policy - github_actions_vulns for repo) + id: verify_github_actions_vulns_repo_tutorial uses: ./ with: policy_file: ./tests/tutorial_resources/detect_vulnerable_github_actions/check_github_actions_vuln_repo.dl output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'true' + reports_artifact_name: macaron-reports-vulnerable-actions-repo + write_job_summary: 'true' - name: Run Macaron (analyze purl - log4j-core example) uses: ./ with: package_url: pkg:maven/org.apache.logging.log4j/log4j-core@2.25.3 output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'false' + write_job_summary: 'false' - name: Run Macaron (verify policy - github_actions_vulns for purl) + id: verify_github_actions_vulns_purl_tutorial uses: ./ with: policy_file: ./tests/tutorial_resources/detect_vulnerable_github_actions/check_github_actions_vuln_purl.dl output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'true' + reports_artifact_name: macaron-reports-vulnerable-actions-purl + write_job_summary: 'true' tutorial-provenance: name: Provenance discovery, extraction, and verification runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze semver 7.7.2) uses: ./ with: package_url: pkg:npm/semver@7.7.2 output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (verify provenance - semver) uses: ./ with: policy_file: ./tests/tutorial_resources/provenance/has-verified-provenance_semver.dl output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (analyze toga 0.5.1 - PyPI provenance) uses: ./ with: package_url: pkg:pypi/toga@0.5.1 output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (verify provenance - toga PyPI) uses: ./ with: policy_file: ./tests/tutorial_resources/provenance/has-verified-provenance_toga.dl output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (analyze toga 0.4.8 - GitHub attestation) uses: ./ with: package_url: pkg:pypi/toga@0.4.8 output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (verify provenance - toga GitHub) uses: ./ with: policy_file: ./tests/tutorial_resources/provenance/has-verified-provenance_toga.dl output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (analyze urllib3 2.0.0a1 - GitHub attestation) uses: ./ with: package_url: pkg:pypi/urllib3@2.0.0a1 output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (verify provenance - urllib3) uses: ./ with: policy_file: ./tests/tutorial_resources/provenance/has-verified-provenance_urllib3.dl output_dir: macaron_output/provenance + upload_reports: 'false' tutorial-detect-malicious-java-dep: name: Detecting Java dependencies manually uploaded to Maven Central runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze example-maven-app with SBOM) uses: ./ with: @@ -186,24 +288,39 @@ jobs: output_dir: macaron_output/detect_malicious_java_dep sbom_path: ./tests/tutorial_resources/detect_malicious_java_dep/example-sbom.json deps_depth: '1' + upload_reports: 'false' - name: Run Macaron (verify policy - detect-malicious-upload) uses: ./ with: policy_file: ./tests/tutorial_resources/detect_malicious_java_dep/example-maven-app.dl output_dir: macaron_output/detect_malicious_java_dep + upload_reports: 'false' tutorial-exclude-include-checks: name: Exclude and include checks in Macaron runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze micronaut-core with default checks) uses: ./ with: package_url: pkg:maven/io.micronaut/micronaut-core@4.3.10 output_dir: macaron_output/exclude_include_checks/normal + upload_reports: 'false' - name: Run Macaron (analyze micronaut-core excluding witness check via defaults.ini) uses: ./ @@ -211,3 +328,86 @@ jobs: package_url: pkg:maven/io.micronaut/micronaut-core@4.3.10 defaults_path: ./tests/tutorial_resources/exclude_include_checks/defaults_exclude_witness.ini output_dir: macaron_output/exclude_include_checks/excluded + upload_reports: 'false' + + test-detect-vulnerable-actions: + name: How to detect vulnerable GitHub Actions + runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar + + - name: Run Macaron (analyze github_actions_vulns for https://github.com/oracle/coherence-js-client) + id: verify_github_actions_vulns_repo_test + # This integration target is intentionally vulnerable; failure is expected. + continue-on-error: true + uses: ./ + with: + repo_path: https://github.com/oracle/coherence-js-client + digest: 39166341bc31f75b663ff439dae36170fb3e99a9 + policy_file: check-github-actions + policy_purl: pkg:github.com/oracle/coherence-js-client@.* + output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'true' + reports_artifact_name: macaron-vulnerable-actions-fail-diagnosis + write_job_summary: 'true' + - name: Assert expected failure (github_actions_vulns for repo test) + if: ${{ always() }} + run: | + # Explicitly assert failure so regressions are visible in CI results. + if [ "${{ steps.verify_github_actions_vulns_repo_test.outcome }}" != "failure" ]; then + echo "Expected verify step to fail, but it did not." + exit 1 + fi + + test-detect-potential-injection: + name: How to detect vulnerable GitHub Actions + runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar + + - name: Run Macaron (analyze github_actions_vulns for https://github.com/oracle/coherence-js-client) + id: verify_github_actions_vulns_repo_test + # This integration target is intentionally vulnerable; failure is expected. + continue-on-error: true + uses: ./ + with: + repo_path: https://github.com/oracle/graalpython + digest: f5f7e67823a699213ab06c86440da94ead672467 + policy_file: check-github-actions + policy_purl: pkg:github.com/oracle/graalpython@.* + output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'false' + reports_artifact_name: macaron-injection-actions-fail-diagnosis + write_job_summary: 'true' + - name: Assert expected failure (github_actions_vulns for repo test) + if: ${{ always() }} + run: | + # Explicitly assert failure so regressions are visible in CI results. + if [ "${{ steps.verify_github_actions_vulns_repo_test.outcome }}" != "failure" ]; then + echo "Expected verify step to fail, but it did not." + exit 1 + fi diff --git a/action.yaml b/action.yaml index 418f37705..10e5dc924 100644 --- a/action.yaml +++ b/action.yaml @@ -43,6 +43,18 @@ inputs: output_dir: description: The output destination path for Macaron. default: output + upload_reports: + description: Upload Macaron reports as a workflow artifact. + default: 'true' + reports_artifact_name: + description: Name of the uploaded reports artifact. + default: macaron-reports + reports_retention_days: + description: Retention period in days for uploaded reports. + default: '90' + write_job_summary: + description: Write a human-friendly summary to the workflow run page. + default: 'true' upload_attestation: description: 'Upload the generated VSA report. default : false' default: false @@ -51,12 +63,24 @@ inputs: default: ${{ github.workspace }} outputs: + html_report_path: + description: Path to the generated HTML analysis report (if available). + value: ${{ steps.run-macaron-analysis.outputs.html_report_path }} + report_dir: + description: Directory containing HTML/JSON reports. + value: ${{ steps.collect-reports.outputs.report_dir }} + db_path: + description: Path to the generated Macaron SQLite database. + value: ${{ steps.collect-reports.outputs.db_path }} policy_report: description: Paths to the Macaron analysis report - value: ${{ steps.run-macaron-policy-verification.outputs.policy_report }} + value: ${{ steps.collect-reports.outputs.policy_report }} vsa_report: description: Verification Summary Attestation - value: ${{ steps.run-macaron-policy-verification.outputs.vsa_report }} + value: ${{ steps.collect-reports.outputs.vsa_report }} + vsa_generated: + description: Whether VSA was generated. + value: ${{ steps.collect-reports.outputs.vsa_generated }} runs: using: composite @@ -103,10 +127,85 @@ runs: POLICY_FILE: ${{ inputs.policy_file }} POLICY_PURL: ${{ inputs.policy_purl }} + - name: Collect report paths + id: collect-reports + if: ${{ always() }} + run: | + OUTPUT_DIR="${OUTPUT_DIR:-output}" + POLICY_REPORT="${OUTPUT_DIR}/policy_report.json" + VSA_REPORT="${OUTPUT_DIR}/vsa.intoto.jsonl" + DB_PATH="${OUTPUT_DIR}/macaron.db" + REPORT_DIR="${OUTPUT_DIR}/reports" + + if [ -f "${VSA_REPORT}" ]; then + VSA_VALUE="${VSA_REPORT}" + VSA_GENERATED=true + else + VSA_VALUE="VSA Not Generated." + VSA_GENERATED=false + fi + + echo "report_dir=${REPORT_DIR}" >> "${GITHUB_OUTPUT}" + echo "db_path=${DB_PATH}" >> "${GITHUB_OUTPUT}" + echo "policy_report=${POLICY_REPORT}" >> "${GITHUB_OUTPUT}" + echo "vsa_report=${VSA_VALUE}" >> "${GITHUB_OUTPUT}" + echo "vsa_generated=${VSA_GENERATED}" >> "${GITHUB_OUTPUT}" + + { + echo "reports_path<> "${GITHUB_OUTPUT}" + shell: bash + env: + OUTPUT_DIR: ${{ inputs.output_dir }} + REPORTS_ARTIFACT_NAME: ${{ inputs.reports_artifact_name }} + + - name: Upload Macaron Reports + id: upload-macaron-reports + if: ${{ always() && inputs.upload_reports == 'true' }} + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: ${{ inputs.reports_artifact_name }} + path: ${{ steps.collect-reports.outputs.reports_path }} + if-no-files-found: warn + retention-days: ${{ inputs.reports_retention_days }} + + - name: Summarize Macaron Results + if: ${{ always() && inputs.write_job_summary == 'true' }} + run: | + bash "$GITHUB_ACTION_PATH/scripts/actions/write_job_summary.sh" + shell: bash + env: + OUTPUT_DIR: ${{ inputs.output_dir }} + VSA_GENERATED: ${{ steps.collect-reports.outputs.vsa_generated }} + POLICY_REPORT: ${{ steps.collect-reports.outputs.policy_report }} + POLICY_FILE: ${{ inputs.policy_file }} + HTML_REPORT_PATH: ${{ steps.run-macaron-analysis.outputs.html_report_path }} + UPLOAD_REPORTS: ${{ inputs.upload_reports }} + REPORTS_ARTIFACT_NAME: ${{ inputs.reports_artifact_name }} + REPORTS_ARTIFACT_URL: ${{ steps.upload-macaron-reports.outputs.artifact-url }} + + - name: Enforce VSA generation + if: ${{ always() && inputs.policy_file != '' }} + run: | + if [ "${VSA_GENERATED}" != "true" ]; then + echo "Policy verification failed. VSA was not generated at ${OUTPUT_DIR}/vsa.intoto.jsonl. Check uploaded reports." + exit 1 + fi + shell: bash + env: + OUTPUT_DIR: ${{ inputs.output_dir }} + VSA_GENERATED: ${{ steps.collect-reports.outputs.vsa_generated }} + POLICY_FILE: ${{ inputs.policy_file }} + - name: Upload Attestation - if: ${{ inputs.upload_attestation == 'true' && steps.run-macaron-policy-verification.outputs.vsa_report != 'VSA Not Generated.' }} + if: ${{ inputs.upload_attestation == 'true' && steps.collect-reports.outputs.vsa_generated == 'true' }} uses: actions/attest@daf44fb950173508f38bd2406030372c1d1162b1 #3.0.0 with: subject-path: ${{ inputs.subject_path }} predicate-type: https://slsa.dev/verification_summary/v1 - predicate-path: ${{ steps.run-macaron-policy-verification.outputs.vsa_report }} + predicate-path: ${{ steps.collect-reports.outputs.vsa_report }} diff --git a/docs/source/pages/macaron_action.rst b/docs/source/pages/macaron_action.rst index 55ce483ae..8deb62842 100644 --- a/docs/source/pages/macaron_action.rst +++ b/docs/source/pages/macaron_action.rst @@ -24,16 +24,18 @@ When you use this action, you can reference it directly in your workflow. For a repo_path: 'https://github.com/example/project' policy_file: check-github-actions policy_purl: 'pkg:github.com/example/project' - output_dir: 'macaron-output' + reports_retention_days: 90 -If you upload the results like in this `workflow `_ check this :ref:`documentation ` to see how to read and understand them. +By default, the action posts a human-friendly results summary to the GitHub Actions run page (job summary). If you upload the results like in this `workflow `_, check this :ref:`documentation ` to see how to read and understand them. Example: policy verification only ---------------------------------- To run only the policy verification step (when you already have an output -database), call the action with ``policy_file`` and set ``output_dir`` to the -directory containing ``macaron.db``: +database), call the action with ``policy_file``. If the previous analysis step +used the default output path, you can omit ``output_dir`` here. If you set a +custom ``output_dir`` in the previous step, use the same value here so policy +verification reads the matching ``macaron.db``. .. code-block:: yaml @@ -41,7 +43,6 @@ directory containing ``macaron.db``: uses: oracle/macaron@fda4dda04aa7228fcaba162804891806cf5a1375 # v0.22.0 with: policy_file: policy.dl - output_dir: macaron-output Inputs ------ @@ -101,6 +102,18 @@ options. Key inputs are listed below (see ``action.yaml`` for the full list): * - ``output_dir`` - Directory where Macaron writes results (database, reports, artifacts). - ``output`` + * - ``upload_reports`` + - When ``true``, upload generated Macaron reports as a workflow artifact. + - ``true`` + * - ``reports_artifact_name`` + - Name of the uploaded reports artifact. + - ``macaron-reports`` + * - ``reports_retention_days`` + - Retention period in days for uploaded reports artifacts. + - ``90`` + * - ``write_job_summary`` + - When ``true``, write a human-friendly summary to the workflow run page. + - ``true`` * - ``upload_attestation`` - When ``true``, the action will attempt to upload a generated verification attestation (VSA) after policy verification. The attestation will be available @@ -114,8 +127,9 @@ options. Key inputs are listed below (see ``action.yaml`` for the full list): Outputs ------- -The composite action exposes the following outputs (set by the -``run_macaron_policy_verification.sh`` script when applicable): +The composite action exposes the following outputs (set by the action steps, +primarily ``Collect report paths``, with some values populated only when +analysis/policy verification generated them): .. list-table:: :header-rows: 1 @@ -123,6 +137,12 @@ The composite action exposes the following outputs (set by the * - Output - Description + * - ``html_report_path`` + - Path to the generated HTML analysis report (when available). + * - ``report_dir`` + - Directory containing generated HTML/JSON reports. + * - ``db_path`` + - Path to the generated Macaron SQLite database (typically ``/macaron.db``). * - ``policy_report`` - Path to the generated policy report JSON file produced by ``macaron verify-policy``. This file contains the policy evaluation @@ -133,6 +153,8 @@ The composite action exposes the following outputs (set by the during verification, the action emits the string ``"VSA Not Generated."`` instead of a path. The attestation will be available under the ``Actions/management`` tab. + * - ``vsa_generated`` + - ``true`` when a VSA was generated; otherwise ``false``. Default Policies ---------------- @@ -172,7 +194,7 @@ How the action works which assembles the ``macaron analyze`` command from the inputs and runs it. Results are written into ``output_dir``. -3. ``Run Macaron Policy Verification``: if a policy file or PURL is supplied, +3. ``Run Macaron Policy Verification``: if ``policy_file`` is supplied, the corresponding script runs ``macaron verify-policy`` against the - analysis database and writes ``policy_report`` and ``vsa_report`` to - ``$GITHUB_OUTPUT`` when produced. + analysis database (using ``policy_purl`` when provided) and writes + policy-related outputs when produced. diff --git a/golang/cmd/bashparser/bashparser.go b/golang/cmd/bashparser/bashparser.go index 50cc6fec2..530bed89e 100644 --- a/golang/cmd/bashparser/bashparser.go +++ b/golang/cmd/bashparser/bashparser.go @@ -1,4 +1,4 @@ -/* Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ package main @@ -30,13 +30,18 @@ func main() { input := flag.String("input", "", "The bash script content to be parsed. Input is prioritized over file option.") out_path := flag.String("output", "", "The output file path to store the JSON content.") raw := flag.Bool("raw", false, "Return raw parse-tree") + rawGHAMap := flag.Bool("raw-gha-map", false, "Return raw parse-tree with GitHub expression mapping") flag.Parse() var json_content string var parse_err error if len(*input) > 0 { // Read the bash script from command line argument. - json_content, parse_err = bashparser.Parse(*input, *raw) + if *rawGHAMap { + json_content, parse_err = bashparser.ParseRawWithGitHubExprMap(*input) + } else { + json_content, parse_err = bashparser.Parse(*input, *raw) + } } else if len(*file_path) <= 0 { fmt.Fprintln(os.Stderr, "Missing bash script input or file path.") flag.PrintDefaults() @@ -48,7 +53,11 @@ func main() { fmt.Fprintln(os.Stderr, read_err.Error()) os.Exit(1) } - json_content, parse_err = bashparser.Parse(string(data), *raw) + if *rawGHAMap { + json_content, parse_err = bashparser.ParseRawWithGitHubExprMap(string(data)) + } else { + json_content, parse_err = bashparser.Parse(string(data), *raw) + } } if parse_err != nil { diff --git a/golang/internal/bashparser/bashparser.go b/golang/internal/bashparser/bashparser.go index b88e43a6e..fdfc63c2f 100644 --- a/golang/internal/bashparser/bashparser.go +++ b/golang/internal/bashparser/bashparser.go @@ -1,4 +1,4 @@ -/* Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ // Package bashparser parses the bash scripts and provides parsed objects in JSON. @@ -7,6 +7,7 @@ package bashparser import ( "bytes" "encoding/json" + "fmt" "regexp" "strings" @@ -19,22 +20,63 @@ type CMDResult struct { Commands [][]string `json:"commands"` } -// ParseCommands parses the bash script to find bash commands. -// It returns the parsed commands in JSON format. -func ParseCommands(data string) (string, error) { +// RawWithGHAMapResult is used to export the raw bash AST with a GitHub-expression mapping. +type RawWithGHAMapResult struct { + AST any `json:"ast"` + GHAExprMap map[string]string `json:"gha_expr_map"` +} + +func preprocessGitHubActionsExpr(data string) (string, error) { // Replace GitHub Actions's expressions with ``$MACARON_UNKNOWN``` variable because the bash parser // doesn't recognize such expressions. For example: ``${{ foo }}`` will be replaced by ``$MACARON_UNKNOWN``. // Note that we don't use greedy matching, so if we have `${{ ${{ foo }} }}`, it will not be replaced by // `$MACARON_UNKNOWN`. // See: https://docs.github.com/en/actions/learn-github-actions/expressions. - var re, reg_error = regexp.Compile(`\$\{\{.*?\}\}`) + re, reg_error := regexp.Compile(`\$\{\{.*?\}\}`) if reg_error != nil { return "", reg_error } - // We replace the GH Actions variables with "$MACARON_UNKNOWN". - data = string(re.ReplaceAll([]byte(data), []byte("$$MACARON_UNKNOWN"))) - data_str := strings.NewReader(data) + return string(re.ReplaceAll([]byte(data), []byte("$$MACARON_UNKNOWN"))), nil +} + +func preprocessGitHubActionsExprWithMap(data string) (string, map[string]string, error) { + // Replace GitHub Actions expressions with unique bash-safe placeholders and return + // a mapping from placeholder variable names to the original expression body. + // + // Example: + // input: echo "${{ github.head_ref }}" + // output: echo "$MACARON_GHA_0001", {"MACARON_GHA_0001": "github.head_ref"} + // + // This preserves expression identity for downstream analysis while keeping the + // transformed script parseable by the bash parser. + re, reg_error := regexp.Compile(`\$\{\{.*?\}\}`) + if reg_error != nil { + return "", nil, reg_error + } + + index := 0 + ghaMap := make(map[string]string) + processed := re.ReplaceAllStringFunc(data, func(match string) string { + index += 1 + key := fmt.Sprintf("MACARON_GHA_%04d", index) + expr := strings.TrimSpace(strings.TrimSuffix(strings.TrimPrefix(match, "${{"), "}}")) + ghaMap[key] = expr + return "$" + key + }) + + return processed, ghaMap, nil +} + +// ParseCommands parses the bash script to find bash commands. +// It returns the parsed commands in JSON format. +func ParseCommands(data string) (string, error) { + processed, preprocessErr := preprocessGitHubActionsExpr(data) + if preprocessErr != nil { + return "", preprocessErr + } + + data_str := strings.NewReader(processed) data_parsed, parse_err := syntax.NewParser().Parse(data_str, "") if parse_err != nil { return "", parse_err @@ -71,19 +113,12 @@ func ParseCommands(data string) (string, error) { } func ParseRaw(data string) (string, error) { - // Replace GitHub Actions's expressions with ``$MACARON_UNKNOWN``` variable because the bash parser - // doesn't recognize such expressions. For example: ``${{ foo }}`` will be replaced by ``$MACARON_UNKNOWN``. - // Note that we don't use greedy matching, so if we have `${{ ${{ foo }} }}`, it will not be replaced by - // `$MACARON_UNKNOWN`. - // See: https://docs.github.com/en/actions/learn-github-actions/expressions. - var re, reg_error = regexp.Compile(`\$\{\{.*?\}\}`) - if reg_error != nil { - return "", reg_error + processed, preprocessErr := preprocessGitHubActionsExpr(data) + if preprocessErr != nil { + return "", preprocessErr } - // We replace the GH Actions variables with "$MACARON_UNKNOWN". - data = string(re.ReplaceAll([]byte(data), []byte("$$MACARON_UNKNOWN"))) - data_str := strings.NewReader(data) + data_str := strings.NewReader(processed) data_parsed, parse_err := syntax.NewParser().Parse(data_str, "") if parse_err != nil { return "", parse_err @@ -98,6 +133,41 @@ func ParseRaw(data string) (string, error) { return b.String(), nil } +// ParseRawWithGitHubExprMap parses raw bash AST and returns it with a GitHub-expression placeholder mapping. +func ParseRawWithGitHubExprMap(data string) (string, error) { + processed, ghaMap, preprocessErr := preprocessGitHubActionsExprWithMap(data) + if preprocessErr != nil { + return "", preprocessErr + } + + data_str := strings.NewReader(processed) + data_parsed, parse_err := syntax.NewParser().Parse(data_str, "") + if parse_err != nil { + return "", parse_err + } + + b := new(strings.Builder) + encode_err := typedjson.Encode(b, data_parsed) + if encode_err != nil { + return "", encode_err + } + + var astObj any + if unmarshalErr := json.Unmarshal([]byte(b.String()), &astObj); unmarshalErr != nil { + return "", unmarshalErr + } + + result := RawWithGHAMapResult{ + AST: astObj, + GHAExprMap: ghaMap, + } + resultBytes, marshalErr := json.MarshalIndent(result, "", " ") + if marshalErr != nil { + return "", marshalErr + } + return string(resultBytes), nil +} + func Parse(data string, raw bool) (string, error) { if raw { return ParseRaw(data) diff --git a/golang/internal/bashparser/bashparser_test.go b/golang/internal/bashparser/bashparser_test.go index 3825f459b..abf02055a 100644 --- a/golang/internal/bashparser/bashparser_test.go +++ b/golang/internal/bashparser/bashparser_test.go @@ -6,6 +6,7 @@ package bashparser import ( "encoding/json" "os" + "strings" "testing" ) @@ -32,3 +33,50 @@ func Test_parse_valid_bash_script(t *testing.T) { t.Errorf("Cannot unmarshal the returned JSON content from parsing %s: %v.", json_content, err) } } + +func Test_parse_raw_with_gha_expr_map(t *testing.T) { + input := `echo "${{ github.head_ref }}" && echo "${{ needs.prepare.outputs.fullVersion }}"` + json_content, parse_err := ParseRawWithGitHubExprMap(input) + if parse_err != nil || json_content == "" { + t.Fatalf("expected successful parse with mapping, got error: %v", parse_err) + } + + var result map[string]any + if err := json.Unmarshal([]byte(json_content), &result); err != nil { + t.Fatalf("cannot unmarshal parser output: %v", err) + } + + ast, astOK := result["ast"] + if !astOK || ast == nil { + t.Fatalf("expected non-empty ast field") + } + + mapRaw, mapOK := result["gha_expr_map"] + if !mapOK { + t.Fatalf("expected gha_expr_map field") + } + ghaMap, ok := mapRaw.(map[string]any) + if !ok { + t.Fatalf("expected gha_expr_map to be an object") + } + if len(ghaMap) != 2 { + t.Fatalf("expected 2 mapped expressions, got %d", len(ghaMap)) + } +} + +func Test_preprocess_github_actions_expr_with_map_replaces_with_single_dollar_var(t *testing.T) { + input := `echo "${{ github.head_ref }}"` + processed, ghaMap, err := preprocessGitHubActionsExprWithMap(input) + if err != nil { + t.Fatalf("unexpected preprocess error: %v", err) + } + if strings.Contains(processed, "$$MACARON_GHA_") { + t.Fatalf("expected single-dollar placeholder, got %q", processed) + } + if !strings.Contains(processed, "$MACARON_GHA_0001") { + t.Fatalf("expected placeholder var in processed script, got %q", processed) + } + if ghaMap["MACARON_GHA_0001"] != "github.head_ref" { + t.Fatalf("unexpected gha mapping: %#v", ghaMap) + } +} diff --git a/pyproject.toml b/pyproject.toml index ede72bdb5..67794b851 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -224,6 +224,9 @@ load-plugins = [ "pylint.extensions.set_membership", "pylint.extensions.typing", ] +# Disable unsubscriptable-object because Pylint has false positives and this check +# overlaps with mypy's checks. Enable the check when the related issue is resolved: +# https://github.com/pylint-dev/pylint/issues/9549 disable = [ "fixme", "line-too-long", # Replaced by Flake8 Bugbear B950 check. @@ -242,6 +245,7 @@ disable = [ "too-many-return-statements", "too-many-statements", "duplicate-code", + "unsubscriptable-object", ] [tool.pylint.MISCELLANEOUS] diff --git a/scripts/actions/run_macaron_analysis.sh b/scripts/actions/run_macaron_analysis.sh index 34305479c..ccde3e646 100644 --- a/scripts/actions/run_macaron_analysis.sh +++ b/scripts/actions/run_macaron_analysis.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. set -euo pipefail @@ -54,4 +54,28 @@ if [ -n "${PROVENANCE_EXPECTATION:-}" ]; then fi echo "Executing: $CMD" -eval "$CMD" + +output_file="$(mktemp)" +set +e +eval "$CMD" 2>&1 | tee "$output_file" +# Capture analyze command's exit code from the pipeline (index 0), then restore fail-fast mode. +status=${PIPESTATUS[0]} +set -e + +if [ "${status}" -ne 0 ]; then + rm -f "$output_file" + exit "${status}" +fi + +if [ -n "${GITHUB_OUTPUT:-}" ]; then + html_report_path="$( + sed -n 's/^[[:space:]]*HTML[[:space:]]\+Report[[:space:]]\+//p' "$output_file" \ + | sed 's/[[:space:]]*$//' \ + | tail -n 1 + )" + if [ -n "$html_report_path" ]; then + echo "html_report_path=${html_report_path}" >> "$GITHUB_OUTPUT" + fi +fi + +rm -f "$output_file" diff --git a/scripts/actions/setup_macaron.sh b/scripts/actions/setup_macaron.sh index a002bb534..cd519253c 100644 --- a/scripts/actions/setup_macaron.sh +++ b/scripts/actions/setup_macaron.sh @@ -1,12 +1,23 @@ #!/usr/bin/env bash -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. set -euo pipefail MACARON_DIR="${RUNNER_TEMP:-/tmp}/macaron" mkdir -p "$MACARON_DIR" +# If a test image tag is explicitly provided (for reusable workflow testing), +# use the local run script from this checkout and preserve the provided tag. +if [ -n "${MACARON_IMAGE_TAG:-}" ]; then + SCRIPT_NAME="run_macaron.sh" + cp "$GITHUB_ACTION_PATH/scripts/release_scripts/run_macaron.sh" "$MACARON_DIR/$SCRIPT_NAME" + chmod +x "$MACARON_DIR/$SCRIPT_NAME" + echo "MACARON=$MACARON_DIR/$SCRIPT_NAME" >> "$GITHUB_ENV" + echo "MACARON_IMAGE_TAG=${MACARON_IMAGE_TAG}" >> "$GITHUB_ENV" + exit 0 +fi + ACTION_DIR="${RUNNER_TEMP:-/tmp}/macaron-action" rm -rf "$ACTION_DIR" mkdir -p "$ACTION_DIR" diff --git a/scripts/actions/write_job_summary.py b/scripts/actions/write_job_summary.py new file mode 100644 index 000000000..bec011787 --- /dev/null +++ b/scripts/actions/write_job_summary.py @@ -0,0 +1,545 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Generate GitHub Actions job summary content for Macaron action runs.""" + +from __future__ import annotations + +import json +import os +import re +import sqlite3 +from pathlib import Path +from urllib.parse import urlsplit + +CHECK_RESULT_DEFAULT_COLUMNS = [ + "component_id", + "check_id", + "passed", +] + + +def _env(name: str, default: str = "") -> str: + return os.environ.get(name, default) + + +def _append_line(summary_path: Path, line: str = "") -> None: + with summary_path.open("a", encoding="utf-8") as summary: + summary.write(f"{line}\n") + + +def _resolve_policy_source(policy_input: str) -> tuple[Path | None, str]: + """Resolve a policy input to either a local file or a predefined policy template path.""" + if not policy_input: + return None, "" + + candidate = Path(policy_input) + if candidate.is_file(): + return candidate, "file" + + action_path = _env("GITHUB_ACTION_PATH", "") + if action_path: + template_path = Path( + os.path.join( + action_path, + "src", + "macaron", + "resources", + "policies", + "datalog", + f"{policy_input}.dl.template", + ) + ) + if template_path.is_file(): + return template_path, "predefined" + + return None, "unresolved" + + +def _resolve_existing_policy_sql(policy_name: str) -> Path | None: + """Resolve SQL diagnostics query for a predefined policy name.""" + action_path = _env("GITHUB_ACTION_PATH", "") + if not action_path: + return None + sql_path = Path(os.path.join(action_path, "src", "macaron", "resources", "policies", "sql", f"{policy_name}.sql")) + return sql_path if sql_path.is_file() else None + + +def _write_header( + summary_path: Path, + db_path: Path, + policy_report: str, + policy_file: str, + html_report: str, + policy_provided: bool, +) -> None: + upload_reports = _env("UPLOAD_REPORTS", "true").lower() == "true" + output_dir = _env("OUTPUT_DIR", "output") + reports_artifact_name = _env("REPORTS_ARTIFACT_NAME", "macaron-reports") + run_url = ( + f"{_env('GITHUB_SERVER_URL', 'https://github.com')}/" + f"{_env('GITHUB_REPOSITORY')}/actions/runs/{_env('GITHUB_RUN_ID')}" + ) + reports_artifact_url = _env("REPORTS_ARTIFACT_URL", run_url) + vsa_generated = _env("VSA_GENERATED", "").lower() + if vsa_generated in {"true", "false"}: + policy_succeeded = vsa_generated == "true" + else: + vsa_path = _env("VSA_PATH", f"{output_dir}/vsa.intoto.jsonl") + policy_succeeded = bool(vsa_path) and Path(vsa_path).is_file() + + _append_line(summary_path, "## Macaron Analysis Results") + _append_line(summary_path) + if upload_reports: + _append_line(summary_path, "Download reports from this artifact link:") + _append_line(summary_path, f"- [`{reports_artifact_name}`]({reports_artifact_url})") + _append_line(summary_path) + _append_line(summary_path, "Generated files:") + if html_report: + _append_line(summary_path, f"- HTML report: `{html_report}`") + _append_line(summary_path, f"- Database: `{db_path}`") + if policy_provided: + _append_line(summary_path, f"- Policy report: `{policy_report}`") + _append_line(summary_path) + + if policy_provided: + _append_line(summary_path, "Policy:") + if policy_file: + _append_line(summary_path, f"- Policy file: `{policy_file}`") + if policy_succeeded: + _append_line(summary_path, "- Policy status: :white_check_mark: Policy verification succeeded.") + else: + _append_line(summary_path, "- Policy status: :x: Policy verification failed.") + else: + _append_line(summary_path, "Policy:") + _append_line(summary_path, "- No policy was provided.") + _append_line(summary_path) + + +def _parse_policy_checks(policy_file: Path) -> tuple[list[str], list[str]]: + policy_text = policy_file.read_text(encoding="utf-8") + check_relations = sorted(set(re.findall(r"\b(check_[A-Za-z0-9_]+)\s*\(", policy_text))) + policy_check_ids = sorted(set(re.findall(r'"(mcn_[a-zA-Z0-9_]+)"', policy_text))) + return check_relations, policy_check_ids + + +def _resolve_existing_table(conn: sqlite3.Connection, table_name: str) -> str | None: + """Resolve a logical table name to an existing SQLite table name.""" + candidates = [table_name] + if not table_name.startswith("_"): + candidates.append(f"_{table_name}") + + cur = conn.cursor() + for candidate in candidates: + cur.execute("SELECT 1 FROM sqlite_master WHERE type IN ('table', 'view') AND name = ? LIMIT 1", (candidate,)) + if cur.fetchone(): + return candidate + return None + + +def _get_existing_columns(conn: sqlite3.Connection, table_name: str) -> list[str]: + cur = conn.cursor() + cur.execute(f"PRAGMA table_info({table_name})") + return [row[1] for row in cur.fetchall()] + + +def _query_selected_columns( + conn: sqlite3.Connection, + table_name: str, + desired_columns: list[str], + where_clause: str = "", + params: tuple[object, ...] = (), +) -> tuple[list[str], list[tuple]]: + available = _get_existing_columns(conn, table_name) + selected = [c for c in desired_columns if c in available] + if not selected: + return [], [] + + sql = f"SELECT {', '.join(selected)} FROM {table_name}" + if where_clause: + sql = f"{sql} WHERE {where_clause}" + sql = f"{sql} ORDER BY 1" + cur = conn.cursor() + cur.execute(sql, params) + return selected, cur.fetchall() + + +def _query_sql(conn: sqlite3.Connection, sql_query: str) -> tuple[list[str], list[tuple]]: + # Python's sqlite cursor.execute() can fail when the SQL begins with line comments. + # Strip leading SQL line comments while preserving the query body. + sanitized_lines = [] + for line in sql_query.splitlines(): + if line.lstrip().startswith("--"): + continue + sanitized_lines.append(line) + sanitized_query = "\n".join(sanitized_lines).strip() + if not sanitized_query: + return [], [] + + cur = conn.cursor() + cur.execute(sanitized_query) + rows = cur.fetchall() + columns = [col[0] for col in (cur.description or [])] + return columns, rows + + +def _write_markdown_table(summary_path: Path, columns: list[str], rows: list[tuple]) -> bool: + if not columns or not rows: + return False + + _append_line(summary_path, f"| {' | '.join(columns)} |") + _append_line(summary_path, f"|{'|'.join(['---'] * len(columns))}|") + for row in rows: + values = [_format_table_cell(value) for value in row] + _append_line(summary_path, f"| {' | '.join(values)} |") + return True + + +def _format_table_cell(value: object) -> str: + text = str(value) + parsed_list = _parse_list_cell(text) + if parsed_list is not None: + items = [_format_list_item(item) for item in parsed_list] + return "
".join(f"- {item}" for item in items) if items else "`[]`" + + if text.startswith(("http://", "https://")): + parsed = urlsplit(text) + segments = [part for part in parsed.path.split("/") if part] + label = segments[-1] if segments else parsed.netloc + return f"[`{label}`]({text})" + return f"`{_sanitize_for_markdown_table_code(text)}`" + + +def _parse_list_cell(text: str) -> list[object] | None: + stripped = text.strip() + if not (stripped.startswith("[") and stripped.endswith("]")): + return None + try: + loaded = json.loads(stripped) + except json.JSONDecodeError: + return None + return loaded if isinstance(loaded, list) else None + + +def _format_list_item(value: object) -> str: + text = str(value) + if text.startswith(("http://", "https://")): + parsed = urlsplit(text) + segments = [part for part in parsed.path.split("/") if part] + label = segments[-1] if segments else parsed.netloc + return f"[`{label}`]({text})" + return f"`{_sanitize_for_markdown_table_code(text)}`" + + +def _sanitize_for_markdown_table_code(text: str) -> str: + """Sanitize inline-code content for markdown table cells.""" + return text.replace("`", "'").replace("|", "\\|").replace("\n", " ") + + +def _priority_label(priority: object) -> str: + """Map numeric priority to a concise severity-like label.""" + try: + value = int(priority) + except (TypeError, ValueError): + return str(priority) + + if value >= 90: + return "critical" + if value >= 70: + return "high" + if value >= 50: + return "medium" + return "low" + + +def _gha_group_label(group: str) -> str: + # finding_group is the top-level section key; finding_type is rendered per-row as the subtype. + if group == "third_party_action_risk": + return "Third-party action risks" + if group == "workflow_security_issue": + return "Workflow security issues" + return group + + +def _extract_finding_summary(message: object) -> str: + """Extract a compact summary from a finding message.""" + text = str(message).strip() + if not text: + return "" + + # Expected format: "Summary: ... Details: ... Recommendation: ..." + match = re.search(r"Summary:\s*(.*?)(?:\s+Details:\s*|\s+Recommendation:\s*|$)", text, flags=re.IGNORECASE) + if match: + return match.group(1).strip() + + return text + + +def write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], rows: list[tuple]) -> bool: + """Write compact GitHub Actions vulnerability diagnostics to the job summary. + + Parameters + ---------- + summary_path : Path + Path to the GitHub job summary markdown file. + columns : list[str] + Ordered column names from the SQL diagnostics query result. + rows : list[tuple] + Row values matching ``columns`` order. + + Returns + ------- + bool + ``True`` if content was rendered; ``False`` when inputs are empty. + """ + if not columns or not rows: + return False + + col_index = {name: idx for idx, name in enumerate(columns)} + required = [ + "finding_priority", + "finding_type", + "action_name", + "action_ref", + "vulnerable_workflow", + ] + if any(name not in col_index for name in required): + return _write_markdown_table(summary_path, columns, rows) + + sorted_rows = sorted( + rows, + key=lambda row: ( + int(row[col_index["finding_priority"]]) if str(row[col_index["finding_priority"]]).isdigit() else 0 + ), + reverse=True, + ) + display_rows = sorted_rows[:10] + group_idx = col_index.get("finding_group") + + _append_line( + summary_path, + "_Showing top 10 findings by priority. Expand details below for full diagnostics._", + ) + preferred_groups = ["workflow_security_issue", "third_party_action_risk"] + groups_in_rows: list[str] = [] + if group_idx is not None: + discovered_groups = [str(row[group_idx]) for row in sorted_rows] + groups_in_rows.extend([group for group in preferred_groups if group in discovered_groups]) + groups_in_rows.extend([group for group in discovered_groups if group not in groups_in_rows]) + else: + groups_in_rows = ["all_findings"] + + for group in groups_in_rows: + if group_idx is None: + group_rows = display_rows + title = "Findings" + else: + group_rows = [row for row in sorted_rows if str(row[group_idx]) == group][:10] + if not group_rows: + continue + title = _gha_group_label(group) + _append_line(summary_path) + _append_line(summary_path, f"#### {title}") + _append_line(summary_path) + if group == "workflow_security_issue": + _append_line(summary_path, "| priority | type | summary | workflow |") + _append_line(summary_path, "|---|---|---|---|") + else: + _append_line(summary_path, "| priority | type | action | version | workflow |") + _append_line(summary_path, "|---|---|---|---|---|") + for row in group_rows: + priority_raw = row[col_index["finding_priority"]] + priority = f"`{_priority_label(priority_raw)} ({priority_raw})`" + finding_type = _format_table_cell(row[col_index["finding_type"]]) + finding_summary = _format_table_cell( + _extract_finding_summary(row[col_index["finding_message"]]) if "finding_message" in col_index else "" + ) + action_name = _format_table_cell(row[col_index["action_name"]]) + action_version = _format_table_cell(row[col_index["action_ref"]]) + workflow = _format_table_cell(row[col_index["vulnerable_workflow"]]) + if group == "workflow_security_issue": + _append_line( + summary_path, + f"| {priority} | {finding_type} | {finding_summary} | {workflow} |", + ) + else: + _append_line( + summary_path, + f"| {priority} | {finding_type} | {action_name} | {action_version} | {workflow} |", + ) + + _append_line(summary_path) + _append_line(summary_path, "
") + _append_line(summary_path, "Detailed findings") + _append_line(summary_path) + detail_groups = groups_in_rows if groups_in_rows else ["all_findings"] + row_counter = 1 + for group in detail_groups: + if group_idx is None: + group_rows = sorted_rows + title = "Findings" + else: + group_rows = [row for row in sorted_rows if str(row[group_idx]) == group] + if not group_rows: + continue + title = _gha_group_label(group) + _append_line(summary_path, f"**{title}**") + for row in group_rows: + action = str(row[col_index["action_name"]]) + version = str(row[col_index["action_ref"]]) + priority = row[col_index["finding_priority"]] + finding_type = str(row[col_index["finding_type"]]) + workflow = str(row[col_index["vulnerable_workflow"]]) + if group == "workflow_security_issue": + subject = workflow + else: + subject = f"{action}@{version}" if version else action + _append_line(summary_path, f"{row_counter}. **`{subject}`** (`{finding_type}`, priority `{priority}`)") + _append_line(summary_path, f"- Workflow: `{workflow}`") + + pin_idx = col_index.get("sha_pinned") + row_group = str(row[group_idx]) if group_idx is not None else "" + if pin_idx is not None and row_group == "third_party_action_risk" and row[pin_idx] is not None: + pin_state = "yes" if bool(row[pin_idx]) else "no" + _append_line(summary_path, f"- Pinned to full commit SHA: `{pin_state}`") + + vul_idx = col_index.get("vuln_urls") + if vul_idx is not None and row[vul_idx]: + parsed = _parse_list_cell(str(row[vul_idx])) + if parsed: + _append_line(summary_path, "- Vulnerabilities:") + for item in parsed: + _append_line(summary_path, f" - {_format_list_item(item)}") + + rec_idx = col_index.get("recommended_ref") + if rec_idx is not None and row[rec_idx]: + _append_line(summary_path, f"- Recommended ref: {_format_table_cell(row[rec_idx])}") + + msg_idx = col_index.get("finding_message") + if msg_idx is not None and row[msg_idx]: + _append_line(summary_path, f"- Details: {_format_table_cell(row[msg_idx])}") + _append_line(summary_path) + row_counter += 1 + _append_line(summary_path, "
") + return True + + +def _write_policy_check_lists(summary_path: Path, policy_check_ids: list[str]) -> None: + + if policy_check_ids: + _append_line( + summary_path, + f"- Checks referenced in policy: {', '.join(f'`{name}`' for name in policy_check_ids)}", + ) + + +def _write_custom_policy_failure_diagnostics(summary_path: Path, db_path: Path, policy_file: Path) -> None: + check_relations, policy_check_ids = _parse_policy_checks(policy_file) + has_details = False + + _append_line(summary_path) + _append_line(summary_path, "### Policy Failure Diagnostics") + _write_policy_check_lists(summary_path, policy_check_ids) + if check_relations or policy_check_ids: + has_details = True + + if not policy_check_ids: + if not has_details: + _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") + return + + with sqlite3.connect(db_path) as conn: + resolved = _resolve_existing_table(conn, "check_result") + if not resolved: + if not has_details: + _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") + return + placeholders = ",".join(["?"] * len(policy_check_ids)) + cols, rows = _query_selected_columns( + conn, + resolved, + CHECK_RESULT_DEFAULT_COLUMNS, + where_clause=f"check_id IN ({placeholders})", + params=tuple(policy_check_ids), + ) + + _append_line(summary_path) + _append_line(summary_path, "#### check_result") + if _write_markdown_table(summary_path, cols, rows): + has_details = True + else: + # Remove empty section header and provide a single friendly fallback below. + _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") + + +def _write_existing_policy_failure_diagnostics( + summary_path: Path, db_path: Path, policy_name: str, policy_file: Path +) -> None: + check_relations, policy_check_ids = _parse_policy_checks(policy_file) + has_details = False + + _append_line(summary_path) + _append_line(summary_path, f"### Policy Failure Diagnostics ({policy_name})") + _write_policy_check_lists(summary_path, policy_check_ids) + if check_relations or policy_check_ids: + has_details = True + + sql_path = _resolve_existing_policy_sql(policy_name) + if sql_path: + sql_query = sql_path.read_text(encoding="utf-8") + with sqlite3.connect(db_path) as conn: + cols, rows = _query_sql(conn, sql_query) + if cols and rows: + _append_line(summary_path) + _append_line(summary_path, f"#### Results") + if policy_name == "check-github-actions": + rendered = write_compact_gha_vuln_diagnostics(summary_path, cols, rows) + else: + rendered = _write_markdown_table(summary_path, cols, rows) + if rendered: + has_details = True + + if not has_details: + _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") + + +def main() -> None: + output_dir = Path(_env("OUTPUT_DIR", "output")) + db_path = Path(_env("DB_PATH", os.path.join(str(output_dir), "macaron.db"))) + policy_report = _env("POLICY_REPORT", os.path.join(str(output_dir), "policy_report.json")) + policy_file_value = _env("POLICY_FILE", "") + resolved_policy_file, policy_mode = _resolve_policy_source(policy_file_value) + policy_label = "" + if policy_mode == "file" and resolved_policy_file: + policy_label = str(resolved_policy_file) + elif policy_mode == "predefined" and resolved_policy_file: + policy_label = f"{policy_file_value}" + elif policy_mode == "unresolved": + policy_label = f"{policy_file_value} (unresolved)" + html_report = _env("HTML_REPORT_PATH", "") + vsa_path_value = _env("VSA_PATH", os.path.join(str(output_dir), "vsa.intoto.jsonl")) + vsa_path = Path(vsa_path_value) if vsa_path_value else None + + summary_output = _env("GITHUB_STEP_SUMMARY") + if not summary_output: + raise RuntimeError("GITHUB_STEP_SUMMARY is not set.") + summary_path = Path(summary_output) + + policy_provided = bool(policy_file_value.strip()) + _write_header(summary_path, db_path, policy_report, policy_label, html_report, policy_provided) + + if not db_path.is_file(): + _append_line(summary_path, ":warning: Macaron database was not generated.") + return + + if (not vsa_path or not vsa_path.is_file()) and resolved_policy_file and resolved_policy_file.is_file(): + if policy_mode == "predefined": + _write_existing_policy_failure_diagnostics(summary_path, db_path, policy_file_value, resolved_policy_file) + else: + _write_custom_policy_failure_diagnostics(summary_path, db_path, resolved_policy_file) + + +if __name__ == "__main__": + main() diff --git a/scripts/actions/write_job_summary.sh b/scripts/actions/write_job_summary.sh new file mode 100755 index 000000000..432069c59 --- /dev/null +++ b/scripts/actions/write_job_summary.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. +set -euo pipefail + +OUTPUT_DIR="${OUTPUT_DIR:-output}" +DB_PATH="${DB_PATH:-${OUTPUT_DIR}/macaron.db}" +POLICY_REPORT="${POLICY_REPORT:-${OUTPUT_DIR}/policy_report.json}" +POLICY_FILE="${POLICY_FILE:-}" +HTML_REPORT_PATH="${HTML_REPORT_PATH:-}" +VSA_PATH="${VSA_PATH:-${OUTPUT_DIR}/vsa.intoto.jsonl}" +UPLOAD_REPORTS="${UPLOAD_REPORTS:-true}" +REPORTS_ARTIFACT_NAME="${REPORTS_ARTIFACT_NAME:-macaron-reports}" +RUN_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" +REPORTS_ARTIFACT_URL="${REPORTS_ARTIFACT_URL:-${RUN_URL}}" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +python "${SCRIPT_DIR}/write_job_summary.py" diff --git a/src/macaron/code_analyzer/dataflow_analysis/bash.py b/src/macaron/code_analyzer/dataflow_analysis/bash.py index 4a4903c86..6b0f05813 100644 --- a/src/macaron/code_analyzer/dataflow_analysis/bash.py +++ b/src/macaron/code_analyzer/dataflow_analysis/bash.py @@ -75,6 +75,8 @@ class BashScriptContext(core.Context): stdout_loc: facts.LocationSpecifier #: Filepath for Bash script file. source_filepath: str + #: Mapping of parser placeholder vars to original GitHub expression bodies. + gha_expr_map_items: tuple[tuple[str, str], ...] = () @staticmethod def create_from_run_step( @@ -106,6 +108,7 @@ def create_from_run_step( stdout_scope=context.ref.job_context.ref.workflow_context.ref.console.get_non_owned(), stdout_loc=facts.Console(), source_filepath=source_filepath, + gha_expr_map_items=(), ) @staticmethod @@ -136,6 +139,7 @@ def create_from_bash_script(context: core.ContextRef[BashScriptContext], source_ stdout_scope=context.ref.stdout_scope.get_non_owned(), stdout_loc=facts.Console(), source_filepath=source_filepath, + gha_expr_map_items=(), ) @staticmethod @@ -164,6 +168,7 @@ def create_in_isolation(context: core.ContextRef[core.AnalysisContext], source_f stdout_scope=core.OwningContextRef(facts.Scope("stdout")), stdout_loc=facts.Console(), source_filepath=source_filepath, + gha_expr_map_items=(), ) def with_stdin( @@ -180,6 +185,7 @@ def with_stdin( self.stdout_scope, self.stdout_loc, self.source_filepath, + self.gha_expr_map_items, ) def with_stdout( @@ -196,6 +202,33 @@ def with_stdout( stdout_scope, stdout_loc, self.source_filepath, + self.gha_expr_map_items, + ) + + def with_gha_expr_map(self, gha_expr_map: dict[str, str]) -> BashScriptContext: + """Return a modified bash script context with GitHub-expression placeholder mappings. + + Parameters + ---------- + gha_expr_map : dict[str, str] + Mapping from parser placeholder variable names to original GitHub expression bodies. + + Returns + ------- + BashScriptContext + A context copy with updated GitHub-expression mapping metadata. + """ + return BashScriptContext( + self.outer_context, + self.filesystem, + self.env, + self.func_decls, + self.stdin_scope, + self.stdin_loc, + self.stdout_scope, + self.stdout_loc, + self.source_filepath, + tuple(sorted(gha_expr_map.items())), ) def get_containing_github_context(self) -> github.GitHubActionsStepContext | None: @@ -261,8 +294,9 @@ def identify_interpretations(self, state: core.State) -> dict[core.Interpretatio def build_bash_script() -> core.Node: try: - parsed_bash = bashparser.parse_raw(script_str, MACARON_PATH) - return BashScriptNode.create(parsed_bash, self.context.get_non_owned()) + parsed_bash, gha_expr_map = bashparser.parse_raw_with_gha_mapping(script_str, MACARON_PATH) + context_with_map = self.context.ref.with_gha_expr_map(gha_expr_map) + return BashScriptNode.create(parsed_bash, core.NonOwningContextRef(context_with_map)) except ParseError: return core.NoOpStatementNode() diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py new file mode 100644 index 000000000..80364ea76 --- /dev/null +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -0,0 +1,969 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Detect security issues and injection risks in GitHub Actions workflows.""" + +import json +import os +import re +from typing import TypedDict, cast + +from macaron.code_analyzer.dataflow_analysis import bash, core, facts +from macaron.code_analyzer.dataflow_analysis.analysis import get_containing_github_job, get_containing_github_step +from macaron.code_analyzer.dataflow_analysis.core import NodeForest, traverse_bfs +from macaron.code_analyzer.dataflow_analysis.github import ( + GitHubActionsActionStepNode, + GitHubActionsNormalJobNode, + GitHubActionsRunStepNode, + GitHubActionsWorkflowNode, +) +from macaron.code_analyzer.gha_security_analysis.recommendation import ( + Recommendation, + parse_unpinned_action_issue, + recommend_for_unpinned_action, + recommend_for_workflow_issue, + resolve_action_ref_to_sha, + resolve_action_ref_to_tag, +) +from macaron.parsers.bashparser_model import CallExpr, is_call_expr, is_lit, is_param_exp +from macaron.parsers.github_workflow_model import Workflow +from macaron.slsa_analyzer.git_url import is_commit_hash + +UNTRUSTED_PR_REFS = { + "${{ github.event.pull_request.head.ref }}", + "${{ github.head_ref }}", + "${{ github.event.pull_request.head.sha }}", + "${{ github.event.pull_request.head.repo.full_name }}", +} + +PRIORITY_CRITICAL = 100 +PRIORITY_HIGH = 80 +PRIORITY_MEDIUM = 60 +PRIORITY_LOW = 40 +PRIORITY_MIN = 20 + + +class PrioritizedIssue(TypedDict): + """A workflow security finding with priority metadata.""" + + issue: str + priority: int + + +class WorkflowFinding(TypedDict): + """Workflow-level security findings.""" + + workflow_name: str + issues: list[PrioritizedIssue] + + +def detect_github_actions_security_issues(nodes: NodeForest) -> list[WorkflowFinding]: + """Detect security issues across GitHub Actions workflow nodes. + + Parameters + ---------- + nodes : NodeForest + Parsed workflow node forest used for traversing GitHub Actions workflow callgraphs. + + Returns + ------- + list[WorkflowFinding] + A list of workflow-level findings. Each item contains: + - ``workflow_name``: workflow file path. + - ``issues``: list of detected security issue messages with priorities. + """ + findings = [] + for root in nodes.root_nodes: + for callee in traverse_bfs(root): + if isinstance(callee, GitHubActionsWorkflowNode): + if result := analyze_workflow(callee, nodes=nodes): + findings.append(result) + return findings + + +def analyze_workflow(workflow_node: GitHubActionsWorkflowNode, nodes: NodeForest) -> WorkflowFinding | None: + """Analyze a GitHub Actions workflow for security issues. + + Parameters + ---------- + workflow_node : GitHubActionsWorkflowNode + The workflow node to analyze. + nodes : NodeForest + The full node forest used to resolve parent relationships while analyzing findings. + + Returns + ------- + WorkflowFinding | None + A finding dictionary with: + - ``workflow_name``: source filepath of the workflow. + - ``issues``: list of issue messages with associated priorities. + Returns ``None`` when no issues are detected. + + Notes + ----- + The analysis covers trigger hardening, permissions configuration, action pinning, + checkout risks, remote-script execution heuristics, self-hosted runner usage, and + dataflow-based expression injection patterns. + """ + findings: list[PrioritizedIssue] = [] + on_keys = _extract_on_keys(workflow_node.definition) + seen_jobs: set[str] = set() + workflow_permissions_defined = "permissions" in workflow_node.definition + has_job_without_permissions = False + + for node in core.traverse_bfs(workflow_node): + if isinstance(node, GitHubActionsWorkflowNode): + _append_workflow_level_findings(findings, on_keys, node.definition) + continue + + if isinstance(node, GitHubActionsNormalJobNode): + if node.job_id in seen_jobs: + continue + seen_jobs.add(node.job_id) + if "permissions" not in node.definition: + has_job_without_permissions = True + _append_job_level_findings(findings, node) + continue + + if isinstance(node, GitHubActionsActionStepNode): + _append_action_step_findings(findings, node, on_keys) + continue + + if isinstance(node, GitHubActionsRunStepNode): + _append_run_step_findings(findings, node, nodes) + continue + + if not workflow_permissions_defined and has_job_without_permissions: + _add_finding( + findings, + ( + "missing-permissions: No explicit workflow permissions defined, and one or more jobs also omit " + "permissions; defaults may be overly broad." + ), + PRIORITY_MEDIUM, + ) + + if "pull_request_target" in on_keys and _has_privileged_trigger_risk_combo(findings): + _add_finding( + findings, + ( + "privileged-trigger: Workflow uses `pull_request_target` with additional risky patterns; " + "treat this workflow as high risk and harden immediately." + ), + PRIORITY_HIGH, + ) + + if findings: + findings_sorted = sorted(findings, key=lambda finding: (-finding["priority"], finding["issue"])) + return { + "workflow_name": os.path.relpath(workflow_node.context.ref.source_filepath, os.getcwd()), + "issues": findings_sorted, + } + + return None + + +def _extract_on_keys(workflow: Workflow) -> set[str]: + """Extract the set of event names from a workflow ``on`` section.""" + on_section = workflow.get("on") + if isinstance(on_section, dict): + return set(on_section.keys()) + if isinstance(on_section, list): + return set(on_section) + return {on_section} + + +def _append_workflow_level_findings(findings: list[PrioritizedIssue], on_keys: set[str], workflow: Workflow) -> None: + """Append workflow-level hardening findings.""" + if "permissions" not in workflow: + return + + permissions = workflow["permissions"] + if isinstance(permissions, str) and permissions.lower() == "write-all": + _add_finding(findings, "overbroad-permissions: Workflow uses `permissions: write-all`.", PRIORITY_HIGH) + if isinstance(permissions, dict) and "pull_request_target" in on_keys: + for scope, level in permissions.items(): + if isinstance(level, str) and "write" in level.lower(): + _add_finding( + findings, + f"overbroad-permissions: PR-triggered workflow requests `{scope}: {level}`.", + PRIORITY_HIGH, + ) + + +def _append_job_level_findings(findings: list[PrioritizedIssue], job_node: GitHubActionsNormalJobNode) -> None: + """Append findings derived from a single job node.""" + runs_on = job_node.definition.get("runs-on") + if runs_on and "self-hosted" in str(runs_on): + _add_finding( + findings, + f"self-hosted-runner: Job `{job_node.job_id}` runs on self-hosted runners; " + "ensure isolation and never run untrusted PR code there.", + PRIORITY_MEDIUM, + ) + + +def _append_action_step_findings( + findings: list[PrioritizedIssue], + action_node: GitHubActionsActionStepNode, + on_keys: set[str], +) -> None: + """Append findings derived from an action step node.""" + uses_name = action_node.uses_name + uses_version = action_node.uses_version + if uses_name and not uses_name.startswith("./") and uses_version and not is_commit_hash(uses_version): + step_line = _extract_action_step_line(action_node) + line_marker = f"[step-line={step_line}] " if step_line else "" + _add_finding( + findings, + f"unpinned-third-party-action: {line_marker}{uses_name}@{uses_version}", + PRIORITY_MIN, + ) + + if uses_name == "actions/checkout": + ref = _literal_value(action_node.with_parameters.get("ref")) + if ref in UNTRUSTED_PR_REFS and "pull_request" in on_keys: + _add_finding( + findings, + f"untrusted-fork-code: A checkout step uses untrusted fork code (`ref: {ref}`) on PR event.", + PRIORITY_CRITICAL, + ) + + if "pull_request_target" in on_keys and ref in UNTRUSTED_PR_REFS: + _add_finding( + findings, + f"pr-target-untrusted-checkout: Workflow uses pull_request_target and checks out PR-controlled ref `{ref}`.", + PRIORITY_CRITICAL, + ) + + +def _append_run_step_findings( + findings: list[PrioritizedIssue], run_step_node: GitHubActionsRunStepNode, nodes: NodeForest +) -> None: + """Append findings derived from a run step node.""" + # Traversing a run-step subgraph can reach semantically identical command nodes through + # multiple CFG/AST paths (for example nested/compound command structures). Track emitted + # injection findings by stable metadata to avoid duplicate reports for the same command line. + seen_injection_keys: set[tuple[int | None, str, str, str]] = set() + for node in core.traverse_bfs(run_step_node): + # Command-level injection checks rely on parsed call argument parts from single-command nodes. + if isinstance(node, bash.BashSingleCommandNode): + _append_injection_findings(findings, node, nodes, seen_injection_keys) + continue + + # Remote script execution risk is structural: downloader output piped into an executor. + if isinstance(node, bash.BashPipeNode): + _append_remote_script_exec_findings(findings, node, run_step_node, nodes) + + +def _append_remote_script_exec_findings( + findings: list[PrioritizedIssue], + pipe_node: bash.BashPipeNode, + run_step_node: GitHubActionsRunStepNode, + nodes: NodeForest, +) -> None: + """Append remote-script-exec findings discovered from parsed bash pipe nodes.""" + if not _is_remote_script_exec_pipe(pipe_node): + return + + # Map the pipe's script-relative line to workflow source line so summary links jump to YAML. + script_line = pipe_node.definition["Pos"]["Line"] + workflow_line = _map_script_line_to_workflow_line(run_step_node, script_line) + if workflow_line is None: + workflow_line = _extract_run_step_line(run_step_node) + job_node = get_containing_github_job(pipe_node, nodes.parents) + issue_payload = { + "step_line": workflow_line, + "script_line": script_line, + "job": job_node.job_id if job_node else "", + "step": _extract_step_name(run_step_node), + "command": _extract_command_text(run_step_node, script_line), + } + _add_finding( + findings, + f"remote-script-exec: {json.dumps(issue_payload)}", + PRIORITY_HIGH, + ) + + +def _is_remote_script_exec_pipe(pipe_node: bash.BashPipeNode) -> bool: + """Return whether a pipe node matches downloader-to-executor behavior.""" + lhs_words = _extract_statement_words(pipe_node.lhs) + rhs_words = _extract_statement_words(pipe_node.rhs) + if not lhs_words or not rhs_words: + return False + + downloader_cmd = lhs_words[0] + if downloader_cmd not in {"curl", "wget"}: + return False + + return _is_executor_invocation(rhs_words) + + +def _extract_statement_words(statement_node: bash.BashStatementNode) -> list[str]: + """Extract normalized literal command words from a Bash statement when available.""" + cmd = statement_node.definition.get("Cmd") + if not is_call_expr(cmd): + return [] + return _extract_call_words(cmd) + + +def _extract_call_words(call_expr: CallExpr) -> list[str]: + """Extract literal word values from a call expression.""" + args = call_expr["Args"] + words: list[str] = [] + for arg in args: + parts = arg["Parts"] + word = "".join(part.get("Value", "") for part in parts if is_lit(part)).strip() + if not word: + return [] + words.append(word) + if not words: + return [] + + normalized = [os.path.basename(word).lower() if idx == 0 else word for idx, word in enumerate(words)] + return normalized + + +def _is_executor_invocation(words: list[str]) -> bool: + """Return whether extracted words represent shell/archive execution.""" + if not words: + return False + direct_executors = {"bash", "sh", "tar"} + wrapper_cmds = {"sudo", "env", "command"} + + command = words[0] + if command in direct_executors: + return True + if command in wrapper_cmds and len(words) > 1: + wrapped = os.path.basename(words[1]).lower() + return wrapped in direct_executors + return False + + +def _append_injection_findings( + findings: list[PrioritizedIssue], + bash_node: bash.BashSingleCommandNode, + nodes: NodeForest, + seen_injection_keys: set[tuple[int | None, str, str, str]] | None = None, +) -> None: + """Append potential injection findings discovered from parsed bash command nodes.""" + if not is_call_expr(bash_node.definition.get("Cmd")): + return + + call_exp = cast(CallExpr, bash_node.definition["Cmd"]) + for arg in call_exp.get("Args", []): + parts = arg.get("Parts") + step_node = get_containing_github_step(bash_node, nodes.parents) + script_line = _extract_script_line_from_parts(parts) + expanded_refs = _extract_expanded_github_refs(bash_node, step_node, script_line, parts) + if _arg_has_attacker_controlled_github_ref(parts) or _has_attacker_controlled_expanded_ref(expanded_refs): + job_node = get_containing_github_job(bash_node, nodes.parents) + workflow_line = _map_script_line_to_workflow_line(step_node, script_line) + if workflow_line is None: + workflow_line = _extract_run_step_line(step_node) + job_name = job_node.job_id if job_node else "" + step_name = _extract_step_name(step_node) + command_text = _extract_command_text(step_node, script_line) + dedupe_key = (workflow_line, job_name, step_name, command_text) + if seen_injection_keys is not None: + # Prevent duplicate findings when the same risky command is visited via + # different traversal paths in the run-step subgraph. + if dedupe_key in seen_injection_keys: + continue + seen_injection_keys.add(dedupe_key) + issue_payload = { + "step_line": workflow_line, + "script_line": script_line, + "job": job_name, + "step": step_name, + "command": command_text, + "expanded_refs": expanded_refs, + "parts": arg.get("Parts"), + } + _add_finding(findings, f"potential-injection: {json.dumps(issue_payload)}", PRIORITY_CRITICAL) + + +def _arg_has_attacker_controlled_github_ref(parts: object) -> bool: + """Return whether argument parts contain attacker-controlled GitHub context expansion. + + Parameters + ---------- + parts : object + Parsed argument ``Parts`` payload from the Bash call expression. + + Returns + ------- + bool + ``True`` when an attacker-controlled GitHub context reference is detected. + """ + if not isinstance(parts, list): + return False + + expansion = False + pr_head_ref = False + for part in parts: + if is_param_exp(part) and part.get("Param", {}).get("Value") == "github": + expansion = True + if is_lit(part) and part.get("Value") in { + ".event.pull_request.head.ref", + ".head_ref", + ".event.issue.body", + ".event.comment.body", + }: + pr_head_ref = True + if expansion and pr_head_ref: + return True + return False + + +def _has_attacker_controlled_expanded_ref(refs: list[str]) -> bool: + """Return whether extracted refs include attacker-controlled GitHub context values. + + Parameters + ---------- + refs : list[str] + Extracted GitHub expression references. + + Returns + ------- + bool + ``True`` if a known attacker-controlled ref is present. + """ + attacker_controlled = { + "github.event.pull_request.head.ref", + "github.head_ref", + "github.event.issue.body", + "github.event.comment.body", + } + return any(ref in attacker_controlled for ref in refs) + + +def _extract_expanded_github_refs( + bash_node: bash.BashSingleCommandNode, + step_node: GitHubActionsRunStepNode | None, + script_line: int | None, + parts: object, +) -> list[str]: + """Extract normalized expanded GitHub refs from mapping with a line-text fallback. + + Parameters + ---------- + bash_node : bash.BashSingleCommandNode + The Bash command node used to resolve parser placeholder mappings. + step_node : GitHubActionsRunStepNode | None + The containing run step node, used for fallback extraction from raw run script text. + script_line : int | None + 1-based line number within the inlined run script for line-targeted fallback extraction. + parts : object + Parsed argument ``Parts`` payload from the Bash call expression. + + Returns + ------- + list[str] + Ordered list of normalized GitHub expression references. + """ + refs: list[str] = [] + placeholder_map = dict(bash_node.context.ref.gha_expr_map_items) + if isinstance(parts, list): + for part in parts: + if not is_param_exp(part): + continue + placeholder = part.get("Param", {}).get("Value") + if isinstance(placeholder, str): + mapped = placeholder_map.get(placeholder) + if mapped: + refs.extend(_extract_github_refs_from_expression(mapped)) + if refs: + return _deduplicate_preserve_order(refs) + + if step_node is None: + return [] + # Fallback: some complex shell constructs (for example command substitution in compound + # test/boolean commands) may not expose mapped placeholders on the current arg parts. + # In those cases, recover refs directly from the original run-script line text. + run_script = step_node.definition["run"] + script_lines = run_script.splitlines() + if script_line is not None and 1 <= script_line <= len(script_lines): + line_text = script_lines[script_line - 1] + else: + line_text = run_script + + matches = re.findall(r"\$\{\{\s*(.*?)\s*\}\}", line_text) + fallback_refs: list[str] = [] + for expr in matches: + fallback_refs.extend(_extract_github_refs_from_expression(expr)) + return _deduplicate_preserve_order(fallback_refs) + + +def _extract_github_refs_from_expression(expression: str) -> list[str]: + """Extract github-context reference paths from a GitHub Actions expression body. + + Parameters + ---------- + expression : str + Expression text inside ``${{ ... }}``. + + Returns + ------- + list[str] + Matched GitHub reference paths (for example ``github.head_ref``). + """ + return re.findall(r"github(?:\.[A-Za-z0-9_-]+)+", expression) + + +def _deduplicate_preserve_order(values: list[str]) -> list[str]: + """Deduplicate string values while preserving insertion order. + + Parameters + ---------- + values : list[str] + Input values that may contain duplicates. + + Returns + ------- + list[str] + Values in original order with duplicates removed. + """ + seen: set[str] = set() + result: list[str] = [] + for value in values: + if value in seen: + continue + seen.add(value) + result.append(value) + return result + + +def _extract_step_name(step_node: GitHubActionsRunStepNode | None) -> str: + """Extract a display name for a workflow run step.""" + if step_node is None: + return "" + step_name = step_node.definition.get("name") + if isinstance(step_name, str): + return step_name + step_id = step_node.definition.get("id") + if isinstance(step_id, str): + return step_id + return "" + + +def _extract_command_text(step_node: GitHubActionsRunStepNode | None, script_line: int | None) -> str: + """Extract a compact command snippet from the run script for display in diagnostics.""" + if step_node is None: + return "" + + run_script = step_node.definition["run"] + script_lines = run_script.splitlines() + if script_line and 1 <= script_line <= len(script_lines): + return script_lines[script_line - 1].strip() + + for line in script_lines: + if line.strip(): + return line.strip() + return "" + + +def _extract_run_step_line(step_node: GitHubActionsRunStepNode | None) -> int | None: + """Extract a 1-based workflow line number for a run step when metadata is available.""" + if step_node is None: + return None + + definition = step_node.definition + line_container = getattr(definition, "lc", None) + if line_container is None: + return _infer_run_step_line_from_source(step_node) + + line = getattr(line_container, "line", None) + if isinstance(line, int) and line >= 0: + # ruamel stores line numbers as 0-based. + return line + 1 + + return _infer_run_step_line_from_source(step_node) + + +def _extract_action_step_line(step_node: GitHubActionsActionStepNode | None) -> int | None: + """Extract a 1-based workflow line number for an action step when metadata is available.""" + if step_node is None: + return None + + definition = step_node.definition + line_container = getattr(definition, "lc", None) + if line_container is None: + return _infer_action_step_line_from_source(step_node) + + line = getattr(line_container, "line", None) + if isinstance(line, int) and line >= 0: + # ruamel stores line numbers as 0-based. + return line + 1 + + return _infer_action_step_line_from_source(step_node) + + +def _infer_action_step_line_from_source(step_node: GitHubActionsActionStepNode) -> int | None: + """Infer an action-step line by matching the ``uses`` value in the workflow source.""" + workflow_path = step_node.context.ref.job_context.ref.workflow_context.ref.source_filepath + if not workflow_path or not os.path.isfile(workflow_path): + return None + + uses_name = step_node.uses_name + uses_version = step_node.uses_version + if not uses_name or not uses_version: + return None + + target_uses = f"{uses_name}@{uses_version}" + step_name = step_node.definition.get("name") + step_id = step_node.definition.get("id") + step_identifier = step_name if isinstance(step_name, str) else step_id if isinstance(step_id, str) else None + + try: + with open(workflow_path, encoding="utf-8") as workflow_file: + workflow_lines = workflow_file.readlines() + except OSError: + return None + + uses_key_re = re.compile(r"^\s*(?:-\s*)?uses\s*:\s*(.*)$") + candidate_lines: list[int] = [] + for index, line in enumerate(workflow_lines): + match = uses_key_re.match(line) + if not match: + continue + uses_value = match.group(1).strip().strip("\"'") + if uses_value == target_uses: + candidate_lines.append(index + 1) + + if not candidate_lines: + return None + if len(candidate_lines) == 1 or not step_identifier: + return candidate_lines[0] + + for candidate_line in candidate_lines: + for lookback_index in range(max(0, candidate_line - 8 - 1), candidate_line - 1): + lookback_line = workflow_lines[lookback_index].strip() + if lookback_line in {f"name: {step_identifier}", f"id: {step_identifier}"}: + return candidate_line + + return candidate_lines[0] + + +def _extract_script_line_from_parts(parts: object) -> int | None: + """Extract the 1-based script line number from parsed shell argument parts.""" + if not isinstance(parts, list): + return None + + for part in parts: + if not isinstance(part, dict): + continue + pos = part.get("Pos") + if not isinstance(pos, dict): + continue + line = pos.get("Line") + if isinstance(line, int) and line > 0: + return line + + return None + + +def _map_script_line_to_workflow_line( + step_node: GitHubActionsRunStepNode | None, script_line: int | None +) -> int | None: + """Map a line number inside a run script to the corresponding workflow source line.""" + if step_node is None or script_line is None or script_line < 1: + return None + + workflow_path = step_node.context.ref.job_context.ref.workflow_context.ref.source_filepath + run_script = step_node.definition.get("run") + if not workflow_path or not isinstance(run_script, str) or not os.path.isfile(workflow_path): + return None + + try: + with open(workflow_path, encoding="utf-8") as workflow_file: + workflow_lines = workflow_file.readlines() + except OSError: + return None + + for block_start, block_lines in _iter_run_blocks(workflow_lines): + if _normalize_multiline_text("\n".join(block_lines)) != _normalize_multiline_text(run_script): + continue + if script_line > len(block_lines): + return None + return block_start + script_line - 1 + + return None + + +def _iter_run_blocks(workflow_lines: list[str]) -> list[tuple[int, list[str]]]: + """Collect run-step script blocks as (1-based start line, content lines).""" + run_key_re = re.compile(r"^(\s*)(?:-\s*)?run\s*:\s*(.*)$") + blocks: list[tuple[int, list[str]]] = [] + i = 0 + while i < len(workflow_lines): + line = workflow_lines[i] + match = run_key_re.match(line) + if not match: + i += 1 + continue + + indent = len(match.group(1)) + run_value = match.group(2).rstrip("\n") + + if run_value.strip().startswith(("|", ">")): + block_start = i + 2 + block_buffer: list[str] = [] + j = i + 1 + min_indent: int | None = None + while j < len(workflow_lines): + candidate = workflow_lines[j] + if candidate.strip(): + candidate_indent = len(candidate) - len(candidate.lstrip(" ")) + if candidate_indent <= indent: + break + if min_indent is None or candidate_indent < min_indent: + min_indent = candidate_indent + block_buffer.append(candidate.rstrip("\n")) + j += 1 + + if min_indent is None: + blocks.append((block_start, [])) + else: + dedented = [b[min_indent:] if len(b) >= min_indent else b for b in block_buffer] + blocks.append((block_start, dedented)) + i = j + continue + + inline_value = run_value.strip().strip("\"'") + blocks.append((i + 1, [inline_value])) + i += 1 + + return blocks + + +def _normalize_multiline_text(text: str) -> str: + """Normalize text for robust matching between YAML-extracted and parsed run scripts.""" + return "\n".join(line.rstrip() for line in text.strip("\n").splitlines()) + + +def _infer_run_step_line_from_source(step_node: GitHubActionsRunStepNode) -> int | None: + """Infer a run step line by matching its script against the workflow source file.""" + workflow_path = step_node.context.ref.job_context.ref.workflow_context.ref.source_filepath + if not workflow_path or not os.path.isfile(workflow_path): + return None + + run_script = step_node.definition["run"] + first_script_line = "" + for line in run_script.splitlines(): + stripped = line.strip() + if stripped: + first_script_line = stripped + break + if not first_script_line: + return None + + try: + with open(workflow_path, encoding="utf-8") as workflow_file: + workflow_lines = workflow_file.readlines() + except OSError: + return None + + run_key_re = re.compile(r"^\s*(?:-\s*)?run\s*:\s*(.*)$") + for index, line in enumerate(workflow_lines): + match = run_key_re.match(line) + if not match: + continue + + run_value = match.group(1).strip() + if run_value and not run_value.startswith("|") and not run_value.startswith(">"): + inline_value = run_value.strip("\"'") + if first_script_line in inline_value or inline_value in first_script_line: + return index + 1 + continue + + run_indent = len(line) - len(line.lstrip(" ")) + for nested_line in workflow_lines[index + 1 :]: + if not nested_line.strip(): + continue + nested_indent = len(nested_line) - len(nested_line.lstrip(" ")) + if nested_indent <= run_indent: + break + if first_script_line in nested_line.strip(): + return index + 1 + + return None + + +def _has_privileged_trigger_risk_combo(findings: list[PrioritizedIssue]) -> bool: + """Return whether findings contain risky patterns that amplify pull_request_target risk.""" + risky_prefixes = ( + "overbroad-permissions:", + "untrusted-fork-code:", + "remote-script-exec:", + "pr-target-untrusted-checkout:", + "potential-injection:", + "self-hosted-runner:", + ) + return any(any(finding["issue"].startswith(prefix) for prefix in risky_prefixes) for finding in findings) + + +def _literal_value(value: facts.Value | None) -> str: + """Return literal string value from a facts expression when available.""" + if isinstance(value, facts.StringLiteral): + return value.literal + return "" + + +def _add_finding(findings: list[PrioritizedIssue], issue: str, priority: int) -> None: + """Append a finding once and keep the highest priority for duplicate issues. + + Parameters + ---------- + findings : list[PrioritizedIssue] + Mutable finding list for the current workflow. + issue : str + Normalized finding identifier/message. + priority : int + Finding priority score. + """ + for existing in findings: + if existing["issue"] == issue: + existing["priority"] = max(existing["priority"], priority) + return + findings.append({"issue": issue, "priority": priority}) + + +def get_workflow_issue_type(issue: str) -> str: + """Extract a normalized workflow issue subtype from issue text.""" + prefix, _, _ = issue.partition(":") + normalized = prefix.strip().replace("_", "-") + return normalized or "workflow-security-issue" + + +def get_workflow_issue_summary(finding_type: str) -> str: + """Return a concise summary for a workflow issue subtype.""" + finding_summaries = { + "privileged-trigger": "Privileged trigger can expose elevated token scope to untrusted input.", + "missing-permissions": "Workflow omits explicit permissions and may inherit broad defaults.", + "overbroad-permissions": "Workflow requests permissions broader than required.", + "untrusted-fork-code": "Workflow can execute code controlled by an untrusted fork.", + "remote-script-exec": "Workflow downloads and executes remote scripts inline.", + "pr-target-untrusted-checkout": "pull_request_target is combined with checkout of PR-controlled refs.", + "potential-injection": "Unsafe expansion of attacker-controllable GitHub context can enable command injection.", + "self-hosted-runner": "Job uses self-hosted runners, increasing blast radius for untrusted code.", + "workflow-security-issue": "Workflow includes a security issue that requires hardening.", + } + return finding_summaries.get(finding_type, "Workflow security finding detected.") + + +def build_workflow_issue_recommendation(issue: str) -> tuple[str, Recommendation, str]: + """Build normalized workflow issue recommendation metadata.""" + finding_type = get_workflow_issue_type(issue) + summary = get_workflow_issue_summary(finding_type) + recommendation = recommend_for_workflow_issue(issue) + details = _format_issue_details(finding_type, issue) + finding_message = f"Summary: {summary} Details: {details} Recommendation: {recommendation.message}" + return finding_type, recommendation, finding_message + + +def _format_issue_details(finding_type: str, issue: str) -> str: + """Format human-readable issue details for job summaries.""" + if finding_type not in {"potential-injection", "remote-script-exec"}: + return issue + + payload = _parse_issue_payload(issue) + if not isinstance(payload, dict): + return issue + + job_name = str(payload.get("job") or "unknown") + step_name = str(payload.get("step") or "unknown") + command_text = str(payload.get("command") or "unknown") + command_text = command_text.replace("`", "'") + refs = payload.get("expanded_refs") + refs_display = "" + if isinstance(refs, list): + refs_clean = [str(ref) for ref in refs if str(ref)] + if refs_clean: + refs_display = f" Expanded refs: `{', '.join(refs_clean)}`" + return f"Job: {job_name} Step: {step_name} Command: `{command_text}`{refs_display}" + + +def _parse_issue_payload(issue: str) -> object | None: + """Parse the serialized issue payload after the finding type prefix.""" + _, _, payload = issue.partition(":") + payload = payload.strip() + if not payload: + return None + + try: + return cast(object, json.loads(payload)) + except json.JSONDecodeError: + return None + + +def build_unpinned_action_recommendation(issue: str, api_client: object) -> tuple[str, str, Recommendation] | None: + """Build normalized recommendation metadata for an unpinned third-party action finding.""" + parsed_issue = parse_unpinned_action_issue(issue) + if not parsed_issue: + return None + + action_name, action_ref = parsed_issue + resolved_sha = resolve_action_ref_to_sha(api_client, action_name, action_ref) + resolved_tag = resolve_action_ref_to_tag(action_name, resolved_sha, action_ref) + recommendation = recommend_for_unpinned_action(action_name, resolved_sha, resolved_tag) + return action_name, action_ref, recommendation + + +def extract_workflow_issue_line(issue: str) -> int | None: + """Extract a 1-based workflow source line number from an issue payload. + + Parameters + ---------- + issue : str + Serialized workflow issue string produced by the detector. + + Returns + ------- + int | None + The 1-based line number when available; otherwise ``None``. + """ + step_line_match = re.search(r"\[step-line=(\d+)\]", issue) + if step_line_match: + step_line = int(step_line_match.group(1)) + if step_line > 0: + return step_line + + if not issue.startswith("potential-injection:") and not issue.startswith("remote-script-exec:"): + return None + + _, _, payload = issue.partition(":") + if not payload.strip(): + return None + + parsed_payload = _parse_issue_payload(issue) + if isinstance(parsed_payload, dict): + payload_step_line = parsed_payload.get("step_line") + if isinstance(payload_step_line, int) and payload_step_line > 0: + return payload_step_line + + parts: object | None + if isinstance(parsed_payload, list): + parts = parsed_payload + elif isinstance(parsed_payload, dict): + parts = parsed_payload.get("parts") + else: + parts = None + + if isinstance(parts, list): + for part in parts: + if not isinstance(part, dict): + continue + pos = part.get("Pos") + if not isinstance(pos, dict): + continue + line = pos.get("Line") + if isinstance(line, int) and line > 0: + return line + + match = re.search(r"""["']Line["']:\s*(\d+)""", payload) + if not match: + return None + line = int(match.group(1)) + return line if line > 0 else None diff --git a/src/macaron/code_analyzer/gha_security_analysis/recommendation.py b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py new file mode 100644 index 000000000..332add599 --- /dev/null +++ b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py @@ -0,0 +1,220 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Recommendation helpers for GitHub Actions security findings. + +This module centralizes user-facing remediation guidance for findings generated by +GitHub Actions security analysis checks. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass + +from macaron.errors import GitTagError +from macaron.slsa_analyzer.git_service.api_client import GhAPIClient +from macaron.slsa_analyzer.git_url import find_highest_git_tag, get_tags_via_git_remote, is_commit_hash + +UNPINNED_ACTION_RE = re.compile( + r"^(?:unpinned-third-party-action:\s*)?(?:\[step-line=(?P\d+)\]\s*)?(?P[^@\s]+)@(?P[^\s]+)$" +) + + +@dataclass(frozen=True) +class Recommendation: + """Normalized recommendation payload for a finding. + + Attributes + ---------- + message : str + Human-readable recommendation text. + recommended_ref : str | None + Optional pinned reference suggestion, such as ``owner/repo@``. + """ + + message: str + recommended_ref: str | None = None + + +def recommend_for_unpinned_action( + action_name: str, resolved_sha: str | None = None, resolved_tag: str | None = None +) -> Recommendation: + """Create a recommendation for an unpinned third-party action. + + Parameters + ---------- + action_name : str + GitHub Action identifier in the form ``owner/repo``. + resolved_sha : str | None, optional + Resolved commit SHA for the action ref if available. + resolved_tag : str | None, optional + Tag corresponding to ``resolved_sha`` when available. + + Returns + ------- + Recommendation + Recommendation containing pinning guidance and a suggested immutable + action reference. + """ + if resolved_sha and resolved_tag: + recommended_ref = f"{action_name}@{resolved_sha} # {resolved_tag}" + elif resolved_sha: + recommended_ref = f"{action_name}@{resolved_sha}" + else: + recommended_ref = "Unable to resolve automatically" + return Recommendation( + message="Pin this third-party action to a 40-character commit SHA.", + recommended_ref=recommended_ref, + ) + + +def parse_unpinned_action_issue(issue: str) -> tuple[str, str] | None: + """Parse an unpinned third-party action reference from issue text. + + Parameters + ---------- + issue : str + Raw issue text emitted by workflow security analysis. + + Returns + ------- + tuple[str, str] | None + Parsed ``(action_name, action_version)`` when the issue matches + ``owner/repo@ref`` format for a third-party action. ``None`` otherwise. + """ + match = UNPINNED_ACTION_RE.fullmatch(issue.strip()) + if not match: + return None + action = match.group("action") + version = match.group("version") + if action.startswith("./"): + return None + if "/" not in action: + return None + return action, version + + +def resolve_action_ref_to_sha(api_client: object, action_name: str, action_version: str) -> str | None: + """Resolve an action reference to an immutable commit SHA. + + Parameters + ---------- + api_client : object + API client instance used for GitHub API calls. + action_name : str + GitHub Action identifier in the form ``owner/repo``. + action_version : str + Action ref currently used by the workflow. + + Returns + ------- + str | None + The resolved commit SHA if resolution succeeds; otherwise ``None``. + """ + if not isinstance(api_client, GhAPIClient): + return None + if not action_name or not action_version: + return None + if is_commit_hash(action_version): + # Normalize short SHAs by resolving them through the API. + return ( + action_version + if len(action_version) == 40 + else api_client.get_commit_sha_from_ref(action_name, action_version) + ) + return api_client.get_commit_sha_from_ref(action_name, action_version) + + +def resolve_action_ref_to_tag(action_name: str, resolved_sha: str | None, action_version: str = "") -> str | None: + """Resolve a commit SHA to a corresponding Git tag for an action repository. + + Parameters + ---------- + action_name : str + GitHub Action identifier in the form ``owner/repo``. + resolved_sha : str | None, optional + Resolved commit SHA for the action ref. + action_version : str, optional + Original action version/ref. If this exact ref is a tag on the same + commit, it is preferred. + + Returns + ------- + str | None + The corresponding tag name if found; otherwise ``None``. + """ + if not action_name or not resolved_sha: + return None + tags = get_tags_via_git_remote(f"https://github.com/{action_name}") + if not tags: + return None + + matching_tags = [tag for tag, tag_sha in tags.items() if tag_sha == resolved_sha] + if not matching_tags: + return None + if action_version and action_version in matching_tags: + return action_version + try: + return find_highest_git_tag(set(matching_tags)) + except GitTagError: + return matching_tags[0] + + +def recommend_for_workflow_issue(issue: str) -> Recommendation: + """Map a workflow issue string to a remediation recommendation. + + Parameters + ---------- + issue : str + Issue string emitted by workflow security analysis. + + Returns + ------- + Recommendation + Recommendation message corresponding to the detected issue category. + """ + if issue.startswith("sensitive-trigger:"): + return Recommendation("Add strict event gating (actor allowlist, branch filters, and conditional checks).") + if issue.startswith("privileged-trigger:"): + return Recommendation("Avoid pull_request_target for untrusted code paths; use pull_request where possible.") + if issue.startswith("missing-permissions:"): + return Recommendation("Define explicit least-privilege permissions at workflow or job scope.") + if issue.startswith("overbroad-permissions:"): + return Recommendation("Reduce permissions to read-only scopes unless write access is strictly required.") + if issue.startswith("untrusted-fork-code:"): + return Recommendation("Do not checkout PR head refs in privileged contexts; validate source and actor first.") + if issue.startswith("persist-credentials:"): + return Recommendation("Set persist-credentials: false for checkout unless later git pushes are required.") + if issue.startswith("remote-script-exec:"): + return Recommendation("Avoid curl|bash patterns; pin script digests or vendor reviewed scripts in-repo.") + if issue.startswith("pr-target-untrusted-checkout:"): + return Recommendation("Never combine pull_request_target with checkout of PR-controlled refs.") + if issue.startswith("potential-injection:"): + return Recommendation("Treat GitHub context data as untrusted input; quote/sanitize before shell execution.") + return Recommendation("Review this workflow finding and apply least-privilege hardening controls.") + + +def recommend_for_osv_vulnerability(action_name: str, action_version: str) -> Recommendation: + """Create a recommendation for a vulnerable GitHub Action version. + + Parameters + ---------- + action_name : str + GitHub Action identifier in the form ``owner/repo``. + action_version : str + Action ref currently used by the workflow. + + Returns + ------- + Recommendation + Recommendation encouraging upgrade to a non-vulnerable release and + subsequent pinning to a commit SHA. + """ + return Recommendation( + message=( + f"Upgrade `{action_name}` from `{action_version}` to a non-vulnerable release, " + "then pin the selected version to a commit SHA." + ), + recommended_ref=None, + ) diff --git a/src/macaron/parsers/bashparser.py b/src/macaron/parsers/bashparser.py index ac2ceed68..2b8de426a 100644 --- a/src/macaron/parsers/bashparser.py +++ b/src/macaron/parsers/bashparser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module is a Python wrapper for the compiled bashparser binary. @@ -162,6 +162,74 @@ def parse_raw(bash_content: str, macaron_path: str | None = None) -> File: raise ParseError("Error while loading the parsed bash script.") from error +def parse_raw_with_gha_mapping(bash_content: str, macaron_path: str | None = None) -> tuple[File, dict[str, str]]: + """Parse bash content and return raw AST plus GitHub expression mapping. + + Parameters + ---------- + bash_content : str + Bash script content. + macaron_path : str | None + Macaron's root path (optional). + + Returns + ------- + tuple[bashparser_model.File, dict[str, str]] + A tuple of: + - The parsed raw bash AST. + - Mapping from parser placeholder variable names to original GitHub expression bodies. + + Raises + ------ + ParseError + When parsing fails with errors or output cannot be decoded. + """ + if not macaron_path: + macaron_path = global_config.macaron_path + cmd = [ + os.path.join(macaron_path, "bin", "bashparser"), + "-input", + bash_content, + "-raw-gha-map", + ] + + try: + result = subprocess.run( # nosec B603 + cmd, + capture_output=True, + check=True, + cwd=macaron_path, + timeout=defaults.getint("bashparser", "timeout", fallback=30), + ) + except ( + subprocess.CalledProcessError, + subprocess.TimeoutExpired, + FileNotFoundError, + ) as error: + raise ParseError("Error while parsing bash script.") from error + + try: + if result.returncode != 0: + raise ParseError(f"Bash script parser failed: {result.stderr.decode('utf-8')}") + + payload = cast(dict[str, object], json.loads(result.stdout.decode("utf-8"))) + ast_data = payload.get("ast") + gha_map = payload.get("gha_expr_map") + if not isinstance(ast_data, dict): + raise ParseError("Error while loading the parsed bash script.") + if not isinstance(gha_map, dict): + raise ParseError("Error while loading the parsed bash script.") + gha_map_clean: dict[str, str] = {} + for key, value in gha_map.items(): + if isinstance(key, str) and isinstance(value, str): + gha_map_clean[key] = value + + return cast(File, ast_data), gha_map_clean + + except json.JSONDecodeError as error: + raise ParseError("Error while loading the parsed bash script.") from error + + def parse_expr(bash_expr_content: str, macaron_path: str | None = None) -> list[Word]: """Parse a bash script's content. diff --git a/src/macaron/resources/policies/sql/check-github-actions.sql b/src/macaron/resources/policies/sql/check-github-actions.sql new file mode 100644 index 000000000..09ba2555b --- /dev/null +++ b/src/macaron/resources/policies/sql/check-github-actions.sql @@ -0,0 +1,26 @@ +-- Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +-- Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +-- Failed check facts for check-github-actions policy template. +SELECT + gha_check.finding_group, + gha_check.finding_priority, + gha_check.finding_type, + gha_check.action_name, + gha_check.action_ref, + gha_check.vuln_urls, + gha_check.finding_message, + gha_check.recommended_ref, + gha_check.sha_pinned, + gha_check.caller_workflow AS vulnerable_workflow, + analysis.analysis_time +FROM github_actions_vulnerabilities_check AS gha_check +JOIN check_facts + ON check_facts.id = gha_check.id +JOIN check_result + ON check_result.id = check_facts.check_result_id +JOIN component + ON check_result.component_id = component.id +JOIN analysis + ON analysis.id = component.analysis_id +WHERE check_result.passed = 0; diff --git a/src/macaron/resources/policies/sql/malware-detection-dependencies.sql b/src/macaron/resources/policies/sql/malware-detection-dependencies.sql new file mode 100644 index 000000000..028b2445c --- /dev/null +++ b/src/macaron/resources/policies/sql/malware-detection-dependencies.sql @@ -0,0 +1,19 @@ +-- Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +-- Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +-- Failed check facts for malware-detection-dependencies policy template. +SELECT + analysis.analysis_time, + component.id component_id, + component.purl component_purl, + detect_malicious_metadata_check.* +FROM detect_malicious_metadata_check +JOIN check_facts + ON check_facts.id = detect_malicious_metadata_check.id +JOIN check_result + ON check_result.id = check_facts.check_result_id +JOIN component + ON check_result.component_id = component.id +JOIN analysis + ON analysis.id = component.analysis_id + AND check_result.passed = 0; diff --git a/src/macaron/resources/policies/sql/malware-detection.sql b/src/macaron/resources/policies/sql/malware-detection.sql new file mode 100644 index 000000000..a6597e89a --- /dev/null +++ b/src/macaron/resources/policies/sql/malware-detection.sql @@ -0,0 +1,19 @@ +-- Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +-- Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +-- Failed check facts for malware-detection policy template. +SELECT + analysis.analysis_time, + component.id component_id, + component.purl component_purl, + detect_malicious_metadata_check.* +FROM detect_malicious_metadata_check +JOIN check_facts + ON check_facts.id = detect_malicious_metadata_check.id +JOIN check_result + ON check_result.id = check_facts.check_result_id +JOIN component + ON check_result.component_id = component.id +JOIN analysis + ON analysis.id = component.analysis_id + AND check_result.passed = 0; diff --git a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py index 3b350091c..4fb2e92ec 100644 --- a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py +++ b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py @@ -5,8 +5,10 @@ import logging import os +import re +from enum import Enum -from sqlalchemy import ForeignKey, String +from sqlalchemy import Boolean, ForeignKey, Integer, String from sqlalchemy.orm import Mapped, mapped_column from macaron.code_analyzer.dataflow_analysis.analysis import get_containing_github_job @@ -15,6 +17,15 @@ GitHubActionsActionStepNode, GitHubActionsReusableWorkflowCallNode, ) +from macaron.code_analyzer.gha_security_analysis.detect_injection import ( + build_unpinned_action_recommendation, + build_workflow_issue_recommendation, + detect_github_actions_security_issues, + extract_workflow_issue_line, +) +from macaron.code_analyzer.gha_security_analysis.recommendation import ( + recommend_for_osv_vulnerability, +) from macaron.database.db_custom_types import DBJsonList from macaron.database.table_definitions import CheckFacts from macaron.errors import APIAccessError @@ -29,6 +40,24 @@ logger: logging.Logger = logging.getLogger(__name__) +class GitHubActionsFindingType(str, Enum): + """Enumeration of finding categories for GitHub Actions vulnerability check facts.""" + + # Note: finding_type is the subtype within a top-level finding_group. + # It intentionally carries more granular detail than finding_group. + KNOWN_VULNERABILITY = "known-vulnerability" + UNPINNED_THIRD_PARTY_ACTION = "unpinned-third-party-action" + + +class GitHubActionsFindingGroup(str, Enum): + """Top-level finding groups for GitHub Actions vulnerability check facts.""" + + # Note: finding_group is the high-level bucket used for reporting sections. + # finding_type refines the exact issue inside one of these groups. + THIRD_PARTY_ACTION_RISK = "third_party_action_risk" + WORKFLOW_SECURITY_ISSUE = "workflow_security_issue" + + class GitHubActionsVulnsFacts(CheckFacts): """The ORM mapping for justifications in the GitHub Actions vulnerabilities check.""" @@ -37,23 +66,57 @@ class GitHubActionsVulnsFacts(CheckFacts): #: The primary key. id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003 - #: The list of vulnerability URLs. - vulnerability_urls: Mapped[list[str]] = mapped_column( - DBJsonList, nullable=False, info={"justification": JustificationType.TEXT} + #: The GitHub Action workflow that may have various security issues. + caller_workflow: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.HREF}) + + #: The finding category (subtype). + #: - ``known-vulnerability`` for known vulnerability findings. + #: - ``unpinned-third-party-action`` for third-party actions not pinned to a commit SHA. + #: - workflow issue subtype names (for example ``overbroad-permissions``). + #: This complements ``finding_group`` instead of replacing it. + finding_type: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.TEXT}) + + #: The top-level finding group. + #: - ``third_party_action_risk`` for action dependency risks. + #: - ``workflow_security_issue`` for workflow implementation security issues. + #: Use this to group rows in summaries; use ``finding_type`` for specific issue filtering. + finding_group: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.TEXT}) + + #: Human-readable finding details. + finding_message: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} ) - #: The GitHub Action Identifier. - github_actions_id: Mapped[str] = mapped_column( - String, nullable=False, info={"justification": JustificationType.TEXT} + #: Priority score for sorting and triaging findings in summary outputs. + finding_priority: Mapped[int] = mapped_column( + Integer, nullable=False, info={"justification": JustificationType.TEXT} ) - #: The GitHub Action version. - github_actions_version: Mapped[str] = mapped_column( - String, nullable=False, info={"justification": JustificationType.TEXT} + #: Recommended immutable action reference, if applicable. + recommended_ref: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} ) - #: The GitHub Action workflow that calls the vulnerable GitHub Action. - caller_workflow: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.HREF}) + #: Third-party action identifier (for ``third_party_action_risk`` findings only). + #: For workflow-security findings, this field is ``None``. + action_name: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} + ) + + #: Third-party action version/ref (for ``third_party_action_risk`` findings only). + #: For workflow-security findings, this field is ``None``. + action_ref: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} + ) + + #: Whether the action reference is pinned to a full commit SHA. + sha_pinned: Mapped[bool | None] = mapped_column(Boolean, nullable=True) + + #: Related vulnerability URLs (for ``third_party_action_risk`` findings only). + #: For workflow-security findings, this field is an empty list. + vuln_urls: Mapped[list[str]] = mapped_column( + DBJsonList, nullable=False, info={"justification": JustificationType.TEXT} + ) __mapper_args__ = { "polymorphic_identity": "_github_actions_vulnerabilities_check", @@ -89,10 +152,71 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: The result of the check. """ ci_services = ctx.dynamic_data["ci_services"] - + result_tables: list[CheckFacts] = [] external_workflows: dict[str, list] = {} + for ci_info in ci_services: callgraph = ci_info["callgraph"] + gh_api_client = ci_info["service"].api_client if hasattr(ci_info["service"], "api_client") else None + if workflow_findings := detect_github_actions_security_issues(callgraph): + for finding in workflow_findings: + caller_workflow_link = "" + if gh_api_client: + caller_workflow_link = gh_api_client.get_file_link( + ctx.component.repository.full_name, + ctx.component.repository.commit_sha, + file_path=( + gh_api_client.get_relative_path_of_workflow(os.path.basename(finding["workflow_name"])) + if finding["workflow_name"] + else "" + ), + ) + for prioritized_issue in finding["issues"]: + issue = prioritized_issue["issue"] + issue_priority = int(prioritized_issue["priority"]) + issue_line = extract_workflow_issue_line(issue) + finding_workflow_link = caller_workflow_link + if issue_line and finding_workflow_link: + finding_workflow_link = f"{finding_workflow_link}#L{issue_line}" + if unpinned_action_info := build_unpinned_action_recommendation(issue, gh_api_client): + action_name, action_version, recommendation = unpinned_action_info + finding_type = GitHubActionsFindingType.UNPINNED_THIRD_PARTY_ACTION.value + result_tables.append( + GitHubActionsVulnsFacts( + vuln_urls=[], + finding_type=finding_type, + finding_group=GitHubActionsFindingGroup.THIRD_PARTY_ACTION_RISK.value, + action_name=action_name, + action_ref=action_version, + caller_workflow=finding_workflow_link, + sha_pinned=False, + finding_message=( + f"Summary: {_short_description_for_finding_type(finding_type)} " + f"Recommendation: {recommendation.message}" + ), + finding_priority=issue_priority, + recommended_ref=recommendation.recommended_ref, + confidence=Confidence.HIGH, + ) + ) + continue + + finding_type, recommendation, finding_message = build_workflow_issue_recommendation(issue) + result_tables.append( + GitHubActionsVulnsFacts( + vuln_urls=[], + finding_type=finding_type, + finding_group=GitHubActionsFindingGroup.WORKFLOW_SECURITY_ISSUE.value, + action_name=None, + action_ref=None, + caller_workflow=finding_workflow_link, + sha_pinned=None, + finding_message=finding_message, + finding_priority=issue_priority, + recommended_ref=recommendation.recommended_ref, + confidence=Confidence.HIGH, + ) + ) for root in callgraph.root_nodes: for callee in traverse_bfs(root): if isinstance(callee, (GitHubActionsReusableWorkflowCallNode, GitHubActionsActionStepNode)): @@ -114,80 +238,88 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: continue caller_path = job.context.ref.workflow_context.ref.source_filepath + caller_path_link = caller_path + if gh_api_client: + caller_path_link = gh_api_client.get_file_link( + ctx.component.repository.full_name, + ctx.component.repository.commit_sha, + file_path=( + gh_api_client.get_relative_path_of_workflow(os.path.basename(caller_path)) + if caller_path + else "" + ), + ) ext_workflow: list = external_workflows.get(workflow_name, []) ext_workflow.append( { "version": workflow_version, - "caller_path": ci_info["service"].api_client.get_file_link( - ctx.component.repository.full_name, - ctx.component.repository.commit_sha, - file_path=( - ci_info["service"].api_client.get_relative_path_of_workflow( - os.path.basename(caller_path) - ) - if caller_path - else "" - ), - ), + "caller_path": caller_path_link, } ) external_workflows[workflow_name] = ext_workflow - # If no external GitHub Actions are found, return passed result. - if not external_workflows: - return CheckResultData( - result_tables=[], - result_type=CheckResultType.PASSED, - ) - - # We first send a batch query to see which GitHub Actions are potentially vulnerable. - # OSV's querybatch returns minimal results but this allows us to only make subsequent - # queries to get vulnerability details when needed. - batch_query = [ - {"package": {"name": k, "ecosystem": "GitHub Actions"}} for k, _ in external_workflows.items() if k - ] - batch_vulns = [] - try: - batch_vulns = OSVDevService.get_vulnerabilities_package_name_batch(batch_query) - except APIAccessError as error: - logger.debug(error) + # If no external GitHub Actions are found, no need to check for known vulnerabilities. + if external_workflows: - result_tables: list[CheckFacts] = [] - for pkg in batch_vulns: - vuln_res = pkg["package"] - vulns: list = [] - workflow_name = vuln_res["name"] + # We first send a batch query to see which GitHub Actions are potentially vulnerable. + # OSV's querybatch returns minimal results but this allows us to only make subsequent + # queries to get vulnerability details when needed. + batch_query = [ + {"package": {"name": k, "ecosystem": "GitHub Actions"}} for k, _ in external_workflows.items() if k + ] + batch_vulns = [] try: - vulns = OSVDevService.get_vulnerabilities_package_name(ecosystem="GitHub Actions", name=workflow_name) + batch_vulns = OSVDevService.get_vulnerabilities_package_name_batch(batch_query) except APIAccessError as error: logger.debug(error) - continue - for workflow_inv in external_workflows[workflow_name]: - vuln_mapping = [] - for vuln in vulns: - if v_id := json_extract(vuln, ["id"], str): - try: - if OSVDevService.is_version_affected( - vuln, - workflow_name, - workflow_inv["version"], - "GitHub Actions", - source_repo=f"https://github.com/{workflow_name}", - ): - vuln_mapping.append(f"https://osv.dev/vulnerability/{v_id}") - except APIAccessError as error: - logger.debug(error) - if vuln_mapping: - result_tables.append( - GitHubActionsVulnsFacts( - vulnerability_urls=vuln_mapping, - github_actions_id=workflow_name, - github_actions_version=workflow_inv["version"], - caller_workflow=workflow_inv["caller_path"], - confidence=Confidence.HIGH, - ) + + for vuln_res in batch_vulns: + vulns: list = [] + workflow_name = vuln_res["package"]["name"] + try: + vulns = OSVDevService.get_vulnerabilities_package_name( + ecosystem="GitHub Actions", name=workflow_name ) + except APIAccessError as error: + logger.debug(error) + continue + for workflow_inv in external_workflows[workflow_name]: + vuln_mapping = [] + for vuln in vulns: + if v_id := json_extract(vuln, ["id"], str): + try: + if OSVDevService.is_version_affected( + vuln, + workflow_name, + workflow_inv["version"], + "GitHub Actions", + source_repo=f"https://github.com/{workflow_name}", + ): + vuln_mapping.append(f"https://osv.dev/vulnerability/{v_id}") + except APIAccessError as error: + logger.debug(error) + if vuln_mapping: + recommendation = recommend_for_osv_vulnerability(workflow_name, workflow_inv["version"]) + finding_type = GitHubActionsFindingType.KNOWN_VULNERABILITY.value + result_tables.append( + GitHubActionsVulnsFacts( + vuln_urls=vuln_mapping, + finding_type=finding_type, + finding_group=GitHubActionsFindingGroup.THIRD_PARTY_ACTION_RISK.value, + action_name=workflow_name, + action_ref=workflow_inv["version"], + caller_workflow=workflow_inv["caller_path"], + sha_pinned=bool(re.fullmatch(r"[0-9a-f]{40}", workflow_inv["version"])), + finding_message=( + f"Summary: {_short_description_for_finding_type(finding_type)} " + f"Recommendation: {recommendation.message}" + ), + finding_priority=100, + recommended_ref=recommendation.recommended_ref, + confidence=Confidence.HIGH, + ) + ) if result_tables: return CheckResultData( @@ -202,3 +334,23 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: registry.register(GitHubActionsVulnsCheck()) + + +def _short_description_for_finding_type(finding_type: str) -> str: + """Return a concise, human-readable summary for a finding subtype.""" + finding_summaries = { + GitHubActionsFindingType.KNOWN_VULNERABILITY.value: "Action version is known to be vulnerable.", + GitHubActionsFindingType.UNPINNED_THIRD_PARTY_ACTION.value: "Third-party action is not pinned to an immutable SHA.", + "sensitive-trigger": "Workflow uses a sensitive trigger and needs strict gating.", + "privileged-trigger": "Privileged trigger can expose elevated token scope to untrusted input.", + "missing-permissions": "Workflow omits explicit permissions and may inherit broad defaults.", + "overbroad-permissions": "Workflow requests permissions broader than required.", + "untrusted-fork-code": "Workflow can execute code controlled by an untrusted fork.", + "persist-credentials": "Persisted checkout credentials can leak token access to later steps.", + "remote-script-exec": "Workflow downloads and executes remote scripts inline.", + "pr-target-untrusted-checkout": "pull_request_target is combined with checkout of PR-controlled refs.", + "potential-injection": "Untrusted GitHub context data may flow into shell execution.", + "self-hosted-runner": "Job uses self-hosted runners, increasing blast radius for untrusted code.", + "workflow-security-issue": "Workflow includes a security issue that requires hardening.", + } + return finding_summaries.get(finding_type, "Workflow security finding detected.") diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py index 4700e5e85..d222ee011 100644 --- a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py +++ b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py @@ -581,14 +581,32 @@ def build_call_graph(self, repo_path: str, macaron_path: str = "") -> NodeForest Returns ------- - CallGraph: CallGraph - The call graph built for GitHub Actions. + NodeForest + The root nodes of call graphs built for GitHub Actions workflows. """ if not macaron_path: macaron_path = global_config.macaron_path # Parse GitHub Actions workflows. files = self.get_workflows(repo_path) + return self.build_call_graph_for_files(files, repo_path) + + def build_call_graph_for_files(self, files: list[str], repo_path: str) -> NodeForest: + """Build call graphs for a given set of GitHub Actions workflow files. + + Parameters + ---------- + files : list[str] + The list of workflow file paths to analyze. + repo_path : str + The repository path used as the base context for workflow analysis. + + Returns + ------- + NodeForest + A forest containing one root node per successfully parsed workflow. + Workflows that raise ``ParseError`` are skipped. + """ nodes: list[Node] = [] for workflow_path in files: try: diff --git a/src/macaron/slsa_analyzer/git_service/api_client.py b/src/macaron/slsa_analyzer/git_service/api_client.py index 9921c2dc9..f49beda7c 100644 --- a/src/macaron/slsa_analyzer/git_service/api_client.py +++ b/src/macaron/slsa_analyzer/git_service/api_client.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """The module provides API clients for VCS services, such as GitHub.""" @@ -11,6 +11,7 @@ from typing import NamedTuple from macaron.config.defaults import defaults +from macaron.json_tools import json_extract from macaron.slsa_analyzer.asset import AssetLocator from macaron.util import ( construct_query, @@ -388,6 +389,28 @@ def get_commit_data_from_hash(self, full_name: str, commit_hash: str) -> dict: return response_data + def get_commit_sha_from_ref(self, full_name: str, ref: str) -> str | None: + """Resolve a Git reference (tag/branch/sha) to a 40-character commit SHA. + + Parameters + ---------- + full_name : str + The full name of the repository in the format ``owner/name``. + ref : str + The git reference to resolve (e.g. ``v5``, ``main``, ``v1.2.3``). + + Returns + ------- + str | None + The resolved commit SHA, or ``None`` if resolution fails. + """ + if not full_name or not ref: + return None + + response_data = self.get_commit_data_from_hash(full_name, ref) + sha = json_extract(response_data, ["sha"], str) + return sha if sha and len(sha) == 40 else None + def search(self, target: str, query: str) -> dict: """Perform a search using GitHub REST API. diff --git a/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr new file mode 100644 index 000000000..984d2d208 --- /dev/null +++ b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr @@ -0,0 +1,30 @@ +# serializer version: 1 +# name: test_detect_github_actions_security_issues[injection_pattern_1.yaml] + list([ + dict({ + 'issues': list([ + dict({ + 'issue': 'potential-injection: {"step_line": 75, "script_line": 7, "job": "auto_format", "step": "Commit and push formatting changes", "command": "git push origin HEAD:${{ github.event.pull_request.head.ref }}", "expanded_refs": ["github.event.pull_request.head.ref"], "parts": [{"End": {"Col": 4, "Line": 7, "Offset": 171}, "Pos": {"Col": 1, "Line": 7, "Offset": 168}, "Type": "Lit", "Value": "git", "ValueEnd": {"Col": 4, "Line": 7, "Offset": 171}, "ValuePos": {"Col": 1, "Line": 7, "Offset": 168}}]}', + 'priority': 100, + }), + dict({ + 'issue': 'privileged-trigger: Workflow uses `pull_request_target` with additional risky patterns; treat this workflow as high risk and harden immediately.', + 'priority': 80, + }), + dict({ + 'issue': 'unpinned-third-party-action: [step-line=28] actions/checkout@v5', + 'priority': 20, + }), + dict({ + 'issue': 'unpinned-third-party-action: [step-line=37] poseidon/wait-for-status-checks@v0.6.0', + 'priority': 20, + }), + dict({ + 'issue': 'unpinned-third-party-action: [step-line=48] dtolnay/rust-toolchain@stable', + 'priority': 20, + }), + ]), + 'workflow_name': 'tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml', + }), + ]) +# --- diff --git a/tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml b/tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml new file mode 100644 index 000000000..9ef276717 --- /dev/null +++ b/tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml @@ -0,0 +1,75 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +name: PR Auto-format + +# This workflow triggers when a PR is opened/updated +on: + pull_request_target: + types: [opened, synchronize, reopened] + branches: + - main + - release + +jobs: + auto_format: + if: | + !contains(github.event.pull_request.labels.*.name, 'skip:ci') && + !contains(github.event.pull_request.head.sha, '[skip ci]') + permissions: + contents: write + pull-requests: write + checks: read + runs-on: ubuntu-latest + timeout-minutes: 60 + + steps: + - name: Checkout PR branch + uses: actions/checkout@v5 + with: + ref: ${{ github.event.pull_request.head.ref }} + repository: ${{ github.event.pull_request.head.repo.full_name }} + token: ${{ secrets.GITHUB_TOKEN }} + fetch-depth: 0 + + # Wait for all PR check runs to complete + - name: Wait for all checks to complete + uses: poseidon/wait-for-status-checks@v0.6.0 + with: + token: ${{ secrets.GITHUB_TOKEN }} + delay: 60 + interval: 30 + timeout: 7200 + + - name: CI completed successfully + run: echo "CI workflow completed successfully - proceeding with auto-format" + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + + - name: Run cargo fmt + run: | + echo "Running cargo fmt --all on PR #${{ github.event.pull_request.number }}" + cargo fmt --all + + - name: Check for formatting changes + id: check_changes + run: | + if [ -n "$(git status --porcelain)" ]; then + echo "has_changes=true" >> $GITHUB_OUTPUT + else + echo "has_changes=false" >> $GITHUB_OUTPUT + fi + + - name: Commit and push formatting changes + if: steps.check_changes.outputs.has_changes == 'true' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + git add -u + git commit -m "Auto-format code [skip ci]" + + git push origin HEAD:${{ github.event.pull_request.head.ref }} diff --git a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py new file mode 100644 index 000000000..cf4990a16 --- /dev/null +++ b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py @@ -0,0 +1,156 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for GitHub Actions security analysis detection heuristics.""" + +import os +from pathlib import Path + +import pytest + +from macaron.code_analyzer.gha_security_analysis.detect_injection import ( + PrioritizedIssue, + WorkflowFinding, + _add_finding, + build_workflow_issue_recommendation, + detect_github_actions_security_issues, + extract_workflow_issue_line, +) +from macaron.slsa_analyzer.ci_service.github_actions.github_actions_ci import GitHubActions + +RESOURCES_DIR = Path(__file__).parent.joinpath("resources") + + +@pytest.mark.parametrize( + "workflow_path", + [ + "injection_pattern_1.yaml", + ], +) +def test_detect_github_actions_security_issues( + snapshot: list[WorkflowFinding], workflow_path: str, github_actions_service: GitHubActions +) -> None: + """Test GH Actions workflows injection patterns.""" + callgraph = github_actions_service.build_call_graph_for_files( + [os.path.join(RESOURCES_DIR, "workflow_files", workflow_path)], + repo_path=os.path.join(RESOURCES_DIR, "workflow_files"), + ) + assert detect_github_actions_security_issues(callgraph) == snapshot + + +def test_extract_workflow_issue_line_from_potential_injection() -> None: + """Extract the source line from a potential-injection issue payload.""" + issue = ( + "potential-injection: " + "[{'Type': 'Lit', 'Pos': {'Offset': 269, 'Line': 6, 'Col': 48}, 'Value': 'origin/'}, " + "{'Type': 'ParamExp', 'Pos': {'Offset': 276, 'Line': 6, 'Col': 55}}]" + ) + + assert extract_workflow_issue_line(issue) == 6 + + +def test_extract_workflow_issue_line_prefers_step_line_marker() -> None: + """Extract the workflow line from an explicit step-line marker.""" + issue = ( + "potential-injection: " + "[step-line=14] " + "[{'Type': 'Lit', 'Pos': {'Offset': 269, 'Line': 6, 'Col': 48}, 'Value': 'origin/'}]" + ) + + assert extract_workflow_issue_line(issue) == 14 + + +def test_extract_workflow_issue_line_from_structured_payload() -> None: + """Extract workflow line from structured potential-injection payload.""" + issue = ( + "potential-injection: " + '{"step_line": 62, "script_line": 6, "job": "retag", "step": "Retag", ' + '"command": "git push origin/${github.head_ref}", "parts": []}' + ) + + assert extract_workflow_issue_line(issue) == 62 + + +def test_build_workflow_issue_recommendation_formats_potential_injection_details() -> None: + """Format concise user-facing details for potential-injection findings.""" + issue = ( + "potential-injection: " + '{"step_line": 62, "script_line": 6, "job": "retag", "step": "Retag", ' + '"command": "git push origin/${github.head_ref}", "parts": []}' + ) + + finding_type, _, finding_message = build_workflow_issue_recommendation(issue) + + assert finding_type == "potential-injection" + assert "Unsafe expansion of attacker-controllable GitHub context can enable command injection." in finding_message + assert "Details: Job: retag Step: Retag Command: `git push origin/${github.head_ref}`" in finding_message + + +def test_build_workflow_issue_recommendation_includes_expanded_refs() -> None: + """Render expanded GitHub refs in potential-injection details when present.""" + issue = ( + "potential-injection: " + '{"step_line": 62, "script_line": 6, "job": "retag", "step": "Retag", ' + '"command": "git push origin/${github.head_ref}", "expanded_refs": ["github.head_ref"]}' + ) + + _, _, finding_message = build_workflow_issue_recommendation(issue) + + assert "Expanded refs: `github.head_ref`" in finding_message + + +def test_build_workflow_issue_recommendation_includes_refs_from_compound_expression() -> None: + """Render extracted github refs when original expression contains operators.""" + issue = ( + "potential-injection: " + '{"step_line": 62, "script_line": 6, "job": "retag", "step": "Retag", ' + '"command": "git push origin/${github.head_ref}", ' + '"expanded_refs": ["github.head_ref", "github.ref_name"]}' + ) + + _, _, finding_message = build_workflow_issue_recommendation(issue) + + assert "Expanded refs: `github.head_ref, github.ref_name`" in finding_message + + +def test_build_workflow_issue_recommendation_formats_remote_script_exec_details() -> None: + """Format concise user-facing details for remote-script-exec findings.""" + issue = ( + "remote-script-exec: " + '{"step_line": 24, "script_line": 3, "job": "build", "step": "Setup", ' + '"command": "curl -fsSL https://x | bash"}' + ) + + finding_type, _, finding_message = build_workflow_issue_recommendation(issue) + + assert finding_type == "remote-script-exec" + assert "Workflow downloads and executes remote scripts inline." in finding_message + assert "Details: Job: build Step: Setup Command: `curl -fsSL https://x | bash`" in finding_message + + +def test_extract_workflow_issue_line_from_remote_script_exec_payload() -> None: + """Extract workflow line from structured remote-script-exec payload.""" + issue = ( + "remote-script-exec: " + '{"step_line": 24, "script_line": 3, "job": "build", "step": "Setup", ' + '"command": "curl -fsSL https://x | bash"}' + ) + + assert extract_workflow_issue_line(issue) == 24 + + +def test_extract_workflow_issue_line_from_unpinned_action_marker() -> None: + """Extract workflow line from unpinned action issue marker.""" + issue = "unpinned-third-party-action: [step-line=62] actions/checkout@v4.2.2" + + assert extract_workflow_issue_line(issue) == 62 + + +def test_add_finding_deduplicates_and_preserves_highest_priority() -> None: + """Keep one finding entry per issue and retain the highest priority.""" + findings: list[PrioritizedIssue] = [] + _add_finding(findings, "remote-script-exec: {}", 80) + _add_finding(findings, "remote-script-exec: {}", 60) + _add_finding(findings, "remote-script-exec: {}", 100) + + assert findings == [{"issue": "remote-script-exec: {}", "priority": 100}] diff --git a/tests/code_analyzer/gha_security_analysis/test_recommendation.py b/tests/code_analyzer/gha_security_analysis/test_recommendation.py new file mode 100644 index 000000000..13a5217ed --- /dev/null +++ b/tests/code_analyzer/gha_security_analysis/test_recommendation.py @@ -0,0 +1,69 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for GitHub Actions security recommendation helpers.""" + +import pytest + +from macaron.code_analyzer.gha_security_analysis.recommendation import ( + parse_unpinned_action_issue, + recommend_for_unpinned_action, + resolve_action_ref_to_tag, +) + + +def test_recommend_for_unpinned_action_with_tag_hint() -> None: + """Return pinned action recommendation with tag hint when SHA and tag are resolved.""" + recommendation = recommend_for_unpinned_action( + "actions/checkout", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "v4.2.2", + ) + + assert recommendation.recommended_ref == "actions/checkout@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa # v4.2.2" + + +def test_recommend_for_unpinned_action_when_sha_not_resolved() -> None: + """Return fallback recommendation text when action SHA cannot be resolved.""" + recommendation = recommend_for_unpinned_action("actions/checkout") + + assert recommendation.recommended_ref == "Unable to resolve automatically" + assert recommendation.message == "Pin this third-party action to a 40-character commit SHA." + + +def test_resolve_action_ref_to_tag_found(monkeypatch: pytest.MonkeyPatch) -> None: + """Resolve the matching tag when a tag points to the resolved action SHA.""" + monkeypatch.setattr( + "macaron.code_analyzer.gha_security_analysis.recommendation.get_tags_via_git_remote", + lambda repo: {"v4.2.2": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"}, + ) + + tag = resolve_action_ref_to_tag("actions/checkout", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", "v4") + + assert tag == "v4.2.2" + + +def test_resolve_action_ref_to_tag_none_when_no_match(monkeypatch: pytest.MonkeyPatch) -> None: + """Return None when no fetched tag points to the resolved action SHA.""" + monkeypatch.setattr( + "macaron.code_analyzer.gha_security_analysis.recommendation.get_tags_via_git_remote", + lambda repo: {"v4.2.2": "dddddddddddddddddddddddddddddddddddddddd"}, + ) + + tag = resolve_action_ref_to_tag("actions/checkout", "cccccccccccccccccccccccccccccccccccccccc", "v4") + + assert tag is None + + +def test_parse_unpinned_action_issue_with_step_line_prefix() -> None: + """Parse unpinned action issues that include finding type and step-line marker.""" + parsed = parse_unpinned_action_issue("unpinned-third-party-action: [step-line=62] actions/checkout@v4.2.2") + + assert parsed == ("actions/checkout", "v4.2.2") + + +def test_parse_unpinned_action_issue_plain_format() -> None: + """Parse legacy unpinned action issues without metadata prefix.""" + parsed = parse_unpinned_action_issue("actions/setup-python@v5.6.0") + + assert parsed == ("actions/setup-python", "v5.6.0") diff --git a/tests/integration/cases/oracle_coherence-js-client/policy.dl b/tests/integration/cases/oracle_coherence-js-client/policy.dl index 5b814eb39..4406970d8 100644 --- a/tests/integration/cases/oracle_coherence-js-client/policy.dl +++ b/tests/integration/cases/oracle_coherence-js-client/policy.dl @@ -7,10 +7,16 @@ Policy("check-github-actions-vulnerabilities", component_id, "Check GitHub Actio check_failed(component_id, "mcn_githubactions_vulnerabilities_1"), github_actions_vulnerabilities_check( _, - "[\"https://osv.dev/vulnerability/GHSA-69fq-xp46-6x23\", \"https://osv.dev/vulnerability/GHSA-9p44-j4g5-cfx5\"]", + "https://github.com/oracle/coherence-js-client/blob/39166341bc31f75b663ff439dae36170fb3e99a9/.github/workflows/trivy-scan.yml", + "known-vulnerability", + "third_party_action_risk", + _, + _, + _, "aquasecurity/trivy-action", "0.32.0", - "https://github.com/oracle/coherence-js-client/blob/39166341bc31f75b663ff439dae36170fb3e99a9/.github/workflows/trivy-scan.yml" + _, + "[\"https://osv.dev/vulnerability/GHSA-69fq-xp46-6x23\", \"https://osv.dev/vulnerability/GHSA-9p44-j4g5-cfx5\"]" ). apply_policy_to("check-github-actions-vulnerabilities", component_id) :- diff --git a/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl b/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl index 00b141481..8f34d5674 100644 --- a/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl +++ b/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl @@ -1,4 +1,4 @@ -/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ #include "prelude.dl" diff --git a/tests/integration/cases/org_apache_logging_log4j/test.yaml b/tests/integration/cases/org_apache_logging_log4j/test.yaml index 7871c7a5a..b0820223d 100644 --- a/tests/integration/cases/org_apache_logging_log4j/test.yaml +++ b/tests/integration/cases/org_apache_logging_log4j/test.yaml @@ -27,6 +27,8 @@ steps: command_args: - -rp - https://github.com/apache/logging-log4j2 + - -d + - 028e9fad03ae7bcbf2e49ab8d32d8cfb900f3587 - name: Run macaron verify-policy to verify passed/failed checks kind: verify options: diff --git a/tests/output_reporter/test_write_job_summary.py b/tests/output_reporter/test_write_job_summary.py new file mode 100644 index 000000000..210dc7ac8 --- /dev/null +++ b/tests/output_reporter/test_write_job_summary.py @@ -0,0 +1,104 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for GitHub Actions job summary rendering helpers.""" + +from __future__ import annotations + +import importlib.util +from pathlib import Path +from types import ModuleType + + +def _load_write_job_summary_module() -> ModuleType: + """Load the write_job_summary script as a Python module for testing.""" + script_path = Path(Path(__file__).parents[2], "scripts", "actions", "write_job_summary.py") + spec = importlib.util.spec_from_file_location("write_job_summary", script_path) + if spec is None or spec.loader is None: + raise RuntimeError("Unable to load write_job_summary.py module.") + + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def test_workflow_security_table_includes_summary_column(tmp_path: Path) -> None: + """Render workflow security findings with the short summary column in compact table output.""" + module = _load_write_job_summary_module() + summary_path = Path(tmp_path, "summary.md") + columns = [ + "finding_group", + "finding_priority", + "finding_type", + "action_name", + "action_ref", + "vulnerable_workflow", + "finding_message", + ] + rows = [ + ( + "workflow_security_issue", + 80, + "remote-script-exec", + "https://github.com/org/repo/.github/workflows/build.yml", + "", + "https://github.com/org/repo/.github/workflows/build.yml", + ( + "Summary: Workflow downloads and executes remote scripts inline. " + "Details: remote-script-exec: A step appears to download and pipe to shell (`curl|bash`). " + "Recommendation: Avoid curl|bash patterns." + ), + ), + ] + + rendered = module.write_compact_gha_vuln_diagnostics(summary_path, columns, rows) + output = summary_path.read_text(encoding="utf-8") + + assert rendered is True + assert "| priority | type | summary | workflow |" in output + assert "Workflow downloads and executes remote scripts inline." in output + + +def test_compact_summary_keeps_all_groups_in_detailed_section(tmp_path: Path) -> None: + """Render detailed section with both finding groups even when top priorities are workflow-only.""" + module = _load_write_job_summary_module() + summary_path = Path(tmp_path, "summary.md") + columns = [ + "finding_group", + "finding_priority", + "finding_type", + "action_name", + "action_ref", + "vulnerable_workflow", + "finding_message", + ] + rows = [ + ( + "workflow_security_issue", + 100, + "potential-injection", + "", + "", + "https://github.com/org/repo/.github/workflows/ci.yml", + "Summary: Injection risk. Details: ... Recommendation: ...", + ), + ( + "third_party_action_risk", + 20, + "unpinned-third-party-action", + "actions/checkout", + "v4", + "https://github.com/org/repo/.github/workflows/ci.yml", + "Summary: Unpinned action. Recommendation: ...", + ), + ] + + rendered = module.write_compact_gha_vuln_diagnostics(summary_path, columns, rows) + output = summary_path.read_text(encoding="utf-8") + + assert rendered is True + assert "#### Workflow security issues" in output + assert "#### Third-party action risks" in output + assert "**Workflow security issues**" in output + assert "**Third-party action risks**" in output + assert "`actions/checkout@v4`" in output diff --git a/tests/parsers/bashparser/test_bashparser.py b/tests/parsers/bashparser/test_bashparser.py index 97c431034..a489330ac 100644 --- a/tests/parsers/bashparser/test_bashparser.py +++ b/tests/parsers/bashparser/test_bashparser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ @@ -13,7 +13,7 @@ from macaron import MACARON_PATH from macaron.errors import ParseError -from macaron.parsers.bashparser import parse, parse_file +from macaron.parsers.bashparser import parse, parse_file, parse_raw_with_gha_mapping @pytest.mark.parametrize( @@ -46,3 +46,13 @@ def test_bashparser_parse_invalid() -> None: # Parse the bash script file. with pytest.raises(ParseError): parse_file(file_path=file_path, macaron_path=MACARON_PATH) + + +def test_bashparser_parse_raw_with_gha_mapping() -> None: + """Test parsing raw bash script with GitHub expression mapping.""" + bash_content = 'echo "${{ github.head_ref }}"\n' + parsed_ast, gha_map = parse_raw_with_gha_mapping(bash_content, MACARON_PATH) + + assert "Stmts" in parsed_ast + assert gha_map + assert "github.head_ref" in gha_map.values() diff --git a/tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl b/tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl new file mode 100644 index 000000000..8bbeba44f --- /dev/null +++ b/tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl @@ -0,0 +1,16 @@ +/* Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy( + "attest-macaron-supply-chain", + component_id, + "Require trusted VCS metadata, and non-hosted repository." +) :- + check_passed(component_id, "mcn_version_control_system_1"), + is_repo(repo_id, "github.com/oracle/macaron", component_id), + not_self_hosted_git(repo_id, _). + +apply_policy_to("attest-macaron-supply-chain", component_id) :- + is_component(component_id, _). From d07edd4b809aba789ee13e2800c8187c00303437 Mon Sep 17 00:00:00 2001 From: Behnaz Hassanshahi Date: Tue, 31 Mar 2026 15:08:28 +1000 Subject: [PATCH 13/13] fix: improve URL validation to avoid unexpected redirects (#1344) This PR improves URL validation to prevent unexpected redirects caused by ambiguous parsing behavior. A new utility function has been introduced to ensure that username and password fields are not implicitly populated by urlparse, which could otherwise lead to incorrect hostname resolution and unintended redirects. Signed-off-by: behnazh-w --- .../slsa_analyzer/provenance/loader.py | 12 +--- src/macaron/util.py | 62 ++++++++++++++++++- 2 files changed, 64 insertions(+), 10 deletions(-) diff --git a/src/macaron/slsa_analyzer/provenance/loader.py b/src/macaron/slsa_analyzer/provenance/loader.py index 3e9d9b1b0..0b7b1352b 100644 --- a/src/macaron/slsa_analyzer/provenance/loader.py +++ b/src/macaron/slsa_analyzer/provenance/loader.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the loaders for SLSA provenances.""" @@ -9,7 +9,6 @@ import json import logging import zlib -from urllib.parse import urlparse from cryptography import x509 from cryptography.x509 import DuplicateExtension, UnsupportedGeneralNameType @@ -19,7 +18,7 @@ from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, validate_intoto_payload from macaron.slsa_analyzer.provenance.intoto.errors import LoadIntotoAttestationError, ValidateInTotoPayloadError from macaron.slsa_analyzer.specs.pypi_certificate_predicate import PyPICertificatePredicate -from macaron.util import send_get_http_raw +from macaron.util import send_get_http_raw, url_is_safe logger: logging.Logger = logging.getLogger(__name__) @@ -43,13 +42,8 @@ def _try_read_url_link_file(file_content: bytes) -> str | None: def _download_url_file_content(url: str, url_link_hostname_allowlist: list[str]) -> bytes: - hostname = urlparse(url).hostname - if hostname is None or hostname == "": + if not url_is_safe(url, allow_list=url_link_hostname_allowlist): raise LoadIntotoAttestationError("Cannot resolve URL link file: invalid URL") - if hostname not in url_link_hostname_allowlist: - raise LoadIntotoAttestationError( - "Cannot resolve URL link file: target hostname '" + hostname + "' is not in allowed hostnames." - ) # TODO download size limit? timeout = defaults.getint("downloads", "timeout", fallback=120) diff --git a/src/macaron/util.py b/src/macaron/util.py index 6509e2f67..b6f789493 100644 --- a/src/macaron/util.py +++ b/src/macaron/util.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module includes utilities functions for Macaron.""" @@ -21,6 +21,66 @@ logger: logging.Logger = logging.getLogger(__name__) +def url_is_safe(url: str, allow_list: list[str] | None = None, allow_login: bool = False) -> bool: + r"""Validate that a URL has an acceptable host and login component. + + Parameters + ---------- + url : str + URL string to validate. + allow_list : list[str] | None + Allowed hostnames. When provided, the parsed hostname must be in this list. + If ``None``, any non-empty hostname is accepted. + allow_login : bool, default=False + Whether username/password URL components are permitted. + + Returns + ------- + bool + ``True`` when the URL passes safety checks, otherwise ``False``. + + Examples + -------- + >>> url_is_safe("https://example.com") + True + >>> url_is_safe("https://example.com", allow_list=["example.com"]) + True + >>> url_is_safe("https://example.com", allow_list=["oracle.com"]) + False + >>> url_is_safe("https://user:test@example.com") + False + >>> url_is_safe("https://user:test@example.com", allow_login=True) + True + >>> url_is_safe("not-a-url") + False + >>> url_is_safe("127.0.0.1:6666\\@allowlist.com", ["allowlist.com"]) + False + >>> url_is_safe("https://attacker.com:6666\\@allowlist.com", ["allowlist.com"]) + False + >>> url_is_safe("https://username:attacker.com\\@allowlist.com", ["allowlist.com"]) + False + >>> url_is_safe("https://username:test@allowlist.com", ["allowlist.com"], allow_login = True) + True + """ + try: + parsed_url = urllib.parse.urlparse(url) + except ValueError: + return False + if not allow_login: + if parsed_url.username or parsed_url.password: + logger.debug("Potential attempt to redirect to an invalid URL: hostname %s", parsed_url.hostname) + return False + + hostname = parsed_url.hostname + if hostname is None or hostname == "": + return False + if allow_list and (hostname not in allow_list): + logger.debug("URL %s is not in allowed hostnames.", url) + return False + + return True + + def send_get_http(url: str, headers: dict) -> dict: """Send the GET HTTP request with the given url and headers.