diff --git a/.github/workflows/_build.yaml b/.github/workflows/_build.yaml index 9949d0a0e..649d95b05 100644 --- a/.github/workflows/_build.yaml +++ b/.github/workflows/_build.yaml @@ -57,13 +57,13 @@ jobs: fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{ matrix.python }} # Install Java. - name: Set up JDK - uses: actions/setup-java@b36c23c0d998641eff861008f374ee103c25ac73 # v4.4.0 + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 with: distribution: oracle java-version: '17' @@ -129,7 +129,7 @@ jobs: # Currently reusable workflows do not support setting strategy property from the caller workflow. - name: Upload the package artifact for debugging and release if: matrix.os == env.ARTIFACT_OS && matrix.python == env.ARTIFACT_PYTHON - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: artifact-${{ matrix.os }}-python-${{ matrix.python }} path: dist diff --git a/.github/workflows/_build_docker.yaml b/.github/workflows/_build_docker.yaml index 06f836280..d503bfff9 100644 --- a/.github/workflows/_build_docker.yaml +++ b/.github/workflows/_build_docker.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. # This is a reuseable workflow to build and test the Docker image. Note that this workflow does not @@ -33,12 +33,12 @@ jobs: # The Docker integration tests require Python 3.11. - name: Set up Python - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.11' - name: Download artifact - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: ${{ inputs.artifact-name }} path: dist @@ -63,6 +63,22 @@ jobs: IMAGE_NAME: ghcr.io/oracle/macaron run: make build-docker + # Export the built image so downstream jobs/workflows can load and reuse + # the exact same image without pushing to a registry. + - name: Export test Docker image + run: docker save ghcr.io/oracle/macaron:test --output /tmp/macaron-test-image.tar + + # Upload the image tarball for the reusable action test workflow. + - name: Upload test Docker image artifact + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: macaron-test-image + path: /tmp/macaron-test-image.tar + if-no-files-found: error + retention-days: 1 + + # Install helper tooling used by integration test utilities that validate + # the built Docker image behavior. - name: Install dependencies for integration test utility run: make setup-integration-test-utility-for-docker @@ -74,3 +90,14 @@ jobs: DOCKER_PULL: never GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: make integration-test-docker + + test-macaron-action: + # Reuse the action test workflow against the exact Docker image built above. + # The image is transferred via artifact to avoid pushing to a registry. + needs: [build-docker] + permissions: + contents: read + uses: ./.github/workflows/test_macaron_action.yaml + with: + docker_image_artifact_name: macaron-test-image + macaron_image_tag: test diff --git a/.github/workflows/_deploy-github-pages.yaml b/.github/workflows/_deploy-github-pages.yaml index bc56e33ee..ebbba1386 100644 --- a/.github/workflows/_deploy-github-pages.yaml +++ b/.github/workflows/_deploy-github-pages.yaml @@ -35,7 +35,7 @@ jobs: fetch-depth: 0 - name: Download artifact - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: ${{ inputs.artifact-name }} path: dist diff --git a/.github/workflows/_release-notifications.yaml b/.github/workflows/_release-notifications.yaml deleted file mode 100644 index 246f6bc2c..000000000 --- a/.github/workflows/_release-notifications.yaml +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. - -# Send a Slack release notification. Instructions to set up Slack to receive -# messages can be found here: https://github.com/slackapi/slack-github-action#setup-2 - -name: Release Notifications -on: - workflow_call: - inputs: - repo_name: - required: true - type: string - release_tag: - required: true - type: string - release_url: - required: true - type: string - secrets: - SLACK_WEBHOOK_URL: - required: true - -# Grant no permissions to this workflow. -permissions: {} - -jobs: - slack: - name: Slack release notification - runs-on: ubuntu-latest - steps: - - - name: Notify via Slack - run: | - curl --header "Content-Type: application/json; charset=UTF-8" --request POST --data "$SLACK_WEBHOOK_MSG" "$SLACK_WEBHOOK_URL" - env: - SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} - SLACK_WEBHOOK_MSG: | - { - "text": "${{ inputs.repo_name }} published a new release ${{ inputs.release_tag }}", - "blocks": [ - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": "*${{ inputs.repo_name }}* published a new release <${{ inputs.release_url }}|${{ inputs.release_tag }}>" - } - } - ] - } diff --git a/.github/workflows/codeql-analysis.yaml b/.github/workflows/codeql-analysis.yaml index f72b51262..20e969518 100644 --- a/.github/workflows/codeql-analysis.yaml +++ b/.github/workflows/codeql-analysis.yaml @@ -38,7 +38,7 @@ jobs: uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Set up Python - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.11' diff --git a/.github/workflows/macaron-analysis.yaml b/.github/workflows/macaron-analysis.yaml new file mode 100644 index 000000000..d0da663d1 --- /dev/null +++ b/.github/workflows/macaron-analysis.yaml @@ -0,0 +1,43 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +# Run Macaron's policies and generate Verification Summary Attestation reports. +# See https://github.com/oracle/macaron + +name: Run Macaron to check supply chain security issues +on: + push: + branches: + - main + paths: + - .github/workflows/** + pull_request: + paths: + - .github/workflows/** + schedule: + - cron: 20 15 * * 3 +permissions: + contents: read + +jobs: + run_macaron: + runs-on: ubuntu-latest + + steps: + + - name: Check out repository + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + fetch-depth: 0 + persist-credentials: false + + # Check the GitHub Actions workflows in the repository for vulnerabilities. + # Note: adjust the policy_purl to refer to your repository URL. + - name: Run Macaron action + id: run_macaron + uses: oracle/macaron@fda4dda04aa7228fcaba162804891806cf5a1375 # v0.22.0 + with: + repo_path: ./ + policy_file: check-github-actions + policy_purl: pkg:github.com/oracle/macaron@.* + reports_retention_days: 90 diff --git a/.github/workflows/pr-conventional-commits.yaml b/.github/workflows/pr-conventional-commits.yaml index 3ca285fdd..b42ac0fb9 100644 --- a/.github/workflows/pr-conventional-commits.yaml +++ b/.github/workflows/pr-conventional-commits.yaml @@ -30,7 +30,7 @@ jobs: fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.11' diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index e63867579..14a0857d2 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. # We run checks on pushing to the specified branches. @@ -45,7 +45,7 @@ jobs: token: ${{ secrets.REPO_ACCESS_TOKEN }} - name: Set up Python - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.11' @@ -131,7 +131,7 @@ jobs: } >> "$GITHUB_OUTPUT" - name: Download artifact - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: ${{ env.ARTIFACT_NAME }} path: dist @@ -215,7 +215,7 @@ jobs: rm -f "$CHECKSUMS" - name: Set up Python - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.11' @@ -310,7 +310,7 @@ jobs: # fetch-depth: 0 # - name: Download provenance - # uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + # uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 # with: # name: ${{ needs.provenance.outputs.provenance-name }} @@ -333,19 +333,3 @@ jobs: # https://github.com/actions/runner/issues/2394. artifact-name: artifact-ubuntu-latest-python-3.11 artifact-sha256: ${{ needs.build.outputs.artifacts-sha256 }} - - # Send out release notifications after the Release was published on GitHub. - # Uncomment the `if` to disable sending release notifications. - notifications: - if: ${{ false }} - needs: [release] - name: Send Release notifications - uses: ./.github/workflows/_release-notifications.yaml - permissions: - contents: read - with: - repo_name: ${{ github.event.repository.name }} - release_tag: ${{ needs.release.outputs.release-tag }} - release_url: ${{ needs.release.outputs.release-url }} - secrets: - SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} diff --git a/.github/workflows/scorecards-analysis.yaml b/.github/workflows/scorecards-analysis.yaml index 0162de5d7..544046506 100644 --- a/.github/workflows/scorecards-analysis.yaml +++ b/.github/workflows/scorecards-analysis.yaml @@ -49,7 +49,7 @@ jobs: # Upload the results as artifacts (optional). - name: Upload artifact - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: SARIF file path: results.sarif diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index 930863d30..4c72dffda 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -1,65 +1,109 @@ # Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. -name: Test Macaron Action (tutorials) +name: Test Macaron Action on: - push: - paths: - - action.yaml - pull_request: - paths: - - action.yaml - workflow_dispatch: + workflow_call: + # Optional overrides used by reusable callers (for example _build_docker.yaml). + # Defaults target the test image artifact produced by our Docker build workflow. + inputs: + docker_image_artifact_name: + required: false + type: string + default: macaron-test-image + macaron_image_tag: + required: false + type: string + default: test permissions: - id-token: write - attestations: write + contents: read jobs: tutorial-commit-finder: name: Analyzing and comparing different versions of an artifact runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze arrow@1.3.0) uses: ./ with: package_url: pkg:pypi/arrow@1.3.0 output_dir: macaron_output/commit_finder + upload_reports: 'false' - name: Run Macaron (analyze arrow@0.15.0) uses: ./ with: package_url: pkg:pypi/arrow@0.15.0 output_dir: macaron_output/commit_finder + upload_reports: 'false' - name: Run Macaron (verify policy - has-hosted-build) + id: verify_has_hosted_build + # This verification is expected to fail for this tutorial scenario. + continue-on-error: true uses: ./ with: policy_file: ./tests/tutorial_resources/commit_finder/has-hosted-build.dl output_dir: macaron_output/commit_finder + upload_reports: 'false' + - name: Assert expected failure (has-hosted-build) + if: ${{ always() }} + run: | + # Keep this workflow green only when the verify step actually fails. + if [ "${{ steps.verify_has_hosted_build.outcome }}" != "failure" ]; then + echo "Expected verify step to fail, but it did not." + exit 1 + fi tutorial-detect-malicious-package: name: Detecting malicious packages runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze django@5.0.6 without dependencies) uses: ./ with: package_url: pkg:pypi/django@5.0.6 output_dir: macaron_output/detect_malicious_package + upload_reports: 'false' - name: Run Macaron (verify policy - check-django) uses: ./ with: policy_file: ./tests/tutorial_resources/detect_malicious_package/check-django.dl output_dir: macaron_output/detect_malicious_package + upload_reports: 'false' - name: Setup Python for analyzed venv - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: 3.11.14 @@ -83,101 +127,159 @@ jobs: output_dir: macaron_output/detect_malicious_package deps_depth: '1' python_venv: /tmp/.django_venv + upload_reports: 'false' - name: Run Macaron (verify policy - check-dependencies) uses: ./ with: policy_file: ./tests/tutorial_resources/detect_malicious_package/check-dependencies.dl output_dir: macaron_output/detect_malicious_package + upload_reports: 'false' tutorial-detect-vulnerable-actions: name: How to detect vulnerable GitHub Actions runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze repo - apache/logging-log4j2) uses: ./ with: repo_path: https://github.com/apache/logging-log4j2 output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'false' + write_job_summary: 'false' - name: Run Macaron (verify policy - github_actions_vulns for repo) + id: verify_github_actions_vulns_repo_tutorial uses: ./ with: policy_file: ./tests/tutorial_resources/detect_vulnerable_github_actions/check_github_actions_vuln_repo.dl output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'true' + reports_artifact_name: macaron-reports-vulnerable-actions-repo + write_job_summary: 'true' - name: Run Macaron (analyze purl - log4j-core example) uses: ./ with: package_url: pkg:maven/org.apache.logging.log4j/log4j-core@2.25.3 output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'false' + write_job_summary: 'false' - name: Run Macaron (verify policy - github_actions_vulns for purl) + id: verify_github_actions_vulns_purl_tutorial uses: ./ with: policy_file: ./tests/tutorial_resources/detect_vulnerable_github_actions/check_github_actions_vuln_purl.dl output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'true' + reports_artifact_name: macaron-reports-vulnerable-actions-purl + write_job_summary: 'true' tutorial-provenance: name: Provenance discovery, extraction, and verification runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze semver 7.7.2) uses: ./ with: package_url: pkg:npm/semver@7.7.2 output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (verify provenance - semver) uses: ./ with: policy_file: ./tests/tutorial_resources/provenance/has-verified-provenance_semver.dl output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (analyze toga 0.5.1 - PyPI provenance) uses: ./ with: package_url: pkg:pypi/toga@0.5.1 output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (verify provenance - toga PyPI) uses: ./ with: policy_file: ./tests/tutorial_resources/provenance/has-verified-provenance_toga.dl output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (analyze toga 0.4.8 - GitHub attestation) uses: ./ with: package_url: pkg:pypi/toga@0.4.8 output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (verify provenance - toga GitHub) uses: ./ with: policy_file: ./tests/tutorial_resources/provenance/has-verified-provenance_toga.dl output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (analyze urllib3 2.0.0a1 - GitHub attestation) uses: ./ with: package_url: pkg:pypi/urllib3@2.0.0a1 output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (verify provenance - urllib3) uses: ./ with: policy_file: ./tests/tutorial_resources/provenance/has-verified-provenance_urllib3.dl output_dir: macaron_output/provenance + upload_reports: 'false' tutorial-detect-malicious-java-dep: name: Detecting Java dependencies manually uploaded to Maven Central runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze example-maven-app with SBOM) uses: ./ with: @@ -186,24 +288,39 @@ jobs: output_dir: macaron_output/detect_malicious_java_dep sbom_path: ./tests/tutorial_resources/detect_malicious_java_dep/example-sbom.json deps_depth: '1' + upload_reports: 'false' - name: Run Macaron (verify policy - detect-malicious-upload) uses: ./ with: policy_file: ./tests/tutorial_resources/detect_malicious_java_dep/example-maven-app.dl output_dir: macaron_output/detect_malicious_java_dep + upload_reports: 'false' tutorial-exclude-include-checks: name: Exclude and include checks in Macaron runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze micronaut-core with default checks) uses: ./ with: package_url: pkg:maven/io.micronaut/micronaut-core@4.3.10 output_dir: macaron_output/exclude_include_checks/normal + upload_reports: 'false' - name: Run Macaron (analyze micronaut-core excluding witness check via defaults.ini) uses: ./ @@ -211,3 +328,86 @@ jobs: package_url: pkg:maven/io.micronaut/micronaut-core@4.3.10 defaults_path: ./tests/tutorial_resources/exclude_include_checks/defaults_exclude_witness.ini output_dir: macaron_output/exclude_include_checks/excluded + upload_reports: 'false' + + test-detect-vulnerable-actions: + name: How to detect vulnerable GitHub Actions + runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar + + - name: Run Macaron (analyze github_actions_vulns for https://github.com/oracle/coherence-js-client) + id: verify_github_actions_vulns_repo_test + # This integration target is intentionally vulnerable; failure is expected. + continue-on-error: true + uses: ./ + with: + repo_path: https://github.com/oracle/coherence-js-client + digest: 39166341bc31f75b663ff439dae36170fb3e99a9 + policy_file: check-github-actions + policy_purl: pkg:github.com/oracle/coherence-js-client@.* + output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'true' + reports_artifact_name: macaron-vulnerable-actions-fail-diagnosis + write_job_summary: 'true' + - name: Assert expected failure (github_actions_vulns for repo test) + if: ${{ always() }} + run: | + # Explicitly assert failure so regressions are visible in CI results. + if [ "${{ steps.verify_github_actions_vulns_repo_test.outcome }}" != "failure" ]; then + echo "Expected verify step to fail, but it did not." + exit 1 + fi + + test-detect-potential-injection: + name: How to detect vulnerable GitHub Actions + runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar + + - name: Run Macaron (analyze github_actions_vulns for https://github.com/oracle/coherence-js-client) + id: verify_github_actions_vulns_repo_test + # This integration target is intentionally vulnerable; failure is expected. + continue-on-error: true + uses: ./ + with: + repo_path: https://github.com/oracle/graalpython + digest: f5f7e67823a699213ab06c86440da94ead672467 + policy_file: check-github-actions + policy_purl: pkg:github.com/oracle/graalpython@.* + output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'false' + reports_artifact_name: macaron-injection-actions-fail-diagnosis + write_job_summary: 'true' + - name: Assert expected failure (github_actions_vulns for repo test) + if: ${{ always() }} + run: | + # Explicitly assert failure so regressions are visible in CI results. + if [ "${{ steps.verify_github_actions_vulns_repo_test.outcome }}" != "failure" ]; then + echo "Expected verify step to fail, but it did not." + exit 1 + fi diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6166236b5..4d0c14627 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,7 +17,7 @@ repos: # Commitizen enforces semantic and conventional commit messages. - repo: https://github.com/commitizen-tools/commitizen - rev: v4.8.3 + rev: v4.13.9 hooks: - id: commitizen name: Check conventional commit message @@ -25,7 +25,7 @@ repos: # Sort imports. - repo: https://github.com/pycqa/isort - rev: 6.0.1 + rev: 8.0.1 hooks: - id: isort name: Sort import statements @@ -34,14 +34,14 @@ repos: # Add Black code formatters. - repo: https://github.com/ambv/black - rev: 25.1.0 + rev: 26.3.1 hooks: - id: black name: Format code - args: [--config, pyproject.toml] + args: [--config, pyproject.toml, --target-version, py311] exclude: ^tests/malware_analyzer/pypi/resources/sourcecode_samples.* - repo: https://github.com/asottile/blacken-docs - rev: 1.19.1 + rev: 1.20.0 hooks: - id: blacken-docs name: Format code in docstrings @@ -50,7 +50,7 @@ repos: # Upgrade and rewrite Python idioms. - repo: https://github.com/asottile/pyupgrade - rev: v3.20.0 + rev: v3.21.2 hooks: - id: pyupgrade name: Upgrade code idioms @@ -72,13 +72,13 @@ repos: # Check GitHub Actions workflow files. - repo: https://github.com/Mateusz-Grzelinski/actionlint-py - rev: v1.7.7.23 + rev: v1.7.11.24 hooks: - id: actionlint # Check shell scripts with shellcheck. - repo: https://github.com/shellcheck-py/shellcheck-py - rev: v0.10.0.1 + rev: v0.11.0.1 hooks: - id: shellcheck exclude: ^tests/ @@ -110,7 +110,7 @@ repos: # Check for potential security issues. - repo: https://github.com/PyCQA/bandit - rev: 1.8.6 + rev: 1.9.4 hooks: - id: bandit name: Check for security issues @@ -123,7 +123,7 @@ repos: # Enable a whole bunch of useful helper hooks, too. # See https://pre-commit.com/hooks.html for more hooks. - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: check-ast - id: check-case-conflict @@ -164,7 +164,7 @@ repos: # Check and prettify the configuration files. - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks - rev: v2.15.0 + rev: v2.16.0 hooks: - id: pretty-format-ini args: [--autofix] @@ -224,7 +224,7 @@ repos: # A linter for Golang - repo: https://github.com/golangci/golangci-lint - rev: v2.8.0 + rev: v2.11.4 hooks: - id: golangci-lint diff --git a/Makefile b/Makefile index 1209a07b3..13394ddb3 100644 --- a/Makefile +++ b/Makefile @@ -299,12 +299,13 @@ requirements.txt: pyproject.toml # editable mode (like the one in development here) because they may not have # a PyPI entry; also print out CVE description and potential fixes if audit # found an issue. +# Remove GHSA-5239-wwwm-4pmq from the ignore list when it is patched. .PHONY: audit audit: if ! $$(python -c "import pip_audit" &> /dev/null); then \ echo "No package pip_audit installed, upgrade your environment!" && exit 1; \ fi; - python -m pip_audit --skip-editable --desc on --fix --dry-run + python -m pip_audit --skip-editable --desc on --fix --dry-run --ignore-vuln GHSA-5239-wwwm-4pmq # Run some or all checks over the package code base. .PHONY: check check-code check-bandit check-flake8 check-lint check-mypy check-go check-actionlint diff --git a/action.yaml b/action.yaml index 418f37705..10e5dc924 100644 --- a/action.yaml +++ b/action.yaml @@ -43,6 +43,18 @@ inputs: output_dir: description: The output destination path for Macaron. default: output + upload_reports: + description: Upload Macaron reports as a workflow artifact. + default: 'true' + reports_artifact_name: + description: Name of the uploaded reports artifact. + default: macaron-reports + reports_retention_days: + description: Retention period in days for uploaded reports. + default: '90' + write_job_summary: + description: Write a human-friendly summary to the workflow run page. + default: 'true' upload_attestation: description: 'Upload the generated VSA report. default : false' default: false @@ -51,12 +63,24 @@ inputs: default: ${{ github.workspace }} outputs: + html_report_path: + description: Path to the generated HTML analysis report (if available). + value: ${{ steps.run-macaron-analysis.outputs.html_report_path }} + report_dir: + description: Directory containing HTML/JSON reports. + value: ${{ steps.collect-reports.outputs.report_dir }} + db_path: + description: Path to the generated Macaron SQLite database. + value: ${{ steps.collect-reports.outputs.db_path }} policy_report: description: Paths to the Macaron analysis report - value: ${{ steps.run-macaron-policy-verification.outputs.policy_report }} + value: ${{ steps.collect-reports.outputs.policy_report }} vsa_report: description: Verification Summary Attestation - value: ${{ steps.run-macaron-policy-verification.outputs.vsa_report }} + value: ${{ steps.collect-reports.outputs.vsa_report }} + vsa_generated: + description: Whether VSA was generated. + value: ${{ steps.collect-reports.outputs.vsa_generated }} runs: using: composite @@ -103,10 +127,85 @@ runs: POLICY_FILE: ${{ inputs.policy_file }} POLICY_PURL: ${{ inputs.policy_purl }} + - name: Collect report paths + id: collect-reports + if: ${{ always() }} + run: | + OUTPUT_DIR="${OUTPUT_DIR:-output}" + POLICY_REPORT="${OUTPUT_DIR}/policy_report.json" + VSA_REPORT="${OUTPUT_DIR}/vsa.intoto.jsonl" + DB_PATH="${OUTPUT_DIR}/macaron.db" + REPORT_DIR="${OUTPUT_DIR}/reports" + + if [ -f "${VSA_REPORT}" ]; then + VSA_VALUE="${VSA_REPORT}" + VSA_GENERATED=true + else + VSA_VALUE="VSA Not Generated." + VSA_GENERATED=false + fi + + echo "report_dir=${REPORT_DIR}" >> "${GITHUB_OUTPUT}" + echo "db_path=${DB_PATH}" >> "${GITHUB_OUTPUT}" + echo "policy_report=${POLICY_REPORT}" >> "${GITHUB_OUTPUT}" + echo "vsa_report=${VSA_VALUE}" >> "${GITHUB_OUTPUT}" + echo "vsa_generated=${VSA_GENERATED}" >> "${GITHUB_OUTPUT}" + + { + echo "reports_path<> "${GITHUB_OUTPUT}" + shell: bash + env: + OUTPUT_DIR: ${{ inputs.output_dir }} + REPORTS_ARTIFACT_NAME: ${{ inputs.reports_artifact_name }} + + - name: Upload Macaron Reports + id: upload-macaron-reports + if: ${{ always() && inputs.upload_reports == 'true' }} + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: ${{ inputs.reports_artifact_name }} + path: ${{ steps.collect-reports.outputs.reports_path }} + if-no-files-found: warn + retention-days: ${{ inputs.reports_retention_days }} + + - name: Summarize Macaron Results + if: ${{ always() && inputs.write_job_summary == 'true' }} + run: | + bash "$GITHUB_ACTION_PATH/scripts/actions/write_job_summary.sh" + shell: bash + env: + OUTPUT_DIR: ${{ inputs.output_dir }} + VSA_GENERATED: ${{ steps.collect-reports.outputs.vsa_generated }} + POLICY_REPORT: ${{ steps.collect-reports.outputs.policy_report }} + POLICY_FILE: ${{ inputs.policy_file }} + HTML_REPORT_PATH: ${{ steps.run-macaron-analysis.outputs.html_report_path }} + UPLOAD_REPORTS: ${{ inputs.upload_reports }} + REPORTS_ARTIFACT_NAME: ${{ inputs.reports_artifact_name }} + REPORTS_ARTIFACT_URL: ${{ steps.upload-macaron-reports.outputs.artifact-url }} + + - name: Enforce VSA generation + if: ${{ always() && inputs.policy_file != '' }} + run: | + if [ "${VSA_GENERATED}" != "true" ]; then + echo "Policy verification failed. VSA was not generated at ${OUTPUT_DIR}/vsa.intoto.jsonl. Check uploaded reports." + exit 1 + fi + shell: bash + env: + OUTPUT_DIR: ${{ inputs.output_dir }} + VSA_GENERATED: ${{ steps.collect-reports.outputs.vsa_generated }} + POLICY_FILE: ${{ inputs.policy_file }} + - name: Upload Attestation - if: ${{ inputs.upload_attestation == 'true' && steps.run-macaron-policy-verification.outputs.vsa_report != 'VSA Not Generated.' }} + if: ${{ inputs.upload_attestation == 'true' && steps.collect-reports.outputs.vsa_generated == 'true' }} uses: actions/attest@daf44fb950173508f38bd2406030372c1d1162b1 #3.0.0 with: subject-path: ${{ inputs.subject_path }} predicate-type: https://slsa.dev/verification_summary/v1 - predicate-path: ${{ steps.run-macaron-policy-verification.outputs.vsa_report }} + predicate-path: ${{ steps.collect-reports.outputs.vsa_report }} diff --git a/docs/source/pages/macaron_action.rst b/docs/source/pages/macaron_action.rst index dc8ebb477..8deb62842 100644 --- a/docs/source/pages/macaron_action.rst +++ b/docs/source/pages/macaron_action.rst @@ -9,7 +9,7 @@ This document describes the composite GitHub Action defined in ``action.yaml`` a Quick usage ----------- -When using this action you can reference the action in your workflow. Example: +When you use this action, you can reference it directly in your workflow. For a real-world example, check out our `workflow `_ (we use it for dogfooding), or follow the example below to understand how it works: .. code-block:: yaml @@ -19,29 +19,30 @@ When using this action you can reference the action in your workflow. Example: steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Run Macaron Security Analysis Action - uses: oracle/macaron@v0.22.0 + uses: oracle/macaron@fda4dda04aa7228fcaba162804891806cf5a1375 # v0.22.0 with: repo_path: 'https://github.com/example/project' policy_file: check-github-actions policy_purl: 'pkg:github.com/example/project' - output_dir: 'macaron-output' - upload_attestation: true + reports_retention_days: 90 + +By default, the action posts a human-friendly results summary to the GitHub Actions run page (job summary). If you upload the results like in this `workflow `_, check this :ref:`documentation ` to see how to read and understand them. Example: policy verification only ---------------------------------- To run only the policy verification step (when you already have an output -database), call the action with ``policy_file`` and set ``output_dir`` to the -directory containing ``macaron.db``: +database), call the action with ``policy_file``. If the previous analysis step +used the default output path, you can omit ``output_dir`` here. If you set a +custom ``output_dir`` in the previous step, use the same value here so policy +verification reads the matching ``macaron.db``. .. code-block:: yaml - name: Verify policy - uses: oracle/macaron@v0.22.0 + uses: oracle/macaron@fda4dda04aa7228fcaba162804891806cf5a1375 # v0.22.0 with: policy_file: policy.dl - output_dir: macaron-output - upload_attestation: true Inputs ------ @@ -101,9 +102,23 @@ options. Key inputs are listed below (see ``action.yaml`` for the full list): * - ``output_dir`` - Directory where Macaron writes results (database, reports, artifacts). - ``output`` + * - ``upload_reports`` + - When ``true``, upload generated Macaron reports as a workflow artifact. + - ``true`` + * - ``reports_artifact_name`` + - Name of the uploaded reports artifact. + - ``macaron-reports`` + * - ``reports_retention_days`` + - Retention period in days for uploaded reports artifacts. + - ``90`` + * - ``write_job_summary`` + - When ``true``, write a human-friendly summary to the workflow run page. + - ``true`` * - ``upload_attestation`` - When ``true``, the action will attempt to upload a generated - verification attestation (VSA) after policy verification. + verification attestation (VSA) after policy verification. The attestation will be available + under the ``Actions/management`` tab. This feature requires ``id-token: write`` and + ``attestations: write`` Job permissions in the GitHub Actions workflow. - ``false`` * - ``subject_path`` - Path to the artifact serving as the subject of the attestation. @@ -112,8 +127,9 @@ options. Key inputs are listed below (see ``action.yaml`` for the full list): Outputs ------- -The composite action exposes the following outputs (set by the -``run_macaron_policy_verification.sh`` script when applicable): +The composite action exposes the following outputs (set by the action steps, +primarily ``Collect report paths``, with some values populated only when +analysis/policy verification generated them): .. list-table:: :header-rows: 1 @@ -121,6 +137,12 @@ The composite action exposes the following outputs (set by the * - Output - Description + * - ``html_report_path`` + - Path to the generated HTML analysis report (when available). + * - ``report_dir`` + - Directory containing generated HTML/JSON reports. + * - ``db_path`` + - Path to the generated Macaron SQLite database (typically ``/macaron.db``). * - ``policy_report`` - Path to the generated policy report JSON file produced by ``macaron verify-policy``. This file contains the policy evaluation @@ -129,7 +151,10 @@ The composite action exposes the following outputs (set by the - Path to the generated VSA (Verification Summary Attestation) in `in-toto `_ JSONL format. If no VSA was produced during verification, the action emits the string ``"VSA Not Generated."`` - instead of a path. + instead of a path. The attestation will be available + under the ``Actions/management`` tab. + * - ``vsa_generated`` + - ``true`` when a VSA was generated; otherwise ``false``. Default Policies ---------------- @@ -169,7 +194,7 @@ How the action works which assembles the ``macaron analyze`` command from the inputs and runs it. Results are written into ``output_dir``. -3. ``Run Macaron Policy Verification``: if a policy file or PURL is supplied, +3. ``Run Macaron Policy Verification``: if ``policy_file`` is supplied, the corresponding script runs ``macaron verify-policy`` against the - analysis database and writes ``policy_report`` and ``vsa_report`` to - ``$GITHUB_OUTPUT`` when produced. + analysis database (using ``policy_purl`` when provided) and writes + policy-related outputs when produced. diff --git a/docs/source/pages/tutorials/detect_vulnerable_github_actions.rst b/docs/source/pages/tutorials/detect_vulnerable_github_actions.rst index 953523a82..aa3c8c6aa 100644 --- a/docs/source/pages/tutorials/detect_vulnerable_github_actions.rst +++ b/docs/source/pages/tutorials/detect_vulnerable_github_actions.rst @@ -126,6 +126,8 @@ Run the ``verify-policy`` command to verify that the check passes: ./run_macaron.sh verify-policy --database ./output/macaron.db --file ./check_github_actions_vuln.dl +.. _detect-vuln-gh-actions-results: + ****************** Review the Results ****************** diff --git a/golang/cmd/bashparser/bashparser.go b/golang/cmd/bashparser/bashparser.go index 50cc6fec2..530bed89e 100644 --- a/golang/cmd/bashparser/bashparser.go +++ b/golang/cmd/bashparser/bashparser.go @@ -1,4 +1,4 @@ -/* Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ package main @@ -30,13 +30,18 @@ func main() { input := flag.String("input", "", "The bash script content to be parsed. Input is prioritized over file option.") out_path := flag.String("output", "", "The output file path to store the JSON content.") raw := flag.Bool("raw", false, "Return raw parse-tree") + rawGHAMap := flag.Bool("raw-gha-map", false, "Return raw parse-tree with GitHub expression mapping") flag.Parse() var json_content string var parse_err error if len(*input) > 0 { // Read the bash script from command line argument. - json_content, parse_err = bashparser.Parse(*input, *raw) + if *rawGHAMap { + json_content, parse_err = bashparser.ParseRawWithGitHubExprMap(*input) + } else { + json_content, parse_err = bashparser.Parse(*input, *raw) + } } else if len(*file_path) <= 0 { fmt.Fprintln(os.Stderr, "Missing bash script input or file path.") flag.PrintDefaults() @@ -48,7 +53,11 @@ func main() { fmt.Fprintln(os.Stderr, read_err.Error()) os.Exit(1) } - json_content, parse_err = bashparser.Parse(string(data), *raw) + if *rawGHAMap { + json_content, parse_err = bashparser.ParseRawWithGitHubExprMap(string(data)) + } else { + json_content, parse_err = bashparser.Parse(string(data), *raw) + } } if parse_err != nil { diff --git a/golang/internal/bashparser/bashparser.go b/golang/internal/bashparser/bashparser.go index b88e43a6e..fdfc63c2f 100644 --- a/golang/internal/bashparser/bashparser.go +++ b/golang/internal/bashparser/bashparser.go @@ -1,4 +1,4 @@ -/* Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ // Package bashparser parses the bash scripts and provides parsed objects in JSON. @@ -7,6 +7,7 @@ package bashparser import ( "bytes" "encoding/json" + "fmt" "regexp" "strings" @@ -19,22 +20,63 @@ type CMDResult struct { Commands [][]string `json:"commands"` } -// ParseCommands parses the bash script to find bash commands. -// It returns the parsed commands in JSON format. -func ParseCommands(data string) (string, error) { +// RawWithGHAMapResult is used to export the raw bash AST with a GitHub-expression mapping. +type RawWithGHAMapResult struct { + AST any `json:"ast"` + GHAExprMap map[string]string `json:"gha_expr_map"` +} + +func preprocessGitHubActionsExpr(data string) (string, error) { // Replace GitHub Actions's expressions with ``$MACARON_UNKNOWN``` variable because the bash parser // doesn't recognize such expressions. For example: ``${{ foo }}`` will be replaced by ``$MACARON_UNKNOWN``. // Note that we don't use greedy matching, so if we have `${{ ${{ foo }} }}`, it will not be replaced by // `$MACARON_UNKNOWN`. // See: https://docs.github.com/en/actions/learn-github-actions/expressions. - var re, reg_error = regexp.Compile(`\$\{\{.*?\}\}`) + re, reg_error := regexp.Compile(`\$\{\{.*?\}\}`) if reg_error != nil { return "", reg_error } - // We replace the GH Actions variables with "$MACARON_UNKNOWN". - data = string(re.ReplaceAll([]byte(data), []byte("$$MACARON_UNKNOWN"))) - data_str := strings.NewReader(data) + return string(re.ReplaceAll([]byte(data), []byte("$$MACARON_UNKNOWN"))), nil +} + +func preprocessGitHubActionsExprWithMap(data string) (string, map[string]string, error) { + // Replace GitHub Actions expressions with unique bash-safe placeholders and return + // a mapping from placeholder variable names to the original expression body. + // + // Example: + // input: echo "${{ github.head_ref }}" + // output: echo "$MACARON_GHA_0001", {"MACARON_GHA_0001": "github.head_ref"} + // + // This preserves expression identity for downstream analysis while keeping the + // transformed script parseable by the bash parser. + re, reg_error := regexp.Compile(`\$\{\{.*?\}\}`) + if reg_error != nil { + return "", nil, reg_error + } + + index := 0 + ghaMap := make(map[string]string) + processed := re.ReplaceAllStringFunc(data, func(match string) string { + index += 1 + key := fmt.Sprintf("MACARON_GHA_%04d", index) + expr := strings.TrimSpace(strings.TrimSuffix(strings.TrimPrefix(match, "${{"), "}}")) + ghaMap[key] = expr + return "$" + key + }) + + return processed, ghaMap, nil +} + +// ParseCommands parses the bash script to find bash commands. +// It returns the parsed commands in JSON format. +func ParseCommands(data string) (string, error) { + processed, preprocessErr := preprocessGitHubActionsExpr(data) + if preprocessErr != nil { + return "", preprocessErr + } + + data_str := strings.NewReader(processed) data_parsed, parse_err := syntax.NewParser().Parse(data_str, "") if parse_err != nil { return "", parse_err @@ -71,19 +113,12 @@ func ParseCommands(data string) (string, error) { } func ParseRaw(data string) (string, error) { - // Replace GitHub Actions's expressions with ``$MACARON_UNKNOWN``` variable because the bash parser - // doesn't recognize such expressions. For example: ``${{ foo }}`` will be replaced by ``$MACARON_UNKNOWN``. - // Note that we don't use greedy matching, so if we have `${{ ${{ foo }} }}`, it will not be replaced by - // `$MACARON_UNKNOWN`. - // See: https://docs.github.com/en/actions/learn-github-actions/expressions. - var re, reg_error = regexp.Compile(`\$\{\{.*?\}\}`) - if reg_error != nil { - return "", reg_error + processed, preprocessErr := preprocessGitHubActionsExpr(data) + if preprocessErr != nil { + return "", preprocessErr } - // We replace the GH Actions variables with "$MACARON_UNKNOWN". - data = string(re.ReplaceAll([]byte(data), []byte("$$MACARON_UNKNOWN"))) - data_str := strings.NewReader(data) + data_str := strings.NewReader(processed) data_parsed, parse_err := syntax.NewParser().Parse(data_str, "") if parse_err != nil { return "", parse_err @@ -98,6 +133,41 @@ func ParseRaw(data string) (string, error) { return b.String(), nil } +// ParseRawWithGitHubExprMap parses raw bash AST and returns it with a GitHub-expression placeholder mapping. +func ParseRawWithGitHubExprMap(data string) (string, error) { + processed, ghaMap, preprocessErr := preprocessGitHubActionsExprWithMap(data) + if preprocessErr != nil { + return "", preprocessErr + } + + data_str := strings.NewReader(processed) + data_parsed, parse_err := syntax.NewParser().Parse(data_str, "") + if parse_err != nil { + return "", parse_err + } + + b := new(strings.Builder) + encode_err := typedjson.Encode(b, data_parsed) + if encode_err != nil { + return "", encode_err + } + + var astObj any + if unmarshalErr := json.Unmarshal([]byte(b.String()), &astObj); unmarshalErr != nil { + return "", unmarshalErr + } + + result := RawWithGHAMapResult{ + AST: astObj, + GHAExprMap: ghaMap, + } + resultBytes, marshalErr := json.MarshalIndent(result, "", " ") + if marshalErr != nil { + return "", marshalErr + } + return string(resultBytes), nil +} + func Parse(data string, raw bool) (string, error) { if raw { return ParseRaw(data) diff --git a/golang/internal/bashparser/bashparser_test.go b/golang/internal/bashparser/bashparser_test.go index 3825f459b..abf02055a 100644 --- a/golang/internal/bashparser/bashparser_test.go +++ b/golang/internal/bashparser/bashparser_test.go @@ -6,6 +6,7 @@ package bashparser import ( "encoding/json" "os" + "strings" "testing" ) @@ -32,3 +33,50 @@ func Test_parse_valid_bash_script(t *testing.T) { t.Errorf("Cannot unmarshal the returned JSON content from parsing %s: %v.", json_content, err) } } + +func Test_parse_raw_with_gha_expr_map(t *testing.T) { + input := `echo "${{ github.head_ref }}" && echo "${{ needs.prepare.outputs.fullVersion }}"` + json_content, parse_err := ParseRawWithGitHubExprMap(input) + if parse_err != nil || json_content == "" { + t.Fatalf("expected successful parse with mapping, got error: %v", parse_err) + } + + var result map[string]any + if err := json.Unmarshal([]byte(json_content), &result); err != nil { + t.Fatalf("cannot unmarshal parser output: %v", err) + } + + ast, astOK := result["ast"] + if !astOK || ast == nil { + t.Fatalf("expected non-empty ast field") + } + + mapRaw, mapOK := result["gha_expr_map"] + if !mapOK { + t.Fatalf("expected gha_expr_map field") + } + ghaMap, ok := mapRaw.(map[string]any) + if !ok { + t.Fatalf("expected gha_expr_map to be an object") + } + if len(ghaMap) != 2 { + t.Fatalf("expected 2 mapped expressions, got %d", len(ghaMap)) + } +} + +func Test_preprocess_github_actions_expr_with_map_replaces_with_single_dollar_var(t *testing.T) { + input := `echo "${{ github.head_ref }}"` + processed, ghaMap, err := preprocessGitHubActionsExprWithMap(input) + if err != nil { + t.Fatalf("unexpected preprocess error: %v", err) + } + if strings.Contains(processed, "$$MACARON_GHA_") { + t.Fatalf("expected single-dollar placeholder, got %q", processed) + } + if !strings.Contains(processed, "$MACARON_GHA_0001") { + t.Fatalf("expected placeholder var in processed script, got %q", processed) + } + if ghaMap["MACARON_GHA_0001"] != "github.head_ref" { + t.Fatalf("unexpected gha mapping: %#v", ghaMap) + } +} diff --git a/pyproject.toml b/pyproject.toml index ede72bdb5..67794b851 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -224,6 +224,9 @@ load-plugins = [ "pylint.extensions.set_membership", "pylint.extensions.typing", ] +# Disable unsubscriptable-object because Pylint has false positives and this check +# overlaps with mypy's checks. Enable the check when the related issue is resolved: +# https://github.com/pylint-dev/pylint/issues/9549 disable = [ "fixme", "line-too-long", # Replaced by Flake8 Bugbear B950 check. @@ -242,6 +245,7 @@ disable = [ "too-many-return-statements", "too-many-statements", "duplicate-code", + "unsubscriptable-object", ] [tool.pylint.MISCELLANEOUS] diff --git a/scripts/actions/run_macaron_analysis.sh b/scripts/actions/run_macaron_analysis.sh index 34305479c..ccde3e646 100644 --- a/scripts/actions/run_macaron_analysis.sh +++ b/scripts/actions/run_macaron_analysis.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. set -euo pipefail @@ -54,4 +54,28 @@ if [ -n "${PROVENANCE_EXPECTATION:-}" ]; then fi echo "Executing: $CMD" -eval "$CMD" + +output_file="$(mktemp)" +set +e +eval "$CMD" 2>&1 | tee "$output_file" +# Capture analyze command's exit code from the pipeline (index 0), then restore fail-fast mode. +status=${PIPESTATUS[0]} +set -e + +if [ "${status}" -ne 0 ]; then + rm -f "$output_file" + exit "${status}" +fi + +if [ -n "${GITHUB_OUTPUT:-}" ]; then + html_report_path="$( + sed -n 's/^[[:space:]]*HTML[[:space:]]\+Report[[:space:]]\+//p' "$output_file" \ + | sed 's/[[:space:]]*$//' \ + | tail -n 1 + )" + if [ -n "$html_report_path" ]; then + echo "html_report_path=${html_report_path}" >> "$GITHUB_OUTPUT" + fi +fi + +rm -f "$output_file" diff --git a/scripts/actions/setup_macaron.sh b/scripts/actions/setup_macaron.sh index a002bb534..cd519253c 100644 --- a/scripts/actions/setup_macaron.sh +++ b/scripts/actions/setup_macaron.sh @@ -1,12 +1,23 @@ #!/usr/bin/env bash -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. set -euo pipefail MACARON_DIR="${RUNNER_TEMP:-/tmp}/macaron" mkdir -p "$MACARON_DIR" +# If a test image tag is explicitly provided (for reusable workflow testing), +# use the local run script from this checkout and preserve the provided tag. +if [ -n "${MACARON_IMAGE_TAG:-}" ]; then + SCRIPT_NAME="run_macaron.sh" + cp "$GITHUB_ACTION_PATH/scripts/release_scripts/run_macaron.sh" "$MACARON_DIR/$SCRIPT_NAME" + chmod +x "$MACARON_DIR/$SCRIPT_NAME" + echo "MACARON=$MACARON_DIR/$SCRIPT_NAME" >> "$GITHUB_ENV" + echo "MACARON_IMAGE_TAG=${MACARON_IMAGE_TAG}" >> "$GITHUB_ENV" + exit 0 +fi + ACTION_DIR="${RUNNER_TEMP:-/tmp}/macaron-action" rm -rf "$ACTION_DIR" mkdir -p "$ACTION_DIR" diff --git a/scripts/actions/write_job_summary.py b/scripts/actions/write_job_summary.py new file mode 100644 index 000000000..bec011787 --- /dev/null +++ b/scripts/actions/write_job_summary.py @@ -0,0 +1,545 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Generate GitHub Actions job summary content for Macaron action runs.""" + +from __future__ import annotations + +import json +import os +import re +import sqlite3 +from pathlib import Path +from urllib.parse import urlsplit + +CHECK_RESULT_DEFAULT_COLUMNS = [ + "component_id", + "check_id", + "passed", +] + + +def _env(name: str, default: str = "") -> str: + return os.environ.get(name, default) + + +def _append_line(summary_path: Path, line: str = "") -> None: + with summary_path.open("a", encoding="utf-8") as summary: + summary.write(f"{line}\n") + + +def _resolve_policy_source(policy_input: str) -> tuple[Path | None, str]: + """Resolve a policy input to either a local file or a predefined policy template path.""" + if not policy_input: + return None, "" + + candidate = Path(policy_input) + if candidate.is_file(): + return candidate, "file" + + action_path = _env("GITHUB_ACTION_PATH", "") + if action_path: + template_path = Path( + os.path.join( + action_path, + "src", + "macaron", + "resources", + "policies", + "datalog", + f"{policy_input}.dl.template", + ) + ) + if template_path.is_file(): + return template_path, "predefined" + + return None, "unresolved" + + +def _resolve_existing_policy_sql(policy_name: str) -> Path | None: + """Resolve SQL diagnostics query for a predefined policy name.""" + action_path = _env("GITHUB_ACTION_PATH", "") + if not action_path: + return None + sql_path = Path(os.path.join(action_path, "src", "macaron", "resources", "policies", "sql", f"{policy_name}.sql")) + return sql_path if sql_path.is_file() else None + + +def _write_header( + summary_path: Path, + db_path: Path, + policy_report: str, + policy_file: str, + html_report: str, + policy_provided: bool, +) -> None: + upload_reports = _env("UPLOAD_REPORTS", "true").lower() == "true" + output_dir = _env("OUTPUT_DIR", "output") + reports_artifact_name = _env("REPORTS_ARTIFACT_NAME", "macaron-reports") + run_url = ( + f"{_env('GITHUB_SERVER_URL', 'https://github.com')}/" + f"{_env('GITHUB_REPOSITORY')}/actions/runs/{_env('GITHUB_RUN_ID')}" + ) + reports_artifact_url = _env("REPORTS_ARTIFACT_URL", run_url) + vsa_generated = _env("VSA_GENERATED", "").lower() + if vsa_generated in {"true", "false"}: + policy_succeeded = vsa_generated == "true" + else: + vsa_path = _env("VSA_PATH", f"{output_dir}/vsa.intoto.jsonl") + policy_succeeded = bool(vsa_path) and Path(vsa_path).is_file() + + _append_line(summary_path, "## Macaron Analysis Results") + _append_line(summary_path) + if upload_reports: + _append_line(summary_path, "Download reports from this artifact link:") + _append_line(summary_path, f"- [`{reports_artifact_name}`]({reports_artifact_url})") + _append_line(summary_path) + _append_line(summary_path, "Generated files:") + if html_report: + _append_line(summary_path, f"- HTML report: `{html_report}`") + _append_line(summary_path, f"- Database: `{db_path}`") + if policy_provided: + _append_line(summary_path, f"- Policy report: `{policy_report}`") + _append_line(summary_path) + + if policy_provided: + _append_line(summary_path, "Policy:") + if policy_file: + _append_line(summary_path, f"- Policy file: `{policy_file}`") + if policy_succeeded: + _append_line(summary_path, "- Policy status: :white_check_mark: Policy verification succeeded.") + else: + _append_line(summary_path, "- Policy status: :x: Policy verification failed.") + else: + _append_line(summary_path, "Policy:") + _append_line(summary_path, "- No policy was provided.") + _append_line(summary_path) + + +def _parse_policy_checks(policy_file: Path) -> tuple[list[str], list[str]]: + policy_text = policy_file.read_text(encoding="utf-8") + check_relations = sorted(set(re.findall(r"\b(check_[A-Za-z0-9_]+)\s*\(", policy_text))) + policy_check_ids = sorted(set(re.findall(r'"(mcn_[a-zA-Z0-9_]+)"', policy_text))) + return check_relations, policy_check_ids + + +def _resolve_existing_table(conn: sqlite3.Connection, table_name: str) -> str | None: + """Resolve a logical table name to an existing SQLite table name.""" + candidates = [table_name] + if not table_name.startswith("_"): + candidates.append(f"_{table_name}") + + cur = conn.cursor() + for candidate in candidates: + cur.execute("SELECT 1 FROM sqlite_master WHERE type IN ('table', 'view') AND name = ? LIMIT 1", (candidate,)) + if cur.fetchone(): + return candidate + return None + + +def _get_existing_columns(conn: sqlite3.Connection, table_name: str) -> list[str]: + cur = conn.cursor() + cur.execute(f"PRAGMA table_info({table_name})") + return [row[1] for row in cur.fetchall()] + + +def _query_selected_columns( + conn: sqlite3.Connection, + table_name: str, + desired_columns: list[str], + where_clause: str = "", + params: tuple[object, ...] = (), +) -> tuple[list[str], list[tuple]]: + available = _get_existing_columns(conn, table_name) + selected = [c for c in desired_columns if c in available] + if not selected: + return [], [] + + sql = f"SELECT {', '.join(selected)} FROM {table_name}" + if where_clause: + sql = f"{sql} WHERE {where_clause}" + sql = f"{sql} ORDER BY 1" + cur = conn.cursor() + cur.execute(sql, params) + return selected, cur.fetchall() + + +def _query_sql(conn: sqlite3.Connection, sql_query: str) -> tuple[list[str], list[tuple]]: + # Python's sqlite cursor.execute() can fail when the SQL begins with line comments. + # Strip leading SQL line comments while preserving the query body. + sanitized_lines = [] + for line in sql_query.splitlines(): + if line.lstrip().startswith("--"): + continue + sanitized_lines.append(line) + sanitized_query = "\n".join(sanitized_lines).strip() + if not sanitized_query: + return [], [] + + cur = conn.cursor() + cur.execute(sanitized_query) + rows = cur.fetchall() + columns = [col[0] for col in (cur.description or [])] + return columns, rows + + +def _write_markdown_table(summary_path: Path, columns: list[str], rows: list[tuple]) -> bool: + if not columns or not rows: + return False + + _append_line(summary_path, f"| {' | '.join(columns)} |") + _append_line(summary_path, f"|{'|'.join(['---'] * len(columns))}|") + for row in rows: + values = [_format_table_cell(value) for value in row] + _append_line(summary_path, f"| {' | '.join(values)} |") + return True + + +def _format_table_cell(value: object) -> str: + text = str(value) + parsed_list = _parse_list_cell(text) + if parsed_list is not None: + items = [_format_list_item(item) for item in parsed_list] + return "
".join(f"- {item}" for item in items) if items else "`[]`" + + if text.startswith(("http://", "https://")): + parsed = urlsplit(text) + segments = [part for part in parsed.path.split("/") if part] + label = segments[-1] if segments else parsed.netloc + return f"[`{label}`]({text})" + return f"`{_sanitize_for_markdown_table_code(text)}`" + + +def _parse_list_cell(text: str) -> list[object] | None: + stripped = text.strip() + if not (stripped.startswith("[") and stripped.endswith("]")): + return None + try: + loaded = json.loads(stripped) + except json.JSONDecodeError: + return None + return loaded if isinstance(loaded, list) else None + + +def _format_list_item(value: object) -> str: + text = str(value) + if text.startswith(("http://", "https://")): + parsed = urlsplit(text) + segments = [part for part in parsed.path.split("/") if part] + label = segments[-1] if segments else parsed.netloc + return f"[`{label}`]({text})" + return f"`{_sanitize_for_markdown_table_code(text)}`" + + +def _sanitize_for_markdown_table_code(text: str) -> str: + """Sanitize inline-code content for markdown table cells.""" + return text.replace("`", "'").replace("|", "\\|").replace("\n", " ") + + +def _priority_label(priority: object) -> str: + """Map numeric priority to a concise severity-like label.""" + try: + value = int(priority) + except (TypeError, ValueError): + return str(priority) + + if value >= 90: + return "critical" + if value >= 70: + return "high" + if value >= 50: + return "medium" + return "low" + + +def _gha_group_label(group: str) -> str: + # finding_group is the top-level section key; finding_type is rendered per-row as the subtype. + if group == "third_party_action_risk": + return "Third-party action risks" + if group == "workflow_security_issue": + return "Workflow security issues" + return group + + +def _extract_finding_summary(message: object) -> str: + """Extract a compact summary from a finding message.""" + text = str(message).strip() + if not text: + return "" + + # Expected format: "Summary: ... Details: ... Recommendation: ..." + match = re.search(r"Summary:\s*(.*?)(?:\s+Details:\s*|\s+Recommendation:\s*|$)", text, flags=re.IGNORECASE) + if match: + return match.group(1).strip() + + return text + + +def write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], rows: list[tuple]) -> bool: + """Write compact GitHub Actions vulnerability diagnostics to the job summary. + + Parameters + ---------- + summary_path : Path + Path to the GitHub job summary markdown file. + columns : list[str] + Ordered column names from the SQL diagnostics query result. + rows : list[tuple] + Row values matching ``columns`` order. + + Returns + ------- + bool + ``True`` if content was rendered; ``False`` when inputs are empty. + """ + if not columns or not rows: + return False + + col_index = {name: idx for idx, name in enumerate(columns)} + required = [ + "finding_priority", + "finding_type", + "action_name", + "action_ref", + "vulnerable_workflow", + ] + if any(name not in col_index for name in required): + return _write_markdown_table(summary_path, columns, rows) + + sorted_rows = sorted( + rows, + key=lambda row: ( + int(row[col_index["finding_priority"]]) if str(row[col_index["finding_priority"]]).isdigit() else 0 + ), + reverse=True, + ) + display_rows = sorted_rows[:10] + group_idx = col_index.get("finding_group") + + _append_line( + summary_path, + "_Showing top 10 findings by priority. Expand details below for full diagnostics._", + ) + preferred_groups = ["workflow_security_issue", "third_party_action_risk"] + groups_in_rows: list[str] = [] + if group_idx is not None: + discovered_groups = [str(row[group_idx]) for row in sorted_rows] + groups_in_rows.extend([group for group in preferred_groups if group in discovered_groups]) + groups_in_rows.extend([group for group in discovered_groups if group not in groups_in_rows]) + else: + groups_in_rows = ["all_findings"] + + for group in groups_in_rows: + if group_idx is None: + group_rows = display_rows + title = "Findings" + else: + group_rows = [row for row in sorted_rows if str(row[group_idx]) == group][:10] + if not group_rows: + continue + title = _gha_group_label(group) + _append_line(summary_path) + _append_line(summary_path, f"#### {title}") + _append_line(summary_path) + if group == "workflow_security_issue": + _append_line(summary_path, "| priority | type | summary | workflow |") + _append_line(summary_path, "|---|---|---|---|") + else: + _append_line(summary_path, "| priority | type | action | version | workflow |") + _append_line(summary_path, "|---|---|---|---|---|") + for row in group_rows: + priority_raw = row[col_index["finding_priority"]] + priority = f"`{_priority_label(priority_raw)} ({priority_raw})`" + finding_type = _format_table_cell(row[col_index["finding_type"]]) + finding_summary = _format_table_cell( + _extract_finding_summary(row[col_index["finding_message"]]) if "finding_message" in col_index else "" + ) + action_name = _format_table_cell(row[col_index["action_name"]]) + action_version = _format_table_cell(row[col_index["action_ref"]]) + workflow = _format_table_cell(row[col_index["vulnerable_workflow"]]) + if group == "workflow_security_issue": + _append_line( + summary_path, + f"| {priority} | {finding_type} | {finding_summary} | {workflow} |", + ) + else: + _append_line( + summary_path, + f"| {priority} | {finding_type} | {action_name} | {action_version} | {workflow} |", + ) + + _append_line(summary_path) + _append_line(summary_path, "
") + _append_line(summary_path, "Detailed findings") + _append_line(summary_path) + detail_groups = groups_in_rows if groups_in_rows else ["all_findings"] + row_counter = 1 + for group in detail_groups: + if group_idx is None: + group_rows = sorted_rows + title = "Findings" + else: + group_rows = [row for row in sorted_rows if str(row[group_idx]) == group] + if not group_rows: + continue + title = _gha_group_label(group) + _append_line(summary_path, f"**{title}**") + for row in group_rows: + action = str(row[col_index["action_name"]]) + version = str(row[col_index["action_ref"]]) + priority = row[col_index["finding_priority"]] + finding_type = str(row[col_index["finding_type"]]) + workflow = str(row[col_index["vulnerable_workflow"]]) + if group == "workflow_security_issue": + subject = workflow + else: + subject = f"{action}@{version}" if version else action + _append_line(summary_path, f"{row_counter}. **`{subject}`** (`{finding_type}`, priority `{priority}`)") + _append_line(summary_path, f"- Workflow: `{workflow}`") + + pin_idx = col_index.get("sha_pinned") + row_group = str(row[group_idx]) if group_idx is not None else "" + if pin_idx is not None and row_group == "third_party_action_risk" and row[pin_idx] is not None: + pin_state = "yes" if bool(row[pin_idx]) else "no" + _append_line(summary_path, f"- Pinned to full commit SHA: `{pin_state}`") + + vul_idx = col_index.get("vuln_urls") + if vul_idx is not None and row[vul_idx]: + parsed = _parse_list_cell(str(row[vul_idx])) + if parsed: + _append_line(summary_path, "- Vulnerabilities:") + for item in parsed: + _append_line(summary_path, f" - {_format_list_item(item)}") + + rec_idx = col_index.get("recommended_ref") + if rec_idx is not None and row[rec_idx]: + _append_line(summary_path, f"- Recommended ref: {_format_table_cell(row[rec_idx])}") + + msg_idx = col_index.get("finding_message") + if msg_idx is not None and row[msg_idx]: + _append_line(summary_path, f"- Details: {_format_table_cell(row[msg_idx])}") + _append_line(summary_path) + row_counter += 1 + _append_line(summary_path, "
") + return True + + +def _write_policy_check_lists(summary_path: Path, policy_check_ids: list[str]) -> None: + + if policy_check_ids: + _append_line( + summary_path, + f"- Checks referenced in policy: {', '.join(f'`{name}`' for name in policy_check_ids)}", + ) + + +def _write_custom_policy_failure_diagnostics(summary_path: Path, db_path: Path, policy_file: Path) -> None: + check_relations, policy_check_ids = _parse_policy_checks(policy_file) + has_details = False + + _append_line(summary_path) + _append_line(summary_path, "### Policy Failure Diagnostics") + _write_policy_check_lists(summary_path, policy_check_ids) + if check_relations or policy_check_ids: + has_details = True + + if not policy_check_ids: + if not has_details: + _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") + return + + with sqlite3.connect(db_path) as conn: + resolved = _resolve_existing_table(conn, "check_result") + if not resolved: + if not has_details: + _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") + return + placeholders = ",".join(["?"] * len(policy_check_ids)) + cols, rows = _query_selected_columns( + conn, + resolved, + CHECK_RESULT_DEFAULT_COLUMNS, + where_clause=f"check_id IN ({placeholders})", + params=tuple(policy_check_ids), + ) + + _append_line(summary_path) + _append_line(summary_path, "#### check_result") + if _write_markdown_table(summary_path, cols, rows): + has_details = True + else: + # Remove empty section header and provide a single friendly fallback below. + _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") + + +def _write_existing_policy_failure_diagnostics( + summary_path: Path, db_path: Path, policy_name: str, policy_file: Path +) -> None: + check_relations, policy_check_ids = _parse_policy_checks(policy_file) + has_details = False + + _append_line(summary_path) + _append_line(summary_path, f"### Policy Failure Diagnostics ({policy_name})") + _write_policy_check_lists(summary_path, policy_check_ids) + if check_relations or policy_check_ids: + has_details = True + + sql_path = _resolve_existing_policy_sql(policy_name) + if sql_path: + sql_query = sql_path.read_text(encoding="utf-8") + with sqlite3.connect(db_path) as conn: + cols, rows = _query_sql(conn, sql_query) + if cols and rows: + _append_line(summary_path) + _append_line(summary_path, f"#### Results") + if policy_name == "check-github-actions": + rendered = write_compact_gha_vuln_diagnostics(summary_path, cols, rows) + else: + rendered = _write_markdown_table(summary_path, cols, rows) + if rendered: + has_details = True + + if not has_details: + _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") + + +def main() -> None: + output_dir = Path(_env("OUTPUT_DIR", "output")) + db_path = Path(_env("DB_PATH", os.path.join(str(output_dir), "macaron.db"))) + policy_report = _env("POLICY_REPORT", os.path.join(str(output_dir), "policy_report.json")) + policy_file_value = _env("POLICY_FILE", "") + resolved_policy_file, policy_mode = _resolve_policy_source(policy_file_value) + policy_label = "" + if policy_mode == "file" and resolved_policy_file: + policy_label = str(resolved_policy_file) + elif policy_mode == "predefined" and resolved_policy_file: + policy_label = f"{policy_file_value}" + elif policy_mode == "unresolved": + policy_label = f"{policy_file_value} (unresolved)" + html_report = _env("HTML_REPORT_PATH", "") + vsa_path_value = _env("VSA_PATH", os.path.join(str(output_dir), "vsa.intoto.jsonl")) + vsa_path = Path(vsa_path_value) if vsa_path_value else None + + summary_output = _env("GITHUB_STEP_SUMMARY") + if not summary_output: + raise RuntimeError("GITHUB_STEP_SUMMARY is not set.") + summary_path = Path(summary_output) + + policy_provided = bool(policy_file_value.strip()) + _write_header(summary_path, db_path, policy_report, policy_label, html_report, policy_provided) + + if not db_path.is_file(): + _append_line(summary_path, ":warning: Macaron database was not generated.") + return + + if (not vsa_path or not vsa_path.is_file()) and resolved_policy_file and resolved_policy_file.is_file(): + if policy_mode == "predefined": + _write_existing_policy_failure_diagnostics(summary_path, db_path, policy_file_value, resolved_policy_file) + else: + _write_custom_policy_failure_diagnostics(summary_path, db_path, resolved_policy_file) + + +if __name__ == "__main__": + main() diff --git a/scripts/actions/write_job_summary.sh b/scripts/actions/write_job_summary.sh new file mode 100755 index 000000000..432069c59 --- /dev/null +++ b/scripts/actions/write_job_summary.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. +set -euo pipefail + +OUTPUT_DIR="${OUTPUT_DIR:-output}" +DB_PATH="${DB_PATH:-${OUTPUT_DIR}/macaron.db}" +POLICY_REPORT="${POLICY_REPORT:-${OUTPUT_DIR}/policy_report.json}" +POLICY_FILE="${POLICY_FILE:-}" +HTML_REPORT_PATH="${HTML_REPORT_PATH:-}" +VSA_PATH="${VSA_PATH:-${OUTPUT_DIR}/vsa.intoto.jsonl}" +UPLOAD_REPORTS="${UPLOAD_REPORTS:-true}" +REPORTS_ARTIFACT_NAME="${REPORTS_ARTIFACT_NAME:-macaron-reports}" +RUN_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" +REPORTS_ARTIFACT_URL="${REPORTS_ARTIFACT_URL:-${RUN_URL}}" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +python "${SCRIPT_DIR}/write_job_summary.py" diff --git a/scripts/release_scripts/run_macaron.sh b/scripts/release_scripts/run_macaron.sh index 306ae1bf7..503b414b6 100755 --- a/scripts/release_scripts/run_macaron.sh +++ b/scripts/release_scripts/run_macaron.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. # This script runs the Macaron Docker image. @@ -238,6 +238,7 @@ function mount_dir_rw_allow_create() { # Note: This function is currently unused but retained to avoid using `_mount_dir` # if not necessary, which may have unintended side effects. # shellcheck disable=SC2317 +# shellcheck disable=SC2329 function mount_dir_rw_forbid_create() { arg_name=$1 dir_on_host=$2 diff --git a/src/macaron/__main__.py b/src/macaron/__main__.py index addb0f881..034fc0525 100644 --- a/src/macaron/__main__.py +++ b/src/macaron/__main__.py @@ -54,11 +54,9 @@ def analyze_slsa_levels_single(analyzer_single_args: argparse.Namespace) -> None if not (analyzer_single_args.repo_path or analyzer_single_args.package_url): # We don't mention --config-path as a possible option in this log message as it going to be move soon. # See: https://github.com/oracle/macaron/issues/417 - logger.error( - """Analysis target missing. Please provide a package url (PURL) and/or repo path. + logger.error("""Analysis target missing. Please provide a package url (PURL) and/or repo path. Examples of a PURL can be seen at https://github.com/package-url/purl-spec: - pkg:github/micronaut-projects/micronaut-core.""" - ) + pkg:github/micronaut-projects/micronaut-core.""") sys.exit(os.EX_USAGE) # Set provenance expectation path. diff --git a/src/macaron/artifact/maven.py b/src/macaron/artifact/maven.py index 8b9b0721c..b39036ea4 100644 --- a/src/macaron/artifact/maven.py +++ b/src/macaron/artifact/maven.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module declares types and utilities for Maven artifacts.""" + import re from collections.abc import Sequence diff --git a/src/macaron/build_spec_generator/common_spec/maven_spec.py b/src/macaron/build_spec_generator/common_spec/maven_spec.py index de0b4c5df..6082b5a91 100644 --- a/src/macaron/build_spec_generator/common_spec/maven_spec.py +++ b/src/macaron/build_spec_generator/common_spec/maven_spec.py @@ -3,7 +3,6 @@ """This module includes build specification and helper classes for Maven packages.""" - import logging from packageurl import PackageURL diff --git a/src/macaron/build_spec_generator/common_spec/pypi_spec.py b/src/macaron/build_spec_generator/common_spec/pypi_spec.py index ee67578c9..e7ce40ee9 100644 --- a/src/macaron/build_spec_generator/common_spec/pypi_spec.py +++ b/src/macaron/build_spec_generator/common_spec/pypi_spec.py @@ -132,7 +132,7 @@ def resolve_fields(self, purl: PackageURL) -> None: logger.debug("From package JSON inferred Python constraints: %s", python_version_set) - self.data["has_binaries"] = not pypi_package_json.has_pure_wheel() + self.data["has_binaries"] = pypi_package_json.has_non_pure_wheel() if self.data["has_binaries"]: logger.debug("Can not find a pure wheel") diff --git a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py index 67d1c6308..7a3cf9539 100644 --- a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py +++ b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py @@ -46,37 +46,23 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str: except InvalidVersion as error: logger.debug("Ran into issue converting %s to a version: %s", language_version, error) raise GenerateBuildSpecError("Derived interpreter version could not be parsed") from error - if not buildspec["build_tools"]: - raise GenerateBuildSpecError("Cannot generate dockerfile when build tool is unknown") - if not buildspec["build_commands"]: - raise GenerateBuildSpecError("Cannot generate dockerfile when build command is unknown") - backend_install_commands: str = " && ".join(build_backend_commands(buildspec)) - build_tool_install: str = "" - if ( - buildspec["build_tools"][0] != "pip" - and buildspec["build_tools"][0] != "conda" - and buildspec["build_tools"][0] != "flit" - ): - build_tool_install = f"pip install {buildspec['build_tools'][0]} && " - elif buildspec["build_tools"][0] == "flit": - build_tool_install = ( - f"pip install {buildspec['build_tools'][0]} && if test -f \"flit.ini\"; then python -m flit.tomlify; fi && " - ) - - modern_build_command = build_tool_install + " ".join(x for x in buildspec["build_commands"][0]) + + backend_install_commands = " && ".join(build_backend_commands(buildspec)) + + modern_build_command = "python -m build --wheel -n" + legacy_build_command = ( - 'if test -f "setup.py"; then pip install wheel && python setup.py bdist_wheel; ' - "else python -m build --wheel -n; fi" + 'if test -f "setup.py"; then python setup.py bdist_wheel; else python -m build --wheel -n; fi' ) - wheel_url: str = "" + # Initialized empty so that the validation script can exit gracefully in the case we find no upstream wheel wheel_name: str = "" - - wheel_urls = buildspec["upstream_artifacts"]["wheels"] - # We currently only look for the pure wheel, if it exists - if wheel_urls: - wheel_url = list(wheel_urls)[0] - wheel_name = wheel_url.rsplit("/", 1)[-1] + wheel_url: str = "" + if "wheels" in buildspec["upstream_artifacts"]: + wheel_urls = buildspec["upstream_artifacts"]["wheels"] + if wheel_urls: + wheel_url = wheel_urls[0] + wheel_name = wheel_url.rsplit("/", 1)[-1] else: logger.debug("We could not find an upstream artifact, and therefore we cannot run validation") @@ -138,7 +124,9 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str: EOF # Run the build - RUN source /deps/bin/activate && {modern_build_command if version in SpecifierSet(">=3.6") else legacy_build_command} + RUN source /deps/bin/activate && /deps/bin/pip install wheel && {modern_build_command + if version in SpecifierSet(">=3.6") + else legacy_build_command} # Validate script RUN cat <<'EOF' >/validate diff --git a/src/macaron/code_analyzer/dataflow_analysis/analysis.py b/src/macaron/code_analyzer/dataflow_analysis/analysis.py index 6f7c3f35f..1fed33070 100644 --- a/src/macaron/code_analyzer/dataflow_analysis/analysis.py +++ b/src/macaron/code_analyzer/dataflow_analysis/analysis.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Entry points to perform and use the dataflow analysis.""" @@ -30,19 +30,22 @@ def analyse_github_workflow_file(workflow_path: str, repo_path: str | None, dump core.Node Graph representation of workflow and analysis results. """ - workflow = actionparser.parse(workflow_path) + try: + workflow = actionparser.parse(workflow_path) - analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) + analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) - core.reset_debug_sequence_number() - raw_workflow_node = github.RawGitHubActionsWorkflowNode.create(workflow, analysis_context, workflow_path) - core.increment_debug_sequence_number() + core.reset_debug_sequence_number() + raw_workflow_node = github.RawGitHubActionsWorkflowNode.create(workflow, analysis_context, workflow_path) + core.increment_debug_sequence_number() - raw_workflow_node.analyse() + raw_workflow_node.analyse() - if dump_debug: - with open("analysis." + workflow_path.replace("/", "_") + ".dot", "w", encoding="utf-8") as f: - printing.print_as_dot_graph(raw_workflow_node, f, include_properties=True, include_states=True) + if dump_debug: + with open("analysis." + workflow_path.replace("/", "_") + ".dot", "w", encoding="utf-8") as f: + printing.print_as_dot_graph(raw_workflow_node, f, include_properties=True, include_states=True) + except Exception as e: + raise CallGraphError("Failed to analyze github workflow '" + workflow_path + "'") from e return raw_workflow_node @@ -68,17 +71,20 @@ def analyse_github_workflow( core.Node Graph representation of workflow and analysis results. """ - analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) + try: + analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) - core.reset_debug_sequence_number() - raw_workflow_node = github.RawGitHubActionsWorkflowNode.create(workflow, analysis_context, workflow_source_path) - core.increment_debug_sequence_number() + core.reset_debug_sequence_number() + raw_workflow_node = github.RawGitHubActionsWorkflowNode.create(workflow, analysis_context, workflow_source_path) + core.increment_debug_sequence_number() - raw_workflow_node.analyse() + raw_workflow_node.analyse() - if dump_debug: - with open("analysis." + workflow_source_path.replace("/", "_") + ".dot", "w", encoding="utf-8") as f: - printing.print_as_dot_graph(raw_workflow_node, f, include_properties=True, include_states=True) + if dump_debug: + with open("analysis." + workflow_source_path.replace("/", "_") + ".dot", "w", encoding="utf-8") as f: + printing.print_as_dot_graph(raw_workflow_node, f, include_properties=True, include_states=True) + except Exception as e: + raise CallGraphError("Failed to analyze github workflow '" + workflow_source_path + "'") from e return raw_workflow_node @@ -104,19 +110,24 @@ def analyse_bash_script( core.Node Graph representation of Bash script and analysis results. """ - analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) - bash_context = core.OwningContextRef(bash.BashScriptContext.create_in_isolation(analysis_context, source_path)) - core.reset_debug_sequence_number() - bash_node = bash.RawBashScriptNode(facts.StringLiteral(bash_content), bash_context) - core.increment_debug_sequence_number() - - bash_node.analyse() - - if dump_debug: - with open( - "analysis." + source_path.replace("/", "_") + "." + str(hash(bash_content)) + ".dot", "w", encoding="utf-8" - ) as f: - printing.print_as_dot_graph(bash_node, f, include_properties=True, include_states=True) + try: + analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) + bash_context = core.OwningContextRef(bash.BashScriptContext.create_in_isolation(analysis_context, source_path)) + core.reset_debug_sequence_number() + bash_node = bash.RawBashScriptNode(facts.StringLiteral(bash_content), bash_context) + core.increment_debug_sequence_number() + + bash_node.analyse() + + if dump_debug: + with open( + "analysis." + source_path.replace("/", "_") + "." + str(hash(bash_content)) + ".dot", + "w", + encoding="utf-8", + ) as f: + printing.print_as_dot_graph(bash_node, f, include_properties=True, include_states=True) + except Exception as e: + raise CallGraphError("Failed to analyze bash script '" + source_path + "'") from e return bash_node diff --git a/src/macaron/code_analyzer/dataflow_analysis/bash.py b/src/macaron/code_analyzer/dataflow_analysis/bash.py index 4a4903c86..6b0f05813 100644 --- a/src/macaron/code_analyzer/dataflow_analysis/bash.py +++ b/src/macaron/code_analyzer/dataflow_analysis/bash.py @@ -75,6 +75,8 @@ class BashScriptContext(core.Context): stdout_loc: facts.LocationSpecifier #: Filepath for Bash script file. source_filepath: str + #: Mapping of parser placeholder vars to original GitHub expression bodies. + gha_expr_map_items: tuple[tuple[str, str], ...] = () @staticmethod def create_from_run_step( @@ -106,6 +108,7 @@ def create_from_run_step( stdout_scope=context.ref.job_context.ref.workflow_context.ref.console.get_non_owned(), stdout_loc=facts.Console(), source_filepath=source_filepath, + gha_expr_map_items=(), ) @staticmethod @@ -136,6 +139,7 @@ def create_from_bash_script(context: core.ContextRef[BashScriptContext], source_ stdout_scope=context.ref.stdout_scope.get_non_owned(), stdout_loc=facts.Console(), source_filepath=source_filepath, + gha_expr_map_items=(), ) @staticmethod @@ -164,6 +168,7 @@ def create_in_isolation(context: core.ContextRef[core.AnalysisContext], source_f stdout_scope=core.OwningContextRef(facts.Scope("stdout")), stdout_loc=facts.Console(), source_filepath=source_filepath, + gha_expr_map_items=(), ) def with_stdin( @@ -180,6 +185,7 @@ def with_stdin( self.stdout_scope, self.stdout_loc, self.source_filepath, + self.gha_expr_map_items, ) def with_stdout( @@ -196,6 +202,33 @@ def with_stdout( stdout_scope, stdout_loc, self.source_filepath, + self.gha_expr_map_items, + ) + + def with_gha_expr_map(self, gha_expr_map: dict[str, str]) -> BashScriptContext: + """Return a modified bash script context with GitHub-expression placeholder mappings. + + Parameters + ---------- + gha_expr_map : dict[str, str] + Mapping from parser placeholder variable names to original GitHub expression bodies. + + Returns + ------- + BashScriptContext + A context copy with updated GitHub-expression mapping metadata. + """ + return BashScriptContext( + self.outer_context, + self.filesystem, + self.env, + self.func_decls, + self.stdin_scope, + self.stdin_loc, + self.stdout_scope, + self.stdout_loc, + self.source_filepath, + tuple(sorted(gha_expr_map.items())), ) def get_containing_github_context(self) -> github.GitHubActionsStepContext | None: @@ -261,8 +294,9 @@ def identify_interpretations(self, state: core.State) -> dict[core.Interpretatio def build_bash_script() -> core.Node: try: - parsed_bash = bashparser.parse_raw(script_str, MACARON_PATH) - return BashScriptNode.create(parsed_bash, self.context.get_non_owned()) + parsed_bash, gha_expr_map = bashparser.parse_raw_with_gha_mapping(script_str, MACARON_PATH) + context_with_map = self.context.ref.with_gha_expr_map(gha_expr_map) + return BashScriptNode.create(parsed_bash, core.NonOwningContextRef(context_with_map)) except ParseError: return core.NoOpStatementNode() diff --git a/src/macaron/code_analyzer/dataflow_analysis/github.py b/src/macaron/code_analyzer/dataflow_analysis/github.py index 222f55fb1..6231c0ea6 100644 --- a/src/macaron/code_analyzer/dataflow_analysis/github.py +++ b/src/macaron/code_analyzer/dataflow_analysis/github.py @@ -363,12 +363,16 @@ def create( for need in needs: actual_need = GitHubActionsWorkflowNode._find_job_id_case_insensitive(jobs, need) if actual_need is None: - raise CallGraphError("needs refers to invalid job") + raise CallGraphError( + "needs refers to invalid job '" + need + "', jobs are " + str(list(jobs.keys())) + ) edges.append(actual_need) elif isinstance(needs, str): actual_need = GitHubActionsWorkflowNode._find_job_id_case_insensitive(jobs, needs) if actual_need is None: - raise CallGraphError("needs refers to invalid job") + raise CallGraphError( + "needs refers to invalid job '" + needs + "', jobs are " + str(list(jobs.keys())) + ) edges.append(actual_need) dependency_graph[job_id] = edges diff --git a/src/macaron/code_analyzer/dataflow_analysis/github_expr.py b/src/macaron/code_analyzer/dataflow_analysis/github_expr.py index 8961750a4..9ecb39426 100644 --- a/src/macaron/code_analyzer/dataflow_analysis/github_expr.py +++ b/src/macaron/code_analyzer/dataflow_analysis/github_expr.py @@ -1,13 +1,14 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Parser for GitHub Actions expression language.""" from typing import cast -from lark import Lark, Token, Tree +from lark import Lark, Token, Tree, UnexpectedInput from macaron.code_analyzer.dataflow_analysis import facts +from macaron.errors import CallGraphError # Parser for GitHub Actions expression language grammar. github_expr_parser = Lark( @@ -67,7 +68,7 @@ function_call: identifier "(" _expr ("," _expr)* ")" %import common.SIGNED_NUMBER - %import common.WS + %import unicode.WS %import common.LETTER %import common.DIGIT %import common._STRING_INNER @@ -111,8 +112,10 @@ def extract_value_from_expr_string(s: str, var_scope: facts.Scope | None) -> fac values.append(facts.StringLiteral(cur_str)) cur_expr_end = s.find("}}", cur_expr_begin) cur_expr = s[cur_expr_begin + 3 : cur_expr_end] - parse_tree = github_expr_parser.parse(cur_expr) - + try: + parse_tree = github_expr_parser.parse(cur_expr) + except UnexpectedInput as e: + raise CallGraphError("Failed to parse github expression '" + cur_expr + "' in string '" + s + "'") from e node = parse_tree.children[0] var_str = extract_expr_variable_name(node) diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py new file mode 100644 index 000000000..80364ea76 --- /dev/null +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -0,0 +1,969 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Detect security issues and injection risks in GitHub Actions workflows.""" + +import json +import os +import re +from typing import TypedDict, cast + +from macaron.code_analyzer.dataflow_analysis import bash, core, facts +from macaron.code_analyzer.dataflow_analysis.analysis import get_containing_github_job, get_containing_github_step +from macaron.code_analyzer.dataflow_analysis.core import NodeForest, traverse_bfs +from macaron.code_analyzer.dataflow_analysis.github import ( + GitHubActionsActionStepNode, + GitHubActionsNormalJobNode, + GitHubActionsRunStepNode, + GitHubActionsWorkflowNode, +) +from macaron.code_analyzer.gha_security_analysis.recommendation import ( + Recommendation, + parse_unpinned_action_issue, + recommend_for_unpinned_action, + recommend_for_workflow_issue, + resolve_action_ref_to_sha, + resolve_action_ref_to_tag, +) +from macaron.parsers.bashparser_model import CallExpr, is_call_expr, is_lit, is_param_exp +from macaron.parsers.github_workflow_model import Workflow +from macaron.slsa_analyzer.git_url import is_commit_hash + +UNTRUSTED_PR_REFS = { + "${{ github.event.pull_request.head.ref }}", + "${{ github.head_ref }}", + "${{ github.event.pull_request.head.sha }}", + "${{ github.event.pull_request.head.repo.full_name }}", +} + +PRIORITY_CRITICAL = 100 +PRIORITY_HIGH = 80 +PRIORITY_MEDIUM = 60 +PRIORITY_LOW = 40 +PRIORITY_MIN = 20 + + +class PrioritizedIssue(TypedDict): + """A workflow security finding with priority metadata.""" + + issue: str + priority: int + + +class WorkflowFinding(TypedDict): + """Workflow-level security findings.""" + + workflow_name: str + issues: list[PrioritizedIssue] + + +def detect_github_actions_security_issues(nodes: NodeForest) -> list[WorkflowFinding]: + """Detect security issues across GitHub Actions workflow nodes. + + Parameters + ---------- + nodes : NodeForest + Parsed workflow node forest used for traversing GitHub Actions workflow callgraphs. + + Returns + ------- + list[WorkflowFinding] + A list of workflow-level findings. Each item contains: + - ``workflow_name``: workflow file path. + - ``issues``: list of detected security issue messages with priorities. + """ + findings = [] + for root in nodes.root_nodes: + for callee in traverse_bfs(root): + if isinstance(callee, GitHubActionsWorkflowNode): + if result := analyze_workflow(callee, nodes=nodes): + findings.append(result) + return findings + + +def analyze_workflow(workflow_node: GitHubActionsWorkflowNode, nodes: NodeForest) -> WorkflowFinding | None: + """Analyze a GitHub Actions workflow for security issues. + + Parameters + ---------- + workflow_node : GitHubActionsWorkflowNode + The workflow node to analyze. + nodes : NodeForest + The full node forest used to resolve parent relationships while analyzing findings. + + Returns + ------- + WorkflowFinding | None + A finding dictionary with: + - ``workflow_name``: source filepath of the workflow. + - ``issues``: list of issue messages with associated priorities. + Returns ``None`` when no issues are detected. + + Notes + ----- + The analysis covers trigger hardening, permissions configuration, action pinning, + checkout risks, remote-script execution heuristics, self-hosted runner usage, and + dataflow-based expression injection patterns. + """ + findings: list[PrioritizedIssue] = [] + on_keys = _extract_on_keys(workflow_node.definition) + seen_jobs: set[str] = set() + workflow_permissions_defined = "permissions" in workflow_node.definition + has_job_without_permissions = False + + for node in core.traverse_bfs(workflow_node): + if isinstance(node, GitHubActionsWorkflowNode): + _append_workflow_level_findings(findings, on_keys, node.definition) + continue + + if isinstance(node, GitHubActionsNormalJobNode): + if node.job_id in seen_jobs: + continue + seen_jobs.add(node.job_id) + if "permissions" not in node.definition: + has_job_without_permissions = True + _append_job_level_findings(findings, node) + continue + + if isinstance(node, GitHubActionsActionStepNode): + _append_action_step_findings(findings, node, on_keys) + continue + + if isinstance(node, GitHubActionsRunStepNode): + _append_run_step_findings(findings, node, nodes) + continue + + if not workflow_permissions_defined and has_job_without_permissions: + _add_finding( + findings, + ( + "missing-permissions: No explicit workflow permissions defined, and one or more jobs also omit " + "permissions; defaults may be overly broad." + ), + PRIORITY_MEDIUM, + ) + + if "pull_request_target" in on_keys and _has_privileged_trigger_risk_combo(findings): + _add_finding( + findings, + ( + "privileged-trigger: Workflow uses `pull_request_target` with additional risky patterns; " + "treat this workflow as high risk and harden immediately." + ), + PRIORITY_HIGH, + ) + + if findings: + findings_sorted = sorted(findings, key=lambda finding: (-finding["priority"], finding["issue"])) + return { + "workflow_name": os.path.relpath(workflow_node.context.ref.source_filepath, os.getcwd()), + "issues": findings_sorted, + } + + return None + + +def _extract_on_keys(workflow: Workflow) -> set[str]: + """Extract the set of event names from a workflow ``on`` section.""" + on_section = workflow.get("on") + if isinstance(on_section, dict): + return set(on_section.keys()) + if isinstance(on_section, list): + return set(on_section) + return {on_section} + + +def _append_workflow_level_findings(findings: list[PrioritizedIssue], on_keys: set[str], workflow: Workflow) -> None: + """Append workflow-level hardening findings.""" + if "permissions" not in workflow: + return + + permissions = workflow["permissions"] + if isinstance(permissions, str) and permissions.lower() == "write-all": + _add_finding(findings, "overbroad-permissions: Workflow uses `permissions: write-all`.", PRIORITY_HIGH) + if isinstance(permissions, dict) and "pull_request_target" in on_keys: + for scope, level in permissions.items(): + if isinstance(level, str) and "write" in level.lower(): + _add_finding( + findings, + f"overbroad-permissions: PR-triggered workflow requests `{scope}: {level}`.", + PRIORITY_HIGH, + ) + + +def _append_job_level_findings(findings: list[PrioritizedIssue], job_node: GitHubActionsNormalJobNode) -> None: + """Append findings derived from a single job node.""" + runs_on = job_node.definition.get("runs-on") + if runs_on and "self-hosted" in str(runs_on): + _add_finding( + findings, + f"self-hosted-runner: Job `{job_node.job_id}` runs on self-hosted runners; " + "ensure isolation and never run untrusted PR code there.", + PRIORITY_MEDIUM, + ) + + +def _append_action_step_findings( + findings: list[PrioritizedIssue], + action_node: GitHubActionsActionStepNode, + on_keys: set[str], +) -> None: + """Append findings derived from an action step node.""" + uses_name = action_node.uses_name + uses_version = action_node.uses_version + if uses_name and not uses_name.startswith("./") and uses_version and not is_commit_hash(uses_version): + step_line = _extract_action_step_line(action_node) + line_marker = f"[step-line={step_line}] " if step_line else "" + _add_finding( + findings, + f"unpinned-third-party-action: {line_marker}{uses_name}@{uses_version}", + PRIORITY_MIN, + ) + + if uses_name == "actions/checkout": + ref = _literal_value(action_node.with_parameters.get("ref")) + if ref in UNTRUSTED_PR_REFS and "pull_request" in on_keys: + _add_finding( + findings, + f"untrusted-fork-code: A checkout step uses untrusted fork code (`ref: {ref}`) on PR event.", + PRIORITY_CRITICAL, + ) + + if "pull_request_target" in on_keys and ref in UNTRUSTED_PR_REFS: + _add_finding( + findings, + f"pr-target-untrusted-checkout: Workflow uses pull_request_target and checks out PR-controlled ref `{ref}`.", + PRIORITY_CRITICAL, + ) + + +def _append_run_step_findings( + findings: list[PrioritizedIssue], run_step_node: GitHubActionsRunStepNode, nodes: NodeForest +) -> None: + """Append findings derived from a run step node.""" + # Traversing a run-step subgraph can reach semantically identical command nodes through + # multiple CFG/AST paths (for example nested/compound command structures). Track emitted + # injection findings by stable metadata to avoid duplicate reports for the same command line. + seen_injection_keys: set[tuple[int | None, str, str, str]] = set() + for node in core.traverse_bfs(run_step_node): + # Command-level injection checks rely on parsed call argument parts from single-command nodes. + if isinstance(node, bash.BashSingleCommandNode): + _append_injection_findings(findings, node, nodes, seen_injection_keys) + continue + + # Remote script execution risk is structural: downloader output piped into an executor. + if isinstance(node, bash.BashPipeNode): + _append_remote_script_exec_findings(findings, node, run_step_node, nodes) + + +def _append_remote_script_exec_findings( + findings: list[PrioritizedIssue], + pipe_node: bash.BashPipeNode, + run_step_node: GitHubActionsRunStepNode, + nodes: NodeForest, +) -> None: + """Append remote-script-exec findings discovered from parsed bash pipe nodes.""" + if not _is_remote_script_exec_pipe(pipe_node): + return + + # Map the pipe's script-relative line to workflow source line so summary links jump to YAML. + script_line = pipe_node.definition["Pos"]["Line"] + workflow_line = _map_script_line_to_workflow_line(run_step_node, script_line) + if workflow_line is None: + workflow_line = _extract_run_step_line(run_step_node) + job_node = get_containing_github_job(pipe_node, nodes.parents) + issue_payload = { + "step_line": workflow_line, + "script_line": script_line, + "job": job_node.job_id if job_node else "", + "step": _extract_step_name(run_step_node), + "command": _extract_command_text(run_step_node, script_line), + } + _add_finding( + findings, + f"remote-script-exec: {json.dumps(issue_payload)}", + PRIORITY_HIGH, + ) + + +def _is_remote_script_exec_pipe(pipe_node: bash.BashPipeNode) -> bool: + """Return whether a pipe node matches downloader-to-executor behavior.""" + lhs_words = _extract_statement_words(pipe_node.lhs) + rhs_words = _extract_statement_words(pipe_node.rhs) + if not lhs_words or not rhs_words: + return False + + downloader_cmd = lhs_words[0] + if downloader_cmd not in {"curl", "wget"}: + return False + + return _is_executor_invocation(rhs_words) + + +def _extract_statement_words(statement_node: bash.BashStatementNode) -> list[str]: + """Extract normalized literal command words from a Bash statement when available.""" + cmd = statement_node.definition.get("Cmd") + if not is_call_expr(cmd): + return [] + return _extract_call_words(cmd) + + +def _extract_call_words(call_expr: CallExpr) -> list[str]: + """Extract literal word values from a call expression.""" + args = call_expr["Args"] + words: list[str] = [] + for arg in args: + parts = arg["Parts"] + word = "".join(part.get("Value", "") for part in parts if is_lit(part)).strip() + if not word: + return [] + words.append(word) + if not words: + return [] + + normalized = [os.path.basename(word).lower() if idx == 0 else word for idx, word in enumerate(words)] + return normalized + + +def _is_executor_invocation(words: list[str]) -> bool: + """Return whether extracted words represent shell/archive execution.""" + if not words: + return False + direct_executors = {"bash", "sh", "tar"} + wrapper_cmds = {"sudo", "env", "command"} + + command = words[0] + if command in direct_executors: + return True + if command in wrapper_cmds and len(words) > 1: + wrapped = os.path.basename(words[1]).lower() + return wrapped in direct_executors + return False + + +def _append_injection_findings( + findings: list[PrioritizedIssue], + bash_node: bash.BashSingleCommandNode, + nodes: NodeForest, + seen_injection_keys: set[tuple[int | None, str, str, str]] | None = None, +) -> None: + """Append potential injection findings discovered from parsed bash command nodes.""" + if not is_call_expr(bash_node.definition.get("Cmd")): + return + + call_exp = cast(CallExpr, bash_node.definition["Cmd"]) + for arg in call_exp.get("Args", []): + parts = arg.get("Parts") + step_node = get_containing_github_step(bash_node, nodes.parents) + script_line = _extract_script_line_from_parts(parts) + expanded_refs = _extract_expanded_github_refs(bash_node, step_node, script_line, parts) + if _arg_has_attacker_controlled_github_ref(parts) or _has_attacker_controlled_expanded_ref(expanded_refs): + job_node = get_containing_github_job(bash_node, nodes.parents) + workflow_line = _map_script_line_to_workflow_line(step_node, script_line) + if workflow_line is None: + workflow_line = _extract_run_step_line(step_node) + job_name = job_node.job_id if job_node else "" + step_name = _extract_step_name(step_node) + command_text = _extract_command_text(step_node, script_line) + dedupe_key = (workflow_line, job_name, step_name, command_text) + if seen_injection_keys is not None: + # Prevent duplicate findings when the same risky command is visited via + # different traversal paths in the run-step subgraph. + if dedupe_key in seen_injection_keys: + continue + seen_injection_keys.add(dedupe_key) + issue_payload = { + "step_line": workflow_line, + "script_line": script_line, + "job": job_name, + "step": step_name, + "command": command_text, + "expanded_refs": expanded_refs, + "parts": arg.get("Parts"), + } + _add_finding(findings, f"potential-injection: {json.dumps(issue_payload)}", PRIORITY_CRITICAL) + + +def _arg_has_attacker_controlled_github_ref(parts: object) -> bool: + """Return whether argument parts contain attacker-controlled GitHub context expansion. + + Parameters + ---------- + parts : object + Parsed argument ``Parts`` payload from the Bash call expression. + + Returns + ------- + bool + ``True`` when an attacker-controlled GitHub context reference is detected. + """ + if not isinstance(parts, list): + return False + + expansion = False + pr_head_ref = False + for part in parts: + if is_param_exp(part) and part.get("Param", {}).get("Value") == "github": + expansion = True + if is_lit(part) and part.get("Value") in { + ".event.pull_request.head.ref", + ".head_ref", + ".event.issue.body", + ".event.comment.body", + }: + pr_head_ref = True + if expansion and pr_head_ref: + return True + return False + + +def _has_attacker_controlled_expanded_ref(refs: list[str]) -> bool: + """Return whether extracted refs include attacker-controlled GitHub context values. + + Parameters + ---------- + refs : list[str] + Extracted GitHub expression references. + + Returns + ------- + bool + ``True`` if a known attacker-controlled ref is present. + """ + attacker_controlled = { + "github.event.pull_request.head.ref", + "github.head_ref", + "github.event.issue.body", + "github.event.comment.body", + } + return any(ref in attacker_controlled for ref in refs) + + +def _extract_expanded_github_refs( + bash_node: bash.BashSingleCommandNode, + step_node: GitHubActionsRunStepNode | None, + script_line: int | None, + parts: object, +) -> list[str]: + """Extract normalized expanded GitHub refs from mapping with a line-text fallback. + + Parameters + ---------- + bash_node : bash.BashSingleCommandNode + The Bash command node used to resolve parser placeholder mappings. + step_node : GitHubActionsRunStepNode | None + The containing run step node, used for fallback extraction from raw run script text. + script_line : int | None + 1-based line number within the inlined run script for line-targeted fallback extraction. + parts : object + Parsed argument ``Parts`` payload from the Bash call expression. + + Returns + ------- + list[str] + Ordered list of normalized GitHub expression references. + """ + refs: list[str] = [] + placeholder_map = dict(bash_node.context.ref.gha_expr_map_items) + if isinstance(parts, list): + for part in parts: + if not is_param_exp(part): + continue + placeholder = part.get("Param", {}).get("Value") + if isinstance(placeholder, str): + mapped = placeholder_map.get(placeholder) + if mapped: + refs.extend(_extract_github_refs_from_expression(mapped)) + if refs: + return _deduplicate_preserve_order(refs) + + if step_node is None: + return [] + # Fallback: some complex shell constructs (for example command substitution in compound + # test/boolean commands) may not expose mapped placeholders on the current arg parts. + # In those cases, recover refs directly from the original run-script line text. + run_script = step_node.definition["run"] + script_lines = run_script.splitlines() + if script_line is not None and 1 <= script_line <= len(script_lines): + line_text = script_lines[script_line - 1] + else: + line_text = run_script + + matches = re.findall(r"\$\{\{\s*(.*?)\s*\}\}", line_text) + fallback_refs: list[str] = [] + for expr in matches: + fallback_refs.extend(_extract_github_refs_from_expression(expr)) + return _deduplicate_preserve_order(fallback_refs) + + +def _extract_github_refs_from_expression(expression: str) -> list[str]: + """Extract github-context reference paths from a GitHub Actions expression body. + + Parameters + ---------- + expression : str + Expression text inside ``${{ ... }}``. + + Returns + ------- + list[str] + Matched GitHub reference paths (for example ``github.head_ref``). + """ + return re.findall(r"github(?:\.[A-Za-z0-9_-]+)+", expression) + + +def _deduplicate_preserve_order(values: list[str]) -> list[str]: + """Deduplicate string values while preserving insertion order. + + Parameters + ---------- + values : list[str] + Input values that may contain duplicates. + + Returns + ------- + list[str] + Values in original order with duplicates removed. + """ + seen: set[str] = set() + result: list[str] = [] + for value in values: + if value in seen: + continue + seen.add(value) + result.append(value) + return result + + +def _extract_step_name(step_node: GitHubActionsRunStepNode | None) -> str: + """Extract a display name for a workflow run step.""" + if step_node is None: + return "" + step_name = step_node.definition.get("name") + if isinstance(step_name, str): + return step_name + step_id = step_node.definition.get("id") + if isinstance(step_id, str): + return step_id + return "" + + +def _extract_command_text(step_node: GitHubActionsRunStepNode | None, script_line: int | None) -> str: + """Extract a compact command snippet from the run script for display in diagnostics.""" + if step_node is None: + return "" + + run_script = step_node.definition["run"] + script_lines = run_script.splitlines() + if script_line and 1 <= script_line <= len(script_lines): + return script_lines[script_line - 1].strip() + + for line in script_lines: + if line.strip(): + return line.strip() + return "" + + +def _extract_run_step_line(step_node: GitHubActionsRunStepNode | None) -> int | None: + """Extract a 1-based workflow line number for a run step when metadata is available.""" + if step_node is None: + return None + + definition = step_node.definition + line_container = getattr(definition, "lc", None) + if line_container is None: + return _infer_run_step_line_from_source(step_node) + + line = getattr(line_container, "line", None) + if isinstance(line, int) and line >= 0: + # ruamel stores line numbers as 0-based. + return line + 1 + + return _infer_run_step_line_from_source(step_node) + + +def _extract_action_step_line(step_node: GitHubActionsActionStepNode | None) -> int | None: + """Extract a 1-based workflow line number for an action step when metadata is available.""" + if step_node is None: + return None + + definition = step_node.definition + line_container = getattr(definition, "lc", None) + if line_container is None: + return _infer_action_step_line_from_source(step_node) + + line = getattr(line_container, "line", None) + if isinstance(line, int) and line >= 0: + # ruamel stores line numbers as 0-based. + return line + 1 + + return _infer_action_step_line_from_source(step_node) + + +def _infer_action_step_line_from_source(step_node: GitHubActionsActionStepNode) -> int | None: + """Infer an action-step line by matching the ``uses`` value in the workflow source.""" + workflow_path = step_node.context.ref.job_context.ref.workflow_context.ref.source_filepath + if not workflow_path or not os.path.isfile(workflow_path): + return None + + uses_name = step_node.uses_name + uses_version = step_node.uses_version + if not uses_name or not uses_version: + return None + + target_uses = f"{uses_name}@{uses_version}" + step_name = step_node.definition.get("name") + step_id = step_node.definition.get("id") + step_identifier = step_name if isinstance(step_name, str) else step_id if isinstance(step_id, str) else None + + try: + with open(workflow_path, encoding="utf-8") as workflow_file: + workflow_lines = workflow_file.readlines() + except OSError: + return None + + uses_key_re = re.compile(r"^\s*(?:-\s*)?uses\s*:\s*(.*)$") + candidate_lines: list[int] = [] + for index, line in enumerate(workflow_lines): + match = uses_key_re.match(line) + if not match: + continue + uses_value = match.group(1).strip().strip("\"'") + if uses_value == target_uses: + candidate_lines.append(index + 1) + + if not candidate_lines: + return None + if len(candidate_lines) == 1 or not step_identifier: + return candidate_lines[0] + + for candidate_line in candidate_lines: + for lookback_index in range(max(0, candidate_line - 8 - 1), candidate_line - 1): + lookback_line = workflow_lines[lookback_index].strip() + if lookback_line in {f"name: {step_identifier}", f"id: {step_identifier}"}: + return candidate_line + + return candidate_lines[0] + + +def _extract_script_line_from_parts(parts: object) -> int | None: + """Extract the 1-based script line number from parsed shell argument parts.""" + if not isinstance(parts, list): + return None + + for part in parts: + if not isinstance(part, dict): + continue + pos = part.get("Pos") + if not isinstance(pos, dict): + continue + line = pos.get("Line") + if isinstance(line, int) and line > 0: + return line + + return None + + +def _map_script_line_to_workflow_line( + step_node: GitHubActionsRunStepNode | None, script_line: int | None +) -> int | None: + """Map a line number inside a run script to the corresponding workflow source line.""" + if step_node is None or script_line is None or script_line < 1: + return None + + workflow_path = step_node.context.ref.job_context.ref.workflow_context.ref.source_filepath + run_script = step_node.definition.get("run") + if not workflow_path or not isinstance(run_script, str) or not os.path.isfile(workflow_path): + return None + + try: + with open(workflow_path, encoding="utf-8") as workflow_file: + workflow_lines = workflow_file.readlines() + except OSError: + return None + + for block_start, block_lines in _iter_run_blocks(workflow_lines): + if _normalize_multiline_text("\n".join(block_lines)) != _normalize_multiline_text(run_script): + continue + if script_line > len(block_lines): + return None + return block_start + script_line - 1 + + return None + + +def _iter_run_blocks(workflow_lines: list[str]) -> list[tuple[int, list[str]]]: + """Collect run-step script blocks as (1-based start line, content lines).""" + run_key_re = re.compile(r"^(\s*)(?:-\s*)?run\s*:\s*(.*)$") + blocks: list[tuple[int, list[str]]] = [] + i = 0 + while i < len(workflow_lines): + line = workflow_lines[i] + match = run_key_re.match(line) + if not match: + i += 1 + continue + + indent = len(match.group(1)) + run_value = match.group(2).rstrip("\n") + + if run_value.strip().startswith(("|", ">")): + block_start = i + 2 + block_buffer: list[str] = [] + j = i + 1 + min_indent: int | None = None + while j < len(workflow_lines): + candidate = workflow_lines[j] + if candidate.strip(): + candidate_indent = len(candidate) - len(candidate.lstrip(" ")) + if candidate_indent <= indent: + break + if min_indent is None or candidate_indent < min_indent: + min_indent = candidate_indent + block_buffer.append(candidate.rstrip("\n")) + j += 1 + + if min_indent is None: + blocks.append((block_start, [])) + else: + dedented = [b[min_indent:] if len(b) >= min_indent else b for b in block_buffer] + blocks.append((block_start, dedented)) + i = j + continue + + inline_value = run_value.strip().strip("\"'") + blocks.append((i + 1, [inline_value])) + i += 1 + + return blocks + + +def _normalize_multiline_text(text: str) -> str: + """Normalize text for robust matching between YAML-extracted and parsed run scripts.""" + return "\n".join(line.rstrip() for line in text.strip("\n").splitlines()) + + +def _infer_run_step_line_from_source(step_node: GitHubActionsRunStepNode) -> int | None: + """Infer a run step line by matching its script against the workflow source file.""" + workflow_path = step_node.context.ref.job_context.ref.workflow_context.ref.source_filepath + if not workflow_path or not os.path.isfile(workflow_path): + return None + + run_script = step_node.definition["run"] + first_script_line = "" + for line in run_script.splitlines(): + stripped = line.strip() + if stripped: + first_script_line = stripped + break + if not first_script_line: + return None + + try: + with open(workflow_path, encoding="utf-8") as workflow_file: + workflow_lines = workflow_file.readlines() + except OSError: + return None + + run_key_re = re.compile(r"^\s*(?:-\s*)?run\s*:\s*(.*)$") + for index, line in enumerate(workflow_lines): + match = run_key_re.match(line) + if not match: + continue + + run_value = match.group(1).strip() + if run_value and not run_value.startswith("|") and not run_value.startswith(">"): + inline_value = run_value.strip("\"'") + if first_script_line in inline_value or inline_value in first_script_line: + return index + 1 + continue + + run_indent = len(line) - len(line.lstrip(" ")) + for nested_line in workflow_lines[index + 1 :]: + if not nested_line.strip(): + continue + nested_indent = len(nested_line) - len(nested_line.lstrip(" ")) + if nested_indent <= run_indent: + break + if first_script_line in nested_line.strip(): + return index + 1 + + return None + + +def _has_privileged_trigger_risk_combo(findings: list[PrioritizedIssue]) -> bool: + """Return whether findings contain risky patterns that amplify pull_request_target risk.""" + risky_prefixes = ( + "overbroad-permissions:", + "untrusted-fork-code:", + "remote-script-exec:", + "pr-target-untrusted-checkout:", + "potential-injection:", + "self-hosted-runner:", + ) + return any(any(finding["issue"].startswith(prefix) for prefix in risky_prefixes) for finding in findings) + + +def _literal_value(value: facts.Value | None) -> str: + """Return literal string value from a facts expression when available.""" + if isinstance(value, facts.StringLiteral): + return value.literal + return "" + + +def _add_finding(findings: list[PrioritizedIssue], issue: str, priority: int) -> None: + """Append a finding once and keep the highest priority for duplicate issues. + + Parameters + ---------- + findings : list[PrioritizedIssue] + Mutable finding list for the current workflow. + issue : str + Normalized finding identifier/message. + priority : int + Finding priority score. + """ + for existing in findings: + if existing["issue"] == issue: + existing["priority"] = max(existing["priority"], priority) + return + findings.append({"issue": issue, "priority": priority}) + + +def get_workflow_issue_type(issue: str) -> str: + """Extract a normalized workflow issue subtype from issue text.""" + prefix, _, _ = issue.partition(":") + normalized = prefix.strip().replace("_", "-") + return normalized or "workflow-security-issue" + + +def get_workflow_issue_summary(finding_type: str) -> str: + """Return a concise summary for a workflow issue subtype.""" + finding_summaries = { + "privileged-trigger": "Privileged trigger can expose elevated token scope to untrusted input.", + "missing-permissions": "Workflow omits explicit permissions and may inherit broad defaults.", + "overbroad-permissions": "Workflow requests permissions broader than required.", + "untrusted-fork-code": "Workflow can execute code controlled by an untrusted fork.", + "remote-script-exec": "Workflow downloads and executes remote scripts inline.", + "pr-target-untrusted-checkout": "pull_request_target is combined with checkout of PR-controlled refs.", + "potential-injection": "Unsafe expansion of attacker-controllable GitHub context can enable command injection.", + "self-hosted-runner": "Job uses self-hosted runners, increasing blast radius for untrusted code.", + "workflow-security-issue": "Workflow includes a security issue that requires hardening.", + } + return finding_summaries.get(finding_type, "Workflow security finding detected.") + + +def build_workflow_issue_recommendation(issue: str) -> tuple[str, Recommendation, str]: + """Build normalized workflow issue recommendation metadata.""" + finding_type = get_workflow_issue_type(issue) + summary = get_workflow_issue_summary(finding_type) + recommendation = recommend_for_workflow_issue(issue) + details = _format_issue_details(finding_type, issue) + finding_message = f"Summary: {summary} Details: {details} Recommendation: {recommendation.message}" + return finding_type, recommendation, finding_message + + +def _format_issue_details(finding_type: str, issue: str) -> str: + """Format human-readable issue details for job summaries.""" + if finding_type not in {"potential-injection", "remote-script-exec"}: + return issue + + payload = _parse_issue_payload(issue) + if not isinstance(payload, dict): + return issue + + job_name = str(payload.get("job") or "unknown") + step_name = str(payload.get("step") or "unknown") + command_text = str(payload.get("command") or "unknown") + command_text = command_text.replace("`", "'") + refs = payload.get("expanded_refs") + refs_display = "" + if isinstance(refs, list): + refs_clean = [str(ref) for ref in refs if str(ref)] + if refs_clean: + refs_display = f" Expanded refs: `{', '.join(refs_clean)}`" + return f"Job: {job_name} Step: {step_name} Command: `{command_text}`{refs_display}" + + +def _parse_issue_payload(issue: str) -> object | None: + """Parse the serialized issue payload after the finding type prefix.""" + _, _, payload = issue.partition(":") + payload = payload.strip() + if not payload: + return None + + try: + return cast(object, json.loads(payload)) + except json.JSONDecodeError: + return None + + +def build_unpinned_action_recommendation(issue: str, api_client: object) -> tuple[str, str, Recommendation] | None: + """Build normalized recommendation metadata for an unpinned third-party action finding.""" + parsed_issue = parse_unpinned_action_issue(issue) + if not parsed_issue: + return None + + action_name, action_ref = parsed_issue + resolved_sha = resolve_action_ref_to_sha(api_client, action_name, action_ref) + resolved_tag = resolve_action_ref_to_tag(action_name, resolved_sha, action_ref) + recommendation = recommend_for_unpinned_action(action_name, resolved_sha, resolved_tag) + return action_name, action_ref, recommendation + + +def extract_workflow_issue_line(issue: str) -> int | None: + """Extract a 1-based workflow source line number from an issue payload. + + Parameters + ---------- + issue : str + Serialized workflow issue string produced by the detector. + + Returns + ------- + int | None + The 1-based line number when available; otherwise ``None``. + """ + step_line_match = re.search(r"\[step-line=(\d+)\]", issue) + if step_line_match: + step_line = int(step_line_match.group(1)) + if step_line > 0: + return step_line + + if not issue.startswith("potential-injection:") and not issue.startswith("remote-script-exec:"): + return None + + _, _, payload = issue.partition(":") + if not payload.strip(): + return None + + parsed_payload = _parse_issue_payload(issue) + if isinstance(parsed_payload, dict): + payload_step_line = parsed_payload.get("step_line") + if isinstance(payload_step_line, int) and payload_step_line > 0: + return payload_step_line + + parts: object | None + if isinstance(parsed_payload, list): + parts = parsed_payload + elif isinstance(parsed_payload, dict): + parts = parsed_payload.get("parts") + else: + parts = None + + if isinstance(parts, list): + for part in parts: + if not isinstance(part, dict): + continue + pos = part.get("Pos") + if not isinstance(pos, dict): + continue + line = pos.get("Line") + if isinstance(line, int) and line > 0: + return line + + match = re.search(r"""["']Line["']:\s*(\d+)""", payload) + if not match: + return None + line = int(match.group(1)) + return line if line > 0 else None diff --git a/src/macaron/code_analyzer/gha_security_analysis/recommendation.py b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py new file mode 100644 index 000000000..332add599 --- /dev/null +++ b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py @@ -0,0 +1,220 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Recommendation helpers for GitHub Actions security findings. + +This module centralizes user-facing remediation guidance for findings generated by +GitHub Actions security analysis checks. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass + +from macaron.errors import GitTagError +from macaron.slsa_analyzer.git_service.api_client import GhAPIClient +from macaron.slsa_analyzer.git_url import find_highest_git_tag, get_tags_via_git_remote, is_commit_hash + +UNPINNED_ACTION_RE = re.compile( + r"^(?:unpinned-third-party-action:\s*)?(?:\[step-line=(?P\d+)\]\s*)?(?P[^@\s]+)@(?P[^\s]+)$" +) + + +@dataclass(frozen=True) +class Recommendation: + """Normalized recommendation payload for a finding. + + Attributes + ---------- + message : str + Human-readable recommendation text. + recommended_ref : str | None + Optional pinned reference suggestion, such as ``owner/repo@``. + """ + + message: str + recommended_ref: str | None = None + + +def recommend_for_unpinned_action( + action_name: str, resolved_sha: str | None = None, resolved_tag: str | None = None +) -> Recommendation: + """Create a recommendation for an unpinned third-party action. + + Parameters + ---------- + action_name : str + GitHub Action identifier in the form ``owner/repo``. + resolved_sha : str | None, optional + Resolved commit SHA for the action ref if available. + resolved_tag : str | None, optional + Tag corresponding to ``resolved_sha`` when available. + + Returns + ------- + Recommendation + Recommendation containing pinning guidance and a suggested immutable + action reference. + """ + if resolved_sha and resolved_tag: + recommended_ref = f"{action_name}@{resolved_sha} # {resolved_tag}" + elif resolved_sha: + recommended_ref = f"{action_name}@{resolved_sha}" + else: + recommended_ref = "Unable to resolve automatically" + return Recommendation( + message="Pin this third-party action to a 40-character commit SHA.", + recommended_ref=recommended_ref, + ) + + +def parse_unpinned_action_issue(issue: str) -> tuple[str, str] | None: + """Parse an unpinned third-party action reference from issue text. + + Parameters + ---------- + issue : str + Raw issue text emitted by workflow security analysis. + + Returns + ------- + tuple[str, str] | None + Parsed ``(action_name, action_version)`` when the issue matches + ``owner/repo@ref`` format for a third-party action. ``None`` otherwise. + """ + match = UNPINNED_ACTION_RE.fullmatch(issue.strip()) + if not match: + return None + action = match.group("action") + version = match.group("version") + if action.startswith("./"): + return None + if "/" not in action: + return None + return action, version + + +def resolve_action_ref_to_sha(api_client: object, action_name: str, action_version: str) -> str | None: + """Resolve an action reference to an immutable commit SHA. + + Parameters + ---------- + api_client : object + API client instance used for GitHub API calls. + action_name : str + GitHub Action identifier in the form ``owner/repo``. + action_version : str + Action ref currently used by the workflow. + + Returns + ------- + str | None + The resolved commit SHA if resolution succeeds; otherwise ``None``. + """ + if not isinstance(api_client, GhAPIClient): + return None + if not action_name or not action_version: + return None + if is_commit_hash(action_version): + # Normalize short SHAs by resolving them through the API. + return ( + action_version + if len(action_version) == 40 + else api_client.get_commit_sha_from_ref(action_name, action_version) + ) + return api_client.get_commit_sha_from_ref(action_name, action_version) + + +def resolve_action_ref_to_tag(action_name: str, resolved_sha: str | None, action_version: str = "") -> str | None: + """Resolve a commit SHA to a corresponding Git tag for an action repository. + + Parameters + ---------- + action_name : str + GitHub Action identifier in the form ``owner/repo``. + resolved_sha : str | None, optional + Resolved commit SHA for the action ref. + action_version : str, optional + Original action version/ref. If this exact ref is a tag on the same + commit, it is preferred. + + Returns + ------- + str | None + The corresponding tag name if found; otherwise ``None``. + """ + if not action_name or not resolved_sha: + return None + tags = get_tags_via_git_remote(f"https://github.com/{action_name}") + if not tags: + return None + + matching_tags = [tag for tag, tag_sha in tags.items() if tag_sha == resolved_sha] + if not matching_tags: + return None + if action_version and action_version in matching_tags: + return action_version + try: + return find_highest_git_tag(set(matching_tags)) + except GitTagError: + return matching_tags[0] + + +def recommend_for_workflow_issue(issue: str) -> Recommendation: + """Map a workflow issue string to a remediation recommendation. + + Parameters + ---------- + issue : str + Issue string emitted by workflow security analysis. + + Returns + ------- + Recommendation + Recommendation message corresponding to the detected issue category. + """ + if issue.startswith("sensitive-trigger:"): + return Recommendation("Add strict event gating (actor allowlist, branch filters, and conditional checks).") + if issue.startswith("privileged-trigger:"): + return Recommendation("Avoid pull_request_target for untrusted code paths; use pull_request where possible.") + if issue.startswith("missing-permissions:"): + return Recommendation("Define explicit least-privilege permissions at workflow or job scope.") + if issue.startswith("overbroad-permissions:"): + return Recommendation("Reduce permissions to read-only scopes unless write access is strictly required.") + if issue.startswith("untrusted-fork-code:"): + return Recommendation("Do not checkout PR head refs in privileged contexts; validate source and actor first.") + if issue.startswith("persist-credentials:"): + return Recommendation("Set persist-credentials: false for checkout unless later git pushes are required.") + if issue.startswith("remote-script-exec:"): + return Recommendation("Avoid curl|bash patterns; pin script digests or vendor reviewed scripts in-repo.") + if issue.startswith("pr-target-untrusted-checkout:"): + return Recommendation("Never combine pull_request_target with checkout of PR-controlled refs.") + if issue.startswith("potential-injection:"): + return Recommendation("Treat GitHub context data as untrusted input; quote/sanitize before shell execution.") + return Recommendation("Review this workflow finding and apply least-privilege hardening controls.") + + +def recommend_for_osv_vulnerability(action_name: str, action_version: str) -> Recommendation: + """Create a recommendation for a vulnerable GitHub Action version. + + Parameters + ---------- + action_name : str + GitHub Action identifier in the form ``owner/repo``. + action_version : str + Action ref currently used by the workflow. + + Returns + ------- + Recommendation + Recommendation encouraging upgrade to a non-vulnerable release and + subsequent pinning to a commit SHA. + """ + return Recommendation( + message=( + f"Upgrade `{action_name}` from `{action_version}` to a non-vulnerable release, " + "then pin the selected version to a commit SHA." + ), + recommended_ref=None, + ) diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini index 96b702b8e..5a0068e2f 100644 --- a/src/macaron/config/defaults.ini +++ b/src/macaron/config/defaults.ini @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. [requests] @@ -11,7 +11,7 @@ error_retries = 5 # The default timeout in seconds for downloading assets. timeout = 120 # This is the acceptable maximum size (in bytes) to download an asset. -max_download_size = 10000000 +max_download_size = 30000000 # This is the database to store Macaron's results. [database] diff --git a/src/macaron/config/global_config.py b/src/macaron/config/global_config.py index 78bedc34b..5bbf275d3 100644 --- a/src/macaron/config/global_config.py +++ b/src/macaron/config/global_config.py @@ -1,7 +1,8 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the GlobalConfig class to be used globally.""" + import logging import os from dataclasses import dataclass, field diff --git a/src/macaron/database/database_manager.py b/src/macaron/database/database_manager.py index 78e4395df..2d7e85482 100644 --- a/src/macaron/database/database_manager.py +++ b/src/macaron/database/database_manager.py @@ -1,7 +1,8 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This DatabaseManager module handles the sqlite database connection.""" + import collections.abc import functools import logging diff --git a/src/macaron/database/table_definitions.py b/src/macaron/database/table_definitions.py index 6414555c2..a3e53f5d7 100644 --- a/src/macaron/database/table_definitions.py +++ b/src/macaron/database/table_definitions.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ @@ -10,6 +10,7 @@ For table associated with a check see the check module. """ + import logging import string from datetime import datetime diff --git a/src/macaron/json_tools.py b/src/macaron/json_tools.py index a69b0eaa8..df8126074 100644 --- a/src/macaron/json_tools.py +++ b/src/macaron/json_tools.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module provides utility functions for JSON data.""" + import logging from collections.abc import Sequence from typing import TypeVar diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/typosquatting_presence.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/typosquatting_presence.py index 87658f714..810d7523b 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/typosquatting_presence.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/typosquatting_presence.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Analyzer checks if there is typosquatting presence in the package name.""" + import logging import os diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/unchanged_release.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/unchanged_release.py index 278f3eeb5..053709cd2 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/unchanged_release.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/unchanged_release.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Heuristics analyzer to check unchanged content in multiple releases.""" + import logging from collections import Counter diff --git a/src/macaron/output_reporter/results.py b/src/macaron/output_reporter/results.py index 2af4fc269..f2e86dcba 100644 --- a/src/macaron/output_reporter/results.py +++ b/src/macaron/output_reporter/results.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains classes that represent the result of the Macaron analysis.""" @@ -162,7 +162,8 @@ def get_dep_summary(self) -> DepSummary: analyzed_deps=0, unique_dep_repos=0, checks_summary=[ - {"check_id": check_id, "num_deps_pass": 0} for check_id in registry.get_all_checks_mapping() + {"check_id": check_id, "num_deps_pass": 0} # nosec B105 + for check_id in registry.get_all_checks_mapping() ], dep_status=[dep.get_summary() for dep in self.dependencies], ) diff --git a/src/macaron/parsers/bashparser.py b/src/macaron/parsers/bashparser.py index ac2ceed68..2b8de426a 100644 --- a/src/macaron/parsers/bashparser.py +++ b/src/macaron/parsers/bashparser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module is a Python wrapper for the compiled bashparser binary. @@ -162,6 +162,74 @@ def parse_raw(bash_content: str, macaron_path: str | None = None) -> File: raise ParseError("Error while loading the parsed bash script.") from error +def parse_raw_with_gha_mapping(bash_content: str, macaron_path: str | None = None) -> tuple[File, dict[str, str]]: + """Parse bash content and return raw AST plus GitHub expression mapping. + + Parameters + ---------- + bash_content : str + Bash script content. + macaron_path : str | None + Macaron's root path (optional). + + Returns + ------- + tuple[bashparser_model.File, dict[str, str]] + A tuple of: + - The parsed raw bash AST. + - Mapping from parser placeholder variable names to original GitHub expression bodies. + + Raises + ------ + ParseError + When parsing fails with errors or output cannot be decoded. + """ + if not macaron_path: + macaron_path = global_config.macaron_path + cmd = [ + os.path.join(macaron_path, "bin", "bashparser"), + "-input", + bash_content, + "-raw-gha-map", + ] + + try: + result = subprocess.run( # nosec B603 + cmd, + capture_output=True, + check=True, + cwd=macaron_path, + timeout=defaults.getint("bashparser", "timeout", fallback=30), + ) + except ( + subprocess.CalledProcessError, + subprocess.TimeoutExpired, + FileNotFoundError, + ) as error: + raise ParseError("Error while parsing bash script.") from error + + try: + if result.returncode != 0: + raise ParseError(f"Bash script parser failed: {result.stderr.decode('utf-8')}") + + payload = cast(dict[str, object], json.loads(result.stdout.decode("utf-8"))) + ast_data = payload.get("ast") + gha_map = payload.get("gha_expr_map") + if not isinstance(ast_data, dict): + raise ParseError("Error while loading the parsed bash script.") + if not isinstance(gha_map, dict): + raise ParseError("Error while loading the parsed bash script.") + gha_map_clean: dict[str, str] = {} + for key, value in gha_map.items(): + if isinstance(key, str) and isinstance(value, str): + gha_map_clean[key] = value + + return cast(File, ast_data), gha_map_clean + + except json.JSONDecodeError as error: + raise ParseError("Error while loading the parsed bash script.") from error + + def parse_expr(bash_expr_content: str, macaron_path: str | None = None) -> list[Word]: """Parse a bash script's content. diff --git a/src/macaron/parsers/pomparser.py b/src/macaron/parsers/pomparser.py index 2597e1938..fe1f90bee 100644 --- a/src/macaron/parsers/pomparser.py +++ b/src/macaron/parsers/pomparser.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the parser for POM files.""" + import logging from xml.etree.ElementTree import Element # nosec B405 diff --git a/src/macaron/provenance/provenance_extractor.py b/src/macaron/provenance/provenance_extractor.py index 4366ab299..b4003b0d0 100644 --- a/src/macaron/provenance/provenance_extractor.py +++ b/src/macaron/provenance/provenance_extractor.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains methods for extracting repository and commit metadata from provenance files.""" + import logging import urllib.parse from abc import ABC, abstractmethod diff --git a/src/macaron/provenance/provenance_finder.py b/src/macaron/provenance/provenance_finder.py index 0c1385d0f..e841fd397 100644 --- a/src/macaron/provenance/provenance_finder.py +++ b/src/macaron/provenance/provenance_finder.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains methods for finding provenance files.""" + import json import logging import os diff --git a/src/macaron/provenance/provenance_verifier.py b/src/macaron/provenance/provenance_verifier.py index 72b457ca0..2ab200b0b 100644 --- a/src/macaron/provenance/provenance_verifier.py +++ b/src/macaron/provenance/provenance_verifier.py @@ -2,6 +2,7 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains methods for verifying provenance files.""" + import glob import hashlib import logging diff --git a/src/macaron/repo_finder/commit_finder.py b/src/macaron/repo_finder/commit_finder.py index 25f24272b..b7f306e03 100644 --- a/src/macaron/repo_finder/commit_finder.py +++ b/src/macaron/repo_finder/commit_finder.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the logic for matching PackageURL versions to repository commits via the tags they contain.""" + import logging import re from datetime import datetime diff --git a/src/macaron/repo_finder/repo_finder_deps_dev.py b/src/macaron/repo_finder/repo_finder_deps_dev.py index e3f92cc4c..e47111081 100644 --- a/src/macaron/repo_finder/repo_finder_deps_dev.py +++ b/src/macaron/repo_finder/repo_finder_deps_dev.py @@ -2,6 +2,7 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the PythonRepoFinderDD class to be used for finding repositories using deps.dev.""" + import json import logging import urllib.parse diff --git a/src/macaron/repo_finder/repo_finder_enums.py b/src/macaron/repo_finder/repo_finder_enums.py index 02e98e262..f1a256053 100644 --- a/src/macaron/repo_finder/repo_finder_enums.py +++ b/src/macaron/repo_finder/repo_finder_enums.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains Enums used to represent the outcome of Repo Finder or Commit Finder executions.""" + from enum import Enum diff --git a/src/macaron/repo_finder/repo_finder_java.py b/src/macaron/repo_finder/repo_finder_java.py index 9b2e111cf..16889603d 100644 --- a/src/macaron/repo_finder/repo_finder_java.py +++ b/src/macaron/repo_finder/repo_finder_java.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the JavaRepoFinder class to be used for finding Java repositories.""" + import logging import re import urllib.parse diff --git a/src/macaron/repo_finder/repo_finder_npm.py b/src/macaron/repo_finder/repo_finder_npm.py index 4cd337dce..20b4c4bd6 100644 --- a/src/macaron/repo_finder/repo_finder_npm.py +++ b/src/macaron/repo_finder/repo_finder_npm.py @@ -1,7 +1,8 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the logic for finding repositories of NPM projects.""" + import logging from packageurl import PackageURL diff --git a/src/macaron/repo_finder/repo_finder_pypi.py b/src/macaron/repo_finder/repo_finder_pypi.py index 2941c71cf..0f2860d72 100644 --- a/src/macaron/repo_finder/repo_finder_pypi.py +++ b/src/macaron/repo_finder/repo_finder_pypi.py @@ -1,7 +1,8 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the logic for finding repositories of PyPI projects.""" + import logging from packageurl import PackageURL diff --git a/src/macaron/repo_finder/repo_utils.py b/src/macaron/repo_finder/repo_utils.py index 56d48b42a..92fc243d5 100644 --- a/src/macaron/repo_finder/repo_utils.py +++ b/src/macaron/repo_finder/repo_utils.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the utility functions for repo and commit finder operations.""" + import json import logging import os diff --git a/src/macaron/repo_finder/repo_validator.py b/src/macaron/repo_finder/repo_validator.py index 4e2e7d639..acaf5fec9 100644 --- a/src/macaron/repo_finder/repo_validator.py +++ b/src/macaron/repo_finder/repo_validator.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module exists to validate URLs in terms of their use as a repository that can be analyzed.""" + import urllib.parse from collections.abc import Iterable diff --git a/src/macaron/repo_verifier/repo_verifier.py b/src/macaron/repo_verifier/repo_verifier.py index a99538fb7..7d7b9aab5 100644 --- a/src/macaron/repo_verifier/repo_verifier.py +++ b/src/macaron/repo_verifier/repo_verifier.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains code to verify whether a reported repository can be linked back to the artifact.""" + import logging from macaron.repo_verifier.repo_verifier_base import ( diff --git a/src/macaron/repo_verifier/repo_verifier_base.py b/src/macaron/repo_verifier/repo_verifier_base.py index b056eac86..dffa61141 100644 --- a/src/macaron/repo_verifier/repo_verifier_base.py +++ b/src/macaron/repo_verifier/repo_verifier_base.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the base class and core data models for repository verification.""" + import abc import logging from dataclasses import dataclass diff --git a/src/macaron/repo_verifier/repo_verifier_gradle.py b/src/macaron/repo_verifier/repo_verifier_gradle.py index 6b4960958..c6f863d62 100644 --- a/src/macaron/repo_verifier/repo_verifier_gradle.py +++ b/src/macaron/repo_verifier/repo_verifier_gradle.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains code to verify whether a Gradle-based repository can be linked back to the artifact.""" + import logging from pathlib import Path diff --git a/src/macaron/repo_verifier/repo_verifier_maven.py b/src/macaron/repo_verifier/repo_verifier_maven.py index f09d9ad3b..fc3a30b2d 100644 --- a/src/macaron/repo_verifier/repo_verifier_maven.py +++ b/src/macaron/repo_verifier/repo_verifier_maven.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains code to verify whether a reported Maven-based repository can be linked back to the artifact.""" + import logging from urllib.parse import urlparse diff --git a/src/macaron/resources/policies/sql/check-github-actions.sql b/src/macaron/resources/policies/sql/check-github-actions.sql new file mode 100644 index 000000000..09ba2555b --- /dev/null +++ b/src/macaron/resources/policies/sql/check-github-actions.sql @@ -0,0 +1,26 @@ +-- Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +-- Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +-- Failed check facts for check-github-actions policy template. +SELECT + gha_check.finding_group, + gha_check.finding_priority, + gha_check.finding_type, + gha_check.action_name, + gha_check.action_ref, + gha_check.vuln_urls, + gha_check.finding_message, + gha_check.recommended_ref, + gha_check.sha_pinned, + gha_check.caller_workflow AS vulnerable_workflow, + analysis.analysis_time +FROM github_actions_vulnerabilities_check AS gha_check +JOIN check_facts + ON check_facts.id = gha_check.id +JOIN check_result + ON check_result.id = check_facts.check_result_id +JOIN component + ON check_result.component_id = component.id +JOIN analysis + ON analysis.id = component.analysis_id +WHERE check_result.passed = 0; diff --git a/src/macaron/resources/policies/sql/malware-detection-dependencies.sql b/src/macaron/resources/policies/sql/malware-detection-dependencies.sql new file mode 100644 index 000000000..028b2445c --- /dev/null +++ b/src/macaron/resources/policies/sql/malware-detection-dependencies.sql @@ -0,0 +1,19 @@ +-- Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +-- Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +-- Failed check facts for malware-detection-dependencies policy template. +SELECT + analysis.analysis_time, + component.id component_id, + component.purl component_purl, + detect_malicious_metadata_check.* +FROM detect_malicious_metadata_check +JOIN check_facts + ON check_facts.id = detect_malicious_metadata_check.id +JOIN check_result + ON check_result.id = check_facts.check_result_id +JOIN component + ON check_result.component_id = component.id +JOIN analysis + ON analysis.id = component.analysis_id + AND check_result.passed = 0; diff --git a/src/macaron/resources/policies/sql/malware-detection.sql b/src/macaron/resources/policies/sql/malware-detection.sql new file mode 100644 index 000000000..a6597e89a --- /dev/null +++ b/src/macaron/resources/policies/sql/malware-detection.sql @@ -0,0 +1,19 @@ +-- Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +-- Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +-- Failed check facts for malware-detection policy template. +SELECT + analysis.analysis_time, + component.id component_id, + component.purl component_purl, + detect_malicious_metadata_check.* +FROM detect_malicious_metadata_check +JOIN check_facts + ON check_facts.id = detect_malicious_metadata_check.id +JOIN check_result + ON check_result.id = check_facts.check_result_id +JOIN component + ON check_result.component_id = component.id +JOIN analysis + ON analysis.id = component.analysis_id + AND check_result.passed = 0; diff --git a/src/macaron/resources/pypi_malware_rules/obfuscation.yaml b/src/macaron/resources/pypi_malware_rules/obfuscation.yaml index 81b2f08f8..c8283fafa 100644 --- a/src/macaron/resources/pypi_malware_rules/obfuscation.yaml +++ b/src/macaron/resources/pypi_malware_rules/obfuscation.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. rules: @@ -11,10 +11,82 @@ rules: severity: ERROR pattern-either: - pattern: __import__('base64') - - pattern: __import__('builtins') - pattern: __import__('subprocess') - - pattern: __import__('sys') - - pattern: __import__('os') + # process execution obfuscation using inline builtins import + - pattern: __import__('builtins').eval(...) + - pattern: __import__('builtins').exec(...) + # writing to a file obfuscation using inline builtins import + - patterns: + - pattern: __import__('builtins').open(..., $MODE, ...) + - pattern-not: __import__('builtins').open(..., 'r', ...) + - pattern-not: __import__('builtins').open(..., 'rb', ...) + - pattern-not: __import__('builtins').open(..., 'rt', ...) + - pattern-not: __import__('builtins').open(..., 'br', ...) + - pattern-not: __import__('builtins').open(..., 'tr', ...) + - patterns: + - pattern: __import__('builtins').open(..., mode=$MODE, ...) + - pattern-not: __import__('builtins').open(..., mode='r', ...) + - pattern-not: __import__('builtins').open(..., mode='rb', ...) + - pattern-not: __import__('builtins').open(..., mode='rt', ...) + - pattern-not: __import__('builtins').open(..., mode='br', ...) + - pattern-not: __import__('builtins').open(..., mode='tr', ...) + - pattern: __import__('sys').setrecursionlimit(...) + - pattern: __import__('sys').remote_exec(...) + # process execution obfuscation using inline os import + - pattern: __import__('os').execl(...) + - pattern: __import__('os').execle(...) + - pattern: __import__('os').execlp(...) + - pattern: __import__('os').execlpe(...) + - pattern: __import__('os').execv(...) + - pattern: __import__('os').execve(...) + - pattern: __import__('os').execvp(...) + - pattern: __import__('os').execvpe(...) + - pattern: __import__('os').popen(...) + - pattern: __import__('os').posix_spawn(...) + - pattern: __import__('os').posix_spawnp(...) + - pattern: __import__('os').spawnl(...) + - pattern: __import__('os').spawnle(...) + - pattern: __import__('os').spawnlp(...) + - pattern: __import__('os').spawnlpe(...) + - pattern: __import__('os').spawnv(...) + - pattern: __import__('os').spawnve(...) + - pattern: __import__('os').spawnvp(...) + - pattern: __import__('os').spawnvpe(...) + - pattern: __import__('os').system(...) + # environmen modification obfuscation using inline import + - pattern: __import__('os').putenv(...) + - pattern: __import__('os').unsetenv(...) + - pattern: __import__('os').environ[...] = ... + - pattern: __import__('os').environb[...] = ... + - pattern: del __import__('os').environ[...] + - pattern: del __import__('os').environb[...] + - pattern: __import__('os').environ.update(...) + - pattern: __import__('os').environb.update(...) + - pattern: __import__('os').environ.pop(...) + - pattern: __import__('os').environb.pop(...) + - pattern: __import__('os').environ.clear() + - pattern: __import__('os').environb.clear() + # writing to a file obfuscation using inline os import + - pattern: __import__('os').write(...) + - patterns: + - pattern: __import__('os').fdopen(..., $MODE, ...) + - pattern-not: __import__('os').fdopen(..., 'r', ...) + - pattern-not: __import__('os').fdopen(..., 'rb', ...) + - pattern-not: __import__('os').fdopen(..., 'rt', ...) + - pattern-not: __import__('os').fdopen(..., 'br', ...) + - pattern-not: __import__('os').fdopen(..., 'tr', ...) + - patterns: + - pattern: __import__('os').fdopen(..., mode=$MODE, ...) + - pattern-not: __import__('os').fdopen(..., mode='r', ...) + - pattern-not: __import__('os').fdopen(..., mode='rb', ...) + - pattern-not: __import__('os').fdopen(..., mode='rt', ...) + - pattern-not: __import__('os').fdopen(..., mode='br', ...) + - pattern-not: __import__('os').fdopen(..., mode='tr', ...) + - patterns: + - pattern: __import__('os').open(..., $FLAGS, ...) + - metavariable-regex: + metavariable: $FLAGS + regex: .*O_(WRONLY|RDWR|APPEND|CREAT|TRUNC).* - pattern: __import__('zlib') - pattern: __import__('marshal') # python will evaluate a hex/oct string diff --git a/src/macaron/slsa_analyzer/checks/build_tool_check.py b/src/macaron/slsa_analyzer/checks/build_tool_check.py index 8432b014e..68790f279 100644 --- a/src/macaron/slsa_analyzer/checks/build_tool_check.py +++ b/src/macaron/slsa_analyzer/checks/build_tool_check.py @@ -1,9 +1,8 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the implementation of the build tool detection check.""" - import logging from sqlalchemy import ForeignKey, String diff --git a/src/macaron/slsa_analyzer/checks/check_result.py b/src/macaron/slsa_analyzer/checks/check_result.py index f9d5c1ad0..12a4af0a7 100644 --- a/src/macaron/slsa_analyzer/checks/check_result.py +++ b/src/macaron/slsa_analyzer/checks/check_result.py @@ -1,7 +1,8 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the CheckResult class for storing the result of a check.""" + import json from dataclasses import dataclass from enum import Enum diff --git a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py index 48c6d445e..4fb2e92ec 100644 --- a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py +++ b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py @@ -1,12 +1,14 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the implementation of the GitHub Actions vulnerabilities check.""" import logging import os +import re +from enum import Enum -from sqlalchemy import ForeignKey, String +from sqlalchemy import Boolean, ForeignKey, Integer, String from sqlalchemy.orm import Mapped, mapped_column from macaron.code_analyzer.dataflow_analysis.analysis import get_containing_github_job @@ -15,6 +17,15 @@ GitHubActionsActionStepNode, GitHubActionsReusableWorkflowCallNode, ) +from macaron.code_analyzer.gha_security_analysis.detect_injection import ( + build_unpinned_action_recommendation, + build_workflow_issue_recommendation, + detect_github_actions_security_issues, + extract_workflow_issue_line, +) +from macaron.code_analyzer.gha_security_analysis.recommendation import ( + recommend_for_osv_vulnerability, +) from macaron.database.db_custom_types import DBJsonList from macaron.database.table_definitions import CheckFacts from macaron.errors import APIAccessError @@ -29,6 +40,24 @@ logger: logging.Logger = logging.getLogger(__name__) +class GitHubActionsFindingType(str, Enum): + """Enumeration of finding categories for GitHub Actions vulnerability check facts.""" + + # Note: finding_type is the subtype within a top-level finding_group. + # It intentionally carries more granular detail than finding_group. + KNOWN_VULNERABILITY = "known-vulnerability" + UNPINNED_THIRD_PARTY_ACTION = "unpinned-third-party-action" + + +class GitHubActionsFindingGroup(str, Enum): + """Top-level finding groups for GitHub Actions vulnerability check facts.""" + + # Note: finding_group is the high-level bucket used for reporting sections. + # finding_type refines the exact issue inside one of these groups. + THIRD_PARTY_ACTION_RISK = "third_party_action_risk" + WORKFLOW_SECURITY_ISSUE = "workflow_security_issue" + + class GitHubActionsVulnsFacts(CheckFacts): """The ORM mapping for justifications in the GitHub Actions vulnerabilities check.""" @@ -37,23 +66,57 @@ class GitHubActionsVulnsFacts(CheckFacts): #: The primary key. id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003 - #: The list of vulnerability URLs. - vulnerability_urls: Mapped[list[str]] = mapped_column( - DBJsonList, nullable=False, info={"justification": JustificationType.TEXT} + #: The GitHub Action workflow that may have various security issues. + caller_workflow: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.HREF}) + + #: The finding category (subtype). + #: - ``known-vulnerability`` for known vulnerability findings. + #: - ``unpinned-third-party-action`` for third-party actions not pinned to a commit SHA. + #: - workflow issue subtype names (for example ``overbroad-permissions``). + #: This complements ``finding_group`` instead of replacing it. + finding_type: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.TEXT}) + + #: The top-level finding group. + #: - ``third_party_action_risk`` for action dependency risks. + #: - ``workflow_security_issue`` for workflow implementation security issues. + #: Use this to group rows in summaries; use ``finding_type`` for specific issue filtering. + finding_group: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.TEXT}) + + #: Human-readable finding details. + finding_message: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} ) - #: The GitHub Action Identifier. - github_actions_id: Mapped[str] = mapped_column( - String, nullable=False, info={"justification": JustificationType.TEXT} + #: Priority score for sorting and triaging findings in summary outputs. + finding_priority: Mapped[int] = mapped_column( + Integer, nullable=False, info={"justification": JustificationType.TEXT} ) - #: The GitHub Action version. - github_actions_version: Mapped[str] = mapped_column( - String, nullable=False, info={"justification": JustificationType.TEXT} + #: Recommended immutable action reference, if applicable. + recommended_ref: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} ) - #: The GitHub Action workflow that calls the vulnerable GitHub Action. - caller_workflow: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.HREF}) + #: Third-party action identifier (for ``third_party_action_risk`` findings only). + #: For workflow-security findings, this field is ``None``. + action_name: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} + ) + + #: Third-party action version/ref (for ``third_party_action_risk`` findings only). + #: For workflow-security findings, this field is ``None``. + action_ref: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} + ) + + #: Whether the action reference is pinned to a full commit SHA. + sha_pinned: Mapped[bool | None] = mapped_column(Boolean, nullable=True) + + #: Related vulnerability URLs (for ``third_party_action_risk`` findings only). + #: For workflow-security findings, this field is an empty list. + vuln_urls: Mapped[list[str]] = mapped_column( + DBJsonList, nullable=False, info={"justification": JustificationType.TEXT} + ) __mapper_args__ = { "polymorphic_identity": "_github_actions_vulnerabilities_check", @@ -89,10 +152,71 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: The result of the check. """ ci_services = ctx.dynamic_data["ci_services"] - + result_tables: list[CheckFacts] = [] external_workflows: dict[str, list] = {} + for ci_info in ci_services: callgraph = ci_info["callgraph"] + gh_api_client = ci_info["service"].api_client if hasattr(ci_info["service"], "api_client") else None + if workflow_findings := detect_github_actions_security_issues(callgraph): + for finding in workflow_findings: + caller_workflow_link = "" + if gh_api_client: + caller_workflow_link = gh_api_client.get_file_link( + ctx.component.repository.full_name, + ctx.component.repository.commit_sha, + file_path=( + gh_api_client.get_relative_path_of_workflow(os.path.basename(finding["workflow_name"])) + if finding["workflow_name"] + else "" + ), + ) + for prioritized_issue in finding["issues"]: + issue = prioritized_issue["issue"] + issue_priority = int(prioritized_issue["priority"]) + issue_line = extract_workflow_issue_line(issue) + finding_workflow_link = caller_workflow_link + if issue_line and finding_workflow_link: + finding_workflow_link = f"{finding_workflow_link}#L{issue_line}" + if unpinned_action_info := build_unpinned_action_recommendation(issue, gh_api_client): + action_name, action_version, recommendation = unpinned_action_info + finding_type = GitHubActionsFindingType.UNPINNED_THIRD_PARTY_ACTION.value + result_tables.append( + GitHubActionsVulnsFacts( + vuln_urls=[], + finding_type=finding_type, + finding_group=GitHubActionsFindingGroup.THIRD_PARTY_ACTION_RISK.value, + action_name=action_name, + action_ref=action_version, + caller_workflow=finding_workflow_link, + sha_pinned=False, + finding_message=( + f"Summary: {_short_description_for_finding_type(finding_type)} " + f"Recommendation: {recommendation.message}" + ), + finding_priority=issue_priority, + recommended_ref=recommendation.recommended_ref, + confidence=Confidence.HIGH, + ) + ) + continue + + finding_type, recommendation, finding_message = build_workflow_issue_recommendation(issue) + result_tables.append( + GitHubActionsVulnsFacts( + vuln_urls=[], + finding_type=finding_type, + finding_group=GitHubActionsFindingGroup.WORKFLOW_SECURITY_ISSUE.value, + action_name=None, + action_ref=None, + caller_workflow=finding_workflow_link, + sha_pinned=None, + finding_message=finding_message, + finding_priority=issue_priority, + recommended_ref=recommendation.recommended_ref, + confidence=Confidence.HIGH, + ) + ) for root in callgraph.root_nodes: for callee in traverse_bfs(root): if isinstance(callee, (GitHubActionsReusableWorkflowCallNode, GitHubActionsActionStepNode)): @@ -114,77 +238,88 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: continue caller_path = job.context.ref.workflow_context.ref.source_filepath + caller_path_link = caller_path + if gh_api_client: + caller_path_link = gh_api_client.get_file_link( + ctx.component.repository.full_name, + ctx.component.repository.commit_sha, + file_path=( + gh_api_client.get_relative_path_of_workflow(os.path.basename(caller_path)) + if caller_path + else "" + ), + ) ext_workflow: list = external_workflows.get(workflow_name, []) ext_workflow.append( { "version": workflow_version, - "caller_path": ci_info["service"].api_client.get_file_link( - ctx.component.repository.full_name, - ctx.component.repository.commit_sha, - file_path=( - ci_info["service"].api_client.get_relative_path_of_workflow( - os.path.basename(caller_path) - ) - if caller_path - else "" - ), - ), + "caller_path": caller_path_link, } ) external_workflows[workflow_name] = ext_workflow - # If no external GitHub Actions are found, return passed result. - if not external_workflows: - return CheckResultData( - result_tables=[], - result_type=CheckResultType.PASSED, - ) - - # We first send a batch query to see which GitHub Actions are potentially vulnerable. - # OSV's querybatch returns minimal results but this allows us to only make subsequent - # queries to get vulnerability details when needed. - batch_query = [{"name": k, "ecosystem": "GitHub Actions"} for k, _ in external_workflows.items()] - batch_vulns = [] - try: - batch_vulns = OSVDevService.get_vulnerabilities_package_name_batch(batch_query) - except APIAccessError as error: - logger.debug(error) + # If no external GitHub Actions are found, no need to check for known vulnerabilities. + if external_workflows: - result_tables: list[CheckFacts] = [] - for vuln_res in batch_vulns: - vulns: list = [] - workflow_name = vuln_res["name"] + # We first send a batch query to see which GitHub Actions are potentially vulnerable. + # OSV's querybatch returns minimal results but this allows us to only make subsequent + # queries to get vulnerability details when needed. + batch_query = [ + {"package": {"name": k, "ecosystem": "GitHub Actions"}} for k, _ in external_workflows.items() if k + ] + batch_vulns = [] try: - vulns = OSVDevService.get_vulnerabilities_package_name(ecosystem="GitHub Actions", name=workflow_name) + batch_vulns = OSVDevService.get_vulnerabilities_package_name_batch(batch_query) except APIAccessError as error: logger.debug(error) - continue - for workflow_inv in external_workflows[workflow_name]: - vuln_mapping = [] - for vuln in vulns: - if v_id := json_extract(vuln, ["id"], str): - try: - if OSVDevService.is_version_affected( - vuln, - workflow_name, - workflow_inv["version"], - "GitHub Actions", - source_repo=f"https://github.com/{workflow_name}", - ): - vuln_mapping.append(f"https://osv.dev/vulnerability/{v_id}") - except APIAccessError as error: - logger.debug(error) - if vuln_mapping: - result_tables.append( - GitHubActionsVulnsFacts( - vulnerability_urls=vuln_mapping, - github_actions_id=workflow_name, - github_actions_version=workflow_inv["version"], - caller_workflow=workflow_inv["caller_path"], - confidence=Confidence.HIGH, - ) + + for vuln_res in batch_vulns: + vulns: list = [] + workflow_name = vuln_res["package"]["name"] + try: + vulns = OSVDevService.get_vulnerabilities_package_name( + ecosystem="GitHub Actions", name=workflow_name ) + except APIAccessError as error: + logger.debug(error) + continue + for workflow_inv in external_workflows[workflow_name]: + vuln_mapping = [] + for vuln in vulns: + if v_id := json_extract(vuln, ["id"], str): + try: + if OSVDevService.is_version_affected( + vuln, + workflow_name, + workflow_inv["version"], + "GitHub Actions", + source_repo=f"https://github.com/{workflow_name}", + ): + vuln_mapping.append(f"https://osv.dev/vulnerability/{v_id}") + except APIAccessError as error: + logger.debug(error) + if vuln_mapping: + recommendation = recommend_for_osv_vulnerability(workflow_name, workflow_inv["version"]) + finding_type = GitHubActionsFindingType.KNOWN_VULNERABILITY.value + result_tables.append( + GitHubActionsVulnsFacts( + vuln_urls=vuln_mapping, + finding_type=finding_type, + finding_group=GitHubActionsFindingGroup.THIRD_PARTY_ACTION_RISK.value, + action_name=workflow_name, + action_ref=workflow_inv["version"], + caller_workflow=workflow_inv["caller_path"], + sha_pinned=bool(re.fullmatch(r"[0-9a-f]{40}", workflow_inv["version"])), + finding_message=( + f"Summary: {_short_description_for_finding_type(finding_type)} " + f"Recommendation: {recommendation.message}" + ), + finding_priority=100, + recommended_ref=recommendation.recommended_ref, + confidence=Confidence.HIGH, + ) + ) if result_tables: return CheckResultData( @@ -199,3 +334,23 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: registry.register(GitHubActionsVulnsCheck()) + + +def _short_description_for_finding_type(finding_type: str) -> str: + """Return a concise, human-readable summary for a finding subtype.""" + finding_summaries = { + GitHubActionsFindingType.KNOWN_VULNERABILITY.value: "Action version is known to be vulnerable.", + GitHubActionsFindingType.UNPINNED_THIRD_PARTY_ACTION.value: "Third-party action is not pinned to an immutable SHA.", + "sensitive-trigger": "Workflow uses a sensitive trigger and needs strict gating.", + "privileged-trigger": "Privileged trigger can expose elevated token scope to untrusted input.", + "missing-permissions": "Workflow omits explicit permissions and may inherit broad defaults.", + "overbroad-permissions": "Workflow requests permissions broader than required.", + "untrusted-fork-code": "Workflow can execute code controlled by an untrusted fork.", + "persist-credentials": "Persisted checkout credentials can leak token access to later steps.", + "remote-script-exec": "Workflow downloads and executes remote scripts inline.", + "pr-target-untrusted-checkout": "pull_request_target is combined with checkout of PR-controlled refs.", + "potential-injection": "Untrusted GitHub context data may flow into shell execution.", + "self-hosted-runner": "Job uses self-hosted runners, increasing blast radius for untrusted code.", + "workflow-security-issue": "Workflow includes a security issue that requires hardening.", + } + return finding_summaries.get(finding_type, "Workflow security finding detected.") diff --git a/src/macaron/slsa_analyzer/checks/provenance_commit_check.py b/src/macaron/slsa_analyzer/checks/provenance_commit_check.py index b2b5d7297..7e271ffea 100644 --- a/src/macaron/slsa_analyzer/checks/provenance_commit_check.py +++ b/src/macaron/slsa_analyzer/checks/provenance_commit_check.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module adds a check that determines whether the repository URL came from provenance.""" + import logging from sqlalchemy import ForeignKey, String diff --git a/src/macaron/slsa_analyzer/checks/provenance_repo_check.py b/src/macaron/slsa_analyzer/checks/provenance_repo_check.py index 1f35fef39..e1260d76c 100644 --- a/src/macaron/slsa_analyzer/checks/provenance_repo_check.py +++ b/src/macaron/slsa_analyzer/checks/provenance_repo_check.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module adds a check that determines whether the repository URL came from provenance.""" + import logging from sqlalchemy import ForeignKey, String diff --git a/src/macaron/slsa_analyzer/checks/provenance_verified_check.py b/src/macaron/slsa_analyzer/checks/provenance_verified_check.py index 65f028ec0..46ac145e7 100644 --- a/src/macaron/slsa_analyzer/checks/provenance_verified_check.py +++ b/src/macaron/slsa_analyzer/checks/provenance_verified_check.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module adds a Check that checks whether the provenance is verified.""" + import logging from sqlalchemy import ForeignKey, String diff --git a/src/macaron/slsa_analyzer/checks/vcs_check.py b/src/macaron/slsa_analyzer/checks/vcs_check.py index ec70731e2..259838477 100644 --- a/src/macaron/slsa_analyzer/checks/vcs_check.py +++ b/src/macaron/slsa_analyzer/checks/vcs_check.py @@ -1,9 +1,8 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the implementation of the VCS check.""" - import logging from sqlalchemy import ForeignKey, String diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py index b24dc5963..d222ee011 100644 --- a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py +++ b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module analyzes GitHub Actions CI.""" @@ -8,13 +8,14 @@ import glob import logging import os +import traceback from datetime import datetime, timedelta, timezone from macaron.code_analyzer.dataflow_analysis.analysis import analyse_github_workflow_file from macaron.code_analyzer.dataflow_analysis.core import Node, NodeForest from macaron.config.defaults import defaults from macaron.config.global_config import global_config -from macaron.errors import GitHubActionsValueError, ParseError +from macaron.errors import CallGraphError, GitHubActionsValueError, ParseError from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService from macaron.slsa_analyzer.git_service.api_client import GhAPIClient, get_default_gh_client from macaron.slsa_analyzer.git_service.base_git_service import BaseGitService @@ -580,21 +581,40 @@ def build_call_graph(self, repo_path: str, macaron_path: str = "") -> NodeForest Returns ------- - CallGraph: CallGraph - The call graph built for GitHub Actions. + NodeForest + The root nodes of call graphs built for GitHub Actions workflows. """ if not macaron_path: macaron_path = global_config.macaron_path # Parse GitHub Actions workflows. files = self.get_workflows(repo_path) + return self.build_call_graph_for_files(files, repo_path) + + def build_call_graph_for_files(self, files: list[str], repo_path: str) -> NodeForest: + """Build call graphs for a given set of GitHub Actions workflow files. + + Parameters + ---------- + files : list[str] + The list of workflow file paths to analyze. + repo_path : str + The repository path used as the base context for workflow analysis. + + Returns + ------- + NodeForest + A forest containing one root node per successfully parsed workflow. + Workflows that raise ``ParseError`` are skipped. + """ nodes: list[Node] = [] for workflow_path in files: try: workflow_node = analyse_github_workflow_file(workflow_path, repo_path) - except ParseError: + except (ParseError, CallGraphError): logger.debug("Skip adding workflow at %s to the callgraph.", workflow_path) + logger.debug("Reason: %s", traceback.format_exc()) continue nodes.append(workflow_node) return NodeForest(nodes) diff --git a/src/macaron/slsa_analyzer/git_service/api_client.py b/src/macaron/slsa_analyzer/git_service/api_client.py index 9921c2dc9..f49beda7c 100644 --- a/src/macaron/slsa_analyzer/git_service/api_client.py +++ b/src/macaron/slsa_analyzer/git_service/api_client.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """The module provides API clients for VCS services, such as GitHub.""" @@ -11,6 +11,7 @@ from typing import NamedTuple from macaron.config.defaults import defaults +from macaron.json_tools import json_extract from macaron.slsa_analyzer.asset import AssetLocator from macaron.util import ( construct_query, @@ -388,6 +389,28 @@ def get_commit_data_from_hash(self, full_name: str, commit_hash: str) -> dict: return response_data + def get_commit_sha_from_ref(self, full_name: str, ref: str) -> str | None: + """Resolve a Git reference (tag/branch/sha) to a 40-character commit SHA. + + Parameters + ---------- + full_name : str + The full name of the repository in the format ``owner/name``. + ref : str + The git reference to resolve (e.g. ``v5``, ``main``, ``v1.2.3``). + + Returns + ------- + str | None + The resolved commit SHA, or ``None`` if resolution fails. + """ + if not full_name or not ref: + return None + + response_data = self.get_commit_data_from_hash(full_name, ref) + sha = json_extract(response_data, ["sha"], str) + return sha if sha and len(sha) == 40 else None + def search(self, target: str, query: str) -> dict: """Perform a search using GitHub REST API. diff --git a/src/macaron/slsa_analyzer/git_service/github.py b/src/macaron/slsa_analyzer/git_service/github.py index d5e1c8548..48922562b 100644 --- a/src/macaron/slsa_analyzer/git_service/github.py +++ b/src/macaron/slsa_analyzer/git_service/github.py @@ -1,7 +1,8 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the spec for the GitHub service.""" + import logging from pydriller.git import Git diff --git a/src/macaron/slsa_analyzer/git_url.py b/src/macaron/slsa_analyzer/git_url.py index 6fa019991..8c3bf25b0 100644 --- a/src/macaron/slsa_analyzer/git_url.py +++ b/src/macaron/slsa_analyzer/git_url.py @@ -3,7 +3,6 @@ """This module provides methods to perform generic actions on Git URLS.""" - import logging import os import re diff --git a/src/macaron/slsa_analyzer/package_registry/maven_central_registry.py b/src/macaron/slsa_analyzer/package_registry/maven_central_registry.py index 010cb20cf..957193229 100644 --- a/src/macaron/slsa_analyzer/package_registry/maven_central_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/maven_central_registry.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """The module provides abstractions for the Maven Central package registry.""" + import hashlib import logging import urllib.parse diff --git a/src/macaron/slsa_analyzer/package_registry/npm_registry.py b/src/macaron/slsa_analyzer/package_registry/npm_registry.py index 7d33f3986..5ff8f9709 100644 --- a/src/macaron/slsa_analyzer/package_registry/npm_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/npm_registry.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """The module provides abstractions for the npm package registry.""" + from __future__ import annotations import json diff --git a/src/macaron/slsa_analyzer/package_registry/osv_dev.py b/src/macaron/slsa_analyzer/package_registry/osv_dev.py index 9a5c96c13..b5955ffa5 100644 --- a/src/macaron/slsa_analyzer/package_registry/osv_dev.py +++ b/src/macaron/slsa_analyzer/package_registry/osv_dev.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains implementation of osv.dev service.""" @@ -102,10 +102,7 @@ def get_vulnerabilities_package_name_batch(packages: list) -> list: APIAccessError If there is an issue with querying the OSV API or if the results do not match the expected size. """ - query_data: dict[str, list] = {"queries": []} - - for pkg in packages: - query_data["queries"].append({"package": {"ecosystem": pkg["ecosystem"], "name": pkg["name"]}}) + query_data: dict[str, list] = {"queries": packages} # The results returned by OSV reports the vulnerabilities, preserving the order. osv_res = OSVDevService.call_osv_querybatch_api(query_data, len(packages)) diff --git a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py index 432d14aa7..6741fd208 100644 --- a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py @@ -2,6 +2,7 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """The module provides abstractions for the pypi package registry.""" + from __future__ import annotations import bisect @@ -964,13 +965,13 @@ def download_wheel(self) -> bool: logger.debug(error) return False - def has_pure_wheel(self) -> bool: - """Check whether the PURL has a pure wheel from its package json. + def has_non_pure_wheel(self) -> bool: + """Check whether the PURL has any non-pure wheel from its package json. Returns ------- bool - Whether the PURL has a pure wheel or not. + Whether the PURL has any non-pure wheel or not. """ if self.component_version: urls = json_extract(self.package_json, ["releases", self.component_version], list) @@ -981,16 +982,13 @@ def has_pure_wheel(self) -> bool: return False for distribution in urls: file_name: str = distribution.get("filename") or "" - # Parse out and check none and any - # Catch exceptions try: _, _, _, tags = parse_wheel_filename(file_name) - # Check if none and any are in the tags (i.e. the wheel is pure) - if all(tag.abi == "none" and tag.platform == "any" for tag in tags): + # A wheel is non-pure if any tag is not abi=none and platform=any + if any(tag.abi != "none" or tag.platform != "any" for tag in tags): return True except InvalidWheelFilename: logger.debug("Could not parse wheel name.") - return False return False @contextmanager diff --git a/src/macaron/slsa_analyzer/provenance/loader.py b/src/macaron/slsa_analyzer/provenance/loader.py index 3e9d9b1b0..0b7b1352b 100644 --- a/src/macaron/slsa_analyzer/provenance/loader.py +++ b/src/macaron/slsa_analyzer/provenance/loader.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the loaders for SLSA provenances.""" @@ -9,7 +9,6 @@ import json import logging import zlib -from urllib.parse import urlparse from cryptography import x509 from cryptography.x509 import DuplicateExtension, UnsupportedGeneralNameType @@ -19,7 +18,7 @@ from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, validate_intoto_payload from macaron.slsa_analyzer.provenance.intoto.errors import LoadIntotoAttestationError, ValidateInTotoPayloadError from macaron.slsa_analyzer.specs.pypi_certificate_predicate import PyPICertificatePredicate -from macaron.util import send_get_http_raw +from macaron.util import send_get_http_raw, url_is_safe logger: logging.Logger = logging.getLogger(__name__) @@ -43,13 +42,8 @@ def _try_read_url_link_file(file_content: bytes) -> str | None: def _download_url_file_content(url: str, url_link_hostname_allowlist: list[str]) -> bytes: - hostname = urlparse(url).hostname - if hostname is None or hostname == "": + if not url_is_safe(url, allow_list=url_link_hostname_allowlist): raise LoadIntotoAttestationError("Cannot resolve URL link file: invalid URL") - if hostname not in url_link_hostname_allowlist: - raise LoadIntotoAttestationError( - "Cannot resolve URL link file: target hostname '" + hostname + "' is not in allowed hostnames." - ) # TODO download size limit? timeout = defaults.getint("downloads", "timeout", fallback=120) diff --git a/src/macaron/slsa_analyzer/specs/inferred_provenance.py b/src/macaron/slsa_analyzer/specs/inferred_provenance.py index ee23b021f..ca04b77bc 100644 --- a/src/macaron/slsa_analyzer/specs/inferred_provenance.py +++ b/src/macaron/slsa_analyzer/specs/inferred_provenance.py @@ -1,9 +1,8 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the inferred SLSA provenance spec.""" - from macaron.slsa_analyzer.provenance.intoto import v01 diff --git a/src/macaron/slsa_analyzer/specs/pypi_certificate_predicate.py b/src/macaron/slsa_analyzer/specs/pypi_certificate_predicate.py index 2ae7cfb6e..aef18c128 100644 --- a/src/macaron/slsa_analyzer/specs/pypi_certificate_predicate.py +++ b/src/macaron/slsa_analyzer/specs/pypi_certificate_predicate.py @@ -1,7 +1,8 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the spec for predicates derived from a PyPI attestation certificate.""" + from dataclasses import dataclass diff --git a/src/macaron/util.py b/src/macaron/util.py index 6509e2f67..b6f789493 100644 --- a/src/macaron/util.py +++ b/src/macaron/util.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module includes utilities functions for Macaron.""" @@ -21,6 +21,66 @@ logger: logging.Logger = logging.getLogger(__name__) +def url_is_safe(url: str, allow_list: list[str] | None = None, allow_login: bool = False) -> bool: + r"""Validate that a URL has an acceptable host and login component. + + Parameters + ---------- + url : str + URL string to validate. + allow_list : list[str] | None + Allowed hostnames. When provided, the parsed hostname must be in this list. + If ``None``, any non-empty hostname is accepted. + allow_login : bool, default=False + Whether username/password URL components are permitted. + + Returns + ------- + bool + ``True`` when the URL passes safety checks, otherwise ``False``. + + Examples + -------- + >>> url_is_safe("https://example.com") + True + >>> url_is_safe("https://example.com", allow_list=["example.com"]) + True + >>> url_is_safe("https://example.com", allow_list=["oracle.com"]) + False + >>> url_is_safe("https://user:test@example.com") + False + >>> url_is_safe("https://user:test@example.com", allow_login=True) + True + >>> url_is_safe("not-a-url") + False + >>> url_is_safe("127.0.0.1:6666\\@allowlist.com", ["allowlist.com"]) + False + >>> url_is_safe("https://attacker.com:6666\\@allowlist.com", ["allowlist.com"]) + False + >>> url_is_safe("https://username:attacker.com\\@allowlist.com", ["allowlist.com"]) + False + >>> url_is_safe("https://username:test@allowlist.com", ["allowlist.com"], allow_login = True) + True + """ + try: + parsed_url = urllib.parse.urlparse(url) + except ValueError: + return False + if not allow_login: + if parsed_url.username or parsed_url.password: + logger.debug("Potential attempt to redirect to an invalid URL: hostname %s", parsed_url.hostname) + return False + + hostname = parsed_url.hostname + if hostname is None or hostname == "": + return False + if allow_list and (hostname not in allow_list): + logger.debug("URL %s is not in allowed hostnames.", url) + return False + + return True + + def send_get_http(url: str, headers: dict) -> dict: """Send the GET HTTP request with the given url and headers. diff --git a/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py b/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py index 7363fe991..e837ab299 100644 --- a/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py +++ b/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py @@ -1,9 +1,8 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains tests for the gradle_cli_command module.""" - import pytest from macaron.build_spec_generator.cli_command_parser.gradle_cli_command import GradleCLIOptions diff --git a/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py b/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py index 6b4611bd8..03d3644ba 100644 --- a/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py +++ b/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py @@ -1,9 +1,8 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the tests for maven cli parser.""" - import pytest from macaron.build_spec_generator.cli_command_parser.maven_cli_parser import ( diff --git a/tests/build_spec_generator/common_spec/compare_default_buildspec.py b/tests/build_spec_generator/common_spec/compare_default_buildspec.py index 51fd9ea1c..d949f1377 100644 --- a/tests/build_spec_generator/common_spec/compare_default_buildspec.py +++ b/tests/build_spec_generator/common_spec/compare_default_buildspec.py @@ -1,9 +1,8 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Script to compare a generated default buildspec.""" - import argparse import json import logging diff --git a/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr b/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr index 8ff65b0da..8b94d8833 100644 --- a/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr +++ b/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr @@ -67,7 +67,7 @@ EOF # Run the build - RUN source /deps/bin/activate && python -m build + RUN source /deps/bin/activate && /deps/bin/pip install wheel && python -m build --wheel -n # Validate script RUN cat <<'EOF' >/validate diff --git a/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr new file mode 100644 index 000000000..984d2d208 --- /dev/null +++ b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr @@ -0,0 +1,30 @@ +# serializer version: 1 +# name: test_detect_github_actions_security_issues[injection_pattern_1.yaml] + list([ + dict({ + 'issues': list([ + dict({ + 'issue': 'potential-injection: {"step_line": 75, "script_line": 7, "job": "auto_format", "step": "Commit and push formatting changes", "command": "git push origin HEAD:${{ github.event.pull_request.head.ref }}", "expanded_refs": ["github.event.pull_request.head.ref"], "parts": [{"End": {"Col": 4, "Line": 7, "Offset": 171}, "Pos": {"Col": 1, "Line": 7, "Offset": 168}, "Type": "Lit", "Value": "git", "ValueEnd": {"Col": 4, "Line": 7, "Offset": 171}, "ValuePos": {"Col": 1, "Line": 7, "Offset": 168}}]}', + 'priority': 100, + }), + dict({ + 'issue': 'privileged-trigger: Workflow uses `pull_request_target` with additional risky patterns; treat this workflow as high risk and harden immediately.', + 'priority': 80, + }), + dict({ + 'issue': 'unpinned-third-party-action: [step-line=28] actions/checkout@v5', + 'priority': 20, + }), + dict({ + 'issue': 'unpinned-third-party-action: [step-line=37] poseidon/wait-for-status-checks@v0.6.0', + 'priority': 20, + }), + dict({ + 'issue': 'unpinned-third-party-action: [step-line=48] dtolnay/rust-toolchain@stable', + 'priority': 20, + }), + ]), + 'workflow_name': 'tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml', + }), + ]) +# --- diff --git a/tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml b/tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml new file mode 100644 index 000000000..9ef276717 --- /dev/null +++ b/tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml @@ -0,0 +1,75 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +name: PR Auto-format + +# This workflow triggers when a PR is opened/updated +on: + pull_request_target: + types: [opened, synchronize, reopened] + branches: + - main + - release + +jobs: + auto_format: + if: | + !contains(github.event.pull_request.labels.*.name, 'skip:ci') && + !contains(github.event.pull_request.head.sha, '[skip ci]') + permissions: + contents: write + pull-requests: write + checks: read + runs-on: ubuntu-latest + timeout-minutes: 60 + + steps: + - name: Checkout PR branch + uses: actions/checkout@v5 + with: + ref: ${{ github.event.pull_request.head.ref }} + repository: ${{ github.event.pull_request.head.repo.full_name }} + token: ${{ secrets.GITHUB_TOKEN }} + fetch-depth: 0 + + # Wait for all PR check runs to complete + - name: Wait for all checks to complete + uses: poseidon/wait-for-status-checks@v0.6.0 + with: + token: ${{ secrets.GITHUB_TOKEN }} + delay: 60 + interval: 30 + timeout: 7200 + + - name: CI completed successfully + run: echo "CI workflow completed successfully - proceeding with auto-format" + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + + - name: Run cargo fmt + run: | + echo "Running cargo fmt --all on PR #${{ github.event.pull_request.number }}" + cargo fmt --all + + - name: Check for formatting changes + id: check_changes + run: | + if [ -n "$(git status --porcelain)" ]; then + echo "has_changes=true" >> $GITHUB_OUTPUT + else + echo "has_changes=false" >> $GITHUB_OUTPUT + fi + + - name: Commit and push formatting changes + if: steps.check_changes.outputs.has_changes == 'true' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + git add -u + git commit -m "Auto-format code [skip ci]" + + git push origin HEAD:${{ github.event.pull_request.head.ref }} diff --git a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py new file mode 100644 index 000000000..cf4990a16 --- /dev/null +++ b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py @@ -0,0 +1,156 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for GitHub Actions security analysis detection heuristics.""" + +import os +from pathlib import Path + +import pytest + +from macaron.code_analyzer.gha_security_analysis.detect_injection import ( + PrioritizedIssue, + WorkflowFinding, + _add_finding, + build_workflow_issue_recommendation, + detect_github_actions_security_issues, + extract_workflow_issue_line, +) +from macaron.slsa_analyzer.ci_service.github_actions.github_actions_ci import GitHubActions + +RESOURCES_DIR = Path(__file__).parent.joinpath("resources") + + +@pytest.mark.parametrize( + "workflow_path", + [ + "injection_pattern_1.yaml", + ], +) +def test_detect_github_actions_security_issues( + snapshot: list[WorkflowFinding], workflow_path: str, github_actions_service: GitHubActions +) -> None: + """Test GH Actions workflows injection patterns.""" + callgraph = github_actions_service.build_call_graph_for_files( + [os.path.join(RESOURCES_DIR, "workflow_files", workflow_path)], + repo_path=os.path.join(RESOURCES_DIR, "workflow_files"), + ) + assert detect_github_actions_security_issues(callgraph) == snapshot + + +def test_extract_workflow_issue_line_from_potential_injection() -> None: + """Extract the source line from a potential-injection issue payload.""" + issue = ( + "potential-injection: " + "[{'Type': 'Lit', 'Pos': {'Offset': 269, 'Line': 6, 'Col': 48}, 'Value': 'origin/'}, " + "{'Type': 'ParamExp', 'Pos': {'Offset': 276, 'Line': 6, 'Col': 55}}]" + ) + + assert extract_workflow_issue_line(issue) == 6 + + +def test_extract_workflow_issue_line_prefers_step_line_marker() -> None: + """Extract the workflow line from an explicit step-line marker.""" + issue = ( + "potential-injection: " + "[step-line=14] " + "[{'Type': 'Lit', 'Pos': {'Offset': 269, 'Line': 6, 'Col': 48}, 'Value': 'origin/'}]" + ) + + assert extract_workflow_issue_line(issue) == 14 + + +def test_extract_workflow_issue_line_from_structured_payload() -> None: + """Extract workflow line from structured potential-injection payload.""" + issue = ( + "potential-injection: " + '{"step_line": 62, "script_line": 6, "job": "retag", "step": "Retag", ' + '"command": "git push origin/${github.head_ref}", "parts": []}' + ) + + assert extract_workflow_issue_line(issue) == 62 + + +def test_build_workflow_issue_recommendation_formats_potential_injection_details() -> None: + """Format concise user-facing details for potential-injection findings.""" + issue = ( + "potential-injection: " + '{"step_line": 62, "script_line": 6, "job": "retag", "step": "Retag", ' + '"command": "git push origin/${github.head_ref}", "parts": []}' + ) + + finding_type, _, finding_message = build_workflow_issue_recommendation(issue) + + assert finding_type == "potential-injection" + assert "Unsafe expansion of attacker-controllable GitHub context can enable command injection." in finding_message + assert "Details: Job: retag Step: Retag Command: `git push origin/${github.head_ref}`" in finding_message + + +def test_build_workflow_issue_recommendation_includes_expanded_refs() -> None: + """Render expanded GitHub refs in potential-injection details when present.""" + issue = ( + "potential-injection: " + '{"step_line": 62, "script_line": 6, "job": "retag", "step": "Retag", ' + '"command": "git push origin/${github.head_ref}", "expanded_refs": ["github.head_ref"]}' + ) + + _, _, finding_message = build_workflow_issue_recommendation(issue) + + assert "Expanded refs: `github.head_ref`" in finding_message + + +def test_build_workflow_issue_recommendation_includes_refs_from_compound_expression() -> None: + """Render extracted github refs when original expression contains operators.""" + issue = ( + "potential-injection: " + '{"step_line": 62, "script_line": 6, "job": "retag", "step": "Retag", ' + '"command": "git push origin/${github.head_ref}", ' + '"expanded_refs": ["github.head_ref", "github.ref_name"]}' + ) + + _, _, finding_message = build_workflow_issue_recommendation(issue) + + assert "Expanded refs: `github.head_ref, github.ref_name`" in finding_message + + +def test_build_workflow_issue_recommendation_formats_remote_script_exec_details() -> None: + """Format concise user-facing details for remote-script-exec findings.""" + issue = ( + "remote-script-exec: " + '{"step_line": 24, "script_line": 3, "job": "build", "step": "Setup", ' + '"command": "curl -fsSL https://x | bash"}' + ) + + finding_type, _, finding_message = build_workflow_issue_recommendation(issue) + + assert finding_type == "remote-script-exec" + assert "Workflow downloads and executes remote scripts inline." in finding_message + assert "Details: Job: build Step: Setup Command: `curl -fsSL https://x | bash`" in finding_message + + +def test_extract_workflow_issue_line_from_remote_script_exec_payload() -> None: + """Extract workflow line from structured remote-script-exec payload.""" + issue = ( + "remote-script-exec: " + '{"step_line": 24, "script_line": 3, "job": "build", "step": "Setup", ' + '"command": "curl -fsSL https://x | bash"}' + ) + + assert extract_workflow_issue_line(issue) == 24 + + +def test_extract_workflow_issue_line_from_unpinned_action_marker() -> None: + """Extract workflow line from unpinned action issue marker.""" + issue = "unpinned-third-party-action: [step-line=62] actions/checkout@v4.2.2" + + assert extract_workflow_issue_line(issue) == 62 + + +def test_add_finding_deduplicates_and_preserves_highest_priority() -> None: + """Keep one finding entry per issue and retain the highest priority.""" + findings: list[PrioritizedIssue] = [] + _add_finding(findings, "remote-script-exec: {}", 80) + _add_finding(findings, "remote-script-exec: {}", 60) + _add_finding(findings, "remote-script-exec: {}", 100) + + assert findings == [{"issue": "remote-script-exec: {}", "priority": 100}] diff --git a/tests/code_analyzer/gha_security_analysis/test_recommendation.py b/tests/code_analyzer/gha_security_analysis/test_recommendation.py new file mode 100644 index 000000000..13a5217ed --- /dev/null +++ b/tests/code_analyzer/gha_security_analysis/test_recommendation.py @@ -0,0 +1,69 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for GitHub Actions security recommendation helpers.""" + +import pytest + +from macaron.code_analyzer.gha_security_analysis.recommendation import ( + parse_unpinned_action_issue, + recommend_for_unpinned_action, + resolve_action_ref_to_tag, +) + + +def test_recommend_for_unpinned_action_with_tag_hint() -> None: + """Return pinned action recommendation with tag hint when SHA and tag are resolved.""" + recommendation = recommend_for_unpinned_action( + "actions/checkout", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "v4.2.2", + ) + + assert recommendation.recommended_ref == "actions/checkout@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa # v4.2.2" + + +def test_recommend_for_unpinned_action_when_sha_not_resolved() -> None: + """Return fallback recommendation text when action SHA cannot be resolved.""" + recommendation = recommend_for_unpinned_action("actions/checkout") + + assert recommendation.recommended_ref == "Unable to resolve automatically" + assert recommendation.message == "Pin this third-party action to a 40-character commit SHA." + + +def test_resolve_action_ref_to_tag_found(monkeypatch: pytest.MonkeyPatch) -> None: + """Resolve the matching tag when a tag points to the resolved action SHA.""" + monkeypatch.setattr( + "macaron.code_analyzer.gha_security_analysis.recommendation.get_tags_via_git_remote", + lambda repo: {"v4.2.2": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"}, + ) + + tag = resolve_action_ref_to_tag("actions/checkout", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", "v4") + + assert tag == "v4.2.2" + + +def test_resolve_action_ref_to_tag_none_when_no_match(monkeypatch: pytest.MonkeyPatch) -> None: + """Return None when no fetched tag points to the resolved action SHA.""" + monkeypatch.setattr( + "macaron.code_analyzer.gha_security_analysis.recommendation.get_tags_via_git_remote", + lambda repo: {"v4.2.2": "dddddddddddddddddddddddddddddddddddddddd"}, + ) + + tag = resolve_action_ref_to_tag("actions/checkout", "cccccccccccccccccccccccccccccccccccccccc", "v4") + + assert tag is None + + +def test_parse_unpinned_action_issue_with_step_line_prefix() -> None: + """Parse unpinned action issues that include finding type and step-line marker.""" + parsed = parse_unpinned_action_issue("unpinned-third-party-action: [step-line=62] actions/checkout@v4.2.2") + + assert parsed == ("actions/checkout", "v4.2.2") + + +def test_parse_unpinned_action_issue_plain_format() -> None: + """Parse legacy unpinned action issues without metadata prefix.""" + parsed = parse_unpinned_action_issue("actions/setup-python@v5.6.0") + + assert parsed == ("actions/setup-python", "v5.6.0") diff --git a/tests/conftest.py b/tests/conftest.py index 413de3498..6290dd8f5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Fixtures for tests.""" + import os import urllib.parse from pathlib import Path diff --git a/tests/dependency_analyzer/cyclonedx/test_cyclonedx.py b/tests/dependency_analyzer/cyclonedx/test_cyclonedx.py index 8f44f409b..7452552c3 100644 --- a/tests/dependency_analyzer/cyclonedx/test_cyclonedx.py +++ b/tests/dependency_analyzer/cyclonedx/test_cyclonedx.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the CycloneDX helper functions.""" + from pathlib import Path import pytest diff --git a/tests/integration/cases/oracle_coherence-js-client/policy.dl b/tests/integration/cases/oracle_coherence-js-client/policy.dl new file mode 100644 index 000000000..4406970d8 --- /dev/null +++ b/tests/integration/cases/oracle_coherence-js-client/policy.dl @@ -0,0 +1,23 @@ +/* Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy("check-github-actions-vulnerabilities", component_id, "Check GitHub Actions vulnerabilities for coherence-js-client") :- + check_failed(component_id, "mcn_githubactions_vulnerabilities_1"), + github_actions_vulnerabilities_check( + _, + "https://github.com/oracle/coherence-js-client/blob/39166341bc31f75b663ff439dae36170fb3e99a9/.github/workflows/trivy-scan.yml", + "known-vulnerability", + "third_party_action_risk", + _, + _, + _, + "aquasecurity/trivy-action", + "0.32.0", + _, + "[\"https://osv.dev/vulnerability/GHSA-69fq-xp46-6x23\", \"https://osv.dev/vulnerability/GHSA-9p44-j4g5-cfx5\"]" + ). + +apply_policy_to("check-github-actions-vulnerabilities", component_id) :- + is_component(component_id, "pkg:github.com/oracle/coherence-js-client@39166341bc31f75b663ff439dae36170fb3e99a9"). diff --git a/tests/integration/cases/oracle_coherence-js-client/test.yaml b/tests/integration/cases/oracle_coherence-js-client/test.yaml new file mode 100644 index 000000000..43a28b268 --- /dev/null +++ b/tests/integration/cases/oracle_coherence-js-client/test.yaml @@ -0,0 +1,23 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Analyzing oracle/coherence-js-client at commit 39166341bc31f75b663ff439dae36170fb3e99a9 + and verifying that the GitHub Actions vulnerabilities check fails. + +tags: +- macaron-python-package + +steps: +- name: Run macaron analyze + kind: analyze + options: + command_args: + - -rp + - https://github.com/oracle/coherence-js-client + - -d + - 39166341bc31f75b663ff439dae36170fb3e99a9 +- name: Run macaron verify-policy to verify that the GitHub Actions vulnerabilities check fails. + kind: verify + options: + policy: policy.dl diff --git a/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl b/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl index 00b141481..8f34d5674 100644 --- a/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl +++ b/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl @@ -1,4 +1,4 @@ -/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ #include "prelude.dl" diff --git a/tests/integration/cases/org_apache_logging_log4j/test.yaml b/tests/integration/cases/org_apache_logging_log4j/test.yaml index 7871c7a5a..b0820223d 100644 --- a/tests/integration/cases/org_apache_logging_log4j/test.yaml +++ b/tests/integration/cases/org_apache_logging_log4j/test.yaml @@ -27,6 +27,8 @@ steps: command_args: - -rp - https://github.com/apache/logging-log4j2 + - -d + - 028e9fad03ae7bcbf2e49ab8d32d8cfb900f3587 - name: Run macaron verify-policy to verify passed/failed checks kind: verify options: diff --git a/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec b/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec index 9fbfdddd3..3eb549766 100644 --- a/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec +++ b/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec @@ -64,7 +64,7 @@ RUN </validate diff --git a/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec b/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec index e6596fc1b..6a1614371 100644 --- a/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec +++ b/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec @@ -64,7 +64,7 @@ RUN </validate diff --git a/tests/integration/cases/pypi_pytesseract/expected_default.buildspec b/tests/integration/cases/pypi_pytesseract/expected_default.buildspec new file mode 100644 index 000000000..9daecb465 --- /dev/null +++ b/tests/integration/cases/pypi_pytesseract/expected_default.buildspec @@ -0,0 +1,39 @@ +{ + "macaron_version": "0.20.0", + "group_id": null, + "artifact_id": "pytesseract", + "version": "0.3.8", + "git_repo": "https://github.com/madmaze/pytesseract", + "git_tag": "805d3959496232edf2f0feb41af750d5702d85b7", + "newline": "lf", + "language_version": [ + ">=3.7" + ], + "ecosystem": "pypi", + "purl": "pkg:pypi/pytesseract@0.3.8", + "language": "python", + "build_tools": [ + "pip" + ], + "build_commands": [ + [ + "python", + "-m", + "build", + "--wheel", + "-n" + ] + ], + "has_binaries": false, + "build_requires": { + "setuptools": "==67.7.2" + }, + "build_backends": [ + "setuptools.build_meta" + ], + "upstream_artifacts": { + "sdist": [ + "https://files.pythonhosted.org/packages/a3/c9/d6e8903482bd6fb994c32722831d15842dd8b614f94ad9ca735807252671/pytesseract-0.3.8.tar.gz" + ] + } +} diff --git a/tests/integration/cases/pypi_pytesseract/expected_dockerfile.buildspec b/tests/integration/cases/pypi_pytesseract/expected_dockerfile.buildspec new file mode 100644 index 000000000..fb840efae --- /dev/null +++ b/tests/integration/cases/pypi_pytesseract/expected_dockerfile.buildspec @@ -0,0 +1,91 @@ + +#syntax=docker/dockerfile:1.10 +FROM oraclelinux:9 + +# Install core tools +RUN dnf -y install which wget tar unzip git + +# Install compiler and make +RUN dnf -y install gcc make + +# Download and unzip interpreter +RUN </validate + [ -n "" ] || { echo "No upstream artifact to validate against."; exit 1; } + # Capture artifacts generated + WHEELS=(/src/dist/*.whl) + # Ensure we only have one artifact + [ ${#WHEELS[@]} -eq 1 ] || { echo "Unexpected artifacts produced!"; exit 1; } + # BUILT_WHEEL is the artifact we built + BUILT_WHEEL=${WHEELS[0]} + # Ensure the artifact produced is not the literal returned by the glob + [ -e $BUILT_WHEEL ] || { echo "No wheels found!"; exit 1; } + # Download the wheel + wget -q + # Compare wheel names + [ $(basename $BUILT_WHEEL) == "" ] || { echo "Wheel name does not match!"; exit 1; } + # Compare file tree + (unzip -Z1 $BUILT_WHEEL | grep -v '\.dist-info' | sort) > built.tree + (unzip -Z1 "" | grep -v '\.dist-info' | sort ) > pypi_artifact.tree + diff -u built.tree pypi_artifact.tree || { echo "File trees do not match!"; exit 1; } + echo "Success!" +EOF + +ENTRYPOINT ["/bin/bash","/validate"] diff --git a/tests/integration/cases/pypi_pytesseract/test.yaml b/tests/integration/cases/pypi_pytesseract/test.yaml new file mode 100644 index 000000000..7407ae002 --- /dev/null +++ b/tests/integration/cases/pypi_pytesseract/test.yaml @@ -0,0 +1,45 @@ +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Integration test to ensure that has_binaries is not a false positive. + +tags: +- macaron-python-package +- tutorial + +steps: +- name: Run macaron analyze + kind: analyze + options: + command_args: + - -purl + - pkg:pypi/pytesseract@0.3.8 +- name: Generate the buildspec + kind: gen-build-spec + options: + command_args: + - -purl + - pkg:pypi/pytesseract@0.3.8 + - --output-format + - default-buildspec +- name: Compare Buildspec. + kind: compare + options: + kind: default_build_spec + result: output/buildspec/pypi/pytesseract/macaron.buildspec + expected: expected_default.buildspec +- name: Generate the buildspec + kind: gen-build-spec + options: + command_args: + - -purl + - pkg:pypi/pytesseract@0.3.8 + - --output-format + - dockerfile +- name: Compare Dockerfile + kind: compare + options: + kind: dockerfile_build_spec + result: output/buildspec/pypi/pytesseract/dockerfile.buildspec + expected: expected_dockerfile.buildspec diff --git a/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec b/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec index a8918d0ce..8618316e9 100644 --- a/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec +++ b/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec @@ -64,7 +64,7 @@ RUN </validate diff --git a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/excessive_spacing.py b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/excessive_spacing.py index 4f9a77616..d1eadfa1b 100644 --- a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/excessive_spacing.py +++ b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/excessive_spacing.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ @@ -21,6 +21,6 @@ def test_function(): sys.exit() # excessive spacing obfuscation. The second line here will trigger two detections, which is expected since it matches both patterns. - print("hello"); __import__('os') + print("hello"); __import__('sys') print("hi") ; __import__('base64') print("things") ;__import__('zlib') diff --git a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/expected_results.json b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/expected_results.json index 5fb7c3965..4d49c84b2 100644 --- a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/expected_results.json +++ b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/expected_results.json @@ -43,11 +43,6 @@ "start": 53, "end": 53 }, - { - "file": "obfuscation/excessive_spacing.py", - "start": 24, - "end": 24 - }, { "file": "obfuscation/excessive_spacing.py", "start": 25, @@ -63,11 +58,6 @@ "start": 23, "end": 23 }, - { - "file": "obfuscation/inline_imports.py", - "start": 24, - "end": 24 - }, { "file": "obfuscation/inline_imports.py", "start": 25, @@ -85,23 +75,58 @@ }, { "file": "obfuscation/inline_imports.py", - "start": 28, - "end": 28 + "start": 30, + "end": 30 }, { "file": "obfuscation/inline_imports.py", - "start": 29, - "end": 29 + "start": 35, + "end": 35 }, { "file": "obfuscation/inline_imports.py", - "start": 31, - "end": 31 + "start": 36, + "end": 36 }, { "file": "obfuscation/inline_imports.py", - "start": 32, - "end": 32 + "start": 37, + "end": 37 + }, + { + "file": "obfuscation/inline_imports.py", + "start": 38, + "end": 38 + }, + { + "file": "obfuscation/inline_imports.py", + "start": 39, + "end": 39 + }, + { + "file": "obfuscation/inline_imports.py", + "start": 40, + "end": 40 + }, + { + "file": "obfuscation/inline_imports.py", + "start": 41, + "end": 41 + }, + { + "file": "obfuscation/inline_imports.py", + "start": 42, + "end": 42 + }, + { + "file": "obfuscation/inline_imports.py", + "start": 44, + "end": 44 + }, + { + "file": "obfuscation/inline_imports.py", + "start": 45, + "end": 45 }, { "file": "obfuscation/obfuscation_tools.py", @@ -135,8 +160,8 @@ }, { "file": "obfuscation/inline_imports.py", - "start": 27, - "end": 27 + "start": 32, + "end": 32 } ] }, diff --git a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/inline_imports.py b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/inline_imports.py index 4e37c7c02..73e8ac30b 100644 --- a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/inline_imports.py +++ b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/inline_imports.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ @@ -21,10 +21,23 @@ def test_function(): sys.exit() __import__('base64') - __import__('builtins') + __import__('builtins') # should not be detected + __import__('builtins').eval("print('hello')") + __import__('builtins').exec("print('hello')") + __import__('builtins').open("written.txt", "w") + __import__('builtins').open("README.md", "r") # should not be detected + _ = open("README.md").read() if __import__("os").path.exists("README.md") else "" # should not be detected __import__('subprocess') __import__('sys') - print("Hello world!") ;__import__('os') + print("Hello world!") ;__import__('sys') + __import__('os').getcwd() # should not be detected + __import__('os').path.join("docs", "README.md") # should not be detected + __import__('os').putenv("CRITICAL_ENV", "1") + __import__('os').environ["CRITICAL_ENV"] = "1" + del __import__('os').environ["CRITICAL_ENV"] + __import__('os').open("written.txt", __import__('os').O_WRONLY | __import__('os').O_CREAT) + __import__('os').write(1, b"hello") + __import__('os').fdopen(1, "w") __import__('zlib') __import__('marshal') # these both just import builtins diff --git a/tests/malware_analyzer/pypi/test_anomalous_version.py b/tests/malware_analyzer/pypi/test_anomalous_version.py index ef1d141d2..45e533738 100644 --- a/tests/malware_analyzer/pypi/test_anomalous_version.py +++ b/tests/malware_analyzer/pypi/test_anomalous_version.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for heuristic detecting anomalous version numbers""" + from unittest.mock import MagicMock import pytest diff --git a/tests/malware_analyzer/pypi/test_closer_release_join_date.py b/tests/malware_analyzer/pypi/test_closer_release_join_date.py index 5eb131300..ed1232bbc 100644 --- a/tests/malware_analyzer/pypi/test_closer_release_join_date.py +++ b/tests/malware_analyzer/pypi/test_closer_release_join_date.py @@ -2,6 +2,7 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for closer release join date heuristic.""" + from datetime import datetime from unittest.mock import MagicMock diff --git a/tests/malware_analyzer/pypi/test_empty_project_link_analyzer.py b/tests/malware_analyzer/pypi/test_empty_project_link_analyzer.py index 5dad60add..ecb774da8 100644 --- a/tests/malware_analyzer/pypi/test_empty_project_link_analyzer.py +++ b/tests/malware_analyzer/pypi/test_empty_project_link_analyzer.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for heuristic detecting malicious metadata from PyPI""" + from unittest.mock import MagicMock import pytest diff --git a/tests/malware_analyzer/pypi/test_fake_email.py b/tests/malware_analyzer/pypi/test_fake_email.py index 56e81e035..d7e33a4f0 100644 --- a/tests/malware_analyzer/pypi/test_fake_email.py +++ b/tests/malware_analyzer/pypi/test_fake_email.py @@ -1,9 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for the FakeEmailAnalyzer heuristic.""" - import os from pathlib import Path from unittest.mock import MagicMock diff --git a/tests/malware_analyzer/pypi/test_one_release_analyzer.py b/tests/malware_analyzer/pypi/test_one_release_analyzer.py index 60ad244ab..78ce0fbf9 100644 --- a/tests/malware_analyzer/pypi/test_one_release_analyzer.py +++ b/tests/malware_analyzer/pypi/test_one_release_analyzer.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for heuristic detecting malicious metadata from PyPI""" + from unittest.mock import MagicMock import pytest diff --git a/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py b/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py index c18369bb4..11aa3c6f8 100644 --- a/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py +++ b/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests detecting malicious patterns in PyPI package sourcecode.""" + import json import os from unittest.mock import MagicMock, patch diff --git a/tests/malware_analyzer/pypi/test_similar_projects.py b/tests/malware_analyzer/pypi/test_similar_projects.py index ed61a204e..2cf13d4d6 100644 --- a/tests/malware_analyzer/pypi/test_similar_projects.py +++ b/tests/malware_analyzer/pypi/test_similar_projects.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for the SimilarProjectAnalyzer heuristic.""" + # pylint: disable=redefined-outer-name import os diff --git a/tests/malware_analyzer/pypi/test_suspicious_setup.py b/tests/malware_analyzer/pypi/test_suspicious_setup.py index ec9af0f0f..649c4d67d 100644 --- a/tests/malware_analyzer/pypi/test_suspicious_setup.py +++ b/tests/malware_analyzer/pypi/test_suspicious_setup.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for suspicious setup.py heuristic.""" + from unittest.mock import MagicMock, patch import pytest diff --git a/tests/malware_analyzer/pypi/test_typosquatting_presence.py b/tests/malware_analyzer/pypi/test_typosquatting_presence.py index 2d23233a2..468cd8c8c 100644 --- a/tests/malware_analyzer/pypi/test_typosquatting_presence.py +++ b/tests/malware_analyzer/pypi/test_typosquatting_presence.py @@ -1,9 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for the TyposquattingPresenceAnalyzer heuristic.""" - import os from pathlib import Path from unittest.mock import MagicMock diff --git a/tests/malware_analyzer/pypi/test_unchanged_release.py b/tests/malware_analyzer/pypi/test_unchanged_release.py index f1162aaea..0a04c4292 100644 --- a/tests/malware_analyzer/pypi/test_unchanged_release.py +++ b/tests/malware_analyzer/pypi/test_unchanged_release.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for heuristic detecting malicious metadata from PyPI""" + from unittest.mock import MagicMock from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult diff --git a/tests/malware_analyzer/pypi/test_wheel_absence.py b/tests/malware_analyzer/pypi/test_wheel_absence.py index c00feb2cb..37716d3cc 100644 --- a/tests/malware_analyzer/pypi/test_wheel_absence.py +++ b/tests/malware_analyzer/pypi/test_wheel_absence.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for heuristic detecting wheel (.whl) file absence from PyPI packages""" + from unittest.mock import MagicMock, patch import pytest diff --git a/tests/output_reporter/test_jinja_extensions.py b/tests/output_reporter/test_jinja_extensions.py index 8baaa528f..1e0e0d46f 100644 --- a/tests/output_reporter/test_jinja_extensions.py +++ b/tests/output_reporter/test_jinja_extensions.py @@ -1,9 +1,8 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains tests for the Jinja2 filter and test extensions.""" - from hypothesis import given from hypothesis import strategies as st diff --git a/tests/output_reporter/test_write_job_summary.py b/tests/output_reporter/test_write_job_summary.py new file mode 100644 index 000000000..210dc7ac8 --- /dev/null +++ b/tests/output_reporter/test_write_job_summary.py @@ -0,0 +1,104 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for GitHub Actions job summary rendering helpers.""" + +from __future__ import annotations + +import importlib.util +from pathlib import Path +from types import ModuleType + + +def _load_write_job_summary_module() -> ModuleType: + """Load the write_job_summary script as a Python module for testing.""" + script_path = Path(Path(__file__).parents[2], "scripts", "actions", "write_job_summary.py") + spec = importlib.util.spec_from_file_location("write_job_summary", script_path) + if spec is None or spec.loader is None: + raise RuntimeError("Unable to load write_job_summary.py module.") + + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def test_workflow_security_table_includes_summary_column(tmp_path: Path) -> None: + """Render workflow security findings with the short summary column in compact table output.""" + module = _load_write_job_summary_module() + summary_path = Path(tmp_path, "summary.md") + columns = [ + "finding_group", + "finding_priority", + "finding_type", + "action_name", + "action_ref", + "vulnerable_workflow", + "finding_message", + ] + rows = [ + ( + "workflow_security_issue", + 80, + "remote-script-exec", + "https://github.com/org/repo/.github/workflows/build.yml", + "", + "https://github.com/org/repo/.github/workflows/build.yml", + ( + "Summary: Workflow downloads and executes remote scripts inline. " + "Details: remote-script-exec: A step appears to download and pipe to shell (`curl|bash`). " + "Recommendation: Avoid curl|bash patterns." + ), + ), + ] + + rendered = module.write_compact_gha_vuln_diagnostics(summary_path, columns, rows) + output = summary_path.read_text(encoding="utf-8") + + assert rendered is True + assert "| priority | type | summary | workflow |" in output + assert "Workflow downloads and executes remote scripts inline." in output + + +def test_compact_summary_keeps_all_groups_in_detailed_section(tmp_path: Path) -> None: + """Render detailed section with both finding groups even when top priorities are workflow-only.""" + module = _load_write_job_summary_module() + summary_path = Path(tmp_path, "summary.md") + columns = [ + "finding_group", + "finding_priority", + "finding_type", + "action_name", + "action_ref", + "vulnerable_workflow", + "finding_message", + ] + rows = [ + ( + "workflow_security_issue", + 100, + "potential-injection", + "", + "", + "https://github.com/org/repo/.github/workflows/ci.yml", + "Summary: Injection risk. Details: ... Recommendation: ...", + ), + ( + "third_party_action_risk", + 20, + "unpinned-third-party-action", + "actions/checkout", + "v4", + "https://github.com/org/repo/.github/workflows/ci.yml", + "Summary: Unpinned action. Recommendation: ...", + ), + ] + + rendered = module.write_compact_gha_vuln_diagnostics(summary_path, columns, rows) + output = summary_path.read_text(encoding="utf-8") + + assert rendered is True + assert "#### Workflow security issues" in output + assert "#### Third-party action risks" in output + assert "**Workflow security issues**" in output + assert "**Third-party action risks**" in output + assert "`actions/checkout@v4`" in output diff --git a/tests/parsers/bashparser/test_bashparser.py b/tests/parsers/bashparser/test_bashparser.py index 97c431034..a489330ac 100644 --- a/tests/parsers/bashparser/test_bashparser.py +++ b/tests/parsers/bashparser/test_bashparser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ @@ -13,7 +13,7 @@ from macaron import MACARON_PATH from macaron.errors import ParseError -from macaron.parsers.bashparser import parse, parse_file +from macaron.parsers.bashparser import parse, parse_file, parse_raw_with_gha_mapping @pytest.mark.parametrize( @@ -46,3 +46,13 @@ def test_bashparser_parse_invalid() -> None: # Parse the bash script file. with pytest.raises(ParseError): parse_file(file_path=file_path, macaron_path=MACARON_PATH) + + +def test_bashparser_parse_raw_with_gha_mapping() -> None: + """Test parsing raw bash script with GitHub expression mapping.""" + bash_content = 'echo "${{ github.head_ref }}"\n' + parsed_ast, gha_map = parse_raw_with_gha_mapping(bash_content, MACARON_PATH) + + assert "Stmts" in parsed_ast + assert gha_map + assert "github.head_ref" in gha_map.values() diff --git a/tests/policy_engine/compare_policy_reports.py b/tests/policy_engine/compare_policy_reports.py index 88e7d0cc9..e02cf09df 100644 --- a/tests/policy_engine/compare_policy_reports.py +++ b/tests/policy_engine/compare_policy_reports.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This script checks the policy engine report against expected results.""" + import json import logging import sys diff --git a/tests/provenance/test_provenance_extractor.py b/tests/provenance/test_provenance_extractor.py index 2f1581200..aa207d952 100644 --- a/tests/provenance/test_provenance_extractor.py +++ b/tests/provenance/test_provenance_extractor.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the provenance extractor on valid example provenances.""" + import json import pytest @@ -19,8 +20,7 @@ @pytest.fixture(name="slsa_v1_gcb_1_provenance") def slsa_v1_gcb_1_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v1 provenance using build type gcb and sourceToBuild.""" - return _load_and_validate_json( - """ + return _load_and_validate_json(""" { "_type": "https://in-toto.io/Statement/v1", "subject": [], @@ -42,15 +42,13 @@ def slsa_v1_gcb_1_provenance_() -> dict[str, JsonType]: } } } - """ - ) + """) @pytest.fixture(name="slsa_v1_gcb_2_provenance") def slsa_v1_gcb_2_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v1 provenance using build type gcb and configSource.""" - return _load_and_validate_json( - """ + return _load_and_validate_json(""" { "_type": "https://in-toto.io/Statement/v1", "subject": [], @@ -74,15 +72,13 @@ def slsa_v1_gcb_2_provenance_() -> dict[str, JsonType]: } } } - """ - ) + """) @pytest.fixture(name="slsa_v1_github_provenance") def slsa_v1_github_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v1 provenance using build type GitHub.""" - return _load_and_validate_json( - """ + return _load_and_validate_json(""" { "_type": "https://in-toto.io/Statement/v1", "subject": [], @@ -109,15 +105,13 @@ def slsa_v1_github_provenance_() -> dict[str, JsonType]: } } } - """ - ) + """) @pytest.fixture(name="slsa_v1_oci_provenance") def slsa_v1_oci_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v1 provenance using the OCI build type.""" - payload = _load_and_validate_json( - """ + payload = _load_and_validate_json(""" { "_type": "https://in-toto.io/Statement/v1", "predicateType": "https://slsa.dev/provenance/v1", @@ -136,8 +130,7 @@ def slsa_v1_oci_provenance_() -> dict[str, JsonType]: } } } - """ - ) + """) # The build type is modified here to avoid issues with excessive line length. _json_modify( payload, @@ -150,8 +143,7 @@ def slsa_v1_oci_provenance_() -> dict[str, JsonType]: @pytest.fixture(name="slsa_v02_provenance") def slsa_v02_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v02 provenance.""" - return _load_and_validate_json( - """ + return _load_and_validate_json(""" { "_type": "https://in-toto.io/Statement/v0.1", "subject": [], @@ -167,15 +159,13 @@ def slsa_v02_provenance_() -> dict[str, JsonType]: } } } - """ - ) + """) @pytest.fixture(name="slsa_v01_provenance") def slsa_v01_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v01 provenance.""" - return _load_and_validate_json( - """ + return _load_and_validate_json(""" { "_type": "https://in-toto.io/Statement/v0.1", "subject": [], @@ -197,15 +187,13 @@ def slsa_v01_provenance_() -> dict[str, JsonType]: ] } } - """ - ) + """) @pytest.fixture(name="witness_gitlab_provenance") def witness_gitlab_provenance_() -> dict[str, JsonType]: """Return a Witness v0.1 provenance with a GitLab attestation.""" - return _load_and_validate_json( - """ + return _load_and_validate_json(""" { "_type": "https://in-toto.io/Statement/v0.1", "subject": [], @@ -228,15 +216,13 @@ def witness_gitlab_provenance_() -> dict[str, JsonType]: ] } } - """ - ) + """) @pytest.fixture(name="witness_github_provenance") def witness_github_provenance_() -> dict[str, JsonType]: """Return a Witness v0.1 provenance with a GitHub attestation.""" - return _load_and_validate_json( - """ + return _load_and_validate_json(""" { "_type": "https://in-toto.io/Statement/v0.1", "subject": [], @@ -259,8 +245,7 @@ def witness_github_provenance_() -> dict[str, JsonType]: ] } } - """ - ) + """) @pytest.fixture(name="target_repository") diff --git a/tests/provenance/test_provenance_finder.py b/tests/provenance/test_provenance_finder.py index 774d2ff9e..3e2389873 100644 --- a/tests/provenance/test_provenance_finder.py +++ b/tests/provenance/test_provenance_finder.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the provenance finder.""" + import os import shutil import tempfile diff --git a/tests/repo_finder/test_commit_finder.py b/tests/repo_finder/test_commit_finder.py index 3fdefcb36..a505b3df2 100644 --- a/tests/repo_finder/test_commit_finder.py +++ b/tests/repo_finder/test_commit_finder.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the commit finder.""" + import logging import os import re diff --git a/tests/repo_finder/test_repo_finder.py b/tests/repo_finder/test_repo_finder.py index 8471af70b..25a917b3b 100644 --- a/tests/repo_finder/test_repo_finder.py +++ b/tests/repo_finder/test_repo_finder.py @@ -1,7 +1,8 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the repo finder.""" + import os from pathlib import Path diff --git a/tests/repo_finder/test_repo_finder_deps_dev.py b/tests/repo_finder/test_repo_finder_deps_dev.py index 1de5fae25..10cb1a5e5 100644 --- a/tests/repo_finder/test_repo_finder_deps_dev.py +++ b/tests/repo_finder/test_repo_finder_deps_dev.py @@ -1,7 +1,8 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the deps.dev repo finder.""" + import pytest from packageurl import PackageURL from pytest_httpserver import HTTPServer diff --git a/tests/repo_finder/test_report_schema.py b/tests/repo_finder/test_report_schema.py index f3fbbbde3..2f851d557 100644 --- a/tests/repo_finder/test_report_schema.py +++ b/tests/repo_finder/test_report_schema.py @@ -2,6 +2,7 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the report schema of the repo finder.""" + import json import os from typing import Any diff --git a/tests/repo_verifier/test_repo_verifier.py b/tests/repo_verifier/test_repo_verifier.py index 0c01a8bff..74f3cfe63 100644 --- a/tests/repo_verifier/test_repo_verifier.py +++ b/tests/repo_verifier/test_repo_verifier.py @@ -1,7 +1,8 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the repo verifier.""" + from pathlib import Path import pytest diff --git a/tests/slsa_analyzer/checks/test_provenance_repo_commit_checks.py b/tests/slsa_analyzer/checks/test_provenance_repo_commit_checks.py index fa65d2002..d1dfbdaa4 100644 --- a/tests/slsa_analyzer/checks/test_provenance_repo_commit_checks.py +++ b/tests/slsa_analyzer/checks/test_provenance_repo_commit_checks.py @@ -1,7 +1,8 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains tests for the provenance available check.""" + from pathlib import Path from typing import TypeVar diff --git a/tests/slsa_analyzer/ci_service/test_base_ci_service.py b/tests/slsa_analyzer/ci_service/test_base_ci_service.py index 510d8cf01..71bfeac0e 100644 --- a/tests/slsa_analyzer/ci_service/test_base_ci_service.py +++ b/tests/slsa_analyzer/ci_service/test_base_ci_service.py @@ -1,9 +1,8 @@ -# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the base CI service.""" - from pathlib import Path import pytest diff --git a/tests/slsa_analyzer/git_service/test_github.py b/tests/slsa_analyzer/git_service/test_github.py index e01a415b7..604b0a50c 100644 --- a/tests/slsa_analyzer/git_service/test_github.py +++ b/tests/slsa_analyzer/git_service/test_github.py @@ -1,11 +1,10 @@ -# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ This module tests the GitHub git service. """ - from macaron.slsa_analyzer.git_service import GitHub from ...macaron_testcase import MacaronTestCase diff --git a/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py b/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py index de9609dfe..85d115a07 100644 --- a/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py +++ b/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for the ``JFrogMavenRegistry`` class.""" @@ -260,8 +260,7 @@ def test_extract_folder_names_from_folder_info_payload( ("args", "expected_file_names"), [ pytest.param( - { - "folder_info_payload": """ + {"folder_info_payload": """ { "children": [ { @@ -274,8 +273,7 @@ def test_extract_folder_names_from_folder_info_payload( } ] } - """ - }, + """}, ["child2"], id="Payload with both files and folders", ), diff --git a/tests/slsa_analyzer/package_registry/test_osv_dev.py b/tests/slsa_analyzer/package_registry/test_osv_dev.py index b96773f15..6856818ae 100644 --- a/tests/slsa_analyzer/package_registry/test_osv_dev.py +++ b/tests/slsa_analyzer/package_registry/test_osv_dev.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for the osv.dev service.""" @@ -222,3 +222,45 @@ def test_is_affected_version_ranges(vuln: dict, workflow: str, version: str, exp OSVDevService.is_version_affected(vuln=vuln, pkg_name=workflow, pkg_version=version, ecosystem="GitHub Actions") == expected ) + + +@pytest.mark.parametrize( + ("packages", "osv_batch_response", "expected"), + [ + pytest.param( + [{"package": {"ecosystem": "GitHub Actions", "name": "aquasecurity/trivy-action"}}], + { + "results": [ + { + "vulns": [ + {"id": "GHSA-69fq-xp46-6x23", "modified": "2026-03-24T18:02:32.837793Z"}, + {"id": "GHSA-9p44-j4g5-cfx5", "modified": "2026-02-22T23:23:29.929429Z"}, + ] + } + ] + }, + [{"package": {"ecosystem": "GitHub Actions", "name": "aquasecurity/trivy-action"}}], + id="Single vulnerable package", + ), + pytest.param( + [{"package": {"ecosystem": "GitHub Actions", "name": ""}}], + {"results": [{}]}, + [], + id="Empty package name", + ), + ], +) +def test_get_vulnerabilities_package_name_batch( + monkeypatch: pytest.MonkeyPatch, packages: list, osv_batch_response: dict[str, list], expected: list +) -> None: + """Test filtering vulnerable packages from OSV batch query results.""" + + def mock_call_osv_querybatch_api(query_data: dict, expected_size: int | None = None) -> list: + assert query_data == {"queries": packages} + assert query_data["queries"][0]["package"]["name"] == packages[0]["package"]["name"] + assert expected_size == len(packages) + return osv_batch_response["results"] + + monkeypatch.setattr(OSVDevService, "call_osv_querybatch_api", staticmethod(mock_call_osv_querybatch_api)) + + assert OSVDevService.get_vulnerabilities_package_name_batch(packages) == expected diff --git a/tests/slsa_analyzer/provenance/test_witness_provenance.py b/tests/slsa_analyzer/provenance/test_witness_provenance.py index 576787aed..576bb4005 100644 --- a/tests/slsa_analyzer/provenance/test_witness_provenance.py +++ b/tests/slsa_analyzer/provenance/test_witness_provenance.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for witness provenance.""" @@ -68,21 +68,17 @@ def test_load_witness_predicate_types( ("payload_json", "predicate_types", "expected_result"), [ pytest.param( - json.loads( - """ + json.loads(""" { "predicateType": "https://witness.testifysec.com/attestation-collection/v0.1" } - """ - ), + """), {"https://witness.testifysec.com/attestation-collection/v0.1"}, True, id="Valid predicateType", ), pytest.param( - json.loads( - """ + json.loads(""" { "predicateType": "https://witness.net/attestation-collection/v0.1" } - """ - ), + """), {"https://witness.testifysec.com/attestation-collection/v0.1"}, False, id="Invalid predicateType", @@ -103,8 +99,7 @@ def test_is_witness_provenance_payload( ("payload_json", "expected_subjects"), [ pytest.param( - json.loads( - """ + json.loads(""" { "subject": [ { @@ -121,8 +116,7 @@ def test_is_witness_provenance_payload( } ] } - """ - ), + """), [ { "name": "https://witness.dev/attestations/product/v0.1/file:target/jackson-annotations-2.9.9.jar", @@ -140,8 +134,7 @@ def test_is_witness_provenance_payload( id="Valid payload", ), pytest.param( - json.loads( - """ + json.loads(""" { "subject": [ { @@ -158,8 +151,7 @@ def test_is_witness_provenance_payload( } ] } - """ - ), + """), [ { "name": "https://witness.dev/attestations/product/v0.1/file:target/jackson-annotations-2.9.9.jar", @@ -171,8 +163,7 @@ def test_is_witness_provenance_payload( id="Missing sha256", ), pytest.param( - json.loads( - """ + json.loads(""" { "subject": [ { @@ -189,8 +180,7 @@ def test_is_witness_provenance_payload( } ] } -""" - ), +"""), [ { "name": "https://witness.dev/attestations/product/v0.1/file:target/jackson-annotations-2.9.9.jar", diff --git a/tests/slsa_analyzer/test_git_url.py b/tests/slsa_analyzer/test_git_url.py index 006a92608..f84bbbdfa 100644 --- a/tests/slsa_analyzer/test_git_url.py +++ b/tests/slsa_analyzer/test_git_url.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the generic actions on Git repositories.""" @@ -220,12 +220,10 @@ def test_get_remote_vcs_url_with_user_defined_allowed_hostnames(tmp_path: Path) user_config_path = os.path.join(tmp_path, "config.ini") with open(user_config_path, "w", encoding="utf-8") as user_config_file: - user_config_file.write( - """ + user_config_file.write(""" [git_service.gitlab.self_hosted] hostname = internal.gitlab.org - """ - ) + """) # We don't have to worry about modifying the ``defaults`` object causing test # pollution here, since we reload the ``defaults`` object before every test with the # ``setup_test`` fixture. diff --git a/tests/test_util.py b/tests/test_util.py index 168d0a880..fa68b6123 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,9 +1,10 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ This module test the Util methods """ + from collections.abc import Callable from unittest import TestCase from unittest.mock import call, patch diff --git a/tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl b/tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl new file mode 100644 index 000000000..8bbeba44f --- /dev/null +++ b/tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl @@ -0,0 +1,16 @@ +/* Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy( + "attest-macaron-supply-chain", + component_id, + "Require trusted VCS metadata, and non-hosted repository." +) :- + check_passed(component_id, "mcn_version_control_system_1"), + is_repo(repo_id, "github.com/oracle/macaron", component_id), + not_self_hosted_git(repo_id, _). + +apply_policy_to("attest-macaron-supply-chain", component_id) :- + is_component(component_id, _). diff --git a/tests/vsa/test_vsa.py b/tests/vsa/test_vsa.py index dbe8b768c..04d0732c1 100644 --- a/tests/vsa/test_vsa.py +++ b/tests/vsa/test_vsa.py @@ -1,9 +1,8 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for VSA generation.""" - import pytest from macaron.vsa.vsa import get_common_purl_from_artifact_purls, get_components_passing_policy