diff --git a/.github/workflows/_build_docker.yaml b/.github/workflows/_build_docker.yaml index 8c5ea7dba..d503bfff9 100644 --- a/.github/workflows/_build_docker.yaml +++ b/.github/workflows/_build_docker.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. # This is a reuseable workflow to build and test the Docker image. Note that this workflow does not @@ -63,6 +63,22 @@ jobs: IMAGE_NAME: ghcr.io/oracle/macaron run: make build-docker + # Export the built image so downstream jobs/workflows can load and reuse + # the exact same image without pushing to a registry. + - name: Export test Docker image + run: docker save ghcr.io/oracle/macaron:test --output /tmp/macaron-test-image.tar + + # Upload the image tarball for the reusable action test workflow. + - name: Upload test Docker image artifact + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: macaron-test-image + path: /tmp/macaron-test-image.tar + if-no-files-found: error + retention-days: 1 + + # Install helper tooling used by integration test utilities that validate + # the built Docker image behavior. - name: Install dependencies for integration test utility run: make setup-integration-test-utility-for-docker @@ -74,3 +90,14 @@ jobs: DOCKER_PULL: never GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: make integration-test-docker + + test-macaron-action: + # Reuse the action test workflow against the exact Docker image built above. + # The image is transferred via artifact to avoid pushing to a registry. + needs: [build-docker] + permissions: + contents: read + uses: ./.github/workflows/test_macaron_action.yaml + with: + docker_image_artifact_name: macaron-test-image + macaron_image_tag: test diff --git a/.github/workflows/macaron-analysis.yaml b/.github/workflows/macaron-analysis.yaml index aca12d881..d0da663d1 100644 --- a/.github/workflows/macaron-analysis.yaml +++ b/.github/workflows/macaron-analysis.yaml @@ -35,29 +35,9 @@ jobs: # Note: adjust the policy_purl to refer to your repository URL. - name: Run Macaron action id: run_macaron - continue-on-error: true uses: oracle/macaron@fda4dda04aa7228fcaba162804891806cf5a1375 # v0.22.0 with: repo_path: ./ policy_file: check-github-actions policy_purl: pkg:github.com/oracle/macaron@.* - - - name: Upload Macaron reports - if: ${{ always() }} - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 - with: - name: macaron-reports - path: | - output/reports/github_com/oracle/macaron/macaron.html - output/reports/github_com/oracle/macaron/macaron.json - output/macaron.db - if-no-files-found: warn - retention-days: 90 - - - name: Check Verification Summary Attestation check passes - if: ${{ always() }} - run: | - if [ ! -f output/vsa.intoto.jsonl ]; then - echo "The check-github-actions policy failed, therefore VSA was not generated at output/vsa.intoto.jsonl. Check the uploaded reports." - exit 1 - fi + reports_retention_days: 90 diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index f560c027f..4c72dffda 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -1,62 +1,106 @@ # Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. -name: Test Macaron Action (tutorials) +name: Test Macaron Action on: - push: - paths: - - action.yaml - pull_request: - paths: - - action.yaml - workflow_dispatch: + workflow_call: + # Optional overrides used by reusable callers (for example _build_docker.yaml). + # Defaults target the test image artifact produced by our Docker build workflow. + inputs: + docker_image_artifact_name: + required: false + type: string + default: macaron-test-image + macaron_image_tag: + required: false + type: string + default: test permissions: - id-token: write - attestations: write + contents: read jobs: tutorial-commit-finder: name: Analyzing and comparing different versions of an artifact runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze arrow@1.3.0) uses: ./ with: package_url: pkg:pypi/arrow@1.3.0 output_dir: macaron_output/commit_finder + upload_reports: 'false' - name: Run Macaron (analyze arrow@0.15.0) uses: ./ with: package_url: pkg:pypi/arrow@0.15.0 output_dir: macaron_output/commit_finder + upload_reports: 'false' - name: Run Macaron (verify policy - has-hosted-build) + id: verify_has_hosted_build + # This verification is expected to fail for this tutorial scenario. + continue-on-error: true uses: ./ with: policy_file: ./tests/tutorial_resources/commit_finder/has-hosted-build.dl output_dir: macaron_output/commit_finder + upload_reports: 'false' + - name: Assert expected failure (has-hosted-build) + if: ${{ always() }} + run: | + # Keep this workflow green only when the verify step actually fails. + if [ "${{ steps.verify_has_hosted_build.outcome }}" != "failure" ]; then + echo "Expected verify step to fail, but it did not." + exit 1 + fi tutorial-detect-malicious-package: name: Detecting malicious packages runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze django@5.0.6 without dependencies) uses: ./ with: package_url: pkg:pypi/django@5.0.6 output_dir: macaron_output/detect_malicious_package + upload_reports: 'false' - name: Run Macaron (verify policy - check-django) uses: ./ with: policy_file: ./tests/tutorial_resources/detect_malicious_package/check-django.dl output_dir: macaron_output/detect_malicious_package + upload_reports: 'false' - name: Setup Python for analyzed venv uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 @@ -83,101 +127,159 @@ jobs: output_dir: macaron_output/detect_malicious_package deps_depth: '1' python_venv: /tmp/.django_venv + upload_reports: 'false' - name: Run Macaron (verify policy - check-dependencies) uses: ./ with: policy_file: ./tests/tutorial_resources/detect_malicious_package/check-dependencies.dl output_dir: macaron_output/detect_malicious_package + upload_reports: 'false' tutorial-detect-vulnerable-actions: name: How to detect vulnerable GitHub Actions runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze repo - apache/logging-log4j2) uses: ./ with: repo_path: https://github.com/apache/logging-log4j2 output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'false' + write_job_summary: 'false' - name: Run Macaron (verify policy - github_actions_vulns for repo) + id: verify_github_actions_vulns_repo_tutorial uses: ./ with: policy_file: ./tests/tutorial_resources/detect_vulnerable_github_actions/check_github_actions_vuln_repo.dl output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'true' + reports_artifact_name: macaron-reports-vulnerable-actions-repo + write_job_summary: 'true' - name: Run Macaron (analyze purl - log4j-core example) uses: ./ with: package_url: pkg:maven/org.apache.logging.log4j/log4j-core@2.25.3 output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'false' + write_job_summary: 'false' - name: Run Macaron (verify policy - github_actions_vulns for purl) + id: verify_github_actions_vulns_purl_tutorial uses: ./ with: policy_file: ./tests/tutorial_resources/detect_vulnerable_github_actions/check_github_actions_vuln_purl.dl output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'true' + reports_artifact_name: macaron-reports-vulnerable-actions-purl + write_job_summary: 'true' tutorial-provenance: name: Provenance discovery, extraction, and verification runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze semver 7.7.2) uses: ./ with: package_url: pkg:npm/semver@7.7.2 output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (verify provenance - semver) uses: ./ with: policy_file: ./tests/tutorial_resources/provenance/has-verified-provenance_semver.dl output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (analyze toga 0.5.1 - PyPI provenance) uses: ./ with: package_url: pkg:pypi/toga@0.5.1 output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (verify provenance - toga PyPI) uses: ./ with: policy_file: ./tests/tutorial_resources/provenance/has-verified-provenance_toga.dl output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (analyze toga 0.4.8 - GitHub attestation) uses: ./ with: package_url: pkg:pypi/toga@0.4.8 output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (verify provenance - toga GitHub) uses: ./ with: policy_file: ./tests/tutorial_resources/provenance/has-verified-provenance_toga.dl output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (analyze urllib3 2.0.0a1 - GitHub attestation) uses: ./ with: package_url: pkg:pypi/urllib3@2.0.0a1 output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (verify provenance - urllib3) uses: ./ with: policy_file: ./tests/tutorial_resources/provenance/has-verified-provenance_urllib3.dl output_dir: macaron_output/provenance + upload_reports: 'false' tutorial-detect-malicious-java-dep: name: Detecting Java dependencies manually uploaded to Maven Central runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze example-maven-app with SBOM) uses: ./ with: @@ -186,24 +288,39 @@ jobs: output_dir: macaron_output/detect_malicious_java_dep sbom_path: ./tests/tutorial_resources/detect_malicious_java_dep/example-sbom.json deps_depth: '1' + upload_reports: 'false' - name: Run Macaron (verify policy - detect-malicious-upload) uses: ./ with: policy_file: ./tests/tutorial_resources/detect_malicious_java_dep/example-maven-app.dl output_dir: macaron_output/detect_malicious_java_dep + upload_reports: 'false' tutorial-exclude-include-checks: name: Exclude and include checks in Macaron runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze micronaut-core with default checks) uses: ./ with: package_url: pkg:maven/io.micronaut/micronaut-core@4.3.10 output_dir: macaron_output/exclude_include_checks/normal + upload_reports: 'false' - name: Run Macaron (analyze micronaut-core excluding witness check via defaults.ini) uses: ./ @@ -211,3 +328,86 @@ jobs: package_url: pkg:maven/io.micronaut/micronaut-core@4.3.10 defaults_path: ./tests/tutorial_resources/exclude_include_checks/defaults_exclude_witness.ini output_dir: macaron_output/exclude_include_checks/excluded + upload_reports: 'false' + + test-detect-vulnerable-actions: + name: How to detect vulnerable GitHub Actions + runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar + + - name: Run Macaron (analyze github_actions_vulns for https://github.com/oracle/coherence-js-client) + id: verify_github_actions_vulns_repo_test + # This integration target is intentionally vulnerable; failure is expected. + continue-on-error: true + uses: ./ + with: + repo_path: https://github.com/oracle/coherence-js-client + digest: 39166341bc31f75b663ff439dae36170fb3e99a9 + policy_file: check-github-actions + policy_purl: pkg:github.com/oracle/coherence-js-client@.* + output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'true' + reports_artifact_name: macaron-vulnerable-actions-fail-diagnosis + write_job_summary: 'true' + - name: Assert expected failure (github_actions_vulns for repo test) + if: ${{ always() }} + run: | + # Explicitly assert failure so regressions are visible in CI results. + if [ "${{ steps.verify_github_actions_vulns_repo_test.outcome }}" != "failure" ]; then + echo "Expected verify step to fail, but it did not." + exit 1 + fi + + test-detect-potential-injection: + name: How to detect vulnerable GitHub Actions + runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar + + - name: Run Macaron (analyze github_actions_vulns for https://github.com/oracle/coherence-js-client) + id: verify_github_actions_vulns_repo_test + # This integration target is intentionally vulnerable; failure is expected. + continue-on-error: true + uses: ./ + with: + repo_path: https://github.com/oracle/graalpython + digest: f5f7e67823a699213ab06c86440da94ead672467 + policy_file: check-github-actions + policy_purl: pkg:github.com/oracle/graalpython@.* + output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'false' + reports_artifact_name: macaron-injection-actions-fail-diagnosis + write_job_summary: 'true' + - name: Assert expected failure (github_actions_vulns for repo test) + if: ${{ always() }} + run: | + # Explicitly assert failure so regressions are visible in CI results. + if [ "${{ steps.verify_github_actions_vulns_repo_test.outcome }}" != "failure" ]; then + echo "Expected verify step to fail, but it did not." + exit 1 + fi diff --git a/action.yaml b/action.yaml index 418f37705..10e5dc924 100644 --- a/action.yaml +++ b/action.yaml @@ -43,6 +43,18 @@ inputs: output_dir: description: The output destination path for Macaron. default: output + upload_reports: + description: Upload Macaron reports as a workflow artifact. + default: 'true' + reports_artifact_name: + description: Name of the uploaded reports artifact. + default: macaron-reports + reports_retention_days: + description: Retention period in days for uploaded reports. + default: '90' + write_job_summary: + description: Write a human-friendly summary to the workflow run page. + default: 'true' upload_attestation: description: 'Upload the generated VSA report. default : false' default: false @@ -51,12 +63,24 @@ inputs: default: ${{ github.workspace }} outputs: + html_report_path: + description: Path to the generated HTML analysis report (if available). + value: ${{ steps.run-macaron-analysis.outputs.html_report_path }} + report_dir: + description: Directory containing HTML/JSON reports. + value: ${{ steps.collect-reports.outputs.report_dir }} + db_path: + description: Path to the generated Macaron SQLite database. + value: ${{ steps.collect-reports.outputs.db_path }} policy_report: description: Paths to the Macaron analysis report - value: ${{ steps.run-macaron-policy-verification.outputs.policy_report }} + value: ${{ steps.collect-reports.outputs.policy_report }} vsa_report: description: Verification Summary Attestation - value: ${{ steps.run-macaron-policy-verification.outputs.vsa_report }} + value: ${{ steps.collect-reports.outputs.vsa_report }} + vsa_generated: + description: Whether VSA was generated. + value: ${{ steps.collect-reports.outputs.vsa_generated }} runs: using: composite @@ -103,10 +127,85 @@ runs: POLICY_FILE: ${{ inputs.policy_file }} POLICY_PURL: ${{ inputs.policy_purl }} + - name: Collect report paths + id: collect-reports + if: ${{ always() }} + run: | + OUTPUT_DIR="${OUTPUT_DIR:-output}" + POLICY_REPORT="${OUTPUT_DIR}/policy_report.json" + VSA_REPORT="${OUTPUT_DIR}/vsa.intoto.jsonl" + DB_PATH="${OUTPUT_DIR}/macaron.db" + REPORT_DIR="${OUTPUT_DIR}/reports" + + if [ -f "${VSA_REPORT}" ]; then + VSA_VALUE="${VSA_REPORT}" + VSA_GENERATED=true + else + VSA_VALUE="VSA Not Generated." + VSA_GENERATED=false + fi + + echo "report_dir=${REPORT_DIR}" >> "${GITHUB_OUTPUT}" + echo "db_path=${DB_PATH}" >> "${GITHUB_OUTPUT}" + echo "policy_report=${POLICY_REPORT}" >> "${GITHUB_OUTPUT}" + echo "vsa_report=${VSA_VALUE}" >> "${GITHUB_OUTPUT}" + echo "vsa_generated=${VSA_GENERATED}" >> "${GITHUB_OUTPUT}" + + { + echo "reports_path<> "${GITHUB_OUTPUT}" + shell: bash + env: + OUTPUT_DIR: ${{ inputs.output_dir }} + REPORTS_ARTIFACT_NAME: ${{ inputs.reports_artifact_name }} + + - name: Upload Macaron Reports + id: upload-macaron-reports + if: ${{ always() && inputs.upload_reports == 'true' }} + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: ${{ inputs.reports_artifact_name }} + path: ${{ steps.collect-reports.outputs.reports_path }} + if-no-files-found: warn + retention-days: ${{ inputs.reports_retention_days }} + + - name: Summarize Macaron Results + if: ${{ always() && inputs.write_job_summary == 'true' }} + run: | + bash "$GITHUB_ACTION_PATH/scripts/actions/write_job_summary.sh" + shell: bash + env: + OUTPUT_DIR: ${{ inputs.output_dir }} + VSA_GENERATED: ${{ steps.collect-reports.outputs.vsa_generated }} + POLICY_REPORT: ${{ steps.collect-reports.outputs.policy_report }} + POLICY_FILE: ${{ inputs.policy_file }} + HTML_REPORT_PATH: ${{ steps.run-macaron-analysis.outputs.html_report_path }} + UPLOAD_REPORTS: ${{ inputs.upload_reports }} + REPORTS_ARTIFACT_NAME: ${{ inputs.reports_artifact_name }} + REPORTS_ARTIFACT_URL: ${{ steps.upload-macaron-reports.outputs.artifact-url }} + + - name: Enforce VSA generation + if: ${{ always() && inputs.policy_file != '' }} + run: | + if [ "${VSA_GENERATED}" != "true" ]; then + echo "Policy verification failed. VSA was not generated at ${OUTPUT_DIR}/vsa.intoto.jsonl. Check uploaded reports." + exit 1 + fi + shell: bash + env: + OUTPUT_DIR: ${{ inputs.output_dir }} + VSA_GENERATED: ${{ steps.collect-reports.outputs.vsa_generated }} + POLICY_FILE: ${{ inputs.policy_file }} + - name: Upload Attestation - if: ${{ inputs.upload_attestation == 'true' && steps.run-macaron-policy-verification.outputs.vsa_report != 'VSA Not Generated.' }} + if: ${{ inputs.upload_attestation == 'true' && steps.collect-reports.outputs.vsa_generated == 'true' }} uses: actions/attest@daf44fb950173508f38bd2406030372c1d1162b1 #3.0.0 with: subject-path: ${{ inputs.subject_path }} predicate-type: https://slsa.dev/verification_summary/v1 - predicate-path: ${{ steps.run-macaron-policy-verification.outputs.vsa_report }} + predicate-path: ${{ steps.collect-reports.outputs.vsa_report }} diff --git a/docs/source/pages/macaron_action.rst b/docs/source/pages/macaron_action.rst index 55ce483ae..8deb62842 100644 --- a/docs/source/pages/macaron_action.rst +++ b/docs/source/pages/macaron_action.rst @@ -24,16 +24,18 @@ When you use this action, you can reference it directly in your workflow. For a repo_path: 'https://github.com/example/project' policy_file: check-github-actions policy_purl: 'pkg:github.com/example/project' - output_dir: 'macaron-output' + reports_retention_days: 90 -If you upload the results like in this `workflow `_ check this :ref:`documentation ` to see how to read and understand them. +By default, the action posts a human-friendly results summary to the GitHub Actions run page (job summary). If you upload the results like in this `workflow `_, check this :ref:`documentation ` to see how to read and understand them. Example: policy verification only ---------------------------------- To run only the policy verification step (when you already have an output -database), call the action with ``policy_file`` and set ``output_dir`` to the -directory containing ``macaron.db``: +database), call the action with ``policy_file``. If the previous analysis step +used the default output path, you can omit ``output_dir`` here. If you set a +custom ``output_dir`` in the previous step, use the same value here so policy +verification reads the matching ``macaron.db``. .. code-block:: yaml @@ -41,7 +43,6 @@ directory containing ``macaron.db``: uses: oracle/macaron@fda4dda04aa7228fcaba162804891806cf5a1375 # v0.22.0 with: policy_file: policy.dl - output_dir: macaron-output Inputs ------ @@ -101,6 +102,18 @@ options. Key inputs are listed below (see ``action.yaml`` for the full list): * - ``output_dir`` - Directory where Macaron writes results (database, reports, artifacts). - ``output`` + * - ``upload_reports`` + - When ``true``, upload generated Macaron reports as a workflow artifact. + - ``true`` + * - ``reports_artifact_name`` + - Name of the uploaded reports artifact. + - ``macaron-reports`` + * - ``reports_retention_days`` + - Retention period in days for uploaded reports artifacts. + - ``90`` + * - ``write_job_summary`` + - When ``true``, write a human-friendly summary to the workflow run page. + - ``true`` * - ``upload_attestation`` - When ``true``, the action will attempt to upload a generated verification attestation (VSA) after policy verification. The attestation will be available @@ -114,8 +127,9 @@ options. Key inputs are listed below (see ``action.yaml`` for the full list): Outputs ------- -The composite action exposes the following outputs (set by the -``run_macaron_policy_verification.sh`` script when applicable): +The composite action exposes the following outputs (set by the action steps, +primarily ``Collect report paths``, with some values populated only when +analysis/policy verification generated them): .. list-table:: :header-rows: 1 @@ -123,6 +137,12 @@ The composite action exposes the following outputs (set by the * - Output - Description + * - ``html_report_path`` + - Path to the generated HTML analysis report (when available). + * - ``report_dir`` + - Directory containing generated HTML/JSON reports. + * - ``db_path`` + - Path to the generated Macaron SQLite database (typically ``/macaron.db``). * - ``policy_report`` - Path to the generated policy report JSON file produced by ``macaron verify-policy``. This file contains the policy evaluation @@ -133,6 +153,8 @@ The composite action exposes the following outputs (set by the during verification, the action emits the string ``"VSA Not Generated."`` instead of a path. The attestation will be available under the ``Actions/management`` tab. + * - ``vsa_generated`` + - ``true`` when a VSA was generated; otherwise ``false``. Default Policies ---------------- @@ -172,7 +194,7 @@ How the action works which assembles the ``macaron analyze`` command from the inputs and runs it. Results are written into ``output_dir``. -3. ``Run Macaron Policy Verification``: if a policy file or PURL is supplied, +3. ``Run Macaron Policy Verification``: if ``policy_file`` is supplied, the corresponding script runs ``macaron verify-policy`` against the - analysis database and writes ``policy_report`` and ``vsa_report`` to - ``$GITHUB_OUTPUT`` when produced. + analysis database (using ``policy_purl`` when provided) and writes + policy-related outputs when produced. diff --git a/golang/cmd/bashparser/bashparser.go b/golang/cmd/bashparser/bashparser.go index 50cc6fec2..530bed89e 100644 --- a/golang/cmd/bashparser/bashparser.go +++ b/golang/cmd/bashparser/bashparser.go @@ -1,4 +1,4 @@ -/* Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ package main @@ -30,13 +30,18 @@ func main() { input := flag.String("input", "", "The bash script content to be parsed. Input is prioritized over file option.") out_path := flag.String("output", "", "The output file path to store the JSON content.") raw := flag.Bool("raw", false, "Return raw parse-tree") + rawGHAMap := flag.Bool("raw-gha-map", false, "Return raw parse-tree with GitHub expression mapping") flag.Parse() var json_content string var parse_err error if len(*input) > 0 { // Read the bash script from command line argument. - json_content, parse_err = bashparser.Parse(*input, *raw) + if *rawGHAMap { + json_content, parse_err = bashparser.ParseRawWithGitHubExprMap(*input) + } else { + json_content, parse_err = bashparser.Parse(*input, *raw) + } } else if len(*file_path) <= 0 { fmt.Fprintln(os.Stderr, "Missing bash script input or file path.") flag.PrintDefaults() @@ -48,7 +53,11 @@ func main() { fmt.Fprintln(os.Stderr, read_err.Error()) os.Exit(1) } - json_content, parse_err = bashparser.Parse(string(data), *raw) + if *rawGHAMap { + json_content, parse_err = bashparser.ParseRawWithGitHubExprMap(string(data)) + } else { + json_content, parse_err = bashparser.Parse(string(data), *raw) + } } if parse_err != nil { diff --git a/golang/internal/bashparser/bashparser.go b/golang/internal/bashparser/bashparser.go index b88e43a6e..fdfc63c2f 100644 --- a/golang/internal/bashparser/bashparser.go +++ b/golang/internal/bashparser/bashparser.go @@ -1,4 +1,4 @@ -/* Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ // Package bashparser parses the bash scripts and provides parsed objects in JSON. @@ -7,6 +7,7 @@ package bashparser import ( "bytes" "encoding/json" + "fmt" "regexp" "strings" @@ -19,22 +20,63 @@ type CMDResult struct { Commands [][]string `json:"commands"` } -// ParseCommands parses the bash script to find bash commands. -// It returns the parsed commands in JSON format. -func ParseCommands(data string) (string, error) { +// RawWithGHAMapResult is used to export the raw bash AST with a GitHub-expression mapping. +type RawWithGHAMapResult struct { + AST any `json:"ast"` + GHAExprMap map[string]string `json:"gha_expr_map"` +} + +func preprocessGitHubActionsExpr(data string) (string, error) { // Replace GitHub Actions's expressions with ``$MACARON_UNKNOWN``` variable because the bash parser // doesn't recognize such expressions. For example: ``${{ foo }}`` will be replaced by ``$MACARON_UNKNOWN``. // Note that we don't use greedy matching, so if we have `${{ ${{ foo }} }}`, it will not be replaced by // `$MACARON_UNKNOWN`. // See: https://docs.github.com/en/actions/learn-github-actions/expressions. - var re, reg_error = regexp.Compile(`\$\{\{.*?\}\}`) + re, reg_error := regexp.Compile(`\$\{\{.*?\}\}`) if reg_error != nil { return "", reg_error } - // We replace the GH Actions variables with "$MACARON_UNKNOWN". - data = string(re.ReplaceAll([]byte(data), []byte("$$MACARON_UNKNOWN"))) - data_str := strings.NewReader(data) + return string(re.ReplaceAll([]byte(data), []byte("$$MACARON_UNKNOWN"))), nil +} + +func preprocessGitHubActionsExprWithMap(data string) (string, map[string]string, error) { + // Replace GitHub Actions expressions with unique bash-safe placeholders and return + // a mapping from placeholder variable names to the original expression body. + // + // Example: + // input: echo "${{ github.head_ref }}" + // output: echo "$MACARON_GHA_0001", {"MACARON_GHA_0001": "github.head_ref"} + // + // This preserves expression identity for downstream analysis while keeping the + // transformed script parseable by the bash parser. + re, reg_error := regexp.Compile(`\$\{\{.*?\}\}`) + if reg_error != nil { + return "", nil, reg_error + } + + index := 0 + ghaMap := make(map[string]string) + processed := re.ReplaceAllStringFunc(data, func(match string) string { + index += 1 + key := fmt.Sprintf("MACARON_GHA_%04d", index) + expr := strings.TrimSpace(strings.TrimSuffix(strings.TrimPrefix(match, "${{"), "}}")) + ghaMap[key] = expr + return "$" + key + }) + + return processed, ghaMap, nil +} + +// ParseCommands parses the bash script to find bash commands. +// It returns the parsed commands in JSON format. +func ParseCommands(data string) (string, error) { + processed, preprocessErr := preprocessGitHubActionsExpr(data) + if preprocessErr != nil { + return "", preprocessErr + } + + data_str := strings.NewReader(processed) data_parsed, parse_err := syntax.NewParser().Parse(data_str, "") if parse_err != nil { return "", parse_err @@ -71,19 +113,12 @@ func ParseCommands(data string) (string, error) { } func ParseRaw(data string) (string, error) { - // Replace GitHub Actions's expressions with ``$MACARON_UNKNOWN``` variable because the bash parser - // doesn't recognize such expressions. For example: ``${{ foo }}`` will be replaced by ``$MACARON_UNKNOWN``. - // Note that we don't use greedy matching, so if we have `${{ ${{ foo }} }}`, it will not be replaced by - // `$MACARON_UNKNOWN`. - // See: https://docs.github.com/en/actions/learn-github-actions/expressions. - var re, reg_error = regexp.Compile(`\$\{\{.*?\}\}`) - if reg_error != nil { - return "", reg_error + processed, preprocessErr := preprocessGitHubActionsExpr(data) + if preprocessErr != nil { + return "", preprocessErr } - // We replace the GH Actions variables with "$MACARON_UNKNOWN". - data = string(re.ReplaceAll([]byte(data), []byte("$$MACARON_UNKNOWN"))) - data_str := strings.NewReader(data) + data_str := strings.NewReader(processed) data_parsed, parse_err := syntax.NewParser().Parse(data_str, "") if parse_err != nil { return "", parse_err @@ -98,6 +133,41 @@ func ParseRaw(data string) (string, error) { return b.String(), nil } +// ParseRawWithGitHubExprMap parses raw bash AST and returns it with a GitHub-expression placeholder mapping. +func ParseRawWithGitHubExprMap(data string) (string, error) { + processed, ghaMap, preprocessErr := preprocessGitHubActionsExprWithMap(data) + if preprocessErr != nil { + return "", preprocessErr + } + + data_str := strings.NewReader(processed) + data_parsed, parse_err := syntax.NewParser().Parse(data_str, "") + if parse_err != nil { + return "", parse_err + } + + b := new(strings.Builder) + encode_err := typedjson.Encode(b, data_parsed) + if encode_err != nil { + return "", encode_err + } + + var astObj any + if unmarshalErr := json.Unmarshal([]byte(b.String()), &astObj); unmarshalErr != nil { + return "", unmarshalErr + } + + result := RawWithGHAMapResult{ + AST: astObj, + GHAExprMap: ghaMap, + } + resultBytes, marshalErr := json.MarshalIndent(result, "", " ") + if marshalErr != nil { + return "", marshalErr + } + return string(resultBytes), nil +} + func Parse(data string, raw bool) (string, error) { if raw { return ParseRaw(data) diff --git a/golang/internal/bashparser/bashparser_test.go b/golang/internal/bashparser/bashparser_test.go index 3825f459b..abf02055a 100644 --- a/golang/internal/bashparser/bashparser_test.go +++ b/golang/internal/bashparser/bashparser_test.go @@ -6,6 +6,7 @@ package bashparser import ( "encoding/json" "os" + "strings" "testing" ) @@ -32,3 +33,50 @@ func Test_parse_valid_bash_script(t *testing.T) { t.Errorf("Cannot unmarshal the returned JSON content from parsing %s: %v.", json_content, err) } } + +func Test_parse_raw_with_gha_expr_map(t *testing.T) { + input := `echo "${{ github.head_ref }}" && echo "${{ needs.prepare.outputs.fullVersion }}"` + json_content, parse_err := ParseRawWithGitHubExprMap(input) + if parse_err != nil || json_content == "" { + t.Fatalf("expected successful parse with mapping, got error: %v", parse_err) + } + + var result map[string]any + if err := json.Unmarshal([]byte(json_content), &result); err != nil { + t.Fatalf("cannot unmarshal parser output: %v", err) + } + + ast, astOK := result["ast"] + if !astOK || ast == nil { + t.Fatalf("expected non-empty ast field") + } + + mapRaw, mapOK := result["gha_expr_map"] + if !mapOK { + t.Fatalf("expected gha_expr_map field") + } + ghaMap, ok := mapRaw.(map[string]any) + if !ok { + t.Fatalf("expected gha_expr_map to be an object") + } + if len(ghaMap) != 2 { + t.Fatalf("expected 2 mapped expressions, got %d", len(ghaMap)) + } +} + +func Test_preprocess_github_actions_expr_with_map_replaces_with_single_dollar_var(t *testing.T) { + input := `echo "${{ github.head_ref }}"` + processed, ghaMap, err := preprocessGitHubActionsExprWithMap(input) + if err != nil { + t.Fatalf("unexpected preprocess error: %v", err) + } + if strings.Contains(processed, "$$MACARON_GHA_") { + t.Fatalf("expected single-dollar placeholder, got %q", processed) + } + if !strings.Contains(processed, "$MACARON_GHA_0001") { + t.Fatalf("expected placeholder var in processed script, got %q", processed) + } + if ghaMap["MACARON_GHA_0001"] != "github.head_ref" { + t.Fatalf("unexpected gha mapping: %#v", ghaMap) + } +} diff --git a/pyproject.toml b/pyproject.toml index ede72bdb5..67794b851 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -224,6 +224,9 @@ load-plugins = [ "pylint.extensions.set_membership", "pylint.extensions.typing", ] +# Disable unsubscriptable-object because Pylint has false positives and this check +# overlaps with mypy's checks. Enable the check when the related issue is resolved: +# https://github.com/pylint-dev/pylint/issues/9549 disable = [ "fixme", "line-too-long", # Replaced by Flake8 Bugbear B950 check. @@ -242,6 +245,7 @@ disable = [ "too-many-return-statements", "too-many-statements", "duplicate-code", + "unsubscriptable-object", ] [tool.pylint.MISCELLANEOUS] diff --git a/scripts/actions/run_macaron_analysis.sh b/scripts/actions/run_macaron_analysis.sh index 34305479c..ccde3e646 100644 --- a/scripts/actions/run_macaron_analysis.sh +++ b/scripts/actions/run_macaron_analysis.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. set -euo pipefail @@ -54,4 +54,28 @@ if [ -n "${PROVENANCE_EXPECTATION:-}" ]; then fi echo "Executing: $CMD" -eval "$CMD" + +output_file="$(mktemp)" +set +e +eval "$CMD" 2>&1 | tee "$output_file" +# Capture analyze command's exit code from the pipeline (index 0), then restore fail-fast mode. +status=${PIPESTATUS[0]} +set -e + +if [ "${status}" -ne 0 ]; then + rm -f "$output_file" + exit "${status}" +fi + +if [ -n "${GITHUB_OUTPUT:-}" ]; then + html_report_path="$( + sed -n 's/^[[:space:]]*HTML[[:space:]]\+Report[[:space:]]\+//p' "$output_file" \ + | sed 's/[[:space:]]*$//' \ + | tail -n 1 + )" + if [ -n "$html_report_path" ]; then + echo "html_report_path=${html_report_path}" >> "$GITHUB_OUTPUT" + fi +fi + +rm -f "$output_file" diff --git a/scripts/actions/setup_macaron.sh b/scripts/actions/setup_macaron.sh index a002bb534..cd519253c 100644 --- a/scripts/actions/setup_macaron.sh +++ b/scripts/actions/setup_macaron.sh @@ -1,12 +1,23 @@ #!/usr/bin/env bash -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. set -euo pipefail MACARON_DIR="${RUNNER_TEMP:-/tmp}/macaron" mkdir -p "$MACARON_DIR" +# If a test image tag is explicitly provided (for reusable workflow testing), +# use the local run script from this checkout and preserve the provided tag. +if [ -n "${MACARON_IMAGE_TAG:-}" ]; then + SCRIPT_NAME="run_macaron.sh" + cp "$GITHUB_ACTION_PATH/scripts/release_scripts/run_macaron.sh" "$MACARON_DIR/$SCRIPT_NAME" + chmod +x "$MACARON_DIR/$SCRIPT_NAME" + echo "MACARON=$MACARON_DIR/$SCRIPT_NAME" >> "$GITHUB_ENV" + echo "MACARON_IMAGE_TAG=${MACARON_IMAGE_TAG}" >> "$GITHUB_ENV" + exit 0 +fi + ACTION_DIR="${RUNNER_TEMP:-/tmp}/macaron-action" rm -rf "$ACTION_DIR" mkdir -p "$ACTION_DIR" diff --git a/scripts/actions/write_job_summary.py b/scripts/actions/write_job_summary.py new file mode 100644 index 000000000..bec011787 --- /dev/null +++ b/scripts/actions/write_job_summary.py @@ -0,0 +1,545 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Generate GitHub Actions job summary content for Macaron action runs.""" + +from __future__ import annotations + +import json +import os +import re +import sqlite3 +from pathlib import Path +from urllib.parse import urlsplit + +CHECK_RESULT_DEFAULT_COLUMNS = [ + "component_id", + "check_id", + "passed", +] + + +def _env(name: str, default: str = "") -> str: + return os.environ.get(name, default) + + +def _append_line(summary_path: Path, line: str = "") -> None: + with summary_path.open("a", encoding="utf-8") as summary: + summary.write(f"{line}\n") + + +def _resolve_policy_source(policy_input: str) -> tuple[Path | None, str]: + """Resolve a policy input to either a local file or a predefined policy template path.""" + if not policy_input: + return None, "" + + candidate = Path(policy_input) + if candidate.is_file(): + return candidate, "file" + + action_path = _env("GITHUB_ACTION_PATH", "") + if action_path: + template_path = Path( + os.path.join( + action_path, + "src", + "macaron", + "resources", + "policies", + "datalog", + f"{policy_input}.dl.template", + ) + ) + if template_path.is_file(): + return template_path, "predefined" + + return None, "unresolved" + + +def _resolve_existing_policy_sql(policy_name: str) -> Path | None: + """Resolve SQL diagnostics query for a predefined policy name.""" + action_path = _env("GITHUB_ACTION_PATH", "") + if not action_path: + return None + sql_path = Path(os.path.join(action_path, "src", "macaron", "resources", "policies", "sql", f"{policy_name}.sql")) + return sql_path if sql_path.is_file() else None + + +def _write_header( + summary_path: Path, + db_path: Path, + policy_report: str, + policy_file: str, + html_report: str, + policy_provided: bool, +) -> None: + upload_reports = _env("UPLOAD_REPORTS", "true").lower() == "true" + output_dir = _env("OUTPUT_DIR", "output") + reports_artifact_name = _env("REPORTS_ARTIFACT_NAME", "macaron-reports") + run_url = ( + f"{_env('GITHUB_SERVER_URL', 'https://github.com')}/" + f"{_env('GITHUB_REPOSITORY')}/actions/runs/{_env('GITHUB_RUN_ID')}" + ) + reports_artifact_url = _env("REPORTS_ARTIFACT_URL", run_url) + vsa_generated = _env("VSA_GENERATED", "").lower() + if vsa_generated in {"true", "false"}: + policy_succeeded = vsa_generated == "true" + else: + vsa_path = _env("VSA_PATH", f"{output_dir}/vsa.intoto.jsonl") + policy_succeeded = bool(vsa_path) and Path(vsa_path).is_file() + + _append_line(summary_path, "## Macaron Analysis Results") + _append_line(summary_path) + if upload_reports: + _append_line(summary_path, "Download reports from this artifact link:") + _append_line(summary_path, f"- [`{reports_artifact_name}`]({reports_artifact_url})") + _append_line(summary_path) + _append_line(summary_path, "Generated files:") + if html_report: + _append_line(summary_path, f"- HTML report: `{html_report}`") + _append_line(summary_path, f"- Database: `{db_path}`") + if policy_provided: + _append_line(summary_path, f"- Policy report: `{policy_report}`") + _append_line(summary_path) + + if policy_provided: + _append_line(summary_path, "Policy:") + if policy_file: + _append_line(summary_path, f"- Policy file: `{policy_file}`") + if policy_succeeded: + _append_line(summary_path, "- Policy status: :white_check_mark: Policy verification succeeded.") + else: + _append_line(summary_path, "- Policy status: :x: Policy verification failed.") + else: + _append_line(summary_path, "Policy:") + _append_line(summary_path, "- No policy was provided.") + _append_line(summary_path) + + +def _parse_policy_checks(policy_file: Path) -> tuple[list[str], list[str]]: + policy_text = policy_file.read_text(encoding="utf-8") + check_relations = sorted(set(re.findall(r"\b(check_[A-Za-z0-9_]+)\s*\(", policy_text))) + policy_check_ids = sorted(set(re.findall(r'"(mcn_[a-zA-Z0-9_]+)"', policy_text))) + return check_relations, policy_check_ids + + +def _resolve_existing_table(conn: sqlite3.Connection, table_name: str) -> str | None: + """Resolve a logical table name to an existing SQLite table name.""" + candidates = [table_name] + if not table_name.startswith("_"): + candidates.append(f"_{table_name}") + + cur = conn.cursor() + for candidate in candidates: + cur.execute("SELECT 1 FROM sqlite_master WHERE type IN ('table', 'view') AND name = ? LIMIT 1", (candidate,)) + if cur.fetchone(): + return candidate + return None + + +def _get_existing_columns(conn: sqlite3.Connection, table_name: str) -> list[str]: + cur = conn.cursor() + cur.execute(f"PRAGMA table_info({table_name})") + return [row[1] for row in cur.fetchall()] + + +def _query_selected_columns( + conn: sqlite3.Connection, + table_name: str, + desired_columns: list[str], + where_clause: str = "", + params: tuple[object, ...] = (), +) -> tuple[list[str], list[tuple]]: + available = _get_existing_columns(conn, table_name) + selected = [c for c in desired_columns if c in available] + if not selected: + return [], [] + + sql = f"SELECT {', '.join(selected)} FROM {table_name}" + if where_clause: + sql = f"{sql} WHERE {where_clause}" + sql = f"{sql} ORDER BY 1" + cur = conn.cursor() + cur.execute(sql, params) + return selected, cur.fetchall() + + +def _query_sql(conn: sqlite3.Connection, sql_query: str) -> tuple[list[str], list[tuple]]: + # Python's sqlite cursor.execute() can fail when the SQL begins with line comments. + # Strip leading SQL line comments while preserving the query body. + sanitized_lines = [] + for line in sql_query.splitlines(): + if line.lstrip().startswith("--"): + continue + sanitized_lines.append(line) + sanitized_query = "\n".join(sanitized_lines).strip() + if not sanitized_query: + return [], [] + + cur = conn.cursor() + cur.execute(sanitized_query) + rows = cur.fetchall() + columns = [col[0] for col in (cur.description or [])] + return columns, rows + + +def _write_markdown_table(summary_path: Path, columns: list[str], rows: list[tuple]) -> bool: + if not columns or not rows: + return False + + _append_line(summary_path, f"| {' | '.join(columns)} |") + _append_line(summary_path, f"|{'|'.join(['---'] * len(columns))}|") + for row in rows: + values = [_format_table_cell(value) for value in row] + _append_line(summary_path, f"| {' | '.join(values)} |") + return True + + +def _format_table_cell(value: object) -> str: + text = str(value) + parsed_list = _parse_list_cell(text) + if parsed_list is not None: + items = [_format_list_item(item) for item in parsed_list] + return "
".join(f"- {item}" for item in items) if items else "`[]`" + + if text.startswith(("http://", "https://")): + parsed = urlsplit(text) + segments = [part for part in parsed.path.split("/") if part] + label = segments[-1] if segments else parsed.netloc + return f"[`{label}`]({text})" + return f"`{_sanitize_for_markdown_table_code(text)}`" + + +def _parse_list_cell(text: str) -> list[object] | None: + stripped = text.strip() + if not (stripped.startswith("[") and stripped.endswith("]")): + return None + try: + loaded = json.loads(stripped) + except json.JSONDecodeError: + return None + return loaded if isinstance(loaded, list) else None + + +def _format_list_item(value: object) -> str: + text = str(value) + if text.startswith(("http://", "https://")): + parsed = urlsplit(text) + segments = [part for part in parsed.path.split("/") if part] + label = segments[-1] if segments else parsed.netloc + return f"[`{label}`]({text})" + return f"`{_sanitize_for_markdown_table_code(text)}`" + + +def _sanitize_for_markdown_table_code(text: str) -> str: + """Sanitize inline-code content for markdown table cells.""" + return text.replace("`", "'").replace("|", "\\|").replace("\n", " ") + + +def _priority_label(priority: object) -> str: + """Map numeric priority to a concise severity-like label.""" + try: + value = int(priority) + except (TypeError, ValueError): + return str(priority) + + if value >= 90: + return "critical" + if value >= 70: + return "high" + if value >= 50: + return "medium" + return "low" + + +def _gha_group_label(group: str) -> str: + # finding_group is the top-level section key; finding_type is rendered per-row as the subtype. + if group == "third_party_action_risk": + return "Third-party action risks" + if group == "workflow_security_issue": + return "Workflow security issues" + return group + + +def _extract_finding_summary(message: object) -> str: + """Extract a compact summary from a finding message.""" + text = str(message).strip() + if not text: + return "" + + # Expected format: "Summary: ... Details: ... Recommendation: ..." + match = re.search(r"Summary:\s*(.*?)(?:\s+Details:\s*|\s+Recommendation:\s*|$)", text, flags=re.IGNORECASE) + if match: + return match.group(1).strip() + + return text + + +def write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], rows: list[tuple]) -> bool: + """Write compact GitHub Actions vulnerability diagnostics to the job summary. + + Parameters + ---------- + summary_path : Path + Path to the GitHub job summary markdown file. + columns : list[str] + Ordered column names from the SQL diagnostics query result. + rows : list[tuple] + Row values matching ``columns`` order. + + Returns + ------- + bool + ``True`` if content was rendered; ``False`` when inputs are empty. + """ + if not columns or not rows: + return False + + col_index = {name: idx for idx, name in enumerate(columns)} + required = [ + "finding_priority", + "finding_type", + "action_name", + "action_ref", + "vulnerable_workflow", + ] + if any(name not in col_index for name in required): + return _write_markdown_table(summary_path, columns, rows) + + sorted_rows = sorted( + rows, + key=lambda row: ( + int(row[col_index["finding_priority"]]) if str(row[col_index["finding_priority"]]).isdigit() else 0 + ), + reverse=True, + ) + display_rows = sorted_rows[:10] + group_idx = col_index.get("finding_group") + + _append_line( + summary_path, + "_Showing top 10 findings by priority. Expand details below for full diagnostics._", + ) + preferred_groups = ["workflow_security_issue", "third_party_action_risk"] + groups_in_rows: list[str] = [] + if group_idx is not None: + discovered_groups = [str(row[group_idx]) for row in sorted_rows] + groups_in_rows.extend([group for group in preferred_groups if group in discovered_groups]) + groups_in_rows.extend([group for group in discovered_groups if group not in groups_in_rows]) + else: + groups_in_rows = ["all_findings"] + + for group in groups_in_rows: + if group_idx is None: + group_rows = display_rows + title = "Findings" + else: + group_rows = [row for row in sorted_rows if str(row[group_idx]) == group][:10] + if not group_rows: + continue + title = _gha_group_label(group) + _append_line(summary_path) + _append_line(summary_path, f"#### {title}") + _append_line(summary_path) + if group == "workflow_security_issue": + _append_line(summary_path, "| priority | type | summary | workflow |") + _append_line(summary_path, "|---|---|---|---|") + else: + _append_line(summary_path, "| priority | type | action | version | workflow |") + _append_line(summary_path, "|---|---|---|---|---|") + for row in group_rows: + priority_raw = row[col_index["finding_priority"]] + priority = f"`{_priority_label(priority_raw)} ({priority_raw})`" + finding_type = _format_table_cell(row[col_index["finding_type"]]) + finding_summary = _format_table_cell( + _extract_finding_summary(row[col_index["finding_message"]]) if "finding_message" in col_index else "" + ) + action_name = _format_table_cell(row[col_index["action_name"]]) + action_version = _format_table_cell(row[col_index["action_ref"]]) + workflow = _format_table_cell(row[col_index["vulnerable_workflow"]]) + if group == "workflow_security_issue": + _append_line( + summary_path, + f"| {priority} | {finding_type} | {finding_summary} | {workflow} |", + ) + else: + _append_line( + summary_path, + f"| {priority} | {finding_type} | {action_name} | {action_version} | {workflow} |", + ) + + _append_line(summary_path) + _append_line(summary_path, "
") + _append_line(summary_path, "Detailed findings") + _append_line(summary_path) + detail_groups = groups_in_rows if groups_in_rows else ["all_findings"] + row_counter = 1 + for group in detail_groups: + if group_idx is None: + group_rows = sorted_rows + title = "Findings" + else: + group_rows = [row for row in sorted_rows if str(row[group_idx]) == group] + if not group_rows: + continue + title = _gha_group_label(group) + _append_line(summary_path, f"**{title}**") + for row in group_rows: + action = str(row[col_index["action_name"]]) + version = str(row[col_index["action_ref"]]) + priority = row[col_index["finding_priority"]] + finding_type = str(row[col_index["finding_type"]]) + workflow = str(row[col_index["vulnerable_workflow"]]) + if group == "workflow_security_issue": + subject = workflow + else: + subject = f"{action}@{version}" if version else action + _append_line(summary_path, f"{row_counter}. **`{subject}`** (`{finding_type}`, priority `{priority}`)") + _append_line(summary_path, f"- Workflow: `{workflow}`") + + pin_idx = col_index.get("sha_pinned") + row_group = str(row[group_idx]) if group_idx is not None else "" + if pin_idx is not None and row_group == "third_party_action_risk" and row[pin_idx] is not None: + pin_state = "yes" if bool(row[pin_idx]) else "no" + _append_line(summary_path, f"- Pinned to full commit SHA: `{pin_state}`") + + vul_idx = col_index.get("vuln_urls") + if vul_idx is not None and row[vul_idx]: + parsed = _parse_list_cell(str(row[vul_idx])) + if parsed: + _append_line(summary_path, "- Vulnerabilities:") + for item in parsed: + _append_line(summary_path, f" - {_format_list_item(item)}") + + rec_idx = col_index.get("recommended_ref") + if rec_idx is not None and row[rec_idx]: + _append_line(summary_path, f"- Recommended ref: {_format_table_cell(row[rec_idx])}") + + msg_idx = col_index.get("finding_message") + if msg_idx is not None and row[msg_idx]: + _append_line(summary_path, f"- Details: {_format_table_cell(row[msg_idx])}") + _append_line(summary_path) + row_counter += 1 + _append_line(summary_path, "
") + return True + + +def _write_policy_check_lists(summary_path: Path, policy_check_ids: list[str]) -> None: + + if policy_check_ids: + _append_line( + summary_path, + f"- Checks referenced in policy: {', '.join(f'`{name}`' for name in policy_check_ids)}", + ) + + +def _write_custom_policy_failure_diagnostics(summary_path: Path, db_path: Path, policy_file: Path) -> None: + check_relations, policy_check_ids = _parse_policy_checks(policy_file) + has_details = False + + _append_line(summary_path) + _append_line(summary_path, "### Policy Failure Diagnostics") + _write_policy_check_lists(summary_path, policy_check_ids) + if check_relations or policy_check_ids: + has_details = True + + if not policy_check_ids: + if not has_details: + _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") + return + + with sqlite3.connect(db_path) as conn: + resolved = _resolve_existing_table(conn, "check_result") + if not resolved: + if not has_details: + _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") + return + placeholders = ",".join(["?"] * len(policy_check_ids)) + cols, rows = _query_selected_columns( + conn, + resolved, + CHECK_RESULT_DEFAULT_COLUMNS, + where_clause=f"check_id IN ({placeholders})", + params=tuple(policy_check_ids), + ) + + _append_line(summary_path) + _append_line(summary_path, "#### check_result") + if _write_markdown_table(summary_path, cols, rows): + has_details = True + else: + # Remove empty section header and provide a single friendly fallback below. + _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") + + +def _write_existing_policy_failure_diagnostics( + summary_path: Path, db_path: Path, policy_name: str, policy_file: Path +) -> None: + check_relations, policy_check_ids = _parse_policy_checks(policy_file) + has_details = False + + _append_line(summary_path) + _append_line(summary_path, f"### Policy Failure Diagnostics ({policy_name})") + _write_policy_check_lists(summary_path, policy_check_ids) + if check_relations or policy_check_ids: + has_details = True + + sql_path = _resolve_existing_policy_sql(policy_name) + if sql_path: + sql_query = sql_path.read_text(encoding="utf-8") + with sqlite3.connect(db_path) as conn: + cols, rows = _query_sql(conn, sql_query) + if cols and rows: + _append_line(summary_path) + _append_line(summary_path, f"#### Results") + if policy_name == "check-github-actions": + rendered = write_compact_gha_vuln_diagnostics(summary_path, cols, rows) + else: + rendered = _write_markdown_table(summary_path, cols, rows) + if rendered: + has_details = True + + if not has_details: + _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") + + +def main() -> None: + output_dir = Path(_env("OUTPUT_DIR", "output")) + db_path = Path(_env("DB_PATH", os.path.join(str(output_dir), "macaron.db"))) + policy_report = _env("POLICY_REPORT", os.path.join(str(output_dir), "policy_report.json")) + policy_file_value = _env("POLICY_FILE", "") + resolved_policy_file, policy_mode = _resolve_policy_source(policy_file_value) + policy_label = "" + if policy_mode == "file" and resolved_policy_file: + policy_label = str(resolved_policy_file) + elif policy_mode == "predefined" and resolved_policy_file: + policy_label = f"{policy_file_value}" + elif policy_mode == "unresolved": + policy_label = f"{policy_file_value} (unresolved)" + html_report = _env("HTML_REPORT_PATH", "") + vsa_path_value = _env("VSA_PATH", os.path.join(str(output_dir), "vsa.intoto.jsonl")) + vsa_path = Path(vsa_path_value) if vsa_path_value else None + + summary_output = _env("GITHUB_STEP_SUMMARY") + if not summary_output: + raise RuntimeError("GITHUB_STEP_SUMMARY is not set.") + summary_path = Path(summary_output) + + policy_provided = bool(policy_file_value.strip()) + _write_header(summary_path, db_path, policy_report, policy_label, html_report, policy_provided) + + if not db_path.is_file(): + _append_line(summary_path, ":warning: Macaron database was not generated.") + return + + if (not vsa_path or not vsa_path.is_file()) and resolved_policy_file and resolved_policy_file.is_file(): + if policy_mode == "predefined": + _write_existing_policy_failure_diagnostics(summary_path, db_path, policy_file_value, resolved_policy_file) + else: + _write_custom_policy_failure_diagnostics(summary_path, db_path, resolved_policy_file) + + +if __name__ == "__main__": + main() diff --git a/scripts/actions/write_job_summary.sh b/scripts/actions/write_job_summary.sh new file mode 100755 index 000000000..432069c59 --- /dev/null +++ b/scripts/actions/write_job_summary.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. +set -euo pipefail + +OUTPUT_DIR="${OUTPUT_DIR:-output}" +DB_PATH="${DB_PATH:-${OUTPUT_DIR}/macaron.db}" +POLICY_REPORT="${POLICY_REPORT:-${OUTPUT_DIR}/policy_report.json}" +POLICY_FILE="${POLICY_FILE:-}" +HTML_REPORT_PATH="${HTML_REPORT_PATH:-}" +VSA_PATH="${VSA_PATH:-${OUTPUT_DIR}/vsa.intoto.jsonl}" +UPLOAD_REPORTS="${UPLOAD_REPORTS:-true}" +REPORTS_ARTIFACT_NAME="${REPORTS_ARTIFACT_NAME:-macaron-reports}" +RUN_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" +REPORTS_ARTIFACT_URL="${REPORTS_ARTIFACT_URL:-${RUN_URL}}" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +python "${SCRIPT_DIR}/write_job_summary.py" diff --git a/src/macaron/code_analyzer/dataflow_analysis/bash.py b/src/macaron/code_analyzer/dataflow_analysis/bash.py index 4a4903c86..6b0f05813 100644 --- a/src/macaron/code_analyzer/dataflow_analysis/bash.py +++ b/src/macaron/code_analyzer/dataflow_analysis/bash.py @@ -75,6 +75,8 @@ class BashScriptContext(core.Context): stdout_loc: facts.LocationSpecifier #: Filepath for Bash script file. source_filepath: str + #: Mapping of parser placeholder vars to original GitHub expression bodies. + gha_expr_map_items: tuple[tuple[str, str], ...] = () @staticmethod def create_from_run_step( @@ -106,6 +108,7 @@ def create_from_run_step( stdout_scope=context.ref.job_context.ref.workflow_context.ref.console.get_non_owned(), stdout_loc=facts.Console(), source_filepath=source_filepath, + gha_expr_map_items=(), ) @staticmethod @@ -136,6 +139,7 @@ def create_from_bash_script(context: core.ContextRef[BashScriptContext], source_ stdout_scope=context.ref.stdout_scope.get_non_owned(), stdout_loc=facts.Console(), source_filepath=source_filepath, + gha_expr_map_items=(), ) @staticmethod @@ -164,6 +168,7 @@ def create_in_isolation(context: core.ContextRef[core.AnalysisContext], source_f stdout_scope=core.OwningContextRef(facts.Scope("stdout")), stdout_loc=facts.Console(), source_filepath=source_filepath, + gha_expr_map_items=(), ) def with_stdin( @@ -180,6 +185,7 @@ def with_stdin( self.stdout_scope, self.stdout_loc, self.source_filepath, + self.gha_expr_map_items, ) def with_stdout( @@ -196,6 +202,33 @@ def with_stdout( stdout_scope, stdout_loc, self.source_filepath, + self.gha_expr_map_items, + ) + + def with_gha_expr_map(self, gha_expr_map: dict[str, str]) -> BashScriptContext: + """Return a modified bash script context with GitHub-expression placeholder mappings. + + Parameters + ---------- + gha_expr_map : dict[str, str] + Mapping from parser placeholder variable names to original GitHub expression bodies. + + Returns + ------- + BashScriptContext + A context copy with updated GitHub-expression mapping metadata. + """ + return BashScriptContext( + self.outer_context, + self.filesystem, + self.env, + self.func_decls, + self.stdin_scope, + self.stdin_loc, + self.stdout_scope, + self.stdout_loc, + self.source_filepath, + tuple(sorted(gha_expr_map.items())), ) def get_containing_github_context(self) -> github.GitHubActionsStepContext | None: @@ -261,8 +294,9 @@ def identify_interpretations(self, state: core.State) -> dict[core.Interpretatio def build_bash_script() -> core.Node: try: - parsed_bash = bashparser.parse_raw(script_str, MACARON_PATH) - return BashScriptNode.create(parsed_bash, self.context.get_non_owned()) + parsed_bash, gha_expr_map = bashparser.parse_raw_with_gha_mapping(script_str, MACARON_PATH) + context_with_map = self.context.ref.with_gha_expr_map(gha_expr_map) + return BashScriptNode.create(parsed_bash, core.NonOwningContextRef(context_with_map)) except ParseError: return core.NoOpStatementNode() diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py new file mode 100644 index 000000000..80364ea76 --- /dev/null +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -0,0 +1,969 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Detect security issues and injection risks in GitHub Actions workflows.""" + +import json +import os +import re +from typing import TypedDict, cast + +from macaron.code_analyzer.dataflow_analysis import bash, core, facts +from macaron.code_analyzer.dataflow_analysis.analysis import get_containing_github_job, get_containing_github_step +from macaron.code_analyzer.dataflow_analysis.core import NodeForest, traverse_bfs +from macaron.code_analyzer.dataflow_analysis.github import ( + GitHubActionsActionStepNode, + GitHubActionsNormalJobNode, + GitHubActionsRunStepNode, + GitHubActionsWorkflowNode, +) +from macaron.code_analyzer.gha_security_analysis.recommendation import ( + Recommendation, + parse_unpinned_action_issue, + recommend_for_unpinned_action, + recommend_for_workflow_issue, + resolve_action_ref_to_sha, + resolve_action_ref_to_tag, +) +from macaron.parsers.bashparser_model import CallExpr, is_call_expr, is_lit, is_param_exp +from macaron.parsers.github_workflow_model import Workflow +from macaron.slsa_analyzer.git_url import is_commit_hash + +UNTRUSTED_PR_REFS = { + "${{ github.event.pull_request.head.ref }}", + "${{ github.head_ref }}", + "${{ github.event.pull_request.head.sha }}", + "${{ github.event.pull_request.head.repo.full_name }}", +} + +PRIORITY_CRITICAL = 100 +PRIORITY_HIGH = 80 +PRIORITY_MEDIUM = 60 +PRIORITY_LOW = 40 +PRIORITY_MIN = 20 + + +class PrioritizedIssue(TypedDict): + """A workflow security finding with priority metadata.""" + + issue: str + priority: int + + +class WorkflowFinding(TypedDict): + """Workflow-level security findings.""" + + workflow_name: str + issues: list[PrioritizedIssue] + + +def detect_github_actions_security_issues(nodes: NodeForest) -> list[WorkflowFinding]: + """Detect security issues across GitHub Actions workflow nodes. + + Parameters + ---------- + nodes : NodeForest + Parsed workflow node forest used for traversing GitHub Actions workflow callgraphs. + + Returns + ------- + list[WorkflowFinding] + A list of workflow-level findings. Each item contains: + - ``workflow_name``: workflow file path. + - ``issues``: list of detected security issue messages with priorities. + """ + findings = [] + for root in nodes.root_nodes: + for callee in traverse_bfs(root): + if isinstance(callee, GitHubActionsWorkflowNode): + if result := analyze_workflow(callee, nodes=nodes): + findings.append(result) + return findings + + +def analyze_workflow(workflow_node: GitHubActionsWorkflowNode, nodes: NodeForest) -> WorkflowFinding | None: + """Analyze a GitHub Actions workflow for security issues. + + Parameters + ---------- + workflow_node : GitHubActionsWorkflowNode + The workflow node to analyze. + nodes : NodeForest + The full node forest used to resolve parent relationships while analyzing findings. + + Returns + ------- + WorkflowFinding | None + A finding dictionary with: + - ``workflow_name``: source filepath of the workflow. + - ``issues``: list of issue messages with associated priorities. + Returns ``None`` when no issues are detected. + + Notes + ----- + The analysis covers trigger hardening, permissions configuration, action pinning, + checkout risks, remote-script execution heuristics, self-hosted runner usage, and + dataflow-based expression injection patterns. + """ + findings: list[PrioritizedIssue] = [] + on_keys = _extract_on_keys(workflow_node.definition) + seen_jobs: set[str] = set() + workflow_permissions_defined = "permissions" in workflow_node.definition + has_job_without_permissions = False + + for node in core.traverse_bfs(workflow_node): + if isinstance(node, GitHubActionsWorkflowNode): + _append_workflow_level_findings(findings, on_keys, node.definition) + continue + + if isinstance(node, GitHubActionsNormalJobNode): + if node.job_id in seen_jobs: + continue + seen_jobs.add(node.job_id) + if "permissions" not in node.definition: + has_job_without_permissions = True + _append_job_level_findings(findings, node) + continue + + if isinstance(node, GitHubActionsActionStepNode): + _append_action_step_findings(findings, node, on_keys) + continue + + if isinstance(node, GitHubActionsRunStepNode): + _append_run_step_findings(findings, node, nodes) + continue + + if not workflow_permissions_defined and has_job_without_permissions: + _add_finding( + findings, + ( + "missing-permissions: No explicit workflow permissions defined, and one or more jobs also omit " + "permissions; defaults may be overly broad." + ), + PRIORITY_MEDIUM, + ) + + if "pull_request_target" in on_keys and _has_privileged_trigger_risk_combo(findings): + _add_finding( + findings, + ( + "privileged-trigger: Workflow uses `pull_request_target` with additional risky patterns; " + "treat this workflow as high risk and harden immediately." + ), + PRIORITY_HIGH, + ) + + if findings: + findings_sorted = sorted(findings, key=lambda finding: (-finding["priority"], finding["issue"])) + return { + "workflow_name": os.path.relpath(workflow_node.context.ref.source_filepath, os.getcwd()), + "issues": findings_sorted, + } + + return None + + +def _extract_on_keys(workflow: Workflow) -> set[str]: + """Extract the set of event names from a workflow ``on`` section.""" + on_section = workflow.get("on") + if isinstance(on_section, dict): + return set(on_section.keys()) + if isinstance(on_section, list): + return set(on_section) + return {on_section} + + +def _append_workflow_level_findings(findings: list[PrioritizedIssue], on_keys: set[str], workflow: Workflow) -> None: + """Append workflow-level hardening findings.""" + if "permissions" not in workflow: + return + + permissions = workflow["permissions"] + if isinstance(permissions, str) and permissions.lower() == "write-all": + _add_finding(findings, "overbroad-permissions: Workflow uses `permissions: write-all`.", PRIORITY_HIGH) + if isinstance(permissions, dict) and "pull_request_target" in on_keys: + for scope, level in permissions.items(): + if isinstance(level, str) and "write" in level.lower(): + _add_finding( + findings, + f"overbroad-permissions: PR-triggered workflow requests `{scope}: {level}`.", + PRIORITY_HIGH, + ) + + +def _append_job_level_findings(findings: list[PrioritizedIssue], job_node: GitHubActionsNormalJobNode) -> None: + """Append findings derived from a single job node.""" + runs_on = job_node.definition.get("runs-on") + if runs_on and "self-hosted" in str(runs_on): + _add_finding( + findings, + f"self-hosted-runner: Job `{job_node.job_id}` runs on self-hosted runners; " + "ensure isolation and never run untrusted PR code there.", + PRIORITY_MEDIUM, + ) + + +def _append_action_step_findings( + findings: list[PrioritizedIssue], + action_node: GitHubActionsActionStepNode, + on_keys: set[str], +) -> None: + """Append findings derived from an action step node.""" + uses_name = action_node.uses_name + uses_version = action_node.uses_version + if uses_name and not uses_name.startswith("./") and uses_version and not is_commit_hash(uses_version): + step_line = _extract_action_step_line(action_node) + line_marker = f"[step-line={step_line}] " if step_line else "" + _add_finding( + findings, + f"unpinned-third-party-action: {line_marker}{uses_name}@{uses_version}", + PRIORITY_MIN, + ) + + if uses_name == "actions/checkout": + ref = _literal_value(action_node.with_parameters.get("ref")) + if ref in UNTRUSTED_PR_REFS and "pull_request" in on_keys: + _add_finding( + findings, + f"untrusted-fork-code: A checkout step uses untrusted fork code (`ref: {ref}`) on PR event.", + PRIORITY_CRITICAL, + ) + + if "pull_request_target" in on_keys and ref in UNTRUSTED_PR_REFS: + _add_finding( + findings, + f"pr-target-untrusted-checkout: Workflow uses pull_request_target and checks out PR-controlled ref `{ref}`.", + PRIORITY_CRITICAL, + ) + + +def _append_run_step_findings( + findings: list[PrioritizedIssue], run_step_node: GitHubActionsRunStepNode, nodes: NodeForest +) -> None: + """Append findings derived from a run step node.""" + # Traversing a run-step subgraph can reach semantically identical command nodes through + # multiple CFG/AST paths (for example nested/compound command structures). Track emitted + # injection findings by stable metadata to avoid duplicate reports for the same command line. + seen_injection_keys: set[tuple[int | None, str, str, str]] = set() + for node in core.traverse_bfs(run_step_node): + # Command-level injection checks rely on parsed call argument parts from single-command nodes. + if isinstance(node, bash.BashSingleCommandNode): + _append_injection_findings(findings, node, nodes, seen_injection_keys) + continue + + # Remote script execution risk is structural: downloader output piped into an executor. + if isinstance(node, bash.BashPipeNode): + _append_remote_script_exec_findings(findings, node, run_step_node, nodes) + + +def _append_remote_script_exec_findings( + findings: list[PrioritizedIssue], + pipe_node: bash.BashPipeNode, + run_step_node: GitHubActionsRunStepNode, + nodes: NodeForest, +) -> None: + """Append remote-script-exec findings discovered from parsed bash pipe nodes.""" + if not _is_remote_script_exec_pipe(pipe_node): + return + + # Map the pipe's script-relative line to workflow source line so summary links jump to YAML. + script_line = pipe_node.definition["Pos"]["Line"] + workflow_line = _map_script_line_to_workflow_line(run_step_node, script_line) + if workflow_line is None: + workflow_line = _extract_run_step_line(run_step_node) + job_node = get_containing_github_job(pipe_node, nodes.parents) + issue_payload = { + "step_line": workflow_line, + "script_line": script_line, + "job": job_node.job_id if job_node else "", + "step": _extract_step_name(run_step_node), + "command": _extract_command_text(run_step_node, script_line), + } + _add_finding( + findings, + f"remote-script-exec: {json.dumps(issue_payload)}", + PRIORITY_HIGH, + ) + + +def _is_remote_script_exec_pipe(pipe_node: bash.BashPipeNode) -> bool: + """Return whether a pipe node matches downloader-to-executor behavior.""" + lhs_words = _extract_statement_words(pipe_node.lhs) + rhs_words = _extract_statement_words(pipe_node.rhs) + if not lhs_words or not rhs_words: + return False + + downloader_cmd = lhs_words[0] + if downloader_cmd not in {"curl", "wget"}: + return False + + return _is_executor_invocation(rhs_words) + + +def _extract_statement_words(statement_node: bash.BashStatementNode) -> list[str]: + """Extract normalized literal command words from a Bash statement when available.""" + cmd = statement_node.definition.get("Cmd") + if not is_call_expr(cmd): + return [] + return _extract_call_words(cmd) + + +def _extract_call_words(call_expr: CallExpr) -> list[str]: + """Extract literal word values from a call expression.""" + args = call_expr["Args"] + words: list[str] = [] + for arg in args: + parts = arg["Parts"] + word = "".join(part.get("Value", "") for part in parts if is_lit(part)).strip() + if not word: + return [] + words.append(word) + if not words: + return [] + + normalized = [os.path.basename(word).lower() if idx == 0 else word for idx, word in enumerate(words)] + return normalized + + +def _is_executor_invocation(words: list[str]) -> bool: + """Return whether extracted words represent shell/archive execution.""" + if not words: + return False + direct_executors = {"bash", "sh", "tar"} + wrapper_cmds = {"sudo", "env", "command"} + + command = words[0] + if command in direct_executors: + return True + if command in wrapper_cmds and len(words) > 1: + wrapped = os.path.basename(words[1]).lower() + return wrapped in direct_executors + return False + + +def _append_injection_findings( + findings: list[PrioritizedIssue], + bash_node: bash.BashSingleCommandNode, + nodes: NodeForest, + seen_injection_keys: set[tuple[int | None, str, str, str]] | None = None, +) -> None: + """Append potential injection findings discovered from parsed bash command nodes.""" + if not is_call_expr(bash_node.definition.get("Cmd")): + return + + call_exp = cast(CallExpr, bash_node.definition["Cmd"]) + for arg in call_exp.get("Args", []): + parts = arg.get("Parts") + step_node = get_containing_github_step(bash_node, nodes.parents) + script_line = _extract_script_line_from_parts(parts) + expanded_refs = _extract_expanded_github_refs(bash_node, step_node, script_line, parts) + if _arg_has_attacker_controlled_github_ref(parts) or _has_attacker_controlled_expanded_ref(expanded_refs): + job_node = get_containing_github_job(bash_node, nodes.parents) + workflow_line = _map_script_line_to_workflow_line(step_node, script_line) + if workflow_line is None: + workflow_line = _extract_run_step_line(step_node) + job_name = job_node.job_id if job_node else "" + step_name = _extract_step_name(step_node) + command_text = _extract_command_text(step_node, script_line) + dedupe_key = (workflow_line, job_name, step_name, command_text) + if seen_injection_keys is not None: + # Prevent duplicate findings when the same risky command is visited via + # different traversal paths in the run-step subgraph. + if dedupe_key in seen_injection_keys: + continue + seen_injection_keys.add(dedupe_key) + issue_payload = { + "step_line": workflow_line, + "script_line": script_line, + "job": job_name, + "step": step_name, + "command": command_text, + "expanded_refs": expanded_refs, + "parts": arg.get("Parts"), + } + _add_finding(findings, f"potential-injection: {json.dumps(issue_payload)}", PRIORITY_CRITICAL) + + +def _arg_has_attacker_controlled_github_ref(parts: object) -> bool: + """Return whether argument parts contain attacker-controlled GitHub context expansion. + + Parameters + ---------- + parts : object + Parsed argument ``Parts`` payload from the Bash call expression. + + Returns + ------- + bool + ``True`` when an attacker-controlled GitHub context reference is detected. + """ + if not isinstance(parts, list): + return False + + expansion = False + pr_head_ref = False + for part in parts: + if is_param_exp(part) and part.get("Param", {}).get("Value") == "github": + expansion = True + if is_lit(part) and part.get("Value") in { + ".event.pull_request.head.ref", + ".head_ref", + ".event.issue.body", + ".event.comment.body", + }: + pr_head_ref = True + if expansion and pr_head_ref: + return True + return False + + +def _has_attacker_controlled_expanded_ref(refs: list[str]) -> bool: + """Return whether extracted refs include attacker-controlled GitHub context values. + + Parameters + ---------- + refs : list[str] + Extracted GitHub expression references. + + Returns + ------- + bool + ``True`` if a known attacker-controlled ref is present. + """ + attacker_controlled = { + "github.event.pull_request.head.ref", + "github.head_ref", + "github.event.issue.body", + "github.event.comment.body", + } + return any(ref in attacker_controlled for ref in refs) + + +def _extract_expanded_github_refs( + bash_node: bash.BashSingleCommandNode, + step_node: GitHubActionsRunStepNode | None, + script_line: int | None, + parts: object, +) -> list[str]: + """Extract normalized expanded GitHub refs from mapping with a line-text fallback. + + Parameters + ---------- + bash_node : bash.BashSingleCommandNode + The Bash command node used to resolve parser placeholder mappings. + step_node : GitHubActionsRunStepNode | None + The containing run step node, used for fallback extraction from raw run script text. + script_line : int | None + 1-based line number within the inlined run script for line-targeted fallback extraction. + parts : object + Parsed argument ``Parts`` payload from the Bash call expression. + + Returns + ------- + list[str] + Ordered list of normalized GitHub expression references. + """ + refs: list[str] = [] + placeholder_map = dict(bash_node.context.ref.gha_expr_map_items) + if isinstance(parts, list): + for part in parts: + if not is_param_exp(part): + continue + placeholder = part.get("Param", {}).get("Value") + if isinstance(placeholder, str): + mapped = placeholder_map.get(placeholder) + if mapped: + refs.extend(_extract_github_refs_from_expression(mapped)) + if refs: + return _deduplicate_preserve_order(refs) + + if step_node is None: + return [] + # Fallback: some complex shell constructs (for example command substitution in compound + # test/boolean commands) may not expose mapped placeholders on the current arg parts. + # In those cases, recover refs directly from the original run-script line text. + run_script = step_node.definition["run"] + script_lines = run_script.splitlines() + if script_line is not None and 1 <= script_line <= len(script_lines): + line_text = script_lines[script_line - 1] + else: + line_text = run_script + + matches = re.findall(r"\$\{\{\s*(.*?)\s*\}\}", line_text) + fallback_refs: list[str] = [] + for expr in matches: + fallback_refs.extend(_extract_github_refs_from_expression(expr)) + return _deduplicate_preserve_order(fallback_refs) + + +def _extract_github_refs_from_expression(expression: str) -> list[str]: + """Extract github-context reference paths from a GitHub Actions expression body. + + Parameters + ---------- + expression : str + Expression text inside ``${{ ... }}``. + + Returns + ------- + list[str] + Matched GitHub reference paths (for example ``github.head_ref``). + """ + return re.findall(r"github(?:\.[A-Za-z0-9_-]+)+", expression) + + +def _deduplicate_preserve_order(values: list[str]) -> list[str]: + """Deduplicate string values while preserving insertion order. + + Parameters + ---------- + values : list[str] + Input values that may contain duplicates. + + Returns + ------- + list[str] + Values in original order with duplicates removed. + """ + seen: set[str] = set() + result: list[str] = [] + for value in values: + if value in seen: + continue + seen.add(value) + result.append(value) + return result + + +def _extract_step_name(step_node: GitHubActionsRunStepNode | None) -> str: + """Extract a display name for a workflow run step.""" + if step_node is None: + return "" + step_name = step_node.definition.get("name") + if isinstance(step_name, str): + return step_name + step_id = step_node.definition.get("id") + if isinstance(step_id, str): + return step_id + return "" + + +def _extract_command_text(step_node: GitHubActionsRunStepNode | None, script_line: int | None) -> str: + """Extract a compact command snippet from the run script for display in diagnostics.""" + if step_node is None: + return "" + + run_script = step_node.definition["run"] + script_lines = run_script.splitlines() + if script_line and 1 <= script_line <= len(script_lines): + return script_lines[script_line - 1].strip() + + for line in script_lines: + if line.strip(): + return line.strip() + return "" + + +def _extract_run_step_line(step_node: GitHubActionsRunStepNode | None) -> int | None: + """Extract a 1-based workflow line number for a run step when metadata is available.""" + if step_node is None: + return None + + definition = step_node.definition + line_container = getattr(definition, "lc", None) + if line_container is None: + return _infer_run_step_line_from_source(step_node) + + line = getattr(line_container, "line", None) + if isinstance(line, int) and line >= 0: + # ruamel stores line numbers as 0-based. + return line + 1 + + return _infer_run_step_line_from_source(step_node) + + +def _extract_action_step_line(step_node: GitHubActionsActionStepNode | None) -> int | None: + """Extract a 1-based workflow line number for an action step when metadata is available.""" + if step_node is None: + return None + + definition = step_node.definition + line_container = getattr(definition, "lc", None) + if line_container is None: + return _infer_action_step_line_from_source(step_node) + + line = getattr(line_container, "line", None) + if isinstance(line, int) and line >= 0: + # ruamel stores line numbers as 0-based. + return line + 1 + + return _infer_action_step_line_from_source(step_node) + + +def _infer_action_step_line_from_source(step_node: GitHubActionsActionStepNode) -> int | None: + """Infer an action-step line by matching the ``uses`` value in the workflow source.""" + workflow_path = step_node.context.ref.job_context.ref.workflow_context.ref.source_filepath + if not workflow_path or not os.path.isfile(workflow_path): + return None + + uses_name = step_node.uses_name + uses_version = step_node.uses_version + if not uses_name or not uses_version: + return None + + target_uses = f"{uses_name}@{uses_version}" + step_name = step_node.definition.get("name") + step_id = step_node.definition.get("id") + step_identifier = step_name if isinstance(step_name, str) else step_id if isinstance(step_id, str) else None + + try: + with open(workflow_path, encoding="utf-8") as workflow_file: + workflow_lines = workflow_file.readlines() + except OSError: + return None + + uses_key_re = re.compile(r"^\s*(?:-\s*)?uses\s*:\s*(.*)$") + candidate_lines: list[int] = [] + for index, line in enumerate(workflow_lines): + match = uses_key_re.match(line) + if not match: + continue + uses_value = match.group(1).strip().strip("\"'") + if uses_value == target_uses: + candidate_lines.append(index + 1) + + if not candidate_lines: + return None + if len(candidate_lines) == 1 or not step_identifier: + return candidate_lines[0] + + for candidate_line in candidate_lines: + for lookback_index in range(max(0, candidate_line - 8 - 1), candidate_line - 1): + lookback_line = workflow_lines[lookback_index].strip() + if lookback_line in {f"name: {step_identifier}", f"id: {step_identifier}"}: + return candidate_line + + return candidate_lines[0] + + +def _extract_script_line_from_parts(parts: object) -> int | None: + """Extract the 1-based script line number from parsed shell argument parts.""" + if not isinstance(parts, list): + return None + + for part in parts: + if not isinstance(part, dict): + continue + pos = part.get("Pos") + if not isinstance(pos, dict): + continue + line = pos.get("Line") + if isinstance(line, int) and line > 0: + return line + + return None + + +def _map_script_line_to_workflow_line( + step_node: GitHubActionsRunStepNode | None, script_line: int | None +) -> int | None: + """Map a line number inside a run script to the corresponding workflow source line.""" + if step_node is None or script_line is None or script_line < 1: + return None + + workflow_path = step_node.context.ref.job_context.ref.workflow_context.ref.source_filepath + run_script = step_node.definition.get("run") + if not workflow_path or not isinstance(run_script, str) or not os.path.isfile(workflow_path): + return None + + try: + with open(workflow_path, encoding="utf-8") as workflow_file: + workflow_lines = workflow_file.readlines() + except OSError: + return None + + for block_start, block_lines in _iter_run_blocks(workflow_lines): + if _normalize_multiline_text("\n".join(block_lines)) != _normalize_multiline_text(run_script): + continue + if script_line > len(block_lines): + return None + return block_start + script_line - 1 + + return None + + +def _iter_run_blocks(workflow_lines: list[str]) -> list[tuple[int, list[str]]]: + """Collect run-step script blocks as (1-based start line, content lines).""" + run_key_re = re.compile(r"^(\s*)(?:-\s*)?run\s*:\s*(.*)$") + blocks: list[tuple[int, list[str]]] = [] + i = 0 + while i < len(workflow_lines): + line = workflow_lines[i] + match = run_key_re.match(line) + if not match: + i += 1 + continue + + indent = len(match.group(1)) + run_value = match.group(2).rstrip("\n") + + if run_value.strip().startswith(("|", ">")): + block_start = i + 2 + block_buffer: list[str] = [] + j = i + 1 + min_indent: int | None = None + while j < len(workflow_lines): + candidate = workflow_lines[j] + if candidate.strip(): + candidate_indent = len(candidate) - len(candidate.lstrip(" ")) + if candidate_indent <= indent: + break + if min_indent is None or candidate_indent < min_indent: + min_indent = candidate_indent + block_buffer.append(candidate.rstrip("\n")) + j += 1 + + if min_indent is None: + blocks.append((block_start, [])) + else: + dedented = [b[min_indent:] if len(b) >= min_indent else b for b in block_buffer] + blocks.append((block_start, dedented)) + i = j + continue + + inline_value = run_value.strip().strip("\"'") + blocks.append((i + 1, [inline_value])) + i += 1 + + return blocks + + +def _normalize_multiline_text(text: str) -> str: + """Normalize text for robust matching between YAML-extracted and parsed run scripts.""" + return "\n".join(line.rstrip() for line in text.strip("\n").splitlines()) + + +def _infer_run_step_line_from_source(step_node: GitHubActionsRunStepNode) -> int | None: + """Infer a run step line by matching its script against the workflow source file.""" + workflow_path = step_node.context.ref.job_context.ref.workflow_context.ref.source_filepath + if not workflow_path or not os.path.isfile(workflow_path): + return None + + run_script = step_node.definition["run"] + first_script_line = "" + for line in run_script.splitlines(): + stripped = line.strip() + if stripped: + first_script_line = stripped + break + if not first_script_line: + return None + + try: + with open(workflow_path, encoding="utf-8") as workflow_file: + workflow_lines = workflow_file.readlines() + except OSError: + return None + + run_key_re = re.compile(r"^\s*(?:-\s*)?run\s*:\s*(.*)$") + for index, line in enumerate(workflow_lines): + match = run_key_re.match(line) + if not match: + continue + + run_value = match.group(1).strip() + if run_value and not run_value.startswith("|") and not run_value.startswith(">"): + inline_value = run_value.strip("\"'") + if first_script_line in inline_value or inline_value in first_script_line: + return index + 1 + continue + + run_indent = len(line) - len(line.lstrip(" ")) + for nested_line in workflow_lines[index + 1 :]: + if not nested_line.strip(): + continue + nested_indent = len(nested_line) - len(nested_line.lstrip(" ")) + if nested_indent <= run_indent: + break + if first_script_line in nested_line.strip(): + return index + 1 + + return None + + +def _has_privileged_trigger_risk_combo(findings: list[PrioritizedIssue]) -> bool: + """Return whether findings contain risky patterns that amplify pull_request_target risk.""" + risky_prefixes = ( + "overbroad-permissions:", + "untrusted-fork-code:", + "remote-script-exec:", + "pr-target-untrusted-checkout:", + "potential-injection:", + "self-hosted-runner:", + ) + return any(any(finding["issue"].startswith(prefix) for prefix in risky_prefixes) for finding in findings) + + +def _literal_value(value: facts.Value | None) -> str: + """Return literal string value from a facts expression when available.""" + if isinstance(value, facts.StringLiteral): + return value.literal + return "" + + +def _add_finding(findings: list[PrioritizedIssue], issue: str, priority: int) -> None: + """Append a finding once and keep the highest priority for duplicate issues. + + Parameters + ---------- + findings : list[PrioritizedIssue] + Mutable finding list for the current workflow. + issue : str + Normalized finding identifier/message. + priority : int + Finding priority score. + """ + for existing in findings: + if existing["issue"] == issue: + existing["priority"] = max(existing["priority"], priority) + return + findings.append({"issue": issue, "priority": priority}) + + +def get_workflow_issue_type(issue: str) -> str: + """Extract a normalized workflow issue subtype from issue text.""" + prefix, _, _ = issue.partition(":") + normalized = prefix.strip().replace("_", "-") + return normalized or "workflow-security-issue" + + +def get_workflow_issue_summary(finding_type: str) -> str: + """Return a concise summary for a workflow issue subtype.""" + finding_summaries = { + "privileged-trigger": "Privileged trigger can expose elevated token scope to untrusted input.", + "missing-permissions": "Workflow omits explicit permissions and may inherit broad defaults.", + "overbroad-permissions": "Workflow requests permissions broader than required.", + "untrusted-fork-code": "Workflow can execute code controlled by an untrusted fork.", + "remote-script-exec": "Workflow downloads and executes remote scripts inline.", + "pr-target-untrusted-checkout": "pull_request_target is combined with checkout of PR-controlled refs.", + "potential-injection": "Unsafe expansion of attacker-controllable GitHub context can enable command injection.", + "self-hosted-runner": "Job uses self-hosted runners, increasing blast radius for untrusted code.", + "workflow-security-issue": "Workflow includes a security issue that requires hardening.", + } + return finding_summaries.get(finding_type, "Workflow security finding detected.") + + +def build_workflow_issue_recommendation(issue: str) -> tuple[str, Recommendation, str]: + """Build normalized workflow issue recommendation metadata.""" + finding_type = get_workflow_issue_type(issue) + summary = get_workflow_issue_summary(finding_type) + recommendation = recommend_for_workflow_issue(issue) + details = _format_issue_details(finding_type, issue) + finding_message = f"Summary: {summary} Details: {details} Recommendation: {recommendation.message}" + return finding_type, recommendation, finding_message + + +def _format_issue_details(finding_type: str, issue: str) -> str: + """Format human-readable issue details for job summaries.""" + if finding_type not in {"potential-injection", "remote-script-exec"}: + return issue + + payload = _parse_issue_payload(issue) + if not isinstance(payload, dict): + return issue + + job_name = str(payload.get("job") or "unknown") + step_name = str(payload.get("step") or "unknown") + command_text = str(payload.get("command") or "unknown") + command_text = command_text.replace("`", "'") + refs = payload.get("expanded_refs") + refs_display = "" + if isinstance(refs, list): + refs_clean = [str(ref) for ref in refs if str(ref)] + if refs_clean: + refs_display = f" Expanded refs: `{', '.join(refs_clean)}`" + return f"Job: {job_name} Step: {step_name} Command: `{command_text}`{refs_display}" + + +def _parse_issue_payload(issue: str) -> object | None: + """Parse the serialized issue payload after the finding type prefix.""" + _, _, payload = issue.partition(":") + payload = payload.strip() + if not payload: + return None + + try: + return cast(object, json.loads(payload)) + except json.JSONDecodeError: + return None + + +def build_unpinned_action_recommendation(issue: str, api_client: object) -> tuple[str, str, Recommendation] | None: + """Build normalized recommendation metadata for an unpinned third-party action finding.""" + parsed_issue = parse_unpinned_action_issue(issue) + if not parsed_issue: + return None + + action_name, action_ref = parsed_issue + resolved_sha = resolve_action_ref_to_sha(api_client, action_name, action_ref) + resolved_tag = resolve_action_ref_to_tag(action_name, resolved_sha, action_ref) + recommendation = recommend_for_unpinned_action(action_name, resolved_sha, resolved_tag) + return action_name, action_ref, recommendation + + +def extract_workflow_issue_line(issue: str) -> int | None: + """Extract a 1-based workflow source line number from an issue payload. + + Parameters + ---------- + issue : str + Serialized workflow issue string produced by the detector. + + Returns + ------- + int | None + The 1-based line number when available; otherwise ``None``. + """ + step_line_match = re.search(r"\[step-line=(\d+)\]", issue) + if step_line_match: + step_line = int(step_line_match.group(1)) + if step_line > 0: + return step_line + + if not issue.startswith("potential-injection:") and not issue.startswith("remote-script-exec:"): + return None + + _, _, payload = issue.partition(":") + if not payload.strip(): + return None + + parsed_payload = _parse_issue_payload(issue) + if isinstance(parsed_payload, dict): + payload_step_line = parsed_payload.get("step_line") + if isinstance(payload_step_line, int) and payload_step_line > 0: + return payload_step_line + + parts: object | None + if isinstance(parsed_payload, list): + parts = parsed_payload + elif isinstance(parsed_payload, dict): + parts = parsed_payload.get("parts") + else: + parts = None + + if isinstance(parts, list): + for part in parts: + if not isinstance(part, dict): + continue + pos = part.get("Pos") + if not isinstance(pos, dict): + continue + line = pos.get("Line") + if isinstance(line, int) and line > 0: + return line + + match = re.search(r"""["']Line["']:\s*(\d+)""", payload) + if not match: + return None + line = int(match.group(1)) + return line if line > 0 else None diff --git a/src/macaron/code_analyzer/gha_security_analysis/recommendation.py b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py new file mode 100644 index 000000000..332add599 --- /dev/null +++ b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py @@ -0,0 +1,220 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Recommendation helpers for GitHub Actions security findings. + +This module centralizes user-facing remediation guidance for findings generated by +GitHub Actions security analysis checks. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass + +from macaron.errors import GitTagError +from macaron.slsa_analyzer.git_service.api_client import GhAPIClient +from macaron.slsa_analyzer.git_url import find_highest_git_tag, get_tags_via_git_remote, is_commit_hash + +UNPINNED_ACTION_RE = re.compile( + r"^(?:unpinned-third-party-action:\s*)?(?:\[step-line=(?P\d+)\]\s*)?(?P[^@\s]+)@(?P[^\s]+)$" +) + + +@dataclass(frozen=True) +class Recommendation: + """Normalized recommendation payload for a finding. + + Attributes + ---------- + message : str + Human-readable recommendation text. + recommended_ref : str | None + Optional pinned reference suggestion, such as ``owner/repo@``. + """ + + message: str + recommended_ref: str | None = None + + +def recommend_for_unpinned_action( + action_name: str, resolved_sha: str | None = None, resolved_tag: str | None = None +) -> Recommendation: + """Create a recommendation for an unpinned third-party action. + + Parameters + ---------- + action_name : str + GitHub Action identifier in the form ``owner/repo``. + resolved_sha : str | None, optional + Resolved commit SHA for the action ref if available. + resolved_tag : str | None, optional + Tag corresponding to ``resolved_sha`` when available. + + Returns + ------- + Recommendation + Recommendation containing pinning guidance and a suggested immutable + action reference. + """ + if resolved_sha and resolved_tag: + recommended_ref = f"{action_name}@{resolved_sha} # {resolved_tag}" + elif resolved_sha: + recommended_ref = f"{action_name}@{resolved_sha}" + else: + recommended_ref = "Unable to resolve automatically" + return Recommendation( + message="Pin this third-party action to a 40-character commit SHA.", + recommended_ref=recommended_ref, + ) + + +def parse_unpinned_action_issue(issue: str) -> tuple[str, str] | None: + """Parse an unpinned third-party action reference from issue text. + + Parameters + ---------- + issue : str + Raw issue text emitted by workflow security analysis. + + Returns + ------- + tuple[str, str] | None + Parsed ``(action_name, action_version)`` when the issue matches + ``owner/repo@ref`` format for a third-party action. ``None`` otherwise. + """ + match = UNPINNED_ACTION_RE.fullmatch(issue.strip()) + if not match: + return None + action = match.group("action") + version = match.group("version") + if action.startswith("./"): + return None + if "/" not in action: + return None + return action, version + + +def resolve_action_ref_to_sha(api_client: object, action_name: str, action_version: str) -> str | None: + """Resolve an action reference to an immutable commit SHA. + + Parameters + ---------- + api_client : object + API client instance used for GitHub API calls. + action_name : str + GitHub Action identifier in the form ``owner/repo``. + action_version : str + Action ref currently used by the workflow. + + Returns + ------- + str | None + The resolved commit SHA if resolution succeeds; otherwise ``None``. + """ + if not isinstance(api_client, GhAPIClient): + return None + if not action_name or not action_version: + return None + if is_commit_hash(action_version): + # Normalize short SHAs by resolving them through the API. + return ( + action_version + if len(action_version) == 40 + else api_client.get_commit_sha_from_ref(action_name, action_version) + ) + return api_client.get_commit_sha_from_ref(action_name, action_version) + + +def resolve_action_ref_to_tag(action_name: str, resolved_sha: str | None, action_version: str = "") -> str | None: + """Resolve a commit SHA to a corresponding Git tag for an action repository. + + Parameters + ---------- + action_name : str + GitHub Action identifier in the form ``owner/repo``. + resolved_sha : str | None, optional + Resolved commit SHA for the action ref. + action_version : str, optional + Original action version/ref. If this exact ref is a tag on the same + commit, it is preferred. + + Returns + ------- + str | None + The corresponding tag name if found; otherwise ``None``. + """ + if not action_name or not resolved_sha: + return None + tags = get_tags_via_git_remote(f"https://github.com/{action_name}") + if not tags: + return None + + matching_tags = [tag for tag, tag_sha in tags.items() if tag_sha == resolved_sha] + if not matching_tags: + return None + if action_version and action_version in matching_tags: + return action_version + try: + return find_highest_git_tag(set(matching_tags)) + except GitTagError: + return matching_tags[0] + + +def recommend_for_workflow_issue(issue: str) -> Recommendation: + """Map a workflow issue string to a remediation recommendation. + + Parameters + ---------- + issue : str + Issue string emitted by workflow security analysis. + + Returns + ------- + Recommendation + Recommendation message corresponding to the detected issue category. + """ + if issue.startswith("sensitive-trigger:"): + return Recommendation("Add strict event gating (actor allowlist, branch filters, and conditional checks).") + if issue.startswith("privileged-trigger:"): + return Recommendation("Avoid pull_request_target for untrusted code paths; use pull_request where possible.") + if issue.startswith("missing-permissions:"): + return Recommendation("Define explicit least-privilege permissions at workflow or job scope.") + if issue.startswith("overbroad-permissions:"): + return Recommendation("Reduce permissions to read-only scopes unless write access is strictly required.") + if issue.startswith("untrusted-fork-code:"): + return Recommendation("Do not checkout PR head refs in privileged contexts; validate source and actor first.") + if issue.startswith("persist-credentials:"): + return Recommendation("Set persist-credentials: false for checkout unless later git pushes are required.") + if issue.startswith("remote-script-exec:"): + return Recommendation("Avoid curl|bash patterns; pin script digests or vendor reviewed scripts in-repo.") + if issue.startswith("pr-target-untrusted-checkout:"): + return Recommendation("Never combine pull_request_target with checkout of PR-controlled refs.") + if issue.startswith("potential-injection:"): + return Recommendation("Treat GitHub context data as untrusted input; quote/sanitize before shell execution.") + return Recommendation("Review this workflow finding and apply least-privilege hardening controls.") + + +def recommend_for_osv_vulnerability(action_name: str, action_version: str) -> Recommendation: + """Create a recommendation for a vulnerable GitHub Action version. + + Parameters + ---------- + action_name : str + GitHub Action identifier in the form ``owner/repo``. + action_version : str + Action ref currently used by the workflow. + + Returns + ------- + Recommendation + Recommendation encouraging upgrade to a non-vulnerable release and + subsequent pinning to a commit SHA. + """ + return Recommendation( + message=( + f"Upgrade `{action_name}` from `{action_version}` to a non-vulnerable release, " + "then pin the selected version to a commit SHA." + ), + recommended_ref=None, + ) diff --git a/src/macaron/parsers/bashparser.py b/src/macaron/parsers/bashparser.py index ac2ceed68..2b8de426a 100644 --- a/src/macaron/parsers/bashparser.py +++ b/src/macaron/parsers/bashparser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module is a Python wrapper for the compiled bashparser binary. @@ -162,6 +162,74 @@ def parse_raw(bash_content: str, macaron_path: str | None = None) -> File: raise ParseError("Error while loading the parsed bash script.") from error +def parse_raw_with_gha_mapping(bash_content: str, macaron_path: str | None = None) -> tuple[File, dict[str, str]]: + """Parse bash content and return raw AST plus GitHub expression mapping. + + Parameters + ---------- + bash_content : str + Bash script content. + macaron_path : str | None + Macaron's root path (optional). + + Returns + ------- + tuple[bashparser_model.File, dict[str, str]] + A tuple of: + - The parsed raw bash AST. + - Mapping from parser placeholder variable names to original GitHub expression bodies. + + Raises + ------ + ParseError + When parsing fails with errors or output cannot be decoded. + """ + if not macaron_path: + macaron_path = global_config.macaron_path + cmd = [ + os.path.join(macaron_path, "bin", "bashparser"), + "-input", + bash_content, + "-raw-gha-map", + ] + + try: + result = subprocess.run( # nosec B603 + cmd, + capture_output=True, + check=True, + cwd=macaron_path, + timeout=defaults.getint("bashparser", "timeout", fallback=30), + ) + except ( + subprocess.CalledProcessError, + subprocess.TimeoutExpired, + FileNotFoundError, + ) as error: + raise ParseError("Error while parsing bash script.") from error + + try: + if result.returncode != 0: + raise ParseError(f"Bash script parser failed: {result.stderr.decode('utf-8')}") + + payload = cast(dict[str, object], json.loads(result.stdout.decode("utf-8"))) + ast_data = payload.get("ast") + gha_map = payload.get("gha_expr_map") + if not isinstance(ast_data, dict): + raise ParseError("Error while loading the parsed bash script.") + if not isinstance(gha_map, dict): + raise ParseError("Error while loading the parsed bash script.") + gha_map_clean: dict[str, str] = {} + for key, value in gha_map.items(): + if isinstance(key, str) and isinstance(value, str): + gha_map_clean[key] = value + + return cast(File, ast_data), gha_map_clean + + except json.JSONDecodeError as error: + raise ParseError("Error while loading the parsed bash script.") from error + + def parse_expr(bash_expr_content: str, macaron_path: str | None = None) -> list[Word]: """Parse a bash script's content. diff --git a/src/macaron/resources/policies/sql/check-github-actions.sql b/src/macaron/resources/policies/sql/check-github-actions.sql new file mode 100644 index 000000000..09ba2555b --- /dev/null +++ b/src/macaron/resources/policies/sql/check-github-actions.sql @@ -0,0 +1,26 @@ +-- Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +-- Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +-- Failed check facts for check-github-actions policy template. +SELECT + gha_check.finding_group, + gha_check.finding_priority, + gha_check.finding_type, + gha_check.action_name, + gha_check.action_ref, + gha_check.vuln_urls, + gha_check.finding_message, + gha_check.recommended_ref, + gha_check.sha_pinned, + gha_check.caller_workflow AS vulnerable_workflow, + analysis.analysis_time +FROM github_actions_vulnerabilities_check AS gha_check +JOIN check_facts + ON check_facts.id = gha_check.id +JOIN check_result + ON check_result.id = check_facts.check_result_id +JOIN component + ON check_result.component_id = component.id +JOIN analysis + ON analysis.id = component.analysis_id +WHERE check_result.passed = 0; diff --git a/src/macaron/resources/policies/sql/malware-detection-dependencies.sql b/src/macaron/resources/policies/sql/malware-detection-dependencies.sql new file mode 100644 index 000000000..028b2445c --- /dev/null +++ b/src/macaron/resources/policies/sql/malware-detection-dependencies.sql @@ -0,0 +1,19 @@ +-- Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +-- Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +-- Failed check facts for malware-detection-dependencies policy template. +SELECT + analysis.analysis_time, + component.id component_id, + component.purl component_purl, + detect_malicious_metadata_check.* +FROM detect_malicious_metadata_check +JOIN check_facts + ON check_facts.id = detect_malicious_metadata_check.id +JOIN check_result + ON check_result.id = check_facts.check_result_id +JOIN component + ON check_result.component_id = component.id +JOIN analysis + ON analysis.id = component.analysis_id + AND check_result.passed = 0; diff --git a/src/macaron/resources/policies/sql/malware-detection.sql b/src/macaron/resources/policies/sql/malware-detection.sql new file mode 100644 index 000000000..a6597e89a --- /dev/null +++ b/src/macaron/resources/policies/sql/malware-detection.sql @@ -0,0 +1,19 @@ +-- Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +-- Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +-- Failed check facts for malware-detection policy template. +SELECT + analysis.analysis_time, + component.id component_id, + component.purl component_purl, + detect_malicious_metadata_check.* +FROM detect_malicious_metadata_check +JOIN check_facts + ON check_facts.id = detect_malicious_metadata_check.id +JOIN check_result + ON check_result.id = check_facts.check_result_id +JOIN component + ON check_result.component_id = component.id +JOIN analysis + ON analysis.id = component.analysis_id + AND check_result.passed = 0; diff --git a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py index 3b350091c..4fb2e92ec 100644 --- a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py +++ b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py @@ -5,8 +5,10 @@ import logging import os +import re +from enum import Enum -from sqlalchemy import ForeignKey, String +from sqlalchemy import Boolean, ForeignKey, Integer, String from sqlalchemy.orm import Mapped, mapped_column from macaron.code_analyzer.dataflow_analysis.analysis import get_containing_github_job @@ -15,6 +17,15 @@ GitHubActionsActionStepNode, GitHubActionsReusableWorkflowCallNode, ) +from macaron.code_analyzer.gha_security_analysis.detect_injection import ( + build_unpinned_action_recommendation, + build_workflow_issue_recommendation, + detect_github_actions_security_issues, + extract_workflow_issue_line, +) +from macaron.code_analyzer.gha_security_analysis.recommendation import ( + recommend_for_osv_vulnerability, +) from macaron.database.db_custom_types import DBJsonList from macaron.database.table_definitions import CheckFacts from macaron.errors import APIAccessError @@ -29,6 +40,24 @@ logger: logging.Logger = logging.getLogger(__name__) +class GitHubActionsFindingType(str, Enum): + """Enumeration of finding categories for GitHub Actions vulnerability check facts.""" + + # Note: finding_type is the subtype within a top-level finding_group. + # It intentionally carries more granular detail than finding_group. + KNOWN_VULNERABILITY = "known-vulnerability" + UNPINNED_THIRD_PARTY_ACTION = "unpinned-third-party-action" + + +class GitHubActionsFindingGroup(str, Enum): + """Top-level finding groups for GitHub Actions vulnerability check facts.""" + + # Note: finding_group is the high-level bucket used for reporting sections. + # finding_type refines the exact issue inside one of these groups. + THIRD_PARTY_ACTION_RISK = "third_party_action_risk" + WORKFLOW_SECURITY_ISSUE = "workflow_security_issue" + + class GitHubActionsVulnsFacts(CheckFacts): """The ORM mapping for justifications in the GitHub Actions vulnerabilities check.""" @@ -37,23 +66,57 @@ class GitHubActionsVulnsFacts(CheckFacts): #: The primary key. id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003 - #: The list of vulnerability URLs. - vulnerability_urls: Mapped[list[str]] = mapped_column( - DBJsonList, nullable=False, info={"justification": JustificationType.TEXT} + #: The GitHub Action workflow that may have various security issues. + caller_workflow: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.HREF}) + + #: The finding category (subtype). + #: - ``known-vulnerability`` for known vulnerability findings. + #: - ``unpinned-third-party-action`` for third-party actions not pinned to a commit SHA. + #: - workflow issue subtype names (for example ``overbroad-permissions``). + #: This complements ``finding_group`` instead of replacing it. + finding_type: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.TEXT}) + + #: The top-level finding group. + #: - ``third_party_action_risk`` for action dependency risks. + #: - ``workflow_security_issue`` for workflow implementation security issues. + #: Use this to group rows in summaries; use ``finding_type`` for specific issue filtering. + finding_group: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.TEXT}) + + #: Human-readable finding details. + finding_message: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} ) - #: The GitHub Action Identifier. - github_actions_id: Mapped[str] = mapped_column( - String, nullable=False, info={"justification": JustificationType.TEXT} + #: Priority score for sorting and triaging findings in summary outputs. + finding_priority: Mapped[int] = mapped_column( + Integer, nullable=False, info={"justification": JustificationType.TEXT} ) - #: The GitHub Action version. - github_actions_version: Mapped[str] = mapped_column( - String, nullable=False, info={"justification": JustificationType.TEXT} + #: Recommended immutable action reference, if applicable. + recommended_ref: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} ) - #: The GitHub Action workflow that calls the vulnerable GitHub Action. - caller_workflow: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.HREF}) + #: Third-party action identifier (for ``third_party_action_risk`` findings only). + #: For workflow-security findings, this field is ``None``. + action_name: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} + ) + + #: Third-party action version/ref (for ``third_party_action_risk`` findings only). + #: For workflow-security findings, this field is ``None``. + action_ref: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} + ) + + #: Whether the action reference is pinned to a full commit SHA. + sha_pinned: Mapped[bool | None] = mapped_column(Boolean, nullable=True) + + #: Related vulnerability URLs (for ``third_party_action_risk`` findings only). + #: For workflow-security findings, this field is an empty list. + vuln_urls: Mapped[list[str]] = mapped_column( + DBJsonList, nullable=False, info={"justification": JustificationType.TEXT} + ) __mapper_args__ = { "polymorphic_identity": "_github_actions_vulnerabilities_check", @@ -89,10 +152,71 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: The result of the check. """ ci_services = ctx.dynamic_data["ci_services"] - + result_tables: list[CheckFacts] = [] external_workflows: dict[str, list] = {} + for ci_info in ci_services: callgraph = ci_info["callgraph"] + gh_api_client = ci_info["service"].api_client if hasattr(ci_info["service"], "api_client") else None + if workflow_findings := detect_github_actions_security_issues(callgraph): + for finding in workflow_findings: + caller_workflow_link = "" + if gh_api_client: + caller_workflow_link = gh_api_client.get_file_link( + ctx.component.repository.full_name, + ctx.component.repository.commit_sha, + file_path=( + gh_api_client.get_relative_path_of_workflow(os.path.basename(finding["workflow_name"])) + if finding["workflow_name"] + else "" + ), + ) + for prioritized_issue in finding["issues"]: + issue = prioritized_issue["issue"] + issue_priority = int(prioritized_issue["priority"]) + issue_line = extract_workflow_issue_line(issue) + finding_workflow_link = caller_workflow_link + if issue_line and finding_workflow_link: + finding_workflow_link = f"{finding_workflow_link}#L{issue_line}" + if unpinned_action_info := build_unpinned_action_recommendation(issue, gh_api_client): + action_name, action_version, recommendation = unpinned_action_info + finding_type = GitHubActionsFindingType.UNPINNED_THIRD_PARTY_ACTION.value + result_tables.append( + GitHubActionsVulnsFacts( + vuln_urls=[], + finding_type=finding_type, + finding_group=GitHubActionsFindingGroup.THIRD_PARTY_ACTION_RISK.value, + action_name=action_name, + action_ref=action_version, + caller_workflow=finding_workflow_link, + sha_pinned=False, + finding_message=( + f"Summary: {_short_description_for_finding_type(finding_type)} " + f"Recommendation: {recommendation.message}" + ), + finding_priority=issue_priority, + recommended_ref=recommendation.recommended_ref, + confidence=Confidence.HIGH, + ) + ) + continue + + finding_type, recommendation, finding_message = build_workflow_issue_recommendation(issue) + result_tables.append( + GitHubActionsVulnsFacts( + vuln_urls=[], + finding_type=finding_type, + finding_group=GitHubActionsFindingGroup.WORKFLOW_SECURITY_ISSUE.value, + action_name=None, + action_ref=None, + caller_workflow=finding_workflow_link, + sha_pinned=None, + finding_message=finding_message, + finding_priority=issue_priority, + recommended_ref=recommendation.recommended_ref, + confidence=Confidence.HIGH, + ) + ) for root in callgraph.root_nodes: for callee in traverse_bfs(root): if isinstance(callee, (GitHubActionsReusableWorkflowCallNode, GitHubActionsActionStepNode)): @@ -114,80 +238,88 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: continue caller_path = job.context.ref.workflow_context.ref.source_filepath + caller_path_link = caller_path + if gh_api_client: + caller_path_link = gh_api_client.get_file_link( + ctx.component.repository.full_name, + ctx.component.repository.commit_sha, + file_path=( + gh_api_client.get_relative_path_of_workflow(os.path.basename(caller_path)) + if caller_path + else "" + ), + ) ext_workflow: list = external_workflows.get(workflow_name, []) ext_workflow.append( { "version": workflow_version, - "caller_path": ci_info["service"].api_client.get_file_link( - ctx.component.repository.full_name, - ctx.component.repository.commit_sha, - file_path=( - ci_info["service"].api_client.get_relative_path_of_workflow( - os.path.basename(caller_path) - ) - if caller_path - else "" - ), - ), + "caller_path": caller_path_link, } ) external_workflows[workflow_name] = ext_workflow - # If no external GitHub Actions are found, return passed result. - if not external_workflows: - return CheckResultData( - result_tables=[], - result_type=CheckResultType.PASSED, - ) - - # We first send a batch query to see which GitHub Actions are potentially vulnerable. - # OSV's querybatch returns minimal results but this allows us to only make subsequent - # queries to get vulnerability details when needed. - batch_query = [ - {"package": {"name": k, "ecosystem": "GitHub Actions"}} for k, _ in external_workflows.items() if k - ] - batch_vulns = [] - try: - batch_vulns = OSVDevService.get_vulnerabilities_package_name_batch(batch_query) - except APIAccessError as error: - logger.debug(error) + # If no external GitHub Actions are found, no need to check for known vulnerabilities. + if external_workflows: - result_tables: list[CheckFacts] = [] - for pkg in batch_vulns: - vuln_res = pkg["package"] - vulns: list = [] - workflow_name = vuln_res["name"] + # We first send a batch query to see which GitHub Actions are potentially vulnerable. + # OSV's querybatch returns minimal results but this allows us to only make subsequent + # queries to get vulnerability details when needed. + batch_query = [ + {"package": {"name": k, "ecosystem": "GitHub Actions"}} for k, _ in external_workflows.items() if k + ] + batch_vulns = [] try: - vulns = OSVDevService.get_vulnerabilities_package_name(ecosystem="GitHub Actions", name=workflow_name) + batch_vulns = OSVDevService.get_vulnerabilities_package_name_batch(batch_query) except APIAccessError as error: logger.debug(error) - continue - for workflow_inv in external_workflows[workflow_name]: - vuln_mapping = [] - for vuln in vulns: - if v_id := json_extract(vuln, ["id"], str): - try: - if OSVDevService.is_version_affected( - vuln, - workflow_name, - workflow_inv["version"], - "GitHub Actions", - source_repo=f"https://github.com/{workflow_name}", - ): - vuln_mapping.append(f"https://osv.dev/vulnerability/{v_id}") - except APIAccessError as error: - logger.debug(error) - if vuln_mapping: - result_tables.append( - GitHubActionsVulnsFacts( - vulnerability_urls=vuln_mapping, - github_actions_id=workflow_name, - github_actions_version=workflow_inv["version"], - caller_workflow=workflow_inv["caller_path"], - confidence=Confidence.HIGH, - ) + + for vuln_res in batch_vulns: + vulns: list = [] + workflow_name = vuln_res["package"]["name"] + try: + vulns = OSVDevService.get_vulnerabilities_package_name( + ecosystem="GitHub Actions", name=workflow_name ) + except APIAccessError as error: + logger.debug(error) + continue + for workflow_inv in external_workflows[workflow_name]: + vuln_mapping = [] + for vuln in vulns: + if v_id := json_extract(vuln, ["id"], str): + try: + if OSVDevService.is_version_affected( + vuln, + workflow_name, + workflow_inv["version"], + "GitHub Actions", + source_repo=f"https://github.com/{workflow_name}", + ): + vuln_mapping.append(f"https://osv.dev/vulnerability/{v_id}") + except APIAccessError as error: + logger.debug(error) + if vuln_mapping: + recommendation = recommend_for_osv_vulnerability(workflow_name, workflow_inv["version"]) + finding_type = GitHubActionsFindingType.KNOWN_VULNERABILITY.value + result_tables.append( + GitHubActionsVulnsFacts( + vuln_urls=vuln_mapping, + finding_type=finding_type, + finding_group=GitHubActionsFindingGroup.THIRD_PARTY_ACTION_RISK.value, + action_name=workflow_name, + action_ref=workflow_inv["version"], + caller_workflow=workflow_inv["caller_path"], + sha_pinned=bool(re.fullmatch(r"[0-9a-f]{40}", workflow_inv["version"])), + finding_message=( + f"Summary: {_short_description_for_finding_type(finding_type)} " + f"Recommendation: {recommendation.message}" + ), + finding_priority=100, + recommended_ref=recommendation.recommended_ref, + confidence=Confidence.HIGH, + ) + ) if result_tables: return CheckResultData( @@ -202,3 +334,23 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: registry.register(GitHubActionsVulnsCheck()) + + +def _short_description_for_finding_type(finding_type: str) -> str: + """Return a concise, human-readable summary for a finding subtype.""" + finding_summaries = { + GitHubActionsFindingType.KNOWN_VULNERABILITY.value: "Action version is known to be vulnerable.", + GitHubActionsFindingType.UNPINNED_THIRD_PARTY_ACTION.value: "Third-party action is not pinned to an immutable SHA.", + "sensitive-trigger": "Workflow uses a sensitive trigger and needs strict gating.", + "privileged-trigger": "Privileged trigger can expose elevated token scope to untrusted input.", + "missing-permissions": "Workflow omits explicit permissions and may inherit broad defaults.", + "overbroad-permissions": "Workflow requests permissions broader than required.", + "untrusted-fork-code": "Workflow can execute code controlled by an untrusted fork.", + "persist-credentials": "Persisted checkout credentials can leak token access to later steps.", + "remote-script-exec": "Workflow downloads and executes remote scripts inline.", + "pr-target-untrusted-checkout": "pull_request_target is combined with checkout of PR-controlled refs.", + "potential-injection": "Untrusted GitHub context data may flow into shell execution.", + "self-hosted-runner": "Job uses self-hosted runners, increasing blast radius for untrusted code.", + "workflow-security-issue": "Workflow includes a security issue that requires hardening.", + } + return finding_summaries.get(finding_type, "Workflow security finding detected.") diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py index 4700e5e85..d222ee011 100644 --- a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py +++ b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py @@ -581,14 +581,32 @@ def build_call_graph(self, repo_path: str, macaron_path: str = "") -> NodeForest Returns ------- - CallGraph: CallGraph - The call graph built for GitHub Actions. + NodeForest + The root nodes of call graphs built for GitHub Actions workflows. """ if not macaron_path: macaron_path = global_config.macaron_path # Parse GitHub Actions workflows. files = self.get_workflows(repo_path) + return self.build_call_graph_for_files(files, repo_path) + + def build_call_graph_for_files(self, files: list[str], repo_path: str) -> NodeForest: + """Build call graphs for a given set of GitHub Actions workflow files. + + Parameters + ---------- + files : list[str] + The list of workflow file paths to analyze. + repo_path : str + The repository path used as the base context for workflow analysis. + + Returns + ------- + NodeForest + A forest containing one root node per successfully parsed workflow. + Workflows that raise ``ParseError`` are skipped. + """ nodes: list[Node] = [] for workflow_path in files: try: diff --git a/src/macaron/slsa_analyzer/git_service/api_client.py b/src/macaron/slsa_analyzer/git_service/api_client.py index 9921c2dc9..f49beda7c 100644 --- a/src/macaron/slsa_analyzer/git_service/api_client.py +++ b/src/macaron/slsa_analyzer/git_service/api_client.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """The module provides API clients for VCS services, such as GitHub.""" @@ -11,6 +11,7 @@ from typing import NamedTuple from macaron.config.defaults import defaults +from macaron.json_tools import json_extract from macaron.slsa_analyzer.asset import AssetLocator from macaron.util import ( construct_query, @@ -388,6 +389,28 @@ def get_commit_data_from_hash(self, full_name: str, commit_hash: str) -> dict: return response_data + def get_commit_sha_from_ref(self, full_name: str, ref: str) -> str | None: + """Resolve a Git reference (tag/branch/sha) to a 40-character commit SHA. + + Parameters + ---------- + full_name : str + The full name of the repository in the format ``owner/name``. + ref : str + The git reference to resolve (e.g. ``v5``, ``main``, ``v1.2.3``). + + Returns + ------- + str | None + The resolved commit SHA, or ``None`` if resolution fails. + """ + if not full_name or not ref: + return None + + response_data = self.get_commit_data_from_hash(full_name, ref) + sha = json_extract(response_data, ["sha"], str) + return sha if sha and len(sha) == 40 else None + def search(self, target: str, query: str) -> dict: """Perform a search using GitHub REST API. diff --git a/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr new file mode 100644 index 000000000..984d2d208 --- /dev/null +++ b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr @@ -0,0 +1,30 @@ +# serializer version: 1 +# name: test_detect_github_actions_security_issues[injection_pattern_1.yaml] + list([ + dict({ + 'issues': list([ + dict({ + 'issue': 'potential-injection: {"step_line": 75, "script_line": 7, "job": "auto_format", "step": "Commit and push formatting changes", "command": "git push origin HEAD:${{ github.event.pull_request.head.ref }}", "expanded_refs": ["github.event.pull_request.head.ref"], "parts": [{"End": {"Col": 4, "Line": 7, "Offset": 171}, "Pos": {"Col": 1, "Line": 7, "Offset": 168}, "Type": "Lit", "Value": "git", "ValueEnd": {"Col": 4, "Line": 7, "Offset": 171}, "ValuePos": {"Col": 1, "Line": 7, "Offset": 168}}]}', + 'priority': 100, + }), + dict({ + 'issue': 'privileged-trigger: Workflow uses `pull_request_target` with additional risky patterns; treat this workflow as high risk and harden immediately.', + 'priority': 80, + }), + dict({ + 'issue': 'unpinned-third-party-action: [step-line=28] actions/checkout@v5', + 'priority': 20, + }), + dict({ + 'issue': 'unpinned-third-party-action: [step-line=37] poseidon/wait-for-status-checks@v0.6.0', + 'priority': 20, + }), + dict({ + 'issue': 'unpinned-third-party-action: [step-line=48] dtolnay/rust-toolchain@stable', + 'priority': 20, + }), + ]), + 'workflow_name': 'tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml', + }), + ]) +# --- diff --git a/tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml b/tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml new file mode 100644 index 000000000..9ef276717 --- /dev/null +++ b/tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml @@ -0,0 +1,75 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +name: PR Auto-format + +# This workflow triggers when a PR is opened/updated +on: + pull_request_target: + types: [opened, synchronize, reopened] + branches: + - main + - release + +jobs: + auto_format: + if: | + !contains(github.event.pull_request.labels.*.name, 'skip:ci') && + !contains(github.event.pull_request.head.sha, '[skip ci]') + permissions: + contents: write + pull-requests: write + checks: read + runs-on: ubuntu-latest + timeout-minutes: 60 + + steps: + - name: Checkout PR branch + uses: actions/checkout@v5 + with: + ref: ${{ github.event.pull_request.head.ref }} + repository: ${{ github.event.pull_request.head.repo.full_name }} + token: ${{ secrets.GITHUB_TOKEN }} + fetch-depth: 0 + + # Wait for all PR check runs to complete + - name: Wait for all checks to complete + uses: poseidon/wait-for-status-checks@v0.6.0 + with: + token: ${{ secrets.GITHUB_TOKEN }} + delay: 60 + interval: 30 + timeout: 7200 + + - name: CI completed successfully + run: echo "CI workflow completed successfully - proceeding with auto-format" + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + + - name: Run cargo fmt + run: | + echo "Running cargo fmt --all on PR #${{ github.event.pull_request.number }}" + cargo fmt --all + + - name: Check for formatting changes + id: check_changes + run: | + if [ -n "$(git status --porcelain)" ]; then + echo "has_changes=true" >> $GITHUB_OUTPUT + else + echo "has_changes=false" >> $GITHUB_OUTPUT + fi + + - name: Commit and push formatting changes + if: steps.check_changes.outputs.has_changes == 'true' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + git add -u + git commit -m "Auto-format code [skip ci]" + + git push origin HEAD:${{ github.event.pull_request.head.ref }} diff --git a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py new file mode 100644 index 000000000..cf4990a16 --- /dev/null +++ b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py @@ -0,0 +1,156 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for GitHub Actions security analysis detection heuristics.""" + +import os +from pathlib import Path + +import pytest + +from macaron.code_analyzer.gha_security_analysis.detect_injection import ( + PrioritizedIssue, + WorkflowFinding, + _add_finding, + build_workflow_issue_recommendation, + detect_github_actions_security_issues, + extract_workflow_issue_line, +) +from macaron.slsa_analyzer.ci_service.github_actions.github_actions_ci import GitHubActions + +RESOURCES_DIR = Path(__file__).parent.joinpath("resources") + + +@pytest.mark.parametrize( + "workflow_path", + [ + "injection_pattern_1.yaml", + ], +) +def test_detect_github_actions_security_issues( + snapshot: list[WorkflowFinding], workflow_path: str, github_actions_service: GitHubActions +) -> None: + """Test GH Actions workflows injection patterns.""" + callgraph = github_actions_service.build_call_graph_for_files( + [os.path.join(RESOURCES_DIR, "workflow_files", workflow_path)], + repo_path=os.path.join(RESOURCES_DIR, "workflow_files"), + ) + assert detect_github_actions_security_issues(callgraph) == snapshot + + +def test_extract_workflow_issue_line_from_potential_injection() -> None: + """Extract the source line from a potential-injection issue payload.""" + issue = ( + "potential-injection: " + "[{'Type': 'Lit', 'Pos': {'Offset': 269, 'Line': 6, 'Col': 48}, 'Value': 'origin/'}, " + "{'Type': 'ParamExp', 'Pos': {'Offset': 276, 'Line': 6, 'Col': 55}}]" + ) + + assert extract_workflow_issue_line(issue) == 6 + + +def test_extract_workflow_issue_line_prefers_step_line_marker() -> None: + """Extract the workflow line from an explicit step-line marker.""" + issue = ( + "potential-injection: " + "[step-line=14] " + "[{'Type': 'Lit', 'Pos': {'Offset': 269, 'Line': 6, 'Col': 48}, 'Value': 'origin/'}]" + ) + + assert extract_workflow_issue_line(issue) == 14 + + +def test_extract_workflow_issue_line_from_structured_payload() -> None: + """Extract workflow line from structured potential-injection payload.""" + issue = ( + "potential-injection: " + '{"step_line": 62, "script_line": 6, "job": "retag", "step": "Retag", ' + '"command": "git push origin/${github.head_ref}", "parts": []}' + ) + + assert extract_workflow_issue_line(issue) == 62 + + +def test_build_workflow_issue_recommendation_formats_potential_injection_details() -> None: + """Format concise user-facing details for potential-injection findings.""" + issue = ( + "potential-injection: " + '{"step_line": 62, "script_line": 6, "job": "retag", "step": "Retag", ' + '"command": "git push origin/${github.head_ref}", "parts": []}' + ) + + finding_type, _, finding_message = build_workflow_issue_recommendation(issue) + + assert finding_type == "potential-injection" + assert "Unsafe expansion of attacker-controllable GitHub context can enable command injection." in finding_message + assert "Details: Job: retag Step: Retag Command: `git push origin/${github.head_ref}`" in finding_message + + +def test_build_workflow_issue_recommendation_includes_expanded_refs() -> None: + """Render expanded GitHub refs in potential-injection details when present.""" + issue = ( + "potential-injection: " + '{"step_line": 62, "script_line": 6, "job": "retag", "step": "Retag", ' + '"command": "git push origin/${github.head_ref}", "expanded_refs": ["github.head_ref"]}' + ) + + _, _, finding_message = build_workflow_issue_recommendation(issue) + + assert "Expanded refs: `github.head_ref`" in finding_message + + +def test_build_workflow_issue_recommendation_includes_refs_from_compound_expression() -> None: + """Render extracted github refs when original expression contains operators.""" + issue = ( + "potential-injection: " + '{"step_line": 62, "script_line": 6, "job": "retag", "step": "Retag", ' + '"command": "git push origin/${github.head_ref}", ' + '"expanded_refs": ["github.head_ref", "github.ref_name"]}' + ) + + _, _, finding_message = build_workflow_issue_recommendation(issue) + + assert "Expanded refs: `github.head_ref, github.ref_name`" in finding_message + + +def test_build_workflow_issue_recommendation_formats_remote_script_exec_details() -> None: + """Format concise user-facing details for remote-script-exec findings.""" + issue = ( + "remote-script-exec: " + '{"step_line": 24, "script_line": 3, "job": "build", "step": "Setup", ' + '"command": "curl -fsSL https://x | bash"}' + ) + + finding_type, _, finding_message = build_workflow_issue_recommendation(issue) + + assert finding_type == "remote-script-exec" + assert "Workflow downloads and executes remote scripts inline." in finding_message + assert "Details: Job: build Step: Setup Command: `curl -fsSL https://x | bash`" in finding_message + + +def test_extract_workflow_issue_line_from_remote_script_exec_payload() -> None: + """Extract workflow line from structured remote-script-exec payload.""" + issue = ( + "remote-script-exec: " + '{"step_line": 24, "script_line": 3, "job": "build", "step": "Setup", ' + '"command": "curl -fsSL https://x | bash"}' + ) + + assert extract_workflow_issue_line(issue) == 24 + + +def test_extract_workflow_issue_line_from_unpinned_action_marker() -> None: + """Extract workflow line from unpinned action issue marker.""" + issue = "unpinned-third-party-action: [step-line=62] actions/checkout@v4.2.2" + + assert extract_workflow_issue_line(issue) == 62 + + +def test_add_finding_deduplicates_and_preserves_highest_priority() -> None: + """Keep one finding entry per issue and retain the highest priority.""" + findings: list[PrioritizedIssue] = [] + _add_finding(findings, "remote-script-exec: {}", 80) + _add_finding(findings, "remote-script-exec: {}", 60) + _add_finding(findings, "remote-script-exec: {}", 100) + + assert findings == [{"issue": "remote-script-exec: {}", "priority": 100}] diff --git a/tests/code_analyzer/gha_security_analysis/test_recommendation.py b/tests/code_analyzer/gha_security_analysis/test_recommendation.py new file mode 100644 index 000000000..13a5217ed --- /dev/null +++ b/tests/code_analyzer/gha_security_analysis/test_recommendation.py @@ -0,0 +1,69 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for GitHub Actions security recommendation helpers.""" + +import pytest + +from macaron.code_analyzer.gha_security_analysis.recommendation import ( + parse_unpinned_action_issue, + recommend_for_unpinned_action, + resolve_action_ref_to_tag, +) + + +def test_recommend_for_unpinned_action_with_tag_hint() -> None: + """Return pinned action recommendation with tag hint when SHA and tag are resolved.""" + recommendation = recommend_for_unpinned_action( + "actions/checkout", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "v4.2.2", + ) + + assert recommendation.recommended_ref == "actions/checkout@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa # v4.2.2" + + +def test_recommend_for_unpinned_action_when_sha_not_resolved() -> None: + """Return fallback recommendation text when action SHA cannot be resolved.""" + recommendation = recommend_for_unpinned_action("actions/checkout") + + assert recommendation.recommended_ref == "Unable to resolve automatically" + assert recommendation.message == "Pin this third-party action to a 40-character commit SHA." + + +def test_resolve_action_ref_to_tag_found(monkeypatch: pytest.MonkeyPatch) -> None: + """Resolve the matching tag when a tag points to the resolved action SHA.""" + monkeypatch.setattr( + "macaron.code_analyzer.gha_security_analysis.recommendation.get_tags_via_git_remote", + lambda repo: {"v4.2.2": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"}, + ) + + tag = resolve_action_ref_to_tag("actions/checkout", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", "v4") + + assert tag == "v4.2.2" + + +def test_resolve_action_ref_to_tag_none_when_no_match(monkeypatch: pytest.MonkeyPatch) -> None: + """Return None when no fetched tag points to the resolved action SHA.""" + monkeypatch.setattr( + "macaron.code_analyzer.gha_security_analysis.recommendation.get_tags_via_git_remote", + lambda repo: {"v4.2.2": "dddddddddddddddddddddddddddddddddddddddd"}, + ) + + tag = resolve_action_ref_to_tag("actions/checkout", "cccccccccccccccccccccccccccccccccccccccc", "v4") + + assert tag is None + + +def test_parse_unpinned_action_issue_with_step_line_prefix() -> None: + """Parse unpinned action issues that include finding type and step-line marker.""" + parsed = parse_unpinned_action_issue("unpinned-third-party-action: [step-line=62] actions/checkout@v4.2.2") + + assert parsed == ("actions/checkout", "v4.2.2") + + +def test_parse_unpinned_action_issue_plain_format() -> None: + """Parse legacy unpinned action issues without metadata prefix.""" + parsed = parse_unpinned_action_issue("actions/setup-python@v5.6.0") + + assert parsed == ("actions/setup-python", "v5.6.0") diff --git a/tests/integration/cases/oracle_coherence-js-client/policy.dl b/tests/integration/cases/oracle_coherence-js-client/policy.dl index 5b814eb39..4406970d8 100644 --- a/tests/integration/cases/oracle_coherence-js-client/policy.dl +++ b/tests/integration/cases/oracle_coherence-js-client/policy.dl @@ -7,10 +7,16 @@ Policy("check-github-actions-vulnerabilities", component_id, "Check GitHub Actio check_failed(component_id, "mcn_githubactions_vulnerabilities_1"), github_actions_vulnerabilities_check( _, - "[\"https://osv.dev/vulnerability/GHSA-69fq-xp46-6x23\", \"https://osv.dev/vulnerability/GHSA-9p44-j4g5-cfx5\"]", + "https://github.com/oracle/coherence-js-client/blob/39166341bc31f75b663ff439dae36170fb3e99a9/.github/workflows/trivy-scan.yml", + "known-vulnerability", + "third_party_action_risk", + _, + _, + _, "aquasecurity/trivy-action", "0.32.0", - "https://github.com/oracle/coherence-js-client/blob/39166341bc31f75b663ff439dae36170fb3e99a9/.github/workflows/trivy-scan.yml" + _, + "[\"https://osv.dev/vulnerability/GHSA-69fq-xp46-6x23\", \"https://osv.dev/vulnerability/GHSA-9p44-j4g5-cfx5\"]" ). apply_policy_to("check-github-actions-vulnerabilities", component_id) :- diff --git a/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl b/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl index 00b141481..8f34d5674 100644 --- a/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl +++ b/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl @@ -1,4 +1,4 @@ -/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ #include "prelude.dl" diff --git a/tests/integration/cases/org_apache_logging_log4j/test.yaml b/tests/integration/cases/org_apache_logging_log4j/test.yaml index 7871c7a5a..b0820223d 100644 --- a/tests/integration/cases/org_apache_logging_log4j/test.yaml +++ b/tests/integration/cases/org_apache_logging_log4j/test.yaml @@ -27,6 +27,8 @@ steps: command_args: - -rp - https://github.com/apache/logging-log4j2 + - -d + - 028e9fad03ae7bcbf2e49ab8d32d8cfb900f3587 - name: Run macaron verify-policy to verify passed/failed checks kind: verify options: diff --git a/tests/output_reporter/test_write_job_summary.py b/tests/output_reporter/test_write_job_summary.py new file mode 100644 index 000000000..210dc7ac8 --- /dev/null +++ b/tests/output_reporter/test_write_job_summary.py @@ -0,0 +1,104 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for GitHub Actions job summary rendering helpers.""" + +from __future__ import annotations + +import importlib.util +from pathlib import Path +from types import ModuleType + + +def _load_write_job_summary_module() -> ModuleType: + """Load the write_job_summary script as a Python module for testing.""" + script_path = Path(Path(__file__).parents[2], "scripts", "actions", "write_job_summary.py") + spec = importlib.util.spec_from_file_location("write_job_summary", script_path) + if spec is None or spec.loader is None: + raise RuntimeError("Unable to load write_job_summary.py module.") + + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def test_workflow_security_table_includes_summary_column(tmp_path: Path) -> None: + """Render workflow security findings with the short summary column in compact table output.""" + module = _load_write_job_summary_module() + summary_path = Path(tmp_path, "summary.md") + columns = [ + "finding_group", + "finding_priority", + "finding_type", + "action_name", + "action_ref", + "vulnerable_workflow", + "finding_message", + ] + rows = [ + ( + "workflow_security_issue", + 80, + "remote-script-exec", + "https://github.com/org/repo/.github/workflows/build.yml", + "", + "https://github.com/org/repo/.github/workflows/build.yml", + ( + "Summary: Workflow downloads and executes remote scripts inline. " + "Details: remote-script-exec: A step appears to download and pipe to shell (`curl|bash`). " + "Recommendation: Avoid curl|bash patterns." + ), + ), + ] + + rendered = module.write_compact_gha_vuln_diagnostics(summary_path, columns, rows) + output = summary_path.read_text(encoding="utf-8") + + assert rendered is True + assert "| priority | type | summary | workflow |" in output + assert "Workflow downloads and executes remote scripts inline." in output + + +def test_compact_summary_keeps_all_groups_in_detailed_section(tmp_path: Path) -> None: + """Render detailed section with both finding groups even when top priorities are workflow-only.""" + module = _load_write_job_summary_module() + summary_path = Path(tmp_path, "summary.md") + columns = [ + "finding_group", + "finding_priority", + "finding_type", + "action_name", + "action_ref", + "vulnerable_workflow", + "finding_message", + ] + rows = [ + ( + "workflow_security_issue", + 100, + "potential-injection", + "", + "", + "https://github.com/org/repo/.github/workflows/ci.yml", + "Summary: Injection risk. Details: ... Recommendation: ...", + ), + ( + "third_party_action_risk", + 20, + "unpinned-third-party-action", + "actions/checkout", + "v4", + "https://github.com/org/repo/.github/workflows/ci.yml", + "Summary: Unpinned action. Recommendation: ...", + ), + ] + + rendered = module.write_compact_gha_vuln_diagnostics(summary_path, columns, rows) + output = summary_path.read_text(encoding="utf-8") + + assert rendered is True + assert "#### Workflow security issues" in output + assert "#### Third-party action risks" in output + assert "**Workflow security issues**" in output + assert "**Third-party action risks**" in output + assert "`actions/checkout@v4`" in output diff --git a/tests/parsers/bashparser/test_bashparser.py b/tests/parsers/bashparser/test_bashparser.py index 97c431034..a489330ac 100644 --- a/tests/parsers/bashparser/test_bashparser.py +++ b/tests/parsers/bashparser/test_bashparser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ @@ -13,7 +13,7 @@ from macaron import MACARON_PATH from macaron.errors import ParseError -from macaron.parsers.bashparser import parse, parse_file +from macaron.parsers.bashparser import parse, parse_file, parse_raw_with_gha_mapping @pytest.mark.parametrize( @@ -46,3 +46,13 @@ def test_bashparser_parse_invalid() -> None: # Parse the bash script file. with pytest.raises(ParseError): parse_file(file_path=file_path, macaron_path=MACARON_PATH) + + +def test_bashparser_parse_raw_with_gha_mapping() -> None: + """Test parsing raw bash script with GitHub expression mapping.""" + bash_content = 'echo "${{ github.head_ref }}"\n' + parsed_ast, gha_map = parse_raw_with_gha_mapping(bash_content, MACARON_PATH) + + assert "Stmts" in parsed_ast + assert gha_map + assert "github.head_ref" in gha_map.values() diff --git a/tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl b/tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl new file mode 100644 index 000000000..8bbeba44f --- /dev/null +++ b/tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl @@ -0,0 +1,16 @@ +/* Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy( + "attest-macaron-supply-chain", + component_id, + "Require trusted VCS metadata, and non-hosted repository." +) :- + check_passed(component_id, "mcn_version_control_system_1"), + is_repo(repo_id, "github.com/oracle/macaron", component_id), + not_self_hosted_git(repo_id, _). + +apply_policy_to("attest-macaron-supply-chain", component_id) :- + is_component(component_id, _).