From 38b999343a368cff7fa2256a8576c3633b302aa1 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Tue, 3 Mar 2026 16:48:42 +1000 Subject: [PATCH 01/30] feat!: add more inputs to Macaron Action and improve GitHub Action analysis Signed-off-by: behnazh-w --- .github/workflows/macaron-analysis.yaml | 27 +- .github/workflows/test_macaron_action.yaml | 50 ++- action.yaml | 208 ++++++++++++- golang/internal/bashparser/bashparser.go | 20 +- scripts/actions/write_job_summary.sh | 79 +++++ .../gha_security_analysis/detect_injection.py | 291 ++++++++++++++++++ .../github_actions_vulnerability_check.py | 108 ++++--- .../github_actions/github_actions_ci.py | 22 +- .../test_gha_security_analysis.ambr | 15 + .../workflow_files/injection_pattern_1.yaml | 75 +++++ .../test_gha_security_analysis.py | 31 ++ 11 files changed, 844 insertions(+), 82 deletions(-) create mode 100755 scripts/actions/write_job_summary.sh create mode 100644 src/macaron/code_analyzer/gha_security_analysis/detect_injection.py create mode 100644 tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr create mode 100644 tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml create mode 100644 tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py diff --git a/.github/workflows/macaron-analysis.yaml b/.github/workflows/macaron-analysis.yaml index aca12d881..8d7e599e6 100644 --- a/.github/workflows/macaron-analysis.yaml +++ b/.github/workflows/macaron-analysis.yaml @@ -36,28 +36,13 @@ jobs: - name: Run Macaron action id: run_macaron continue-on-error: true - uses: oracle/macaron@fda4dda04aa7228fcaba162804891806cf5a1375 # v0.22.0 + uses: ./ with: repo_path: ./ policy_file: check-github-actions policy_purl: pkg:github.com/oracle/macaron@.* - - - name: Upload Macaron reports - if: ${{ always() }} - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 - with: - name: macaron-reports - path: | - output/reports/github_com/oracle/macaron/macaron.html - output/reports/github_com/oracle/macaron/macaron.json - output/macaron.db - if-no-files-found: warn - retention-days: 90 - - - name: Check Verification Summary Attestation check passes - if: ${{ always() }} - run: | - if [ ! -f output/vsa.intoto.jsonl ]; then - echo "The check-github-actions policy failed, therefore VSA was not generated at output/vsa.intoto.jsonl. Check the uploaded reports." - exit 1 - fi + upload_reports: true + reports_artifact_name: macaron-reports + reports_retention_days: 90 + write_job_summary: true + require_vsa: true diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index f560c027f..35078bfa0 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -1,10 +1,12 @@ # Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. -name: Test Macaron Action (tutorials) +name: Test Macaron Action on: push: + branches: + - main paths: - action.yaml pull_request: @@ -28,18 +30,21 @@ jobs: with: package_url: pkg:pypi/arrow@1.3.0 output_dir: macaron_output/commit_finder + upload_reports: 'false' - name: Run Macaron (analyze arrow@0.15.0) uses: ./ with: package_url: pkg:pypi/arrow@0.15.0 output_dir: macaron_output/commit_finder + upload_reports: 'false' - name: Run Macaron (verify policy - has-hosted-build) uses: ./ with: policy_file: ./tests/tutorial_resources/commit_finder/has-hosted-build.dl output_dir: macaron_output/commit_finder + upload_reports: 'false' tutorial-detect-malicious-package: name: Detecting malicious packages @@ -51,12 +56,14 @@ jobs: with: package_url: pkg:pypi/django@5.0.6 output_dir: macaron_output/detect_malicious_package + upload_reports: 'false' - name: Run Macaron (verify policy - check-django) uses: ./ with: policy_file: ./tests/tutorial_resources/detect_malicious_package/check-django.dl output_dir: macaron_output/detect_malicious_package + upload_reports: 'false' - name: Setup Python for analyzed venv uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 @@ -83,12 +90,14 @@ jobs: output_dir: macaron_output/detect_malicious_package deps_depth: '1' python_venv: /tmp/.django_venv + upload_reports: 'false' - name: Run Macaron (verify policy - check-dependencies) uses: ./ with: policy_file: ./tests/tutorial_resources/detect_malicious_package/check-dependencies.dl output_dir: macaron_output/detect_malicious_package + upload_reports: 'false' tutorial-detect-vulnerable-actions: name: How to detect vulnerable GitHub Actions @@ -101,24 +110,34 @@ jobs: with: repo_path: https://github.com/apache/logging-log4j2 output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'false' + write_job_summary: 'false' - name: Run Macaron (verify policy - github_actions_vulns for repo) uses: ./ with: policy_file: ./tests/tutorial_resources/detect_vulnerable_github_actions/check_github_actions_vuln_repo.dl output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'true' + reports_artifact_name: macaron-reports-vulnerable-actions-repo + write_job_summary: 'true' - name: Run Macaron (analyze purl - log4j-core example) uses: ./ with: package_url: pkg:maven/org.apache.logging.log4j/log4j-core@2.25.3 output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'false' + write_job_summary: 'false' - name: Run Macaron (verify policy - github_actions_vulns for purl) uses: ./ with: policy_file: ./tests/tutorial_resources/detect_vulnerable_github_actions/check_github_actions_vuln_purl.dl output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'true' + reports_artifact_name: macaron-reports-vulnerable-actions-purl + write_job_summary: 'true' tutorial-provenance: name: Provenance discovery, extraction, and verification @@ -130,48 +149,56 @@ jobs: with: package_url: pkg:npm/semver@7.7.2 output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (verify provenance - semver) uses: ./ with: policy_file: ./tests/tutorial_resources/provenance/has-verified-provenance_semver.dl output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (analyze toga 0.5.1 - PyPI provenance) uses: ./ with: package_url: pkg:pypi/toga@0.5.1 output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (verify provenance - toga PyPI) uses: ./ with: policy_file: ./tests/tutorial_resources/provenance/has-verified-provenance_toga.dl output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (analyze toga 0.4.8 - GitHub attestation) uses: ./ with: package_url: pkg:pypi/toga@0.4.8 output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (verify provenance - toga GitHub) uses: ./ with: policy_file: ./tests/tutorial_resources/provenance/has-verified-provenance_toga.dl output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (analyze urllib3 2.0.0a1 - GitHub attestation) uses: ./ with: package_url: pkg:pypi/urllib3@2.0.0a1 output_dir: macaron_output/provenance + upload_reports: 'false' - name: Run Macaron (verify provenance - urllib3) uses: ./ with: policy_file: ./tests/tutorial_resources/provenance/has-verified-provenance_urllib3.dl output_dir: macaron_output/provenance + upload_reports: 'false' tutorial-detect-malicious-java-dep: name: Detecting Java dependencies manually uploaded to Maven Central @@ -186,12 +213,14 @@ jobs: output_dir: macaron_output/detect_malicious_java_dep sbom_path: ./tests/tutorial_resources/detect_malicious_java_dep/example-sbom.json deps_depth: '1' + upload_reports: 'false' - name: Run Macaron (verify policy - detect-malicious-upload) uses: ./ with: policy_file: ./tests/tutorial_resources/detect_malicious_java_dep/example-maven-app.dl output_dir: macaron_output/detect_malicious_java_dep + upload_reports: 'false' tutorial-exclude-include-checks: name: Exclude and include checks in Macaron @@ -204,6 +233,7 @@ jobs: with: package_url: pkg:maven/io.micronaut/micronaut-core@4.3.10 output_dir: macaron_output/exclude_include_checks/normal + upload_reports: 'false' - name: Run Macaron (analyze micronaut-core excluding witness check via defaults.ini) uses: ./ @@ -211,3 +241,21 @@ jobs: package_url: pkg:maven/io.micronaut/micronaut-core@4.3.10 defaults_path: ./tests/tutorial_resources/exclude_include_checks/defaults_exclude_witness.ini output_dir: macaron_output/exclude_include_checks/excluded + upload_reports: 'false' + + tutorial-upload-attestation: + name: Upload verification summary attestation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Run Macaron (analyze macaron repo + custom policy + upload attestation) + uses: ./ + with: + repo_path: ./ + policy_file: ./tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl + output_dir: macaron_output/upload_attestation + upload_attestation: 'true' + require_vsa: 'true' + upload_reports: 'true' + reports_artifact_name: macaron-reports-upload-attestation + write_job_summary: 'true' diff --git a/action.yaml b/action.yaml index 418f37705..6538e83fe 100644 --- a/action.yaml +++ b/action.yaml @@ -43,6 +43,24 @@ inputs: output_dir: description: The output destination path for Macaron. default: output + upload_reports: + description: Upload Macaron reports as a workflow artifact. + default: 'true' + reports_artifact_name: + description: Name of the uploaded reports artifact. + default: macaron-reports + reports_retention_days: + description: Retention period in days for uploaded reports. + default: '90' + write_job_summary: + description: Write a human-friendly summary to the workflow run page. + default: 'true' + post_commit_comment: + description: Post or update a commit comment with Macaron results. + default: 'false' + require_vsa: + description: Fail the action if VSA is not generated. + default: 'false' upload_attestation: description: 'Upload the generated VSA report. default : false' default: false @@ -51,12 +69,24 @@ inputs: default: ${{ github.workspace }} outputs: + report_dir: + description: Directory containing HTML/JSON reports. + value: ${{ steps.collect-reports.outputs.report_dir }} + db_path: + description: Path to the generated Macaron SQLite database. + value: ${{ steps.collect-reports.outputs.db_path }} policy_report: description: Paths to the Macaron analysis report - value: ${{ steps.run-macaron-policy-verification.outputs.policy_report }} + value: ${{ steps.collect-reports.outputs.policy_report }} vsa_report: description: Verification Summary Attestation - value: ${{ steps.run-macaron-policy-verification.outputs.vsa_report }} + value: ${{ steps.collect-reports.outputs.vsa_report }} + vsa_generated: + description: Whether VSA was generated. + value: ${{ steps.collect-reports.outputs.vsa_generated }} + vsa_artifact_name: + description: Name of the uploaded VSA artifact (if generated). + value: ${{ steps.collect-reports.outputs.vsa_artifact_name }} runs: using: composite @@ -103,10 +133,180 @@ runs: POLICY_FILE: ${{ inputs.policy_file }} POLICY_PURL: ${{ inputs.policy_purl }} + - name: Collect report paths + id: collect-reports + if: ${{ always() }} + run: | + OUTPUT_DIR="${OUTPUT_DIR:-output}" + POLICY_REPORT="${OUTPUT_DIR}/policy_report.json" + VSA_REPORT="${OUTPUT_DIR}/vsa.intoto.jsonl" + DB_PATH="${OUTPUT_DIR}/macaron.db" + REPORT_DIR="${OUTPUT_DIR}/reports" + + if [ -f "${VSA_REPORT}" ]; then + VSA_VALUE="${VSA_REPORT}" + VSA_GENERATED=true + else + VSA_VALUE="VSA Not Generated." + VSA_GENERATED=false + fi + + echo "report_dir=${REPORT_DIR}" >> "${GITHUB_OUTPUT}" + echo "db_path=${DB_PATH}" >> "${GITHUB_OUTPUT}" + echo "policy_report=${POLICY_REPORT}" >> "${GITHUB_OUTPUT}" + echo "vsa_report=${VSA_VALUE}" >> "${GITHUB_OUTPUT}" + echo "vsa_generated=${VSA_GENERATED}" >> "${GITHUB_OUTPUT}" + echo "vsa_artifact_name=${REPORTS_ARTIFACT_NAME}-vsa" >> "${GITHUB_OUTPUT}" + + { + echo "reports_path<> "${GITHUB_OUTPUT}" + shell: bash + env: + OUTPUT_DIR: ${{ inputs.output_dir }} + REPORTS_ARTIFACT_NAME: ${{ inputs.reports_artifact_name }} + + - name: Upload Macaron Reports + if: ${{ always() && inputs.upload_reports == 'true' }} + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: ${{ inputs.reports_artifact_name }} + path: ${{ steps.collect-reports.outputs.reports_path }} + if-no-files-found: warn + retention-days: ${{ inputs.reports_retention_days }} + + - name: Upload VSA Artifact + if: ${{ always() && inputs.upload_reports == 'true' && steps.collect-reports.outputs.vsa_generated == 'true' }} + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: ${{ steps.collect-reports.outputs.vsa_artifact_name }} + path: ${{ inputs.output_dir }}/vsa.intoto.jsonl + if-no-files-found: warn + retention-days: ${{ inputs.reports_retention_days }} + + - name: Summarize Macaron Results + if: ${{ always() && inputs.write_job_summary == 'true' }} + run: | + bash "$GITHUB_ACTION_PATH/scripts/actions/write_job_summary.sh" + shell: bash + env: + OUTPUT_DIR: ${{ inputs.output_dir }} + VSA_GENERATED: ${{ steps.collect-reports.outputs.vsa_generated }} + POLICY_REPORT: ${{ steps.collect-reports.outputs.policy_report }} + REPORTS_ARTIFACT_NAME: ${{ inputs.reports_artifact_name }} + VSA_ARTIFACT_NAME: ${{ steps.collect-reports.outputs.vsa_artifact_name }} + + - name: Post Macaron commit comment + if: ${{ always() && inputs.post_commit_comment == 'true' }} + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v8.0.0 + env: + OUTPUT_DIR: ${{ inputs.output_dir }} + POLICY_REPORT: ${{ steps.collect-reports.outputs.policy_report }} + VSA_REPORT: ${{ steps.collect-reports.outputs.vsa_report }} + VSA_GENERATED: ${{ steps.collect-reports.outputs.vsa_generated }} + REPORTS_ARTIFACT_NAME: ${{ inputs.reports_artifact_name }} + with: + script: | + const fs = require("fs"); + const path = require("path"); + + const marker = ""; + const outputDir = process.env.OUTPUT_DIR || "output"; + const policyReportPath = process.env.POLICY_REPORT || path.join(outputDir, "policy_report.json"); + const vsaGenerated = process.env.VSA_GENERATED === "true"; + const vsaReport = process.env.VSA_REPORT || "VSA Not Generated."; + const artifactName = process.env.REPORTS_ARTIFACT_NAME || "macaron-reports"; + + let parsed = {}; + if (fs.existsSync(policyReportPath)) { + try { + parsed = JSON.parse(fs.readFileSync(policyReportPath, "utf8")); + } catch (err) { + parsed = {}; + } + } + + const relation = parsed.check_github_actions_vulnerabilities; + const lines = []; + lines.push(marker); + lines.push("## Macaron Results"); + lines.push(""); + lines.push(`- Workflow run: ${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`); + lines.push(`- Reports artifact: \`${artifactName}\``); + lines.push(`- VSA generated: \`${vsaGenerated}\``); + if (vsaGenerated) { + lines.push(`- VSA file: \`${vsaReport}\``); + } + lines.push(""); + + if (Array.isArray(relation) && relation.length > 0) { + lines.push("### Vulnerable GitHub Actions"); + lines.push(""); + lines.push("| ID | Action | Version | Vulnerabilities | Workflow |"); + lines.push("|---|---|---|---|---|"); + for (const row of relation) { + const [id, urls, actionId, version, callerWorkflow] = row; + lines.push(`| \`${id}\` | \`${actionId}\` | \`${version}\` | \`${urls}\` | ${callerWorkflow} |`); + } + } else { + lines.push(":white_check_mark: No `check_github_actions_vulnerabilities` findings in `policy_report.json`."); + } + + const body = lines.join("\n"); + + const list = await github.rest.repos.listCommentsForCommit({ + owner: context.repo.owner, + repo: context.repo.repo, + commit_sha: context.sha, + per_page: 100, + }); + + const existing = list.data.find((comment) => { + return ( + comment.user && + comment.user.type === "Bot" && + typeof comment.body === "string" && + comment.body.includes(marker) + ); + }); + + if (existing) { + await github.rest.repos.updateCommitComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body, + }); + } else { + await github.rest.repos.createCommitComment({ + owner: context.repo.owner, + repo: context.repo.repo, + commit_sha: context.sha, + body, + }); + } + + - name: Enforce VSA generation + if: ${{ always() && inputs.require_vsa == 'true' }} + run: | + if [ "${VSA_GENERATED}" != "true" ]; then + echo "VSA was not generated at ${OUTPUT_DIR}/vsa.intoto.jsonl. Check uploaded reports." + exit 1 + fi + shell: bash + env: + OUTPUT_DIR: ${{ inputs.output_dir }} + VSA_GENERATED: ${{ steps.collect-reports.outputs.vsa_generated }} + - name: Upload Attestation - if: ${{ inputs.upload_attestation == 'true' && steps.run-macaron-policy-verification.outputs.vsa_report != 'VSA Not Generated.' }} + if: ${{ inputs.upload_attestation == 'true' && steps.collect-reports.outputs.vsa_generated == 'true' }} uses: actions/attest@daf44fb950173508f38bd2406030372c1d1162b1 #3.0.0 with: subject-path: ${{ inputs.subject_path }} predicate-type: https://slsa.dev/verification_summary/v1 - predicate-path: ${{ steps.run-macaron-policy-verification.outputs.vsa_report }} + predicate-path: ${{ steps.collect-reports.outputs.vsa_report }} diff --git a/golang/internal/bashparser/bashparser.go b/golang/internal/bashparser/bashparser.go index b88e43a6e..cbd105df8 100644 --- a/golang/internal/bashparser/bashparser.go +++ b/golang/internal/bashparser/bashparser.go @@ -32,8 +32,14 @@ func ParseCommands(data string) (string, error) { return "", reg_error } - // We replace the GH Actions variables with "$MACARON_UNKNOWN". - data = string(re.ReplaceAll([]byte(data), []byte("$$MACARON_UNKNOWN"))) + // Remove '{', '}', and spaces from each matched value. + data = string(re.ReplaceAllFunc([]byte(data), func(m []byte) []byte { + s := string(m) + s = strings.ReplaceAll(s, "{", "") + s = strings.ReplaceAll(s, "}", "") + s = strings.ReplaceAll(s, " ", "") + return []byte(s) + })) data_str := strings.NewReader(data) data_parsed, parse_err := syntax.NewParser().Parse(data_str, "") if parse_err != nil { @@ -81,8 +87,14 @@ func ParseRaw(data string) (string, error) { return "", reg_error } - // We replace the GH Actions variables with "$MACARON_UNKNOWN". - data = string(re.ReplaceAll([]byte(data), []byte("$$MACARON_UNKNOWN"))) + // Remove '{', '}', and spaces from each matched value. + data = string(re.ReplaceAllFunc([]byte(data), func(m []byte) []byte { + s := string(m) + s = strings.ReplaceAll(s, "{", "") + s = strings.ReplaceAll(s, "}", "") + s = strings.ReplaceAll(s, " ", "") + return []byte(s) + })) data_str := strings.NewReader(data) data_parsed, parse_err := syntax.NewParser().Parse(data_str, "") if parse_err != nil { diff --git a/scripts/actions/write_job_summary.sh b/scripts/actions/write_job_summary.sh new file mode 100755 index 000000000..4b45c6386 --- /dev/null +++ b/scripts/actions/write_job_summary.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash + +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. +set -euo pipefail + +OUTPUT_DIR="${OUTPUT_DIR:-output}" +DB_PATH="${OUTPUT_DIR}/macaron.db" +POLICY_REPORT="${POLICY_REPORT:-${OUTPUT_DIR}/policy_report.json}" +VSA_PATH="${OUTPUT_DIR}/vsa.intoto.jsonl" +VSA_GENERATED="${VSA_GENERATED:-false}" +REPORTS_ARTIFACT_NAME="${REPORTS_ARTIFACT_NAME:-macaron-reports}" +VSA_ARTIFACT_NAME="${VSA_ARTIFACT_NAME:-${REPORTS_ARTIFACT_NAME}-vsa}" +ARTIFACTS_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/artifacts" + +{ + echo "## Macaron GitHub Actions Vulnerability Results" + echo + echo "- Database: [\`${DB_PATH}\`](${ARTIFACTS_URL})" + echo "- Policy report: [\`${POLICY_REPORT}\`](${ARTIFACTS_URL})" + echo "- VSA generated: \`${VSA_GENERATED}\`" + echo "- Download artifact: [\`${REPORTS_ARTIFACT_NAME}\`](${ARTIFACTS_URL})" + if [ "${VSA_GENERATED}" = "true" ]; then + echo "- Download VSA: [\`${VSA_ARTIFACT_NAME}\`](${ARTIFACTS_URL})" + fi + echo +} >> "${GITHUB_STEP_SUMMARY}" + +if [ ! -f "${DB_PATH}" ]; then + echo ":warning: Macaron database was not generated." >> "${GITHUB_STEP_SUMMARY}" + exit 0 +fi + +python - <<'PY' +import json +import os +import sqlite3 + +db_path = os.path.join(os.environ.get("OUTPUT_DIR", "output"), "macaron.db") +summary_path = os.environ["GITHUB_STEP_SUMMARY"] + +with sqlite3.connect(db_path) as conn: + cur = conn.cursor() + cur.execute( + """ + SELECT github_actions_id, github_actions_version, vulnerability_urls, caller_workflow + FROM github_actions_vulnerabilities_check + ORDER BY id + """ + ) + rows = cur.fetchall() + +with open(summary_path, "a", encoding="utf-8") as f: + if not rows: + f.write(":white_check_mark: No vulnerable GitHub Actions detected.\n") + else: + f.write("| Action | Version | Vulnerabilities | Workflow |\n") + f.write("|---|---|---|---|\n") + for action_id, version, vulnerability_urls, caller_workflow in rows: + vuln_value = vulnerability_urls + try: + parsed = json.loads(vulnerability_urls) + if isinstance(parsed, list): + vuln_value = ", ".join(parsed) + except (json.JSONDecodeError, TypeError): + pass + + f.write( + f"| `{action_id}` | `{version}` | `{vuln_value}` | {caller_workflow} |\n" + ) +PY + +if [ -f "${VSA_PATH}" ]; then + echo >> "${GITHUB_STEP_SUMMARY}" + echo ":white_check_mark: VSA was generated at \`${VSA_PATH}\`." >> "${GITHUB_STEP_SUMMARY}" +else + echo >> "${GITHUB_STEP_SUMMARY}" + echo ":warning: VSA was not generated at \`${VSA_PATH}\`." >> "${GITHUB_STEP_SUMMARY}" +fi diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py new file mode 100644 index 000000000..8da4c6ace --- /dev/null +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -0,0 +1,291 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Detect security issues and injection risks in GitHub Actions workflows.""" + +import re +from typing import cast + +from macaron.code_analyzer.dataflow_analysis import bash, core +from macaron.code_analyzer.dataflow_analysis.core import NodeForest, traverse_bfs +from macaron.code_analyzer.dataflow_analysis.github import GitHubActionsWorkflowNode +from macaron.parsers.bashparser_model import CallExpr, is_call_expr, is_lit, is_param_exp +from macaron.parsers.github_workflow_model import is_normal_job + +REMOTE_SCRIPT_RE = re.compile(r"(curl|wget)\s+.*\|\s*(bash|sh|tar)", re.IGNORECASE) +SHA_PINNED_USES_RE = re.compile(r".+@([0-9a-f]{40})$") # commit SHA pinning + +UNTRUSTED_PR_REFS = { + "${{ github.event.pull_request.head.ref }}", + "${{ github.head_ref }}", + "${{ github.event.pull_request.head.sha }}", + "${{ github.event.pull_request.head.repo.full_name }}", +} + +DANGEROUS_TRIGGERS = { + "pull_request_target", # elevated token context + "workflow_run", # can chain privileged workflows + "repository_dispatch", # external event injection risk if misused + "issue_comment", # often used to trigger runs; needs strict gating +} + + +def detect_github_actions_security_issues(nodes: NodeForest) -> list[dict[str, list[str]]]: + """Detect security issues across GitHub Actions workflow nodes. + + Parameters + ---------- + nodes : NodeForest + Parsed workflow node forest used for traversing GitHub Actions workflow callgraphs. + + Returns + ------- + list[dict[str, list[str]]] + A list of workflow-level findings. Each item contains: + - ``workflow_name``: workflow file path. + - ``issues``: list of detected security issue messages. + """ + findings = [] + for root in nodes.root_nodes: + for callee in traverse_bfs(root): + if isinstance(callee, GitHubActionsWorkflowNode): + if result := analyze_workflow(callee, nodes): + findings.append(result) + return findings + + +def analyze_workflow( + workflow_node: GitHubActionsWorkflowNode, +) -> dict[str, object] | None: + """Analyze a GitHub Actions workflow for security issues. + + Parameters + ---------- + workflow_node : GitHubActionsWorkflowNode + The workflow node to analyze. + + Returns + ------- + dict[str, object] | None + A finding dictionary with: + - ``workflow_name``: source filepath of the workflow. + - ``issues``: list of issue messages. + Returns ``None`` when no issues are detected. + + Notes + ----- + The analysis covers trigger hardening, permissions configuration, action pinning, + checkout risks, remote-script execution heuristics, self-hosted runner usage, and + dataflow-based expression injection patterns. + """ + wf = workflow_node.definition + findings: list[str] = [] + + on_section = wf.get("on") + on_keys = set() + if isinstance(on_section, dict): + on_keys = set(on_section.keys()) + elif isinstance(on_section, list): + on_keys = set(on_section) + elif isinstance(on_section, str): + on_keys = {on_section} + + # --- A. Triggers that often need extra hardening / gating --- + sensitive = sorted(on_keys.intersection(DANGEROUS_TRIGGERS)) + if sensitive: + findings.append( + f"sensitive-trigger: Workflow uses {sensitive}. Ensure strict gating (e.g., actor allowlist, " + "branch protection, and minimal permissions)." + ) + + # --- B. Privileged trigger check (existing) --- + if "pull_request_target" in on_keys: + findings.append( + "privileged-trigger: Workflow uses `pull_request_target`, which runs with elevated permissions." + ) + + # --- C. Missing workflow permissions (existing) --- + if "permissions" not in wf: + findings.append("missing-permissions: No explicit workflow permissions defined; defaults may be overly broad.") + else: + # --- C2. Overly broad workflow permissions (new heuristic) --- + perms = wf.get("permissions") + if isinstance(perms, str) and perms.lower() == "write-all": + findings.append("overbroad-permissions: Workflow uses `permissions: write-all`.") + if isinstance(perms, dict): + # Example policy: flag any write permissions on PR-triggered workflows + if "pull_request_target" in on_keys: + for scope, level in perms.items(): + if isinstance(level, str) and "write" in level.lower(): + findings.append( + f"overbroad-permissions: PR-triggered workflow requests " f"`{scope}: {level}`." + ) + + # Walk jobs/steps for step-level checks. + jobs = wf.get("jobs", {}) if isinstance(wf.get("jobs"), dict) else {} + for job_name, job in jobs.items(): + if not is_normal_job(job): + continue + + # --- D. Self-hosted runners (new) --- + runs_on = job.get("runs-on") + if runs_on: + runs_on_str = str(runs_on) + if "self-hosted" in runs_on_str: + findings.append( + f"self-hosted-runner: Job `{job_name}` runs on self-hosted runners; " + "ensure isolation and never run untrusted PR code there." + ) + + steps = job.get("steps", []) if isinstance(job.get("steps"), list) else [] + + for step in steps: + uses = step.get("uses", "") if isinstance(step, dict) else "" + run = step.get("run", "") if isinstance(step, dict) else "" + + # --- E. Action SHA pinning (new) --- + if uses: + # Ignore local actions "./.github/actions/..." + if not uses.startswith("./") and not SHA_PINNED_USES_RE.match(uses): + # findings.append(f"unpinned-action: Job `{job_name}` uses `{uses}` not pinned to a commit SHA.") + findings.append(uses) + + # --- F. Checkout untrusted fork refs on PR event (existing, expanded) --- + if uses and "actions/checkout" in uses: + with_section = step.get("with", {}) if isinstance(step.get("with"), dict) else {} + ref = with_section.get("ref", "") + if ref in UNTRUSTED_PR_REFS and "pull_request" in on_keys: + findings.append( + f"untrusted-fork-code: Job `{job_name}` checks out " + f"untrusted fork code (`ref: {ref}`) on PR event." + ) + + # --- G. persist-credentials (new) --- + # Default is true for checkout; many orgs prefer setting false explicitly. + persist = with_section.get("persist-credentials", None) + if persist is True or (isinstance(persist, str) and persist.lower() == "true"): + findings.append( + f"persist-credentials: Job `{job_name}` uses checkout " + "with `persist-credentials: true`; may expose " + "GITHUB_TOKEN to subsequent git commands." + ) + + # --- H. Remote script execution: curl|bash (new heuristic) --- + if isinstance(run, str) and REMOTE_SCRIPT_RE.search(run): + findings.append( + f"remote-script-exec: Job `{job_name}` step appears to " "download and pipe to shell (`curl|bash`)." + ) + + # --- I. Extra dangerous combo: pull_request_target + checkout PR head ref (new) --- + if "pull_request_target" in on_keys and uses and "actions/checkout" in uses: + with_section = step.get("with", {}) if isinstance(step.get("with"), dict) else {} + ref = with_section.get("ref", "") + if ref in UNTRUSTED_PR_REFS: + findings.append( + f"pr-target-untrusted-checkout: Job `{job_name}` uses " + f"pull_request_target and checks out PR-controlled " + f"ref `{ref}`." + ) + + # --- J. Your existing dataflow-based injection heuristic (kept) --- + for node in core.traverse_bfs(workflow_node): + if isinstance(node, bash.BashSingleCommandNode): + # step_node = get_containing_github_step(node, nodes.parents) + if is_call_expr(node.definition.get("Cmd")): + call_exp = cast(CallExpr, node.definition["Cmd"]) + for arg in call_exp.get("Args", []): + expansion = False + pr_head_ref = False + for part in arg.get("Parts", []): + if is_param_exp(part) and part.get("Param", {}).get("Value") == "github": + expansion = True + if is_lit(part) and part.get("Value") in { + ".event.pull_request.head.ref", + ".head_ref", + ".event.issue.body", + ".event.comment.body", + }: + pr_head_ref = True + if expansion and pr_head_ref: + findings.append(f"potential-injection: {arg.get('Parts')}") + + if findings: + return {"workflow_name": workflow_node.context.ref.source_filepath, "issues": findings} + + return None + + +# def analyze_workflow(workflow_node: GitHubActionsWorkflowNode, nodes: NodeForest) -> list[dict[str, str]]: +# """ +# Analyze a GitHub Actions workflow for common security misconfigurations. + +# Issues Detected: +# - Privileged triggers such as pull_request_target +# - Execution of untrusted code from forked PRs +# - Inline shell scripts or unvalidated input usage +# - Missing permissions or authorization checks +# """ +# wf = workflow_node.definition +# findings = [] + +# for node in core.traverse_bfs(workflow_node): +# if isinstance(node, bash.BashSingleCommandNode): +# # The step in GitHub Actions job that triggers the path in the callgraph. +# step_node = get_containing_github_step(node, nodes.parents) +# if is_call_expr(node.definition["Cmd"]): +# call_exp = cast(CallExpr, node.definition["Cmd"]) +# for arg in call_exp["Args"]: +# expansion = False +# pr_head_ref = False +# for part in arg["Parts"]: +# if is_param_exp(part) and part["Param"]["Value"] == "github": +# expansion = True +# if is_lit(part) and part["Value"] == ".event.pull_request.head.ref": +# pr_head_ref = True +# if expansion and pr_head_ref: +# findings.append( +# f"Potential injection: {arg['Parts']}" +# ) + +# # --- 1. Privileged trigger check --- +# if isinstance(wf.get("on"), dict) and "pull_request_target" in wf["on"]: +# findings.append( +# "privileged-trigger: Workflow uses `pull_request_target`, which runs with elevated permissions." +# ) + +# # --- 2. Untrusted code execution --- +# if isinstance(wf.get("on"), dict) and "pull_request" in wf["on"]: +# for job_name, job in wf["jobs"].items(): +# if is_normal_job(job) and "steps" in job: +# for step in job["steps"]: +# uses = step.get("uses", "") +# if "actions/checkout" in uses: +# ref = step.get("with", {}).get("ref", "") +# if ref in ["${{ github.event.pull_request.head.ref }}", "${{ github.head_ref }}"]: +# findings.append( +# f"untrusted-fork-code Job `{job_name}` checks out untrusted fork code on PR event." +# ) + +# # --- 3. Inline shell or unvalidated inputs --- +# # for job_name, job in wf["jobs"].items(): +# # if is_normal_job(job) and "steps" in job: +# # for step in job["steps"]: +# # script = get_run_step(step) +# # if script and ("${{ github" in script or "${{ inputs" in script): +# # findings.append( +# # f"unvalidated-input-script: Step `{step.get('name', job_name)}` runs inline shell with expressions." +# # ) +# # elif script and re.search(r"(curl|wget|bash\s+-c)", script): +# # findings.append( +# # f"inline-shell-risk Step `{step.get('name', job_name)}` runs shell commands directly." +# # ) + +# # --- 4. Authorization check --- +# if "permissions" not in wf: +# findings.append("missing-permissions: No explicit workflow permissions defined; defaults may be overly broad.") + +# if findings: +# result: dict[str, list[str]] = {"workflow_name": wf.get("name"), "issues": findings} +# return result + +# return None diff --git a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py index 3b350091c..5e56310a6 100644 --- a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py +++ b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py @@ -15,6 +15,7 @@ GitHubActionsActionStepNode, GitHubActionsReusableWorkflowCallNode, ) +from macaron.code_analyzer.gha_security_analysis.detect_injection import detect_github_actions_security_issues from macaron.database.db_custom_types import DBJsonList from macaron.database.table_definitions import CheckFacts from macaron.errors import APIAccessError @@ -89,10 +90,23 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: The result of the check. """ ci_services = ctx.dynamic_data["ci_services"] - + result_tables: list[CheckFacts] = [] external_workflows: dict[str, list] = {} + for ci_info in ci_services: callgraph = ci_info["callgraph"] + if findings := detect_github_actions_security_issues(callgraph): + for finding in findings: + for issue in finding["issues"]: + result_tables.append( + GitHubActionsVulnsFacts( + vulnerability_urls=[issue], + github_actions_id=finding["workflow_name"], + github_actions_version="None", + caller_workflow="None", + confidence=Confidence.HIGH, + ) + ) for root in callgraph.root_nodes: for callee in traverse_bfs(root): if isinstance(callee, (GitHubActionsReusableWorkflowCallNode, GitHubActionsActionStepNode)): @@ -134,60 +148,54 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: ) external_workflows[workflow_name] = ext_workflow - # If no external GitHub Actions are found, return passed result. - if not external_workflows: - return CheckResultData( - result_tables=[], - result_type=CheckResultType.PASSED, - ) + # If no external GitHub Actions are found, no need to check for known vulnerabilities. + if external_workflows: - # We first send a batch query to see which GitHub Actions are potentially vulnerable. - # OSV's querybatch returns minimal results but this allows us to only make subsequent - # queries to get vulnerability details when needed. - batch_query = [ - {"package": {"name": k, "ecosystem": "GitHub Actions"}} for k, _ in external_workflows.items() if k - ] - batch_vulns = [] - try: - batch_vulns = OSVDevService.get_vulnerabilities_package_name_batch(batch_query) - except APIAccessError as error: - logger.debug(error) - - result_tables: list[CheckFacts] = [] - for pkg in batch_vulns: - vuln_res = pkg["package"] - vulns: list = [] - workflow_name = vuln_res["name"] + # We first send a batch query to see which GitHub Actions are potentially vulnerable. + # OSV's querybatch returns minimal results but this allows us to only make subsequent + # queries to get vulnerability details when needed. + batch_query = [{"package": {"name": k, "ecosystem": "GitHub Actions"}} for k, _ in external_workflows.items() if k] + batch_vulns = [] try: - vulns = OSVDevService.get_vulnerabilities_package_name(ecosystem="GitHub Actions", name=workflow_name) + batch_vulns = OSVDevService.get_vulnerabilities_package_name_batch(batch_query) except APIAccessError as error: logger.debug(error) - continue - for workflow_inv in external_workflows[workflow_name]: - vuln_mapping = [] - for vuln in vulns: - if v_id := json_extract(vuln, ["id"], str): - try: - if OSVDevService.is_version_affected( - vuln, - workflow_name, - workflow_inv["version"], - "GitHub Actions", - source_repo=f"https://github.com/{workflow_name}", - ): - vuln_mapping.append(f"https://osv.dev/vulnerability/{v_id}") - except APIAccessError as error: - logger.debug(error) - if vuln_mapping: - result_tables.append( - GitHubActionsVulnsFacts( - vulnerability_urls=vuln_mapping, - github_actions_id=workflow_name, - github_actions_version=workflow_inv["version"], - caller_workflow=workflow_inv["caller_path"], - confidence=Confidence.HIGH, - ) + + for vuln_res in batch_vulns: + vulns: list = [] + workflow_name = vuln_res["name"] + try: + vulns = OSVDevService.get_vulnerabilities_package_name( + ecosystem="GitHub Actions", name=workflow_name ) + except APIAccessError as error: + logger.debug(error) + continue + for workflow_inv in external_workflows[workflow_name]: + vuln_mapping = [] + for vuln in vulns: + if v_id := json_extract(vuln, ["id"], str): + try: + if OSVDevService.is_version_affected( + vuln, + workflow_name, + workflow_inv["version"], + "GitHub Actions", + source_repo=f"https://github.com/{workflow_name}", + ): + vuln_mapping.append(f"https://osv.dev/vulnerability/{v_id}") + except APIAccessError as error: + logger.debug(error) + if vuln_mapping: + result_tables.append( + GitHubActionsVulnsFacts( + vulnerability_urls=vuln_mapping, + github_actions_id=workflow_name, + github_actions_version=workflow_inv["version"], + caller_workflow=workflow_inv["caller_path"], + confidence=Confidence.HIGH, + ) + ) if result_tables: return CheckResultData( diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py index 4700e5e85..d222ee011 100644 --- a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py +++ b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py @@ -581,14 +581,32 @@ def build_call_graph(self, repo_path: str, macaron_path: str = "") -> NodeForest Returns ------- - CallGraph: CallGraph - The call graph built for GitHub Actions. + NodeForest + The root nodes of call graphs built for GitHub Actions workflows. """ if not macaron_path: macaron_path = global_config.macaron_path # Parse GitHub Actions workflows. files = self.get_workflows(repo_path) + return self.build_call_graph_for_files(files, repo_path) + + def build_call_graph_for_files(self, files: list[str], repo_path: str) -> NodeForest: + """Build call graphs for a given set of GitHub Actions workflow files. + + Parameters + ---------- + files : list[str] + The list of workflow file paths to analyze. + repo_path : str + The repository path used as the base context for workflow analysis. + + Returns + ------- + NodeForest + A forest containing one root node per successfully parsed workflow. + Workflows that raise ``ParseError`` are skipped. + """ nodes: list[Node] = [] for workflow_path in files: try: diff --git a/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr new file mode 100644 index 000000000..5ffd2eab3 --- /dev/null +++ b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr @@ -0,0 +1,15 @@ +# serializer version: 1 +# name: test_detect_github_actions_security_issues[injection_pattern_1.yaml] + list([ + dict({ + 'issues': list([ + "Potential injection: [{'Type': 'Lit', 'Pos': {'Offset': 184, 'Line': 7, 'Col': 17}, 'End': {'Offset': 189, 'Line': 7, 'Col': 22}, 'ValuePos': {'Offset': 184, 'Line': 7, 'Col': 17}, 'ValueEnd': {'Offset': 189, 'Line': 7, 'Col': 22}, 'Value': 'HEAD:'}, {'Type': 'ParamExp', 'Pos': {'Offset': 189, 'Line': 7, 'Col': 22}, 'End': {'Offset': 196, 'Line': 7, 'Col': 29}, 'Dollar': {'Offset': 189, 'Line': 7, 'Col': 22}, 'Short': True, 'Param': {'Pos': {'Offset': 190, 'Line': 7, 'Col': 23}, 'End': {'Offset': 196, 'Line': 7, 'Col': 29}, 'ValuePos': {'Offset': 190, 'Line': 7, 'Col': 23}, 'ValueEnd': {'Offset': 196, 'Line': 7, 'Col': 29}, 'Value': 'github'}}, {'Type': 'Lit', 'Pos': {'Offset': 196, 'Line': 7, 'Col': 29}, 'End': {'Offset': 224, 'Line': 7, 'Col': 57}, 'ValuePos': {'Offset': 196, 'Line': 7, 'Col': 29}, 'ValueEnd': {'Offset': 224, 'Line': 7, 'Col': 57}, 'Value': '.event.pull_request.head.ref'}]", + 'privileged-trigger: Workflow uses `pull_request_target`, which runs with elevated permissions.', + 'unvalidated-input-script: Step `Run cargo fmt` runs inline shell with expressions.', + 'unvalidated-input-script: Step `Commit and push formatting changes` runs inline shell with expressions.', + 'missing-permissionsNo explicit workflow permissions defined; defaults may be overly broad.', + ]), + 'workflow_name': 'PR Auto-format', + }), + ]) +# --- diff --git a/tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml b/tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml new file mode 100644 index 000000000..9ef276717 --- /dev/null +++ b/tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml @@ -0,0 +1,75 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +name: PR Auto-format + +# This workflow triggers when a PR is opened/updated +on: + pull_request_target: + types: [opened, synchronize, reopened] + branches: + - main + - release + +jobs: + auto_format: + if: | + !contains(github.event.pull_request.labels.*.name, 'skip:ci') && + !contains(github.event.pull_request.head.sha, '[skip ci]') + permissions: + contents: write + pull-requests: write + checks: read + runs-on: ubuntu-latest + timeout-minutes: 60 + + steps: + - name: Checkout PR branch + uses: actions/checkout@v5 + with: + ref: ${{ github.event.pull_request.head.ref }} + repository: ${{ github.event.pull_request.head.repo.full_name }} + token: ${{ secrets.GITHUB_TOKEN }} + fetch-depth: 0 + + # Wait for all PR check runs to complete + - name: Wait for all checks to complete + uses: poseidon/wait-for-status-checks@v0.6.0 + with: + token: ${{ secrets.GITHUB_TOKEN }} + delay: 60 + interval: 30 + timeout: 7200 + + - name: CI completed successfully + run: echo "CI workflow completed successfully - proceeding with auto-format" + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + + - name: Run cargo fmt + run: | + echo "Running cargo fmt --all on PR #${{ github.event.pull_request.number }}" + cargo fmt --all + + - name: Check for formatting changes + id: check_changes + run: | + if [ -n "$(git status --porcelain)" ]; then + echo "has_changes=true" >> $GITHUB_OUTPUT + else + echo "has_changes=false" >> $GITHUB_OUTPUT + fi + + - name: Commit and push formatting changes + if: steps.check_changes.outputs.has_changes == 'true' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + git add -u + git commit -m "Auto-format code [skip ci]" + + git push origin HEAD:${{ github.event.pull_request.head.ref }} diff --git a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py new file mode 100644 index 000000000..e14f66447 --- /dev/null +++ b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py @@ -0,0 +1,31 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for GitHub Actions security analysis detection heuristics.""" + +import os +from pathlib import Path + +import pytest + +from macaron.code_analyzer.gha_security_analysis.detect_injection import detect_github_actions_security_issues +from macaron.slsa_analyzer.ci_service.github_actions.github_actions_ci import GitHubActions + +RESOURCES_DIR = Path(__file__).parent.joinpath("resources") + + +@pytest.mark.parametrize( + "workflow_path", + [ + "injection_pattern_1.yaml", + ], +) +def test_detect_github_actions_security_issues( + snapshot: dict, workflow_path: str, github_actions_service: GitHubActions +) -> None: + """Test GH Actions workflows injection patterns.""" + callgraph = github_actions_service.build_call_graph_for_files( + [os.path.join(RESOURCES_DIR, "workflow_files", workflow_path)], + repo_path=os.path.join(RESOURCES_DIR, "workflow_files"), + ) + assert detect_github_actions_security_issues(callgraph) == snapshot From 3d2d5173c6a1ffff03d3abd0f32ccf49dfa478f2 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sat, 28 Mar 2026 13:45:18 +1000 Subject: [PATCH 02/30] chore: add missing policy and disable some injection checks Signed-off-by: behnazh-w --- .github/workflows/test_macaron_action.yaml | 4 +- action.yaml | 6 +- pyproject.toml | 4 + scripts/actions/write_job_summary.sh | 12 +- .../gha_security_analysis/detect_injection.py | 141 +++++++++--------- .../github_actions_vulnerability_check.py | 6 +- .../test_gha_security_analysis.ambr | 8 +- .../test_gha_security_analysis.py | 2 +- .../provenance/attest-macaron-supply-chain.dl | 16 ++ 9 files changed, 114 insertions(+), 85 deletions(-) create mode 100644 tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index 35078bfa0..e61f5fb94 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -6,7 +6,7 @@ name: Test Macaron Action on: push: branches: - - main + - main paths: - action.yaml pull_request: @@ -253,8 +253,10 @@ jobs: with: repo_path: ./ policy_file: ./tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl + policy_purl: pkg:github.com/oracle/macaron@.* output_dir: macaron_output/upload_attestation upload_attestation: 'true' + subject_path: ./macaron_output/upload_attestation/vsa.intoto.jsonl require_vsa: 'true' upload_reports: 'true' reports_artifact_name: macaron-reports-upload-attestation diff --git a/action.yaml b/action.yaml index 6538e83fe..eb0a9e2b6 100644 --- a/action.yaml +++ b/action.yaml @@ -172,6 +172,7 @@ runs: REPORTS_ARTIFACT_NAME: ${{ inputs.reports_artifact_name }} - name: Upload Macaron Reports + id: upload-macaron-reports if: ${{ always() && inputs.upload_reports == 'true' }} uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: @@ -181,8 +182,9 @@ runs: retention-days: ${{ inputs.reports_retention_days }} - name: Upload VSA Artifact + id: upload-vsa-artifact if: ${{ always() && inputs.upload_reports == 'true' && steps.collect-reports.outputs.vsa_generated == 'true' }} - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: ${{ steps.collect-reports.outputs.vsa_artifact_name }} path: ${{ inputs.output_dir }}/vsa.intoto.jsonl @@ -200,6 +202,8 @@ runs: POLICY_REPORT: ${{ steps.collect-reports.outputs.policy_report }} REPORTS_ARTIFACT_NAME: ${{ inputs.reports_artifact_name }} VSA_ARTIFACT_NAME: ${{ steps.collect-reports.outputs.vsa_artifact_name }} + REPORTS_ARTIFACT_URL: ${{ steps.upload-macaron-reports.outputs.artifact-url }} + VSA_ARTIFACT_URL: ${{ steps.upload-vsa-artifact.outputs.artifact-url }} - name: Post Macaron commit comment if: ${{ always() && inputs.post_commit_comment == 'true' }} diff --git a/pyproject.toml b/pyproject.toml index ede72bdb5..67794b851 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -224,6 +224,9 @@ load-plugins = [ "pylint.extensions.set_membership", "pylint.extensions.typing", ] +# Disable unsubscriptable-object because Pylint has false positives and this check +# overlaps with mypy's checks. Enable the check when the related issue is resolved: +# https://github.com/pylint-dev/pylint/issues/9549 disable = [ "fixme", "line-too-long", # Replaced by Flake8 Bugbear B950 check. @@ -242,6 +245,7 @@ disable = [ "too-many-return-statements", "too-many-statements", "duplicate-code", + "unsubscriptable-object", ] [tool.pylint.MISCELLANEOUS] diff --git a/scripts/actions/write_job_summary.sh b/scripts/actions/write_job_summary.sh index 4b45c6386..819bd4f23 100755 --- a/scripts/actions/write_job_summary.sh +++ b/scripts/actions/write_job_summary.sh @@ -11,17 +11,19 @@ VSA_PATH="${OUTPUT_DIR}/vsa.intoto.jsonl" VSA_GENERATED="${VSA_GENERATED:-false}" REPORTS_ARTIFACT_NAME="${REPORTS_ARTIFACT_NAME:-macaron-reports}" VSA_ARTIFACT_NAME="${VSA_ARTIFACT_NAME:-${REPORTS_ARTIFACT_NAME}-vsa}" -ARTIFACTS_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/artifacts" +RUN_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" +REPORTS_ARTIFACT_URL="${REPORTS_ARTIFACT_URL:-${RUN_URL}}" +VSA_ARTIFACT_URL="${VSA_ARTIFACT_URL:-${RUN_URL}}" { echo "## Macaron GitHub Actions Vulnerability Results" echo - echo "- Database: [\`${DB_PATH}\`](${ARTIFACTS_URL})" - echo "- Policy report: [\`${POLICY_REPORT}\`](${ARTIFACTS_URL})" + echo "- Database: [\`${DB_PATH}\`](${REPORTS_ARTIFACT_URL})" + echo "- Policy report: [\`${POLICY_REPORT}\`](${REPORTS_ARTIFACT_URL})" echo "- VSA generated: \`${VSA_GENERATED}\`" - echo "- Download artifact: [\`${REPORTS_ARTIFACT_NAME}\`](${ARTIFACTS_URL})" + echo "- Download artifact: [\`${REPORTS_ARTIFACT_NAME}\`](${REPORTS_ARTIFACT_URL})" if [ "${VSA_GENERATED}" = "true" ]; then - echo "- Download VSA: [\`${VSA_ARTIFACT_NAME}\`](${ARTIFACTS_URL})" + echo "- Download VSA: [\`${VSA_ARTIFACT_NAME}\`](${VSA_ARTIFACT_URL})" fi echo } >> "${GITHUB_STEP_SUMMARY}" diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py index 8da4c6ace..2a7bc8996 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -10,7 +10,8 @@ from macaron.code_analyzer.dataflow_analysis.core import NodeForest, traverse_bfs from macaron.code_analyzer.dataflow_analysis.github import GitHubActionsWorkflowNode from macaron.parsers.bashparser_model import CallExpr, is_call_expr, is_lit, is_param_exp -from macaron.parsers.github_workflow_model import is_normal_job + +# from macaron.parsers.github_workflow_model import is_normal_job REMOTE_SCRIPT_RE = re.compile(r"(curl|wget)\s+.*\|\s*(bash|sh|tar)", re.IGNORECASE) SHA_PINNED_USES_RE = re.compile(r".+@([0-9a-f]{40})$") # commit SHA pinning @@ -30,7 +31,7 @@ } -def detect_github_actions_security_issues(nodes: NodeForest) -> list[dict[str, list[str]]]: +def detect_github_actions_security_issues(nodes: NodeForest) -> list[dict[str, str | list[str]]]: """Detect security issues across GitHub Actions workflow nodes. Parameters @@ -40,7 +41,7 @@ def detect_github_actions_security_issues(nodes: NodeForest) -> list[dict[str, l Returns ------- - list[dict[str, list[str]]] + list[dict[str, str | list[str]]] A list of workflow-level findings. Each item contains: - ``workflow_name``: workflow file path. - ``issues``: list of detected security issue messages. @@ -49,14 +50,14 @@ def detect_github_actions_security_issues(nodes: NodeForest) -> list[dict[str, l for root in nodes.root_nodes: for callee in traverse_bfs(root): if isinstance(callee, GitHubActionsWorkflowNode): - if result := analyze_workflow(callee, nodes): + if result := analyze_workflow(callee): findings.append(result) return findings def analyze_workflow( workflow_node: GitHubActionsWorkflowNode, -) -> dict[str, object] | None: +) -> dict[str, str | list[str]] | None: """Analyze a GitHub Actions workflow for security issues. Parameters @@ -121,71 +122,71 @@ def analyze_workflow( f"overbroad-permissions: PR-triggered workflow requests " f"`{scope}: {level}`." ) - # Walk jobs/steps for step-level checks. - jobs = wf.get("jobs", {}) if isinstance(wf.get("jobs"), dict) else {} - for job_name, job in jobs.items(): - if not is_normal_job(job): - continue - - # --- D. Self-hosted runners (new) --- - runs_on = job.get("runs-on") - if runs_on: - runs_on_str = str(runs_on) - if "self-hosted" in runs_on_str: - findings.append( - f"self-hosted-runner: Job `{job_name}` runs on self-hosted runners; " - "ensure isolation and never run untrusted PR code there." - ) - - steps = job.get("steps", []) if isinstance(job.get("steps"), list) else [] - - for step in steps: - uses = step.get("uses", "") if isinstance(step, dict) else "" - run = step.get("run", "") if isinstance(step, dict) else "" - - # --- E. Action SHA pinning (new) --- - if uses: - # Ignore local actions "./.github/actions/..." - if not uses.startswith("./") and not SHA_PINNED_USES_RE.match(uses): - # findings.append(f"unpinned-action: Job `{job_name}` uses `{uses}` not pinned to a commit SHA.") - findings.append(uses) - - # --- F. Checkout untrusted fork refs on PR event (existing, expanded) --- - if uses and "actions/checkout" in uses: - with_section = step.get("with", {}) if isinstance(step.get("with"), dict) else {} - ref = with_section.get("ref", "") - if ref in UNTRUSTED_PR_REFS and "pull_request" in on_keys: - findings.append( - f"untrusted-fork-code: Job `{job_name}` checks out " - f"untrusted fork code (`ref: {ref}`) on PR event." - ) - - # --- G. persist-credentials (new) --- - # Default is true for checkout; many orgs prefer setting false explicitly. - persist = with_section.get("persist-credentials", None) - if persist is True or (isinstance(persist, str) and persist.lower() == "true"): - findings.append( - f"persist-credentials: Job `{job_name}` uses checkout " - "with `persist-credentials: true`; may expose " - "GITHUB_TOKEN to subsequent git commands." - ) - - # --- H. Remote script execution: curl|bash (new heuristic) --- - if isinstance(run, str) and REMOTE_SCRIPT_RE.search(run): - findings.append( - f"remote-script-exec: Job `{job_name}` step appears to " "download and pipe to shell (`curl|bash`)." - ) - - # --- I. Extra dangerous combo: pull_request_target + checkout PR head ref (new) --- - if "pull_request_target" in on_keys and uses and "actions/checkout" in uses: - with_section = step.get("with", {}) if isinstance(step.get("with"), dict) else {} - ref = with_section.get("ref", "") - if ref in UNTRUSTED_PR_REFS: - findings.append( - f"pr-target-untrusted-checkout: Job `{job_name}` uses " - f"pull_request_target and checks out PR-controlled " - f"ref `{ref}`." - ) + # # Walk jobs/steps for step-level checks. + # jobs = wf.get("jobs", {}) if isinstance(wf.get("jobs"), dict) else {} + # for job_name, job in jobs.items(): + # if not is_normal_job(job): + # continue + + # # --- D. Self-hosted runners (new) --- + # runs_on = job.get("runs-on") + # if runs_on: + # runs_on_str = str(runs_on) + # if "self-hosted" in runs_on_str: + # findings.append( + # f"self-hosted-runner: Job `{job_name}` runs on self-hosted runners; " + # "ensure isolation and never run untrusted PR code there." + # ) + + # steps = job.get("steps", []) if isinstance(job.get("steps"), list) else [] + + # for step in steps: + # uses = step.get("uses", "") if isinstance(step, dict) else "" + # run = step.get("run", "") if isinstance(step, dict) else "" + + # # --- E. Action SHA pinning (new) --- + # if uses: + # # Ignore local actions "./.github/actions/..." + # if not uses.startswith("./") and not SHA_PINNED_USES_RE.match(uses): + # # findings.append(f"unpinned-action: Job `{job_name}` uses `{uses}` not pinned to a commit SHA.") + # findings.append(uses) + + # # --- F. Checkout untrusted fork refs on PR event (existing, expanded) --- + # if uses and "actions/checkout" in uses: + # with_section = step.get("with", {}) if isinstance(step.get("with"), dict) else {} + # ref = with_section.get("ref", "") + # if ref in UNTRUSTED_PR_REFS and "pull_request" in on_keys: + # findings.append( + # f"untrusted-fork-code: Job `{job_name}` checks out " + # f"untrusted fork code (`ref: {ref}`) on PR event." + # ) + + # # --- G. persist-credentials (new) --- + # # Default is true for checkout; many orgs prefer setting false explicitly. + # persist = with_section.get("persist-credentials", None) + # if persist is True or (isinstance(persist, str) and persist.lower() == "true"): + # findings.append( + # f"persist-credentials: Job `{job_name}` uses checkout " + # "with `persist-credentials: true`; may expose " + # "GITHUB_TOKEN to subsequent git commands." + # ) + + # # --- H. Remote script execution: curl|bash (new heuristic) --- + # if isinstance(run, str) and REMOTE_SCRIPT_RE.search(run): + # findings.append( + # f"remote-script-exec: Job `{job_name}` step appears to " "download and pipe to shell (`curl|bash`)." + # ) + + # # --- I. Extra dangerous combo: pull_request_target + checkout PR head ref (new) --- + # if "pull_request_target" in on_keys and uses and "actions/checkout" in uses: + # with_section = step.get("with", {}) if isinstance(step.get("with"), dict) else {} + # ref = with_section.get("ref", "") + # if ref in UNTRUSTED_PR_REFS: + # findings.append( + # f"pr-target-untrusted-checkout: Job `{job_name}` uses " + # f"pull_request_target and checks out PR-controlled " + # f"ref `{ref}`." + # ) # --- J. Your existing dataflow-based injection heuristic (kept) --- for node in core.traverse_bfs(workflow_node): diff --git a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py index 5e56310a6..34eca8aff 100644 --- a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py +++ b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py @@ -154,7 +154,9 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: # We first send a batch query to see which GitHub Actions are potentially vulnerable. # OSV's querybatch returns minimal results but this allows us to only make subsequent # queries to get vulnerability details when needed. - batch_query = [{"package": {"name": k, "ecosystem": "GitHub Actions"}} for k, _ in external_workflows.items() if k] + batch_query = [ + {"package": {"name": k, "ecosystem": "GitHub Actions"}} for k, _ in external_workflows.items() if k + ] batch_vulns = [] try: batch_vulns = OSVDevService.get_vulnerabilities_package_name_batch(batch_query) @@ -163,7 +165,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: for vuln_res in batch_vulns: vulns: list = [] - workflow_name = vuln_res["name"] + workflow_name = vuln_res["package"]["name"] try: vulns = OSVDevService.get_vulnerabilities_package_name( ecosystem="GitHub Actions", name=workflow_name diff --git a/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr index 5ffd2eab3..c01a5a96f 100644 --- a/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr +++ b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr @@ -3,13 +3,11 @@ list([ dict({ 'issues': list([ - "Potential injection: [{'Type': 'Lit', 'Pos': {'Offset': 184, 'Line': 7, 'Col': 17}, 'End': {'Offset': 189, 'Line': 7, 'Col': 22}, 'ValuePos': {'Offset': 184, 'Line': 7, 'Col': 17}, 'ValueEnd': {'Offset': 189, 'Line': 7, 'Col': 22}, 'Value': 'HEAD:'}, {'Type': 'ParamExp', 'Pos': {'Offset': 189, 'Line': 7, 'Col': 22}, 'End': {'Offset': 196, 'Line': 7, 'Col': 29}, 'Dollar': {'Offset': 189, 'Line': 7, 'Col': 22}, 'Short': True, 'Param': {'Pos': {'Offset': 190, 'Line': 7, 'Col': 23}, 'End': {'Offset': 196, 'Line': 7, 'Col': 29}, 'ValuePos': {'Offset': 190, 'Line': 7, 'Col': 23}, 'ValueEnd': {'Offset': 196, 'Line': 7, 'Col': 29}, 'Value': 'github'}}, {'Type': 'Lit', 'Pos': {'Offset': 196, 'Line': 7, 'Col': 29}, 'End': {'Offset': 224, 'Line': 7, 'Col': 57}, 'ValuePos': {'Offset': 196, 'Line': 7, 'Col': 29}, 'ValueEnd': {'Offset': 224, 'Line': 7, 'Col': 57}, 'Value': '.event.pull_request.head.ref'}]", + "sensitive-trigger: Workflow uses ['pull_request_target']. Ensure strict gating (e.g., actor allowlist, branch protection, and minimal permissions).", 'privileged-trigger: Workflow uses `pull_request_target`, which runs with elevated permissions.', - 'unvalidated-input-script: Step `Run cargo fmt` runs inline shell with expressions.', - 'unvalidated-input-script: Step `Commit and push formatting changes` runs inline shell with expressions.', - 'missing-permissionsNo explicit workflow permissions defined; defaults may be overly broad.', + 'missing-permissions: No explicit workflow permissions defined; defaults may be overly broad.', ]), - 'workflow_name': 'PR Auto-format', + 'workflow_name': '/home/behnaz/research/github/macaron/tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml', }), ]) # --- diff --git a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py index e14f66447..658a6ebf0 100644 --- a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py +++ b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py @@ -21,7 +21,7 @@ ], ) def test_detect_github_actions_security_issues( - snapshot: dict, workflow_path: str, github_actions_service: GitHubActions + snapshot: list[dict[str, str | list[str]]], workflow_path: str, github_actions_service: GitHubActions ) -> None: """Test GH Actions workflows injection patterns.""" callgraph = github_actions_service.build_call_graph_for_files( diff --git a/tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl b/tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl new file mode 100644 index 000000000..8bbeba44f --- /dev/null +++ b/tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl @@ -0,0 +1,16 @@ +/* Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy( + "attest-macaron-supply-chain", + component_id, + "Require trusted VCS metadata, and non-hosted repository." +) :- + check_passed(component_id, "mcn_version_control_system_1"), + is_repo(repo_id, "github.com/oracle/macaron", component_id), + not_self_hosted_git(repo_id, _). + +apply_policy_to("attest-macaron-supply-chain", component_id) :- + is_component(component_id, _). From 15472da4a66d797c991ac58e864af06fdc5e7f08 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sat, 28 Mar 2026 16:30:39 +1000 Subject: [PATCH 03/30] chore: improve the action summary Signed-off-by: behnazh-w --- .github/workflows/test_macaron_action.yaml | 36 +++++++++++----------- action.yaml | 4 +++ scripts/actions/run_macaron_analysis.sh | 24 +++++++++++++-- scripts/actions/write_job_summary.sh | 35 ++++++++++----------- 4 files changed, 60 insertions(+), 39 deletions(-) diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index e61f5fb94..4368d8d3c 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -243,21 +243,21 @@ jobs: output_dir: macaron_output/exclude_include_checks/excluded upload_reports: 'false' - tutorial-upload-attestation: - name: Upload verification summary attestation - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - - name: Run Macaron (analyze macaron repo + custom policy + upload attestation) - uses: ./ - with: - repo_path: ./ - policy_file: ./tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl - policy_purl: pkg:github.com/oracle/macaron@.* - output_dir: macaron_output/upload_attestation - upload_attestation: 'true' - subject_path: ./macaron_output/upload_attestation/vsa.intoto.jsonl - require_vsa: 'true' - upload_reports: 'true' - reports_artifact_name: macaron-reports-upload-attestation - write_job_summary: 'true' + # tutorial-upload-attestation: + # name: Upload verification summary attestation + # runs-on: ubuntu-latest + # steps: + # - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + # - name: Run Macaron (analyze macaron repo + custom policy + upload attestation) + # uses: ./ + # with: + # repo_path: ./ + # policy_file: ./tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl + # policy_purl: pkg:github.com/oracle/macaron@.* + # output_dir: macaron_output/upload_attestation + # upload_attestation: 'true' + # subject_path: ./macaron_output/upload_attestation/vsa.intoto.jsonl + # require_vsa: 'true' + # upload_reports: 'true' + # reports_artifact_name: macaron-reports-upload-attestation + # write_job_summary: 'true' diff --git a/action.yaml b/action.yaml index eb0a9e2b6..a01642ee6 100644 --- a/action.yaml +++ b/action.yaml @@ -69,6 +69,9 @@ inputs: default: ${{ github.workspace }} outputs: + html_report_path: + description: Path to the generated HTML analysis report (if available). + value: ${{ steps.run-macaron-analysis.outputs.html_report_path }} report_dir: description: Directory containing HTML/JSON reports. value: ${{ steps.collect-reports.outputs.report_dir }} @@ -200,6 +203,7 @@ runs: OUTPUT_DIR: ${{ inputs.output_dir }} VSA_GENERATED: ${{ steps.collect-reports.outputs.vsa_generated }} POLICY_REPORT: ${{ steps.collect-reports.outputs.policy_report }} + HTML_REPORT_PATH: ${{ steps.run-macaron-analysis.outputs.html_report_path }} REPORTS_ARTIFACT_NAME: ${{ inputs.reports_artifact_name }} VSA_ARTIFACT_NAME: ${{ steps.collect-reports.outputs.vsa_artifact_name }} REPORTS_ARTIFACT_URL: ${{ steps.upload-macaron-reports.outputs.artifact-url }} diff --git a/scripts/actions/run_macaron_analysis.sh b/scripts/actions/run_macaron_analysis.sh index 34305479c..d89a05705 100644 --- a/scripts/actions/run_macaron_analysis.sh +++ b/scripts/actions/run_macaron_analysis.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. set -euo pipefail @@ -54,4 +54,24 @@ if [ -n "${PROVENANCE_EXPECTATION:-}" ]; then fi echo "Executing: $CMD" -eval "$CMD" + +output_file="$(mktemp)" +set +e +eval "$CMD" 2>&1 | tee "$output_file" +# Capture analyze command's exit code from the pipeline (index 0), then restore fail-fast mode. +status=${PIPESTATUS[0]} +set -e + +if [ "${status}" -ne 0 ]; then + rm -f "$output_file" + exit "${status}" +fi + +if [ -n "${GITHUB_OUTPUT:-}" ]; then + html_report_path="$(sed -n 's/^HTML Report[[:space:]]\+//p' "$output_file" | tail -n 1)" + if [ -n "$html_report_path" ]; then + echo "html_report_path=${html_report_path}" >> "$GITHUB_OUTPUT" + fi +fi + +rm -f "$output_file" diff --git a/scripts/actions/write_job_summary.sh b/scripts/actions/write_job_summary.sh index 819bd4f23..603d413c4 100755 --- a/scripts/actions/write_job_summary.sh +++ b/scripts/actions/write_job_summary.sh @@ -7,23 +7,28 @@ set -euo pipefail OUTPUT_DIR="${OUTPUT_DIR:-output}" DB_PATH="${OUTPUT_DIR}/macaron.db" POLICY_REPORT="${POLICY_REPORT:-${OUTPUT_DIR}/policy_report.json}" -VSA_PATH="${OUTPUT_DIR}/vsa.intoto.jsonl" -VSA_GENERATED="${VSA_GENERATED:-false}" +HTML_REPORT_PATH="${HTML_REPORT_PATH:-}" +VSA_PATH="${VSA_PATH:-${OUTPUT_DIR}/vsa.intoto.jsonl}" REPORTS_ARTIFACT_NAME="${REPORTS_ARTIFACT_NAME:-macaron-reports}" -VSA_ARTIFACT_NAME="${VSA_ARTIFACT_NAME:-${REPORTS_ARTIFACT_NAME}-vsa}" RUN_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" REPORTS_ARTIFACT_URL="${REPORTS_ARTIFACT_URL:-${RUN_URL}}" -VSA_ARTIFACT_URL="${VSA_ARTIFACT_URL:-${RUN_URL}}" { - echo "## Macaron GitHub Actions Vulnerability Results" + echo "## Macaron Analysis Results" echo - echo "- Database: [\`${DB_PATH}\`](${REPORTS_ARTIFACT_URL})" - echo "- Policy report: [\`${POLICY_REPORT}\`](${REPORTS_ARTIFACT_URL})" - echo "- VSA generated: \`${VSA_GENERATED}\`" - echo "- Download artifact: [\`${REPORTS_ARTIFACT_NAME}\`](${REPORTS_ARTIFACT_URL})" - if [ "${VSA_GENERATED}" = "true" ]; then - echo "- Download VSA: [\`${VSA_ARTIFACT_NAME}\`](${VSA_ARTIFACT_URL})" + echo "Download reports from this artifact link:" + echo "- [\`${REPORTS_ARTIFACT_NAME}\`](${REPORTS_ARTIFACT_URL})" + echo + echo "Generated files:" + if [ -n "${HTML_REPORT_PATH}" ]; then + echo "- HTML report: \`${HTML_REPORT_PATH}\`" + fi + echo "- Database: \`${DB_PATH}\`" + echo "- Policy report: \`${POLICY_REPORT}\`" + if [ -n "${VSA_PATH}" ] && [ -f "${VSA_PATH}" ]; then + echo "- Policy status: :white_check_mark: Policy verification succeeded." + else + echo "- Policy status: :x: Policy verification failed." fi echo } >> "${GITHUB_STEP_SUMMARY}" @@ -71,11 +76,3 @@ with open(summary_path, "a", encoding="utf-8") as f: f"| `{action_id}` | `{version}` | `{vuln_value}` | {caller_workflow} |\n" ) PY - -if [ -f "${VSA_PATH}" ]; then - echo >> "${GITHUB_STEP_SUMMARY}" - echo ":white_check_mark: VSA was generated at \`${VSA_PATH}\`." >> "${GITHUB_STEP_SUMMARY}" -else - echo >> "${GITHUB_STEP_SUMMARY}" - echo ":warning: VSA was not generated at \`${VSA_PATH}\`." >> "${GITHUB_STEP_SUMMARY}" -fi From 2a1b12311fbab8dac759bb7091f3a544a52aab07 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sat, 28 Mar 2026 17:15:36 +1000 Subject: [PATCH 04/30] chore: test check based summary Signed-off-by: behnazh-w --- .github/workflows/test_macaron_action.yaml | 24 ++ action.yaml | 1 + scripts/actions/write_job_summary.py | 309 +++++++++++++++++++++ scripts/actions/write_job_summary.sh | 69 +---- 4 files changed, 338 insertions(+), 65 deletions(-) create mode 100644 scripts/actions/write_job_summary.py diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index 4368d8d3c..92f33d087 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -261,3 +261,27 @@ jobs: # upload_reports: 'true' # reports_artifact_name: macaron-reports-upload-attestation # write_job_summary: 'true' + + test-detect-vulnerable-actions: + name: How to detect vulnerable GitHub Actions + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + + - name: Run Macaron (analyze repo - https://github.com/oracle/coherence-js-client) + uses: ./ + with: + repo_path: https://github.com/oracle/coherence-js-client + digest: 39166341bc31f75b663ff439dae36170fb3e99a9 + output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'false' + write_job_summary: 'false' + + - name: Run Macaron (verify policy - github_actions_vulns for repo) + uses: ./ + with: + policy_file: check-github-actions + output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'true' + reports_artifact_name: macaron-reports-vulnerable-actions-coherence + write_job_summary: 'true' diff --git a/action.yaml b/action.yaml index a01642ee6..f9c8432f0 100644 --- a/action.yaml +++ b/action.yaml @@ -203,6 +203,7 @@ runs: OUTPUT_DIR: ${{ inputs.output_dir }} VSA_GENERATED: ${{ steps.collect-reports.outputs.vsa_generated }} POLICY_REPORT: ${{ steps.collect-reports.outputs.policy_report }} + POLICY_FILE: ${{ inputs.policy_file }} HTML_REPORT_PATH: ${{ steps.run-macaron-analysis.outputs.html_report_path }} REPORTS_ARTIFACT_NAME: ${{ inputs.reports_artifact_name }} VSA_ARTIFACT_NAME: ${{ steps.collect-reports.outputs.vsa_artifact_name }} diff --git a/scripts/actions/write_job_summary.py b/scripts/actions/write_job_summary.py new file mode 100644 index 000000000..e057cd979 --- /dev/null +++ b/scripts/actions/write_job_summary.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Generate GitHub Actions job summary content for Macaron action runs.""" + +from __future__ import annotations + +import json +import os +import re +import sqlite3 +from pathlib import Path + +CHECK_RESULT_DEFAULT_COLUMNS = ["id", "check_id", "passed", "component_id"] +EXISTING_POLICY_TABLE_COLUMNS: dict[str, dict[str, list[str]]] = { + "check-github-actions": { + "check_result": CHECK_RESULT_DEFAULT_COLUMNS, + "github_actions_vulnerabilities_check": [ + "id", + "github_actions_id", + "github_actions_version", + "caller_workflow", + "vulnerability_urls", + ], + } +} + + +def _env(name: str, default: str = "") -> str: + return os.environ.get(name, default) + + +def _append_line(summary_path: Path, line: str = "") -> None: + with summary_path.open("a", encoding="utf-8") as summary: + summary.write(f"{line}\n") + + +def _resolve_policy_source(policy_input: str) -> tuple[Path | None, str]: + """Resolve a policy input to either a local file or a predefined policy template path.""" + if not policy_input: + return None, "" + + candidate = Path(policy_input) + if candidate.is_file(): + return candidate, "file" + + action_path = _env("GITHUB_ACTION_PATH", "") + if action_path: + template_path = ( + Path(action_path) + / "src" + / "macaron" + / "resources" + / "policies" + / "datalog" + / (f"{policy_input}.dl.template") + ) + if template_path.is_file(): + return template_path, "predefined" + + return None, "unresolved" + + +def _write_header(summary_path: Path, db_path: Path, policy_report: str, policy_file: str, html_report: str) -> None: + reports_artifact_name = _env("REPORTS_ARTIFACT_NAME", "macaron-reports") + run_url = ( + f"{_env('GITHUB_SERVER_URL', 'https://github.com')}/" + f"{_env('GITHUB_REPOSITORY')}/actions/runs/{_env('GITHUB_RUN_ID')}" + ) + reports_artifact_url = _env("REPORTS_ARTIFACT_URL", run_url) + vsa_path = _env("VSA_PATH", "output/vsa.intoto.jsonl") + policy_succeeded = bool(vsa_path) and Path(vsa_path).is_file() + + _append_line(summary_path, "## Macaron Analysis Results") + _append_line(summary_path) + _append_line(summary_path, "Download reports from this artifact link:") + _append_line(summary_path, f"- [`{reports_artifact_name}`]({reports_artifact_url})") + _append_line(summary_path) + _append_line(summary_path, "Generated files:") + if html_report: + _append_line(summary_path, f"- HTML report: `{html_report}`") + _append_line(summary_path, f"- Database: `{db_path}`") + _append_line(summary_path, f"- Policy report: `{policy_report}`") + if policy_file: + _append_line(summary_path, f"- Policy file: `{policy_file}`") + if policy_succeeded: + _append_line(summary_path, "- Policy status: :white_check_mark: Policy verification succeeded.") + else: + _append_line(summary_path, "- Policy status: :x: Policy verification failed.") + _append_line(summary_path) + + +def _write_vulnerability_table(summary_path: Path, db_path: Path) -> None: + with sqlite3.connect(db_path) as conn: + cur = conn.cursor() + cur.execute(""" + SELECT github_actions_id, github_actions_version, vulnerability_urls, caller_workflow + FROM github_actions_vulnerabilities_check + ORDER BY id + """) + rows = cur.fetchall() + + if not rows: + _append_line(summary_path, ":white_check_mark: No vulnerable GitHub Actions detected.") + return + + _append_line(summary_path, "| Action | Version | Vulnerabilities | Workflow |") + _append_line(summary_path, "|---|---|---|---|") + for action_id, version, vulnerability_urls, caller_workflow in rows: + vuln_value = vulnerability_urls + try: + parsed = json.loads(vulnerability_urls) + if isinstance(parsed, list): + vuln_value = ", ".join(parsed) + except (json.JSONDecodeError, TypeError): + pass + _append_line(summary_path, f"| `{action_id}` | `{version}` | `{vuln_value}` | {caller_workflow} |") + + +def _parse_policy_checks(policy_file: Path) -> tuple[list[str], list[str]]: + policy_text = policy_file.read_text(encoding="utf-8") + check_relations = sorted(set(re.findall(r"\b(check_[A-Za-z0-9_]+)\s*\(", policy_text))) + policy_check_ids = sorted(set(re.findall(r'"(mcn_[a-zA-Z0-9_]+)"', policy_text))) + return check_relations, policy_check_ids + + +def _resolve_existing_table(conn: sqlite3.Connection, table_name: str) -> str | None: + """Resolve a logical table name to an existing SQLite table name.""" + candidates = [table_name] + if not table_name.startswith("_"): + candidates.append(f"_{table_name}") + + cur = conn.cursor() + for candidate in candidates: + cur.execute("SELECT 1 FROM sqlite_master WHERE type IN ('table', 'view') AND name = ? LIMIT 1", (candidate,)) + if cur.fetchone(): + return candidate + return None + + +def _get_existing_columns(conn: sqlite3.Connection, table_name: str) -> list[str]: + cur = conn.cursor() + cur.execute(f"PRAGMA table_info({table_name})") + return [row[1] for row in cur.fetchall()] + + +def _query_selected_columns( + conn: sqlite3.Connection, + table_name: str, + desired_columns: list[str], + where_clause: str = "", + params: tuple[object, ...] = (), +) -> tuple[list[str], list[tuple]]: + available = _get_existing_columns(conn, table_name) + selected = [c for c in desired_columns if c in available] + if not selected: + return [], [] + + sql = f"SELECT {', '.join(selected)} FROM {table_name}" + if where_clause: + sql = f"{sql} WHERE {where_clause}" + sql = f"{sql} ORDER BY 1" + cur = conn.cursor() + cur.execute(sql, params) + return selected, cur.fetchall() + + +def _write_markdown_table(summary_path: Path, columns: list[str], rows: list[tuple]) -> bool: + if not columns or not rows: + return False + + _append_line(summary_path, f"| {' | '.join(columns)} |") + _append_line(summary_path, f"|{'|'.join(['---'] * len(columns))}|") + for row in rows: + values = [f"`{value}`" for value in row] + _append_line(summary_path, f"| {' | '.join(values)} |") + return True + + +def _write_policy_check_lists(summary_path: Path, check_relations: list[str], policy_check_ids: list[str]) -> None: + if check_relations: + _append_line( + summary_path, + f"- `check_*` relations in policy: {', '.join(f'`{name}`' for name in check_relations)}", + ) + + if policy_check_ids: + _append_line( + summary_path, + f"- `mcn_*` checks referenced in policy: {', '.join(f'`{name}`' for name in policy_check_ids)}", + ) + + +def _write_custom_policy_failure_diagnostics(summary_path: Path, db_path: Path, policy_file: Path) -> None: + check_relations, policy_check_ids = _parse_policy_checks(policy_file) + has_details = False + + _append_line(summary_path) + _append_line(summary_path, "### Policy Failure Diagnostics") + _write_policy_check_lists(summary_path, check_relations, policy_check_ids) + if check_relations or policy_check_ids: + has_details = True + + if not policy_check_ids: + if not has_details: + _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") + return + + with sqlite3.connect(db_path) as conn: + resolved = _resolve_existing_table(conn, "check_result") + if not resolved: + if not has_details: + _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") + return + placeholders = ",".join(["?"] * len(policy_check_ids)) + cols, rows = _query_selected_columns( + conn, + resolved, + CHECK_RESULT_DEFAULT_COLUMNS, + where_clause=f"check_id IN ({placeholders})", + params=tuple(policy_check_ids), + ) + + _append_line(summary_path) + _append_line(summary_path, "#### check_result") + if _write_markdown_table(summary_path, cols, rows): + has_details = True + else: + # Remove empty section header and provide a single friendly fallback below. + _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") + + +def _write_existing_policy_failure_diagnostics( + summary_path: Path, db_path: Path, policy_name: str, policy_file: Path +) -> None: + check_relations, policy_check_ids = _parse_policy_checks(policy_file) + table_config = EXISTING_POLICY_TABLE_COLUMNS.get(policy_name, {"check_result": CHECK_RESULT_DEFAULT_COLUMNS}) + has_details = False + + _append_line(summary_path) + _append_line(summary_path, f"### Policy Failure Diagnostics ({policy_name})") + _write_policy_check_lists(summary_path, check_relations, policy_check_ids) + if check_relations or policy_check_ids: + has_details = True + + with sqlite3.connect(db_path) as conn: + for logical_table, desired_columns in table_config.items(): + resolved = _resolve_existing_table(conn, logical_table) + if not resolved: + continue + + where_clause = "" + params: tuple[object, ...] = () + if logical_table == "check_result" and policy_check_ids: + placeholders = ",".join(["?"] * len(policy_check_ids)) + where_clause = f"check_id IN ({placeholders})" + params = tuple(policy_check_ids) + cols, rows = _query_selected_columns(conn, resolved, desired_columns, where_clause, params) + if cols and rows: + _append_line(summary_path) + _append_line(summary_path, f"#### {logical_table}") + if _write_markdown_table(summary_path, cols, rows): + has_details = True + + if not has_details: + _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") + + +def main() -> None: + output_dir = Path(_env("OUTPUT_DIR", "output")) + db_path = Path(_env("DB_PATH", str(output_dir / "macaron.db"))) + policy_report = _env("POLICY_REPORT", str(output_dir / "policy_report.json")) + policy_file_value = _env("POLICY_FILE", "") + resolved_policy_file, policy_mode = _resolve_policy_source(policy_file_value) + policy_label = "" + if policy_mode == "file" and resolved_policy_file: + policy_label = str(resolved_policy_file) + elif policy_mode == "predefined" and resolved_policy_file: + policy_label = f"{policy_file_value} (predefined template: {resolved_policy_file})" + elif policy_mode == "unresolved": + policy_label = f"{policy_file_value} (unresolved)" + html_report = _env("HTML_REPORT_PATH", "") + vsa_path_value = _env("VSA_PATH", str(output_dir / "vsa.intoto.jsonl")) + vsa_path = Path(vsa_path_value) if vsa_path_value else None + + summary_output = _env("GITHUB_STEP_SUMMARY") + if not summary_output: + raise RuntimeError("GITHUB_STEP_SUMMARY is not set.") + summary_path = Path(summary_output) + + _write_header(summary_path, db_path, policy_report, policy_label, html_report) + + if not db_path.is_file(): + _append_line(summary_path, ":warning: Macaron database was not generated.") + return + + _write_vulnerability_table(summary_path, db_path) + + if (not vsa_path or not vsa_path.is_file()) and resolved_policy_file and resolved_policy_file.is_file(): + if policy_mode == "predefined": + _write_existing_policy_failure_diagnostics(summary_path, db_path, policy_file_value, resolved_policy_file) + else: + _write_custom_policy_failure_diagnostics(summary_path, db_path, resolved_policy_file) + + +if __name__ == "__main__": + main() diff --git a/scripts/actions/write_job_summary.sh b/scripts/actions/write_job_summary.sh index 603d413c4..b5cab46dd 100755 --- a/scripts/actions/write_job_summary.sh +++ b/scripts/actions/write_job_summary.sh @@ -5,74 +5,13 @@ set -euo pipefail OUTPUT_DIR="${OUTPUT_DIR:-output}" -DB_PATH="${OUTPUT_DIR}/macaron.db" +DB_PATH="${DB_PATH:-${OUTPUT_DIR}/macaron.db}" POLICY_REPORT="${POLICY_REPORT:-${OUTPUT_DIR}/policy_report.json}" +POLICY_FILE="${POLICY_FILE:-}" HTML_REPORT_PATH="${HTML_REPORT_PATH:-}" VSA_PATH="${VSA_PATH:-${OUTPUT_DIR}/vsa.intoto.jsonl}" REPORTS_ARTIFACT_NAME="${REPORTS_ARTIFACT_NAME:-macaron-reports}" RUN_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" REPORTS_ARTIFACT_URL="${REPORTS_ARTIFACT_URL:-${RUN_URL}}" - -{ - echo "## Macaron Analysis Results" - echo - echo "Download reports from this artifact link:" - echo "- [\`${REPORTS_ARTIFACT_NAME}\`](${REPORTS_ARTIFACT_URL})" - echo - echo "Generated files:" - if [ -n "${HTML_REPORT_PATH}" ]; then - echo "- HTML report: \`${HTML_REPORT_PATH}\`" - fi - echo "- Database: \`${DB_PATH}\`" - echo "- Policy report: \`${POLICY_REPORT}\`" - if [ -n "${VSA_PATH}" ] && [ -f "${VSA_PATH}" ]; then - echo "- Policy status: :white_check_mark: Policy verification succeeded." - else - echo "- Policy status: :x: Policy verification failed." - fi - echo -} >> "${GITHUB_STEP_SUMMARY}" - -if [ ! -f "${DB_PATH}" ]; then - echo ":warning: Macaron database was not generated." >> "${GITHUB_STEP_SUMMARY}" - exit 0 -fi - -python - <<'PY' -import json -import os -import sqlite3 - -db_path = os.path.join(os.environ.get("OUTPUT_DIR", "output"), "macaron.db") -summary_path = os.environ["GITHUB_STEP_SUMMARY"] - -with sqlite3.connect(db_path) as conn: - cur = conn.cursor() - cur.execute( - """ - SELECT github_actions_id, github_actions_version, vulnerability_urls, caller_workflow - FROM github_actions_vulnerabilities_check - ORDER BY id - """ - ) - rows = cur.fetchall() - -with open(summary_path, "a", encoding="utf-8") as f: - if not rows: - f.write(":white_check_mark: No vulnerable GitHub Actions detected.\n") - else: - f.write("| Action | Version | Vulnerabilities | Workflow |\n") - f.write("|---|---|---|---|\n") - for action_id, version, vulnerability_urls, caller_workflow in rows: - vuln_value = vulnerability_urls - try: - parsed = json.loads(vulnerability_urls) - if isinstance(parsed, list): - vuln_value = ", ".join(parsed) - except (json.JSONDecodeError, TypeError): - pass - - f.write( - f"| `{action_id}` | `{version}` | `{vuln_value}` | {caller_workflow} |\n" - ) -PY +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +python "${SCRIPT_DIR}/write_job_summary.py" From 37e0917a1fd6231507e757d85132198ef17cf06e Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sun, 29 Mar 2026 08:35:29 +1000 Subject: [PATCH 05/30] feat: add sql files for existing policies Signed-off-by: behnazh-w --- Makefile | 4 +- scripts/actions/write_job_summary.py | 99 ++++++------------- .../policies/sql/check-github-actions.sql | 16 +++ .../sql/malware-detection-dependencies.sql | 16 +++ .../policies/sql/malware-detection.sql | 16 +++ 5 files changed, 80 insertions(+), 71 deletions(-) create mode 100644 src/macaron/resources/policies/sql/check-github-actions.sql create mode 100644 src/macaron/resources/policies/sql/malware-detection-dependencies.sql create mode 100644 src/macaron/resources/policies/sql/malware-detection.sql diff --git a/Makefile b/Makefile index 13394ddb3..e32313f10 100644 --- a/Makefile +++ b/Makefile @@ -386,10 +386,10 @@ integration-test-update: # set to the build date/epoch. For more details, see: https://flit.pypa.io/en/latest/reproducible.html .PHONY: dist dist: dist/$(PACKAGE_WHEEL_DIST_NAME).whl dist/$(PACKAGE_SDIST_NAME).tar.gz dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-docs-html.zip dist/$(PACKAGE_WHEEL_DIST_NAME)-build-epoch.txt -dist/$(PACKAGE_WHEEL_DIST_NAME).whl: check test integration-test +dist/$(PACKAGE_WHEEL_DIST_NAME).whl: check SOURCE_DATE_EPOCH=$(SOURCE_DATE_EPOCH) flit build --setup-py --format wheel mv dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-py3-none-any.whl dist/$(PACKAGE_WHEEL_DIST_NAME).whl -dist/$(PACKAGE_SDIST_NAME).tar.gz: check test integration-test +dist/$(PACKAGE_SDIST_NAME).tar.gz: check SOURCE_DATE_EPOCH=$(SOURCE_DATE_EPOCH) flit build --setup-py --format sdist dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-docs-html.zip: docs python -m zipfile -c dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-docs-html.zip docs/_build/html diff --git a/scripts/actions/write_job_summary.py b/scripts/actions/write_job_summary.py index e057cd979..b98721be0 100644 --- a/scripts/actions/write_job_summary.py +++ b/scripts/actions/write_job_summary.py @@ -7,25 +7,12 @@ from __future__ import annotations -import json import os import re import sqlite3 from pathlib import Path CHECK_RESULT_DEFAULT_COLUMNS = ["id", "check_id", "passed", "component_id"] -EXISTING_POLICY_TABLE_COLUMNS: dict[str, dict[str, list[str]]] = { - "check-github-actions": { - "check_result": CHECK_RESULT_DEFAULT_COLUMNS, - "github_actions_vulnerabilities_check": [ - "id", - "github_actions_id", - "github_actions_version", - "caller_workflow", - "vulnerability_urls", - ], - } -} def _env(name: str, default: str = "") -> str: @@ -63,6 +50,15 @@ def _resolve_policy_source(policy_input: str) -> tuple[Path | None, str]: return None, "unresolved" +def _resolve_existing_policy_sql(policy_name: str) -> Path | None: + """Resolve SQL diagnostics query for a predefined policy name.""" + action_path = _env("GITHUB_ACTION_PATH", "") + if not action_path: + return None + sql_path = Path(action_path) / "src" / "macaron" / "resources" / "policies" / "sql" / f"{policy_name}.sql" + return sql_path if sql_path.is_file() else None + + def _write_header(summary_path: Path, db_path: Path, policy_report: str, policy_file: str, html_report: str) -> None: reports_artifact_name = _env("REPORTS_ARTIFACT_NAME", "macaron-reports") run_url = ( @@ -92,33 +88,6 @@ def _write_header(summary_path: Path, db_path: Path, policy_report: str, policy_ _append_line(summary_path) -def _write_vulnerability_table(summary_path: Path, db_path: Path) -> None: - with sqlite3.connect(db_path) as conn: - cur = conn.cursor() - cur.execute(""" - SELECT github_actions_id, github_actions_version, vulnerability_urls, caller_workflow - FROM github_actions_vulnerabilities_check - ORDER BY id - """) - rows = cur.fetchall() - - if not rows: - _append_line(summary_path, ":white_check_mark: No vulnerable GitHub Actions detected.") - return - - _append_line(summary_path, "| Action | Version | Vulnerabilities | Workflow |") - _append_line(summary_path, "|---|---|---|---|") - for action_id, version, vulnerability_urls, caller_workflow in rows: - vuln_value = vulnerability_urls - try: - parsed = json.loads(vulnerability_urls) - if isinstance(parsed, list): - vuln_value = ", ".join(parsed) - except (json.JSONDecodeError, TypeError): - pass - _append_line(summary_path, f"| `{action_id}` | `{version}` | `{vuln_value}` | {caller_workflow} |") - - def _parse_policy_checks(policy_file: Path) -> tuple[list[str], list[str]]: policy_text = policy_file.read_text(encoding="utf-8") check_relations = sorted(set(re.findall(r"\b(check_[A-Za-z0-9_]+)\s*\(", policy_text))) @@ -167,6 +136,14 @@ def _query_selected_columns( return selected, cur.fetchall() +def _query_sql(conn: sqlite3.Connection, sql_query: str) -> tuple[list[str], list[tuple]]: + cur = conn.cursor() + cur.execute(sql_query) + rows = cur.fetchall() + columns = [col[0] for col in (cur.description or [])] + return columns, rows + + def _write_markdown_table(summary_path: Path, columns: list[str], rows: list[tuple]) -> bool: if not columns or not rows: return False @@ -179,12 +156,7 @@ def _write_markdown_table(summary_path: Path, columns: list[str], rows: list[tup return True -def _write_policy_check_lists(summary_path: Path, check_relations: list[str], policy_check_ids: list[str]) -> None: - if check_relations: - _append_line( - summary_path, - f"- `check_*` relations in policy: {', '.join(f'`{name}`' for name in check_relations)}", - ) +def _write_policy_check_lists(summary_path: Path, policy_check_ids: list[str]) -> None: if policy_check_ids: _append_line( @@ -199,7 +171,7 @@ def _write_custom_policy_failure_diagnostics(summary_path: Path, db_path: Path, _append_line(summary_path) _append_line(summary_path, "### Policy Failure Diagnostics") - _write_policy_check_lists(summary_path, check_relations, policy_check_ids) + _write_policy_check_lists(summary_path, policy_check_ids) if check_relations or policy_check_ids: has_details = True @@ -236,33 +208,24 @@ def _write_existing_policy_failure_diagnostics( summary_path: Path, db_path: Path, policy_name: str, policy_file: Path ) -> None: check_relations, policy_check_ids = _parse_policy_checks(policy_file) - table_config = EXISTING_POLICY_TABLE_COLUMNS.get(policy_name, {"check_result": CHECK_RESULT_DEFAULT_COLUMNS}) has_details = False _append_line(summary_path) _append_line(summary_path, f"### Policy Failure Diagnostics ({policy_name})") - _write_policy_check_lists(summary_path, check_relations, policy_check_ids) + _write_policy_check_lists(summary_path, policy_check_ids) if check_relations or policy_check_ids: has_details = True - with sqlite3.connect(db_path) as conn: - for logical_table, desired_columns in table_config.items(): - resolved = _resolve_existing_table(conn, logical_table) - if not resolved: - continue - - where_clause = "" - params: tuple[object, ...] = () - if logical_table == "check_result" and policy_check_ids: - placeholders = ",".join(["?"] * len(policy_check_ids)) - where_clause = f"check_id IN ({placeholders})" - params = tuple(policy_check_ids) - cols, rows = _query_selected_columns(conn, resolved, desired_columns, where_clause, params) - if cols and rows: - _append_line(summary_path) - _append_line(summary_path, f"#### {logical_table}") - if _write_markdown_table(summary_path, cols, rows): - has_details = True + sql_path = _resolve_existing_policy_sql(policy_name) + if sql_path: + sql_query = sql_path.read_text(encoding="utf-8") + with sqlite3.connect(db_path) as conn: + cols, rows = _query_sql(conn, sql_query) + if cols and rows: + _append_line(summary_path) + _append_line(summary_path, f"#### SQL Results ({sql_path.name})") + if _write_markdown_table(summary_path, cols, rows): + has_details = True if not has_details: _append_line(summary_path, "- Additional check-level details are unavailable for this failure.") @@ -296,8 +259,6 @@ def main() -> None: _append_line(summary_path, ":warning: Macaron database was not generated.") return - _write_vulnerability_table(summary_path, db_path) - if (not vsa_path or not vsa_path.is_file()) and resolved_policy_file and resolved_policy_file.is_file(): if policy_mode == "predefined": _write_existing_policy_failure_diagnostics(summary_path, db_path, policy_file_value, resolved_policy_file) diff --git a/src/macaron/resources/policies/sql/check-github-actions.sql b/src/macaron/resources/policies/sql/check-github-actions.sql new file mode 100644 index 000000000..b1071576c --- /dev/null +++ b/src/macaron/resources/policies/sql/check-github-actions.sql @@ -0,0 +1,16 @@ +-- Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +-- Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +-- Failed check facts for check-github-actions policy template. +SELECT + c.id AS component_id, + c.purl AS component_purl, + gha.* +FROM github_actions_vulnerabilities_check AS gha +JOIN check_facts AS cf + ON cf.id = gha.id +JOIN check_result AS cr + ON cr.id = cf.check_result_id +JOIN component AS c + ON cr.component_id = c.id +WHERE cr.passed = 0; diff --git a/src/macaron/resources/policies/sql/malware-detection-dependencies.sql b/src/macaron/resources/policies/sql/malware-detection-dependencies.sql new file mode 100644 index 000000000..aebe483eb --- /dev/null +++ b/src/macaron/resources/policies/sql/malware-detection-dependencies.sql @@ -0,0 +1,16 @@ +-- Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +-- Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +-- Failed check facts for malware-detection-dependencies policy template. +SELECT + c.id AS component_id, + c.purl AS component_purl, + md.* +FROM detect_malicious_metadata_check AS md +JOIN check_facts AS cf + ON cf.id = md.id +JOIN check_result AS cr + ON cr.id = cf.check_result_id +JOIN component AS c + ON cr.component_id = c.id + AND cr.passed = 0; diff --git a/src/macaron/resources/policies/sql/malware-detection.sql b/src/macaron/resources/policies/sql/malware-detection.sql new file mode 100644 index 000000000..89b023e5b --- /dev/null +++ b/src/macaron/resources/policies/sql/malware-detection.sql @@ -0,0 +1,16 @@ +-- Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +-- Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +-- Failed check facts for malware-detection policy template. +SELECT + c.id AS component_id, + c.purl AS component_purl, + md.* +FROM detect_malicious_metadata_check AS md +JOIN check_facts AS cf + ON cf.id = md.id +JOIN check_result AS cr + ON cr.id = cf.check_result_id +JOIN component AS c + ON cr.component_id = c.id + AND cr.passed = 0; From 85d80192c721f2ed445451d2e6dff4e68b7b1e65 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sun, 29 Mar 2026 10:02:56 +1000 Subject: [PATCH 06/30] chore: refine the summary output Signed-off-by: behnazh-w --- .github/workflows/test_macaron_action.yaml | 2 +- action.yaml | 17 +----- scripts/actions/run_macaron_analysis.sh | 2 +- scripts/actions/write_job_summary.py | 54 ++++++++++++------- scripts/actions/write_job_summary.sh | 1 + .../policies/sql/check-github-actions.sql | 26 +++++---- .../sql/malware-detection-dependencies.sql | 25 +++++---- .../policies/sql/malware-detection.sql | 25 +++++---- 8 files changed, 83 insertions(+), 69 deletions(-) diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index 92f33d087..c5d5612cb 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -283,5 +283,5 @@ jobs: policy_file: check-github-actions output_dir: macaron_output/detect_vulnerable_github_actions upload_reports: 'true' - reports_artifact_name: macaron-reports-vulnerable-actions-coherence + reports_artifact_name: macaron-vulnerable-actions-fail-diagnosis write_job_summary: 'true' diff --git a/action.yaml b/action.yaml index f9c8432f0..05c83de41 100644 --- a/action.yaml +++ b/action.yaml @@ -87,9 +87,6 @@ outputs: vsa_generated: description: Whether VSA was generated. value: ${{ steps.collect-reports.outputs.vsa_generated }} - vsa_artifact_name: - description: Name of the uploaded VSA artifact (if generated). - value: ${{ steps.collect-reports.outputs.vsa_artifact_name }} runs: using: composite @@ -159,7 +156,6 @@ runs: echo "policy_report=${POLICY_REPORT}" >> "${GITHUB_OUTPUT}" echo "vsa_report=${VSA_VALUE}" >> "${GITHUB_OUTPUT}" echo "vsa_generated=${VSA_GENERATED}" >> "${GITHUB_OUTPUT}" - echo "vsa_artifact_name=${REPORTS_ARTIFACT_NAME}-vsa" >> "${GITHUB_OUTPUT}" { echo "reports_path<> "$GITHUB_OUTPUT" fi diff --git a/scripts/actions/write_job_summary.py b/scripts/actions/write_job_summary.py index b98721be0..d5f4d5a43 100644 --- a/scripts/actions/write_job_summary.py +++ b/scripts/actions/write_job_summary.py @@ -59,7 +59,15 @@ def _resolve_existing_policy_sql(policy_name: str) -> Path | None: return sql_path if sql_path.is_file() else None -def _write_header(summary_path: Path, db_path: Path, policy_report: str, policy_file: str, html_report: str) -> None: +def _write_header( + summary_path: Path, + db_path: Path, + policy_report: str, + policy_file: str, + html_report: str, + policy_provided: bool, +) -> None: + upload_reports = _env("UPLOAD_REPORTS", "true").lower() == "true" reports_artifact_name = _env("REPORTS_ARTIFACT_NAME", "macaron-reports") run_url = ( f"{_env('GITHUB_SERVER_URL', 'https://github.com')}/" @@ -71,20 +79,29 @@ def _write_header(summary_path: Path, db_path: Path, policy_report: str, policy_ _append_line(summary_path, "## Macaron Analysis Results") _append_line(summary_path) - _append_line(summary_path, "Download reports from this artifact link:") - _append_line(summary_path, f"- [`{reports_artifact_name}`]({reports_artifact_url})") - _append_line(summary_path) - _append_line(summary_path, "Generated files:") - if html_report: - _append_line(summary_path, f"- HTML report: `{html_report}`") - _append_line(summary_path, f"- Database: `{db_path}`") - _append_line(summary_path, f"- Policy report: `{policy_report}`") - if policy_file: - _append_line(summary_path, f"- Policy file: `{policy_file}`") - if policy_succeeded: - _append_line(summary_path, "- Policy status: :white_check_mark: Policy verification succeeded.") + if upload_reports: + _append_line(summary_path, "Download reports from this artifact link:") + _append_line(summary_path, f"- [`{reports_artifact_name}`]({reports_artifact_url})") + _append_line(summary_path) + _append_line(summary_path, "Generated files:") + if html_report: + _append_line(summary_path, f"- HTML report: `{html_report}`") + _append_line(summary_path, f"- Database: `{db_path}`") + if policy_provided: + _append_line(summary_path, f"- Policy report: `{policy_report}`") + _append_line(summary_path) + + if policy_provided: + _append_line(summary_path, "Policy:") + if policy_file: + _append_line(summary_path, f"- Policy file: `{policy_file}`") + if policy_succeeded: + _append_line(summary_path, "- Policy status: :white_check_mark: Policy verification succeeded.") + else: + _append_line(summary_path, "- Policy status: :x: Policy verification failed.") else: - _append_line(summary_path, "- Policy status: :x: Policy verification failed.") + _append_line(summary_path, "Policy:") + _append_line(summary_path, "- No policy was provided.") _append_line(summary_path) @@ -161,7 +178,7 @@ def _write_policy_check_lists(summary_path: Path, policy_check_ids: list[str]) - if policy_check_ids: _append_line( summary_path, - f"- `mcn_*` checks referenced in policy: {', '.join(f'`{name}`' for name in policy_check_ids)}", + f"- Checks referenced in policy: {', '.join(f'`{name}`' for name in policy_check_ids)}", ) @@ -223,7 +240,7 @@ def _write_existing_policy_failure_diagnostics( cols, rows = _query_sql(conn, sql_query) if cols and rows: _append_line(summary_path) - _append_line(summary_path, f"#### SQL Results ({sql_path.name})") + _append_line(summary_path, f"#### Results") if _write_markdown_table(summary_path, cols, rows): has_details = True @@ -241,7 +258,7 @@ def main() -> None: if policy_mode == "file" and resolved_policy_file: policy_label = str(resolved_policy_file) elif policy_mode == "predefined" and resolved_policy_file: - policy_label = f"{policy_file_value} (predefined template: {resolved_policy_file})" + policy_label = f"{policy_file_value}" elif policy_mode == "unresolved": policy_label = f"{policy_file_value} (unresolved)" html_report = _env("HTML_REPORT_PATH", "") @@ -253,7 +270,8 @@ def main() -> None: raise RuntimeError("GITHUB_STEP_SUMMARY is not set.") summary_path = Path(summary_output) - _write_header(summary_path, db_path, policy_report, policy_label, html_report) + policy_provided = bool(policy_file_value.strip()) + _write_header(summary_path, db_path, policy_report, policy_label, html_report, policy_provided) if not db_path.is_file(): _append_line(summary_path, ":warning: Macaron database was not generated.") diff --git a/scripts/actions/write_job_summary.sh b/scripts/actions/write_job_summary.sh index b5cab46dd..432069c59 100755 --- a/scripts/actions/write_job_summary.sh +++ b/scripts/actions/write_job_summary.sh @@ -10,6 +10,7 @@ POLICY_REPORT="${POLICY_REPORT:-${OUTPUT_DIR}/policy_report.json}" POLICY_FILE="${POLICY_FILE:-}" HTML_REPORT_PATH="${HTML_REPORT_PATH:-}" VSA_PATH="${VSA_PATH:-${OUTPUT_DIR}/vsa.intoto.jsonl}" +UPLOAD_REPORTS="${UPLOAD_REPORTS:-true}" REPORTS_ARTIFACT_NAME="${REPORTS_ARTIFACT_NAME:-macaron-reports}" RUN_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" REPORTS_ARTIFACT_URL="${REPORTS_ARTIFACT_URL:-${RUN_URL}}" diff --git a/src/macaron/resources/policies/sql/check-github-actions.sql b/src/macaron/resources/policies/sql/check-github-actions.sql index b1071576c..761f74ea8 100644 --- a/src/macaron/resources/policies/sql/check-github-actions.sql +++ b/src/macaron/resources/policies/sql/check-github-actions.sql @@ -3,14 +3,18 @@ -- Failed check facts for check-github-actions policy template. SELECT - c.id AS component_id, - c.purl AS component_purl, - gha.* -FROM github_actions_vulnerabilities_check AS gha -JOIN check_facts AS cf - ON cf.id = gha.id -JOIN check_result AS cr - ON cr.id = cf.check_result_id -JOIN component AS c - ON cr.component_id = c.id -WHERE cr.passed = 0; + analysis.analysis_time, + gha_check.vulnerability_urls as vulnerability, + gha_check.github_actions_id as third-party_action_name, + gha_check.github_actions_version as third-party_action_version, + gha_check.caller_workflow as vulnerable_workflow +FROM github_actions_vulnerabilities_check as gha_check +JOIN check_facts + ON check_facts.id = gha_check.id +JOIN check_result + ON check_result.id = check_facts.check_result_id +JOIN component + ON check_result.component_id = component.id +JOIN analysis + ON analysis.id = component.analysis_id +WHERE check_result.passed = 0; diff --git a/src/macaron/resources/policies/sql/malware-detection-dependencies.sql b/src/macaron/resources/policies/sql/malware-detection-dependencies.sql index aebe483eb..028b2445c 100644 --- a/src/macaron/resources/policies/sql/malware-detection-dependencies.sql +++ b/src/macaron/resources/policies/sql/malware-detection-dependencies.sql @@ -3,14 +3,17 @@ -- Failed check facts for malware-detection-dependencies policy template. SELECT - c.id AS component_id, - c.purl AS component_purl, - md.* -FROM detect_malicious_metadata_check AS md -JOIN check_facts AS cf - ON cf.id = md.id -JOIN check_result AS cr - ON cr.id = cf.check_result_id -JOIN component AS c - ON cr.component_id = c.id - AND cr.passed = 0; + analysis.analysis_time, + component.id component_id, + component.purl component_purl, + detect_malicious_metadata_check.* +FROM detect_malicious_metadata_check +JOIN check_facts + ON check_facts.id = detect_malicious_metadata_check.id +JOIN check_result + ON check_result.id = check_facts.check_result_id +JOIN component + ON check_result.component_id = component.id +JOIN analysis + ON analysis.id = component.analysis_id + AND check_result.passed = 0; diff --git a/src/macaron/resources/policies/sql/malware-detection.sql b/src/macaron/resources/policies/sql/malware-detection.sql index 89b023e5b..a6597e89a 100644 --- a/src/macaron/resources/policies/sql/malware-detection.sql +++ b/src/macaron/resources/policies/sql/malware-detection.sql @@ -3,14 +3,17 @@ -- Failed check facts for malware-detection policy template. SELECT - c.id AS component_id, - c.purl AS component_purl, - md.* -FROM detect_malicious_metadata_check AS md -JOIN check_facts AS cf - ON cf.id = md.id -JOIN check_result AS cr - ON cr.id = cf.check_result_id -JOIN component AS c - ON cr.component_id = c.id - AND cr.passed = 0; + analysis.analysis_time, + component.id component_id, + component.purl component_purl, + detect_malicious_metadata_check.* +FROM detect_malicious_metadata_check +JOIN check_facts + ON check_facts.id = detect_malicious_metadata_check.id +JOIN check_result + ON check_result.id = check_facts.check_result_id +JOIN component + ON check_result.component_id = component.id +JOIN analysis + ON analysis.id = component.analysis_id + AND check_result.passed = 0; From 42558dd57bbddb8de7c0efe4c3a33654103f05e1 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sun, 29 Mar 2026 10:29:14 +1000 Subject: [PATCH 07/30] fix: fix issues in summaries Signed-off-by: behnazh-w --- scripts/actions/run_macaron_analysis.sh | 6 ++++- scripts/actions/write_job_summary.py | 22 ++++++++++++++++--- .../policies/sql/check-github-actions.sql | 10 ++++----- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/scripts/actions/run_macaron_analysis.sh b/scripts/actions/run_macaron_analysis.sh index a43f11058..ccde3e646 100644 --- a/scripts/actions/run_macaron_analysis.sh +++ b/scripts/actions/run_macaron_analysis.sh @@ -68,7 +68,11 @@ if [ "${status}" -ne 0 ]; then fi if [ -n "${GITHUB_OUTPUT:-}" ]; then - html_report_path="$(sed -n 's/^[[:space:]]*HTML[[:space:]]\+Report[[:space:]]\+//p' "$output_file" | tail -n 1)" + html_report_path="$( + sed -n 's/^[[:space:]]*HTML[[:space:]]\+Report[[:space:]]\+//p' "$output_file" \ + | sed 's/[[:space:]]*$//' \ + | tail -n 1 + )" if [ -n "$html_report_path" ]; then echo "html_report_path=${html_report_path}" >> "$GITHUB_OUTPUT" fi diff --git a/scripts/actions/write_job_summary.py b/scripts/actions/write_job_summary.py index d5f4d5a43..8cb6097dc 100644 --- a/scripts/actions/write_job_summary.py +++ b/scripts/actions/write_job_summary.py @@ -68,14 +68,19 @@ def _write_header( policy_provided: bool, ) -> None: upload_reports = _env("UPLOAD_REPORTS", "true").lower() == "true" + output_dir = _env("OUTPUT_DIR", "output") reports_artifact_name = _env("REPORTS_ARTIFACT_NAME", "macaron-reports") run_url = ( f"{_env('GITHUB_SERVER_URL', 'https://github.com')}/" f"{_env('GITHUB_REPOSITORY')}/actions/runs/{_env('GITHUB_RUN_ID')}" ) reports_artifact_url = _env("REPORTS_ARTIFACT_URL", run_url) - vsa_path = _env("VSA_PATH", "output/vsa.intoto.jsonl") - policy_succeeded = bool(vsa_path) and Path(vsa_path).is_file() + vsa_generated = _env("VSA_GENERATED", "").lower() + if vsa_generated in {"true", "false"}: + policy_succeeded = vsa_generated == "true" + else: + vsa_path = _env("VSA_PATH", f"{output_dir}/vsa.intoto.jsonl") + policy_succeeded = bool(vsa_path) and Path(vsa_path).is_file() _append_line(summary_path, "## Macaron Analysis Results") _append_line(summary_path) @@ -154,8 +159,19 @@ def _query_selected_columns( def _query_sql(conn: sqlite3.Connection, sql_query: str) -> tuple[list[str], list[tuple]]: + # Python's sqlite cursor.execute() can fail when the SQL begins with line comments. + # Strip leading SQL line comments while preserving the query body. + sanitized_lines = [] + for line in sql_query.splitlines(): + if line.lstrip().startswith("--"): + continue + sanitized_lines.append(line) + sanitized_query = "\n".join(sanitized_lines).strip() + if not sanitized_query: + return [], [] + cur = conn.cursor() - cur.execute(sql_query) + cur.execute(sanitized_query) rows = cur.fetchall() columns = [col[0] for col in (cur.description or [])] return columns, rows diff --git a/src/macaron/resources/policies/sql/check-github-actions.sql b/src/macaron/resources/policies/sql/check-github-actions.sql index 761f74ea8..0ce7adf2c 100644 --- a/src/macaron/resources/policies/sql/check-github-actions.sql +++ b/src/macaron/resources/policies/sql/check-github-actions.sql @@ -4,11 +4,11 @@ -- Failed check facts for check-github-actions policy template. SELECT analysis.analysis_time, - gha_check.vulnerability_urls as vulnerability, - gha_check.github_actions_id as third-party_action_name, - gha_check.github_actions_version as third-party_action_version, - gha_check.caller_workflow as vulnerable_workflow -FROM github_actions_vulnerabilities_check as gha_check + gha_check.vulnerability_urls AS vulnerability, + gha_check.github_actions_id AS third_party_action_name, + gha_check.github_actions_version AS third_party_action_version, + gha_check.caller_workflow AS vulnerable_workflow +FROM github_actions_vulnerabilities_check AS gha_check JOIN check_facts ON check_facts.id = gha_check.id JOIN check_result From d0b409eec0b7925b1e400801b3daa0b2b074e288 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sun, 29 Mar 2026 10:48:15 +1000 Subject: [PATCH 08/30] test: should fail when policy verify fails Signed-off-by: behnazh-w --- .github/workflows/macaron-analysis.yaml | 1 - .github/workflows/test_macaron_action.yaml | 1 - action.yaml | 8 +++----- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/macaron-analysis.yaml b/.github/workflows/macaron-analysis.yaml index 8d7e599e6..ff0f875dd 100644 --- a/.github/workflows/macaron-analysis.yaml +++ b/.github/workflows/macaron-analysis.yaml @@ -45,4 +45,3 @@ jobs: reports_artifact_name: macaron-reports reports_retention_days: 90 write_job_summary: true - require_vsa: true diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index c5d5612cb..5fe11ea26 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -257,7 +257,6 @@ jobs: # output_dir: macaron_output/upload_attestation # upload_attestation: 'true' # subject_path: ./macaron_output/upload_attestation/vsa.intoto.jsonl - # require_vsa: 'true' # upload_reports: 'true' # reports_artifact_name: macaron-reports-upload-attestation # write_job_summary: 'true' diff --git a/action.yaml b/action.yaml index 05c83de41..0afbcfbfd 100644 --- a/action.yaml +++ b/action.yaml @@ -58,9 +58,6 @@ inputs: post_commit_comment: description: Post or update a commit comment with Macaron results. default: 'false' - require_vsa: - description: Fail the action if VSA is not generated. - default: 'false' upload_attestation: description: 'Upload the generated VSA report. default : false' default: false @@ -286,16 +283,17 @@ runs: } - name: Enforce VSA generation - if: ${{ always() && inputs.require_vsa == 'true' }} + if: ${{ always() && inputs.policy_file != '' }} run: | if [ "${VSA_GENERATED}" != "true" ]; then - echo "VSA was not generated at ${OUTPUT_DIR}/vsa.intoto.jsonl. Check uploaded reports." + echo "Policy verification failed. VSA was not generated at ${OUTPUT_DIR}/vsa.intoto.jsonl. Check uploaded reports." exit 1 fi shell: bash env: OUTPUT_DIR: ${{ inputs.output_dir }} VSA_GENERATED: ${{ steps.collect-reports.outputs.vsa_generated }} + POLICY_FILE: ${{ inputs.policy_file }} - name: Upload Attestation if: ${{ inputs.upload_attestation == 'true' && steps.collect-reports.outputs.vsa_generated == 'true' }} From a6f863944b8f208178d6a539cab1b66fc42853a5 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sun, 29 Mar 2026 11:00:43 +1000 Subject: [PATCH 09/30] chore: added expected failure guards Signed-off-by: behnazh-w --- .github/workflows/test_macaron_action.yaml | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index 5fe11ea26..09b81d14c 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -40,11 +40,22 @@ jobs: upload_reports: 'false' - name: Run Macaron (verify policy - has-hosted-build) + id: verify_has_hosted_build + # This verification is expected to fail for this tutorial scenario. + continue-on-error: true uses: ./ with: policy_file: ./tests/tutorial_resources/commit_finder/has-hosted-build.dl output_dir: macaron_output/commit_finder upload_reports: 'false' + - name: Assert expected failure (has-hosted-build) + if: ${{ always() }} + run: | + # Keep this workflow green only when the verify step actually fails. + if [ "${{ steps.verify_has_hosted_build.outcome }}" != "failure" ]; then + echo "Expected verify step to fail, but it did not." + exit 1 + fi tutorial-detect-malicious-package: name: Detecting malicious packages @@ -114,6 +125,7 @@ jobs: write_job_summary: 'false' - name: Run Macaron (verify policy - github_actions_vulns for repo) + id: verify_github_actions_vulns_repo_tutorial uses: ./ with: policy_file: ./tests/tutorial_resources/detect_vulnerable_github_actions/check_github_actions_vuln_repo.dl @@ -277,6 +289,9 @@ jobs: write_job_summary: 'false' - name: Run Macaron (verify policy - github_actions_vulns for repo) + id: verify_github_actions_vulns_repo_test + # This integration target is intentionally vulnerable; failure is expected. + continue-on-error: true uses: ./ with: policy_file: check-github-actions @@ -284,3 +299,11 @@ jobs: upload_reports: 'true' reports_artifact_name: macaron-vulnerable-actions-fail-diagnosis write_job_summary: 'true' + - name: Assert expected failure (github_actions_vulns for repo test) + if: ${{ always() }} + run: | + # Explicitly assert failure so regressions are visible in CI results. + if [ "${{ steps.verify_github_actions_vulns_repo_test.outcome }}" != "failure" ]; then + echo "Expected verify step to fail, but it did not." + exit 1 + fi From b4f1250309d9d202285bfeac3fae5f5df3c6f1d1 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sun, 29 Mar 2026 11:24:46 +1000 Subject: [PATCH 10/30] chore: improve the summary table Signed-off-by: behnazh-w --- scripts/actions/write_job_summary.py | 40 ++++++++++++++++++- .../policies/sql/check-github-actions.sql | 6 +-- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/scripts/actions/write_job_summary.py b/scripts/actions/write_job_summary.py index 8cb6097dc..aff9fb7cb 100644 --- a/scripts/actions/write_job_summary.py +++ b/scripts/actions/write_job_summary.py @@ -7,10 +7,12 @@ from __future__ import annotations +import json import os import re import sqlite3 from pathlib import Path +from urllib.parse import urlsplit CHECK_RESULT_DEFAULT_COLUMNS = ["id", "check_id", "passed", "component_id"] @@ -184,11 +186,47 @@ def _write_markdown_table(summary_path: Path, columns: list[str], rows: list[tup _append_line(summary_path, f"| {' | '.join(columns)} |") _append_line(summary_path, f"|{'|'.join(['---'] * len(columns))}|") for row in rows: - values = [f"`{value}`" for value in row] + values = [_format_table_cell(value) for value in row] _append_line(summary_path, f"| {' | '.join(values)} |") return True +def _format_table_cell(value: object) -> str: + text = str(value) + parsed_list = _parse_list_cell(text) + if parsed_list is not None: + items = [_format_list_item(item) for item in parsed_list] + return "
".join(f"- {item}" for item in items) if items else "`[]`" + + if text.startswith(("http://", "https://")): + parsed = urlsplit(text) + segments = [part for part in parsed.path.split("/") if part] + label = segments[-1] if segments else parsed.netloc + return f"[`{label}`]({text})" + return f"`{text}`" + + +def _parse_list_cell(text: str) -> list[object] | None: + stripped = text.strip() + if not (stripped.startswith("[") and stripped.endswith("]")): + return None + try: + loaded = json.loads(stripped) + except json.JSONDecodeError: + return None + return loaded if isinstance(loaded, list) else None + + +def _format_list_item(value: object) -> str: + text = str(value) + if text.startswith(("http://", "https://")): + parsed = urlsplit(text) + segments = [part for part in parsed.path.split("/") if part] + label = segments[-1] if segments else parsed.netloc + return f"[`{label}`]({text})" + return f"`{text}`" + + def _write_policy_check_lists(summary_path: Path, policy_check_ids: list[str]) -> None: if policy_check_ids: diff --git a/src/macaron/resources/policies/sql/check-github-actions.sql b/src/macaron/resources/policies/sql/check-github-actions.sql index 0ce7adf2c..ab6d3f53e 100644 --- a/src/macaron/resources/policies/sql/check-github-actions.sql +++ b/src/macaron/resources/policies/sql/check-github-actions.sql @@ -3,11 +3,11 @@ -- Failed check facts for check-github-actions policy template. SELECT - analysis.analysis_time, - gha_check.vulnerability_urls AS vulnerability, gha_check.github_actions_id AS third_party_action_name, gha_check.github_actions_version AS third_party_action_version, - gha_check.caller_workflow AS vulnerable_workflow + gha_check.vulnerability_urls AS vulnerability, + gha_check.caller_workflow AS vulnerable_workflow, + analysis.analysis_time FROM github_actions_vulnerabilities_check AS gha_check JOIN check_facts ON check_facts.id = gha_check.id From dd5ae3e80fba463153c5498e1878c57f0e96051d Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sun, 29 Mar 2026 11:37:24 +1000 Subject: [PATCH 11/30] chore: test other patterns Signed-off-by: behnazh-w --- Makefile | 4 +- .../gha_security_analysis/detect_injection.py | 130 +++++++++--------- 2 files changed, 67 insertions(+), 67 deletions(-) diff --git a/Makefile b/Makefile index e32313f10..35b1fef30 100644 --- a/Makefile +++ b/Makefile @@ -386,10 +386,10 @@ integration-test-update: # set to the build date/epoch. For more details, see: https://flit.pypa.io/en/latest/reproducible.html .PHONY: dist dist: dist/$(PACKAGE_WHEEL_DIST_NAME).whl dist/$(PACKAGE_SDIST_NAME).tar.gz dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-docs-html.zip dist/$(PACKAGE_WHEEL_DIST_NAME)-build-epoch.txt -dist/$(PACKAGE_WHEEL_DIST_NAME).whl: check +dist/$(PACKAGE_WHEEL_DIST_NAME).whl: SOURCE_DATE_EPOCH=$(SOURCE_DATE_EPOCH) flit build --setup-py --format wheel mv dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-py3-none-any.whl dist/$(PACKAGE_WHEEL_DIST_NAME).whl -dist/$(PACKAGE_SDIST_NAME).tar.gz: check +dist/$(PACKAGE_SDIST_NAME).tar.gz: SOURCE_DATE_EPOCH=$(SOURCE_DATE_EPOCH) flit build --setup-py --format sdist dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-docs-html.zip: docs python -m zipfile -c dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-docs-html.zip docs/_build/html diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py index 2a7bc8996..af0547c34 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -122,71 +122,71 @@ def analyze_workflow( f"overbroad-permissions: PR-triggered workflow requests " f"`{scope}: {level}`." ) - # # Walk jobs/steps for step-level checks. - # jobs = wf.get("jobs", {}) if isinstance(wf.get("jobs"), dict) else {} - # for job_name, job in jobs.items(): - # if not is_normal_job(job): - # continue - - # # --- D. Self-hosted runners (new) --- - # runs_on = job.get("runs-on") - # if runs_on: - # runs_on_str = str(runs_on) - # if "self-hosted" in runs_on_str: - # findings.append( - # f"self-hosted-runner: Job `{job_name}` runs on self-hosted runners; " - # "ensure isolation and never run untrusted PR code there." - # ) - - # steps = job.get("steps", []) if isinstance(job.get("steps"), list) else [] - - # for step in steps: - # uses = step.get("uses", "") if isinstance(step, dict) else "" - # run = step.get("run", "") if isinstance(step, dict) else "" - - # # --- E. Action SHA pinning (new) --- - # if uses: - # # Ignore local actions "./.github/actions/..." - # if not uses.startswith("./") and not SHA_PINNED_USES_RE.match(uses): - # # findings.append(f"unpinned-action: Job `{job_name}` uses `{uses}` not pinned to a commit SHA.") - # findings.append(uses) - - # # --- F. Checkout untrusted fork refs on PR event (existing, expanded) --- - # if uses and "actions/checkout" in uses: - # with_section = step.get("with", {}) if isinstance(step.get("with"), dict) else {} - # ref = with_section.get("ref", "") - # if ref in UNTRUSTED_PR_REFS and "pull_request" in on_keys: - # findings.append( - # f"untrusted-fork-code: Job `{job_name}` checks out " - # f"untrusted fork code (`ref: {ref}`) on PR event." - # ) - - # # --- G. persist-credentials (new) --- - # # Default is true for checkout; many orgs prefer setting false explicitly. - # persist = with_section.get("persist-credentials", None) - # if persist is True or (isinstance(persist, str) and persist.lower() == "true"): - # findings.append( - # f"persist-credentials: Job `{job_name}` uses checkout " - # "with `persist-credentials: true`; may expose " - # "GITHUB_TOKEN to subsequent git commands." - # ) - - # # --- H. Remote script execution: curl|bash (new heuristic) --- - # if isinstance(run, str) and REMOTE_SCRIPT_RE.search(run): - # findings.append( - # f"remote-script-exec: Job `{job_name}` step appears to " "download and pipe to shell (`curl|bash`)." - # ) - - # # --- I. Extra dangerous combo: pull_request_target + checkout PR head ref (new) --- - # if "pull_request_target" in on_keys and uses and "actions/checkout" in uses: - # with_section = step.get("with", {}) if isinstance(step.get("with"), dict) else {} - # ref = with_section.get("ref", "") - # if ref in UNTRUSTED_PR_REFS: - # findings.append( - # f"pr-target-untrusted-checkout: Job `{job_name}` uses " - # f"pull_request_target and checks out PR-controlled " - # f"ref `{ref}`." - # ) + # Walk jobs/steps for step-level checks. + jobs = wf.get("jobs", {}) if isinstance(wf.get("jobs"), dict) else {} + for job_name, job in jobs.items(): + if not is_normal_job(job): + continue + + # --- D. Self-hosted runners (new) --- + runs_on = job.get("runs-on") + if runs_on: + runs_on_str = str(runs_on) + if "self-hosted" in runs_on_str: + findings.append( + f"self-hosted-runner: Job `{job_name}` runs on self-hosted runners; " + "ensure isolation and never run untrusted PR code there." + ) + + steps = job.get("steps", []) if isinstance(job.get("steps"), list) else [] + + for step in steps: + uses = step.get("uses", "") if isinstance(step, dict) else "" + run = step.get("run", "") if isinstance(step, dict) else "" + + # --- E. Action SHA pinning (new) --- + if uses: + # Ignore local actions "./.github/actions/..." + if not uses.startswith("./") and not SHA_PINNED_USES_RE.match(uses): + # findings.append(f"unpinned-action: Job `{job_name}` uses `{uses}` not pinned to a commit SHA.") + findings.append(uses) + + # --- F. Checkout untrusted fork refs on PR event (existing, expanded) --- + if uses and "actions/checkout" in uses: + with_section = step.get("with", {}) if isinstance(step.get("with"), dict) else {} + ref = with_section.get("ref", "") + if ref in UNTRUSTED_PR_REFS and "pull_request" in on_keys: + findings.append( + f"untrusted-fork-code: Job `{job_name}` checks out " + f"untrusted fork code (`ref: {ref}`) on PR event." + ) + + # --- G. persist-credentials (new) --- + # Default is true for checkout; many orgs prefer setting false explicitly. + persist = with_section.get("persist-credentials", None) + if persist is True or (isinstance(persist, str) and persist.lower() == "true"): + findings.append( + f"persist-credentials: Job `{job_name}` uses checkout " + "with `persist-credentials: true`; may expose " + "GITHUB_TOKEN to subsequent git commands." + ) + + # --- H. Remote script execution: curl|bash (new heuristic) --- + if isinstance(run, str) and REMOTE_SCRIPT_RE.search(run): + findings.append( + f"remote-script-exec: Job `{job_name}` step appears to " "download and pipe to shell (`curl|bash`)." + ) + + # --- I. Extra dangerous combo: pull_request_target + checkout PR head ref (new) --- + if "pull_request_target" in on_keys and uses and "actions/checkout" in uses: + with_section = step.get("with", {}) if isinstance(step.get("with"), dict) else {} + ref = with_section.get("ref", "") + if ref in UNTRUSTED_PR_REFS: + findings.append( + f"pr-target-untrusted-checkout: Job `{job_name}` uses " + f"pull_request_target and checks out PR-controlled " + f"ref `{ref}`." + ) # --- J. Your existing dataflow-based injection heuristic (kept) --- for node in core.traverse_bfs(workflow_node): From 6b3ab0ba16ac46510edcbd355c82fafc7020f845 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sun, 29 Mar 2026 12:06:14 +1000 Subject: [PATCH 12/30] test: debug local failure Signed-off-by: behnazh-w --- .github/workflows/_build_docker.yaml | 16 ++++++++-------- .github/workflows/test_macaron_action.yaml | 14 ++++---------- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/.github/workflows/_build_docker.yaml b/.github/workflows/_build_docker.yaml index 8c5ea7dba..a209a5b9e 100644 --- a/.github/workflows/_build_docker.yaml +++ b/.github/workflows/_build_docker.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. # This is a reuseable workflow to build and test the Docker image. Note that this workflow does not @@ -67,10 +67,10 @@ jobs: run: make setup-integration-test-utility-for-docker # Run the integration tests against the built Docker image. - - name: Test the Docker image - env: - # This environment variable will be picked up by run_macaron.sh. - MACARON_IMAGE_TAG: test - DOCKER_PULL: never - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: make integration-test-docker + # - name: Test the Docker image + # env: + # # This environment variable will be picked up by run_macaron.sh. + # MACARON_IMAGE_TAG: test + # DOCKER_PULL: never + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # run: make integration-test-docker diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index 09b81d14c..2f5f12ad3 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -279,21 +279,15 @@ jobs: steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - - name: Run Macaron (analyze repo - https://github.com/oracle/coherence-js-client) - uses: ./ - with: - repo_path: https://github.com/oracle/coherence-js-client - digest: 39166341bc31f75b663ff439dae36170fb3e99a9 - output_dir: macaron_output/detect_vulnerable_github_actions - upload_reports: 'false' - write_job_summary: 'false' - - - name: Run Macaron (verify policy - github_actions_vulns for repo) + - name: Run Macaron (analyze github_actions_vulns for https://github.com/oracle/coherence-js-client) id: verify_github_actions_vulns_repo_test # This integration target is intentionally vulnerable; failure is expected. continue-on-error: true uses: ./ with: + repo_path: https://github.com/oracle/coherence-js-client + digest: 39166341bc31f75b663ff439dae36170fb3e99a9 + package_url: pkg:github.com/oracle/coherence-js-client@.* policy_file: check-github-actions output_dir: macaron_output/detect_vulnerable_github_actions upload_reports: 'true' From e36ce0a9dfd2019b7d45cf77c4fc97ca835c503d Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sun, 29 Mar 2026 12:22:44 +1000 Subject: [PATCH 13/30] fix: fix import and policy purl input Signed-off-by: behnazh-w --- .github/workflows/test_macaron_action.yaml | 2 +- scripts/actions/setup_macaron.sh | 13 +++++++++++-- .../gha_security_analysis/detect_injection.py | 3 +-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index 2f5f12ad3..a07dd3319 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -287,8 +287,8 @@ jobs: with: repo_path: https://github.com/oracle/coherence-js-client digest: 39166341bc31f75b663ff439dae36170fb3e99a9 - package_url: pkg:github.com/oracle/coherence-js-client@.* policy_file: check-github-actions + policy_purl: pkg:github.com/oracle/coherence-js-client@.* output_dir: macaron_output/detect_vulnerable_github_actions upload_reports: 'true' reports_artifact_name: macaron-vulnerable-actions-fail-diagnosis diff --git a/scripts/actions/setup_macaron.sh b/scripts/actions/setup_macaron.sh index a002bb534..7ecb94ef9 100644 --- a/scripts/actions/setup_macaron.sh +++ b/scripts/actions/setup_macaron.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. set -euo pipefail @@ -13,7 +13,16 @@ mkdir -p "$ACTION_DIR" git clone --filter=blob:none --no-checkout https://github.com/oracle/macaron.git "$ACTION_DIR" -TARGET_REF="${ACTION_REF:-main}" +# For self-tests in oracle/macaron (uses: ./), prefer the current workflow commit. +# Keep existing behavior for all other usage patterns. +if [ -z "${ACTION_REF:-}" ] \ + && [ "${GITHUB_REPOSITORY:-}" = "oracle/macaron" ] \ + && [ -n "${GITHUB_SHA:-}" ] \ + && [ "${GITHUB_ACTION_PATH:-}" = "${GITHUB_WORKSPACE:-}" ]; then + TARGET_REF="${GITHUB_SHA}" +else + TARGET_REF="${ACTION_REF:-main}" +fi MACARON_IMAGE_TAG="" cd "$ACTION_DIR" if [[ "$TARGET_REF" =~ ^[0-9a-f]{40}$ ]]; then diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py index af0547c34..f070406f8 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -10,8 +10,7 @@ from macaron.code_analyzer.dataflow_analysis.core import NodeForest, traverse_bfs from macaron.code_analyzer.dataflow_analysis.github import GitHubActionsWorkflowNode from macaron.parsers.bashparser_model import CallExpr, is_call_expr, is_lit, is_param_exp - -# from macaron.parsers.github_workflow_model import is_normal_job +from macaron.parsers.github_workflow_model import is_normal_job REMOTE_SCRIPT_RE = re.compile(r"(curl|wget)\s+.*\|\s*(bash|sh|tar)", re.IGNORECASE) SHA_PINNED_USES_RE = re.compile(r".+@([0-9a-f]{40})$") # commit SHA pinning From 67913806420d033d3cd1f5e89691d1fe12236233 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sun, 29 Mar 2026 14:59:32 +1000 Subject: [PATCH 14/30] test: run action on the test image Signed-off-by: behnazh-w --- .github/workflows/_build_docker.yaml | 22 +++++ .github/workflows/test_macaron_action.yaml | 108 ++++++++++++++++++--- scripts/actions/setup_macaron.sh | 22 +++-- 3 files changed, 131 insertions(+), 21 deletions(-) diff --git a/.github/workflows/_build_docker.yaml b/.github/workflows/_build_docker.yaml index a209a5b9e..710f4d510 100644 --- a/.github/workflows/_build_docker.yaml +++ b/.github/workflows/_build_docker.yaml @@ -63,6 +63,17 @@ jobs: IMAGE_NAME: ghcr.io/oracle/macaron run: make build-docker + - name: Export test Docker image + run: docker save ghcr.io/oracle/macaron:test --output /tmp/macaron-test-image.tar + + - name: Upload test Docker image artifact + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: macaron-test-image + path: /tmp/macaron-test-image.tar + if-no-files-found: error + retention-days: 1 + - name: Install dependencies for integration test utility run: make setup-integration-test-utility-for-docker @@ -74,3 +85,14 @@ jobs: # DOCKER_PULL: never # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # run: make integration-test-docker + + test-macaron-action: + # Reuse the action test workflow against the exact Docker image built above. + # The image is transferred via artifact to avoid pushing to a registry. + needs: [build-docker] + permissions: + contents: read + uses: ./.github/workflows/test_macaron_action.yaml + with: + docker_image_artifact_name: macaron-test-image + macaron_image_tag: test diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index a07dd3319..0e599252e 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -4,26 +4,40 @@ name: Test Macaron Action on: - push: - branches: - - main - paths: - - action.yaml - pull_request: - paths: - - action.yaml - workflow_dispatch: + workflow_call: + # Optional overrides used by reusable callers (for example _build_docker.yaml). + # Defaults target the test image artifact produced by our Docker build workflow. + inputs: + docker_image_artifact_name: + required: false + type: string + default: macaron-test-image + macaron_image_tag: + required: false + type: string + default: test permissions: - id-token: write - attestations: write + contents: read jobs: tutorial-commit-finder: name: Analyzing and comparing different versions of an artifact runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze arrow@1.3.0) uses: ./ @@ -60,8 +74,20 @@ jobs: tutorial-detect-malicious-package: name: Detecting malicious packages runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze django@5.0.6 without dependencies) uses: ./ with: @@ -113,8 +139,20 @@ jobs: tutorial-detect-vulnerable-actions: name: How to detect vulnerable GitHub Actions runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze repo - apache/logging-log4j2) uses: ./ @@ -154,8 +192,20 @@ jobs: tutorial-provenance: name: Provenance discovery, extraction, and verification runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze semver 7.7.2) uses: ./ with: @@ -215,8 +265,20 @@ jobs: tutorial-detect-malicious-java-dep: name: Detecting Java dependencies manually uploaded to Maven Central runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze example-maven-app with SBOM) uses: ./ with: @@ -237,8 +299,20 @@ jobs: tutorial-exclude-include-checks: name: Exclude and include checks in Macaron runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze micronaut-core with default checks) uses: ./ @@ -276,8 +350,20 @@ jobs: test-detect-vulnerable-actions: name: How to detect vulnerable GitHub Actions runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar - name: Run Macaron (analyze github_actions_vulns for https://github.com/oracle/coherence-js-client) id: verify_github_actions_vulns_repo_test diff --git a/scripts/actions/setup_macaron.sh b/scripts/actions/setup_macaron.sh index 7ecb94ef9..cd519253c 100644 --- a/scripts/actions/setup_macaron.sh +++ b/scripts/actions/setup_macaron.sh @@ -7,22 +7,24 @@ set -euo pipefail MACARON_DIR="${RUNNER_TEMP:-/tmp}/macaron" mkdir -p "$MACARON_DIR" +# If a test image tag is explicitly provided (for reusable workflow testing), +# use the local run script from this checkout and preserve the provided tag. +if [ -n "${MACARON_IMAGE_TAG:-}" ]; then + SCRIPT_NAME="run_macaron.sh" + cp "$GITHUB_ACTION_PATH/scripts/release_scripts/run_macaron.sh" "$MACARON_DIR/$SCRIPT_NAME" + chmod +x "$MACARON_DIR/$SCRIPT_NAME" + echo "MACARON=$MACARON_DIR/$SCRIPT_NAME" >> "$GITHUB_ENV" + echo "MACARON_IMAGE_TAG=${MACARON_IMAGE_TAG}" >> "$GITHUB_ENV" + exit 0 +fi + ACTION_DIR="${RUNNER_TEMP:-/tmp}/macaron-action" rm -rf "$ACTION_DIR" mkdir -p "$ACTION_DIR" git clone --filter=blob:none --no-checkout https://github.com/oracle/macaron.git "$ACTION_DIR" -# For self-tests in oracle/macaron (uses: ./), prefer the current workflow commit. -# Keep existing behavior for all other usage patterns. -if [ -z "${ACTION_REF:-}" ] \ - && [ "${GITHUB_REPOSITORY:-}" = "oracle/macaron" ] \ - && [ -n "${GITHUB_SHA:-}" ] \ - && [ "${GITHUB_ACTION_PATH:-}" = "${GITHUB_WORKSPACE:-}" ]; then - TARGET_REF="${GITHUB_SHA}" -else - TARGET_REF="${ACTION_REF:-main}" -fi +TARGET_REF="${ACTION_REF:-main}" MACARON_IMAGE_TAG="" cd "$ACTION_DIR" if [[ "$TARGET_REF" =~ ^[0-9a-f]{40}$ ]]; then From b528124e24363bb1c3b9871f4a3f7c03571985ac Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sun, 29 Mar 2026 15:30:27 +1000 Subject: [PATCH 15/30] test: expect the gha tests to fail Signed-off-by: behnazh-w --- .github/workflows/_build_docker.yaml | 2 +- .github/workflows/test_macaron_action.yaml | 21 +++++ action.yaml | 93 ---------------------- scripts/actions/write_job_summary.py | 6 +- 4 files changed, 27 insertions(+), 95 deletions(-) diff --git a/.github/workflows/_build_docker.yaml b/.github/workflows/_build_docker.yaml index 710f4d510..76f7c43bb 100644 --- a/.github/workflows/_build_docker.yaml +++ b/.github/workflows/_build_docker.yaml @@ -72,7 +72,7 @@ jobs: name: macaron-test-image path: /tmp/macaron-test-image.tar if-no-files-found: error - retention-days: 1 + retention-days: 5 - name: Install dependencies for integration test utility run: make setup-integration-test-utility-for-docker diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index 0e599252e..652c15dec 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -164,6 +164,8 @@ jobs: - name: Run Macaron (verify policy - github_actions_vulns for repo) id: verify_github_actions_vulns_repo_tutorial + # This verification is expected to fail and surface vulnerable actions. + continue-on-error: true uses: ./ with: policy_file: ./tests/tutorial_resources/detect_vulnerable_github_actions/check_github_actions_vuln_repo.dl @@ -171,6 +173,14 @@ jobs: upload_reports: 'true' reports_artifact_name: macaron-reports-vulnerable-actions-repo write_job_summary: 'true' + - name: Assert expected failure (github_actions_vulns for repo tutorial) + if: ${{ always() }} + run: | + # Ensure this tutorial case remains a failing policy verification. + if [ "${{ steps.verify_github_actions_vulns_repo_tutorial.outcome }}" != "failure" ]; then + echo "Expected verify step to fail, but it did not." + exit 1 + fi - name: Run Macaron (analyze purl - log4j-core example) uses: ./ @@ -181,6 +191,9 @@ jobs: write_job_summary: 'false' - name: Run Macaron (verify policy - github_actions_vulns for purl) + id: verify_github_actions_vulns_purl_tutorial + # This verification is expected to fail and detect vulnerable actions. + continue-on-error: true uses: ./ with: policy_file: ./tests/tutorial_resources/detect_vulnerable_github_actions/check_github_actions_vuln_purl.dl @@ -188,6 +201,14 @@ jobs: upload_reports: 'true' reports_artifact_name: macaron-reports-vulnerable-actions-purl write_job_summary: 'true' + - name: Assert expected failure (github_actions_vulns for purl tutorial) + if: ${{ always() }} + run: | + # Ensure this tutorial case remains a failing policy verification. + if [ "${{ steps.verify_github_actions_vulns_purl_tutorial.outcome }}" != "failure" ]; then + echo "Expected verify step to fail, but it did not." + exit 1 + fi tutorial-provenance: name: Provenance discovery, extraction, and verification diff --git a/action.yaml b/action.yaml index 0afbcfbfd..10e5dc924 100644 --- a/action.yaml +++ b/action.yaml @@ -55,9 +55,6 @@ inputs: write_job_summary: description: Write a human-friendly summary to the workflow run page. default: 'true' - post_commit_comment: - description: Post or update a commit comment with Macaron results. - default: 'false' upload_attestation: description: 'Upload the generated VSA report. default : false' default: false @@ -192,96 +189,6 @@ runs: REPORTS_ARTIFACT_NAME: ${{ inputs.reports_artifact_name }} REPORTS_ARTIFACT_URL: ${{ steps.upload-macaron-reports.outputs.artifact-url }} - - name: Post Macaron commit comment - if: ${{ always() && inputs.post_commit_comment == 'true' }} - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v8.0.0 - env: - OUTPUT_DIR: ${{ inputs.output_dir }} - POLICY_REPORT: ${{ steps.collect-reports.outputs.policy_report }} - VSA_REPORT: ${{ steps.collect-reports.outputs.vsa_report }} - VSA_GENERATED: ${{ steps.collect-reports.outputs.vsa_generated }} - REPORTS_ARTIFACT_NAME: ${{ inputs.reports_artifact_name }} - with: - script: | - const fs = require("fs"); - const path = require("path"); - - const marker = ""; - const outputDir = process.env.OUTPUT_DIR || "output"; - const policyReportPath = process.env.POLICY_REPORT || path.join(outputDir, "policy_report.json"); - const vsaGenerated = process.env.VSA_GENERATED === "true"; - const vsaReport = process.env.VSA_REPORT || "VSA Not Generated."; - const artifactName = process.env.REPORTS_ARTIFACT_NAME || "macaron-reports"; - - let parsed = {}; - if (fs.existsSync(policyReportPath)) { - try { - parsed = JSON.parse(fs.readFileSync(policyReportPath, "utf8")); - } catch (err) { - parsed = {}; - } - } - - const relation = parsed.check_github_actions_vulnerabilities; - const lines = []; - lines.push(marker); - lines.push("## Macaron Results"); - lines.push(""); - lines.push(`- Workflow run: ${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`); - lines.push(`- Reports artifact: \`${artifactName}\``); - lines.push(`- VSA generated: \`${vsaGenerated}\``); - if (vsaGenerated) { - lines.push(`- VSA file: \`${vsaReport}\``); - } - lines.push(""); - - if (Array.isArray(relation) && relation.length > 0) { - lines.push("### Vulnerable GitHub Actions"); - lines.push(""); - lines.push("| ID | Action | Version | Vulnerabilities | Workflow |"); - lines.push("|---|---|---|---|---|"); - for (const row of relation) { - const [id, urls, actionId, version, callerWorkflow] = row; - lines.push(`| \`${id}\` | \`${actionId}\` | \`${version}\` | \`${urls}\` | ${callerWorkflow} |`); - } - } else { - lines.push(":white_check_mark: No `check_github_actions_vulnerabilities` findings in `policy_report.json`."); - } - - const body = lines.join("\n"); - - const list = await github.rest.repos.listCommentsForCommit({ - owner: context.repo.owner, - repo: context.repo.repo, - commit_sha: context.sha, - per_page: 100, - }); - - const existing = list.data.find((comment) => { - return ( - comment.user && - comment.user.type === "Bot" && - typeof comment.body === "string" && - comment.body.includes(marker) - ); - }); - - if (existing) { - await github.rest.repos.updateCommitComment({ - owner: context.repo.owner, - repo: context.repo.repo, - comment_id: existing.id, - body, - }); - } else { - await github.rest.repos.createCommitComment({ - owner: context.repo.owner, - repo: context.repo.repo, - commit_sha: context.sha, - body, - }); - } - - name: Enforce VSA generation if: ${{ always() && inputs.policy_file != '' }} run: | diff --git a/scripts/actions/write_job_summary.py b/scripts/actions/write_job_summary.py index aff9fb7cb..93717e758 100644 --- a/scripts/actions/write_job_summary.py +++ b/scripts/actions/write_job_summary.py @@ -14,7 +14,11 @@ from pathlib import Path from urllib.parse import urlsplit -CHECK_RESULT_DEFAULT_COLUMNS = ["id", "check_id", "passed", "component_id"] +CHECK_RESULT_DEFAULT_COLUMNS = [ + "component_id", + "check_id", + "passed", +] def _env(name: str, default: str = "") -> str: From dc7aa16136bf528d6388e61219b0a7c3e447b6f8 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sun, 29 Mar 2026 20:03:54 +1000 Subject: [PATCH 16/30] chore: change the table schema Signed-off-by: behnazh-w --- .github/workflows/_build_docker.yaml | 7 +- .../gha_security_analysis/recommendation.py | 110 ++++++++++++++++++ .../policies/sql/check-github-actions.sql | 6 +- .../github_actions_vulnerability_check.py | 88 +++++++++++++- .../slsa_analyzer/git_service/api_client.py | 25 +++- .../oracle_coherence-js-client/policy.dl | 6 +- 6 files changed, 234 insertions(+), 8 deletions(-) create mode 100644 src/macaron/code_analyzer/gha_security_analysis/recommendation.py diff --git a/.github/workflows/_build_docker.yaml b/.github/workflows/_build_docker.yaml index 76f7c43bb..36c4ae6ea 100644 --- a/.github/workflows/_build_docker.yaml +++ b/.github/workflows/_build_docker.yaml @@ -63,17 +63,22 @@ jobs: IMAGE_NAME: ghcr.io/oracle/macaron run: make build-docker + # Export the built image so downstream jobs/workflows can load and reuse + # the exact same image without pushing to a registry. - name: Export test Docker image run: docker save ghcr.io/oracle/macaron:test --output /tmp/macaron-test-image.tar + # Upload the image tarball for the reusable action test workflow. - name: Upload test Docker image artifact uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: macaron-test-image path: /tmp/macaron-test-image.tar if-no-files-found: error - retention-days: 5 + retention-days: 1 + # Install helper tooling used by integration test utilities that validate + # the built Docker image behavior. - name: Install dependencies for integration test utility run: make setup-integration-test-utility-for-docker diff --git a/src/macaron/code_analyzer/gha_security_analysis/recommendation.py b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py new file mode 100644 index 000000000..5202856c1 --- /dev/null +++ b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py @@ -0,0 +1,110 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Recommendation helpers for GitHub Actions security findings. + +This module centralizes user-facing remediation guidance for findings generated by +GitHub Actions security analysis checks. +""" + +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True) +class Recommendation: + """Normalized recommendation payload for a finding. + + Attributes + ---------- + message : str + Human-readable recommendation text. + recommended_ref : str | None + Optional pinned reference suggestion, such as ``owner/repo@``. + """ + + message: str + recommended_ref: str | None = None + + +def recommend_for_unpinned_action(action_name: str, resolved_sha: str | None = None) -> Recommendation: + """Create a recommendation for an unpinned third-party action. + + Parameters + ---------- + action_name : str + GitHub Action identifier in the form ``owner/repo``. + resolved_sha : str | None, optional + Resolved commit SHA for the action ref if available. + + Returns + ------- + Recommendation + Recommendation containing pinning guidance and a suggested immutable + action reference. + """ + recommended_ref = f"{action_name}@{resolved_sha}" if resolved_sha else f"{action_name}@<40-char-commit-sha>" + return Recommendation( + message="Pin this third-party action to a full 40-character commit SHA to prevent tag drift or takeover risk.", + recommended_ref=recommended_ref, + ) + + +def recommend_for_workflow_issue(issue: str) -> Recommendation: + """Map a workflow issue string to a remediation recommendation. + + Parameters + ---------- + issue : str + Issue string emitted by workflow security analysis. + + Returns + ------- + Recommendation + Recommendation message corresponding to the detected issue category. + """ + if issue.startswith("sensitive-trigger:"): + return Recommendation("Add strict event gating (actor allowlist, branch filters, and conditional checks).") + if issue.startswith("privileged-trigger:"): + return Recommendation("Avoid pull_request_target for untrusted code paths; use pull_request where possible.") + if issue.startswith("missing-permissions:"): + return Recommendation("Define explicit least-privilege permissions at workflow or job scope.") + if issue.startswith("overbroad-permissions:"): + return Recommendation("Reduce permissions to read-only scopes unless write access is strictly required.") + if issue.startswith("untrusted-fork-code:"): + return Recommendation("Do not checkout PR head refs in privileged contexts; validate source and actor first.") + if issue.startswith("persist-credentials:"): + return Recommendation("Set persist-credentials: false for checkout unless later git pushes are required.") + if issue.startswith("remote-script-exec:"): + return Recommendation("Avoid curl|bash patterns; pin script digests or vendor reviewed scripts in-repo.") + if issue.startswith("pr-target-untrusted-checkout:"): + return Recommendation("Never combine pull_request_target with checkout of PR-controlled refs.") + if issue.startswith("potential-injection:"): + return Recommendation("Treat GitHub context data as untrusted input; quote/sanitize before shell execution.") + return Recommendation("Review this workflow finding and apply least-privilege hardening controls.") + + +def recommend_for_osv_vulnerability(action_name: str, action_version: str) -> Recommendation: + """Create a recommendation for a vulnerable GitHub Action version. + + Parameters + ---------- + action_name : str + GitHub Action identifier in the form ``owner/repo``. + action_version : str + Action ref currently used by the workflow. + + Returns + ------- + Recommendation + Recommendation encouraging upgrade to a non-vulnerable release and + subsequent pinning to a commit SHA. + """ + return Recommendation( + message=( + f"Upgrade `{action_name}` from `{action_version}` to a non-vulnerable release, " + "then pin the selected version to a commit SHA." + ), + recommended_ref=None, + ) diff --git a/src/macaron/resources/policies/sql/check-github-actions.sql b/src/macaron/resources/policies/sql/check-github-actions.sql index ab6d3f53e..f0831e5cd 100644 --- a/src/macaron/resources/policies/sql/check-github-actions.sql +++ b/src/macaron/resources/policies/sql/check-github-actions.sql @@ -3,9 +3,13 @@ -- Failed check facts for check-github-actions policy template. SELECT + gha_check.finding_type, gha_check.github_actions_id AS third_party_action_name, gha_check.github_actions_version AS third_party_action_version, - gha_check.vulnerability_urls AS vulnerability, + gha_check.vulnerability_urls AS vulnerabilities, + gha_check.finding_message, + gha_check.recommended_ref, + gha_check.is_pinned_sha, gha_check.caller_workflow AS vulnerable_workflow, analysis.analysis_time FROM github_actions_vulnerabilities_check AS gha_check diff --git a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py index 34eca8aff..4a57b5082 100644 --- a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py +++ b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py @@ -5,8 +5,9 @@ import logging import os +import re -from sqlalchemy import ForeignKey, String +from sqlalchemy import Boolean, ForeignKey, String from sqlalchemy.orm import Mapped, mapped_column from macaron.code_analyzer.dataflow_analysis.analysis import get_containing_github_job @@ -16,6 +17,11 @@ GitHubActionsReusableWorkflowCallNode, ) from macaron.code_analyzer.gha_security_analysis.detect_injection import detect_github_actions_security_issues +from macaron.code_analyzer.gha_security_analysis.recommendation import ( + recommend_for_osv_vulnerability, + recommend_for_unpinned_action, + recommend_for_workflow_issue, +) from macaron.database.db_custom_types import DBJsonList from macaron.database.table_definitions import CheckFacts from macaron.errors import APIAccessError @@ -23,11 +29,13 @@ from macaron.slsa_analyzer.analyze_context import AnalyzeContext from macaron.slsa_analyzer.checks.base_check import BaseCheck, CheckResultType from macaron.slsa_analyzer.checks.check_result import CheckResultData, Confidence, JustificationType +from macaron.slsa_analyzer.git_service.api_client import GhAPIClient from macaron.slsa_analyzer.package_registry.osv_dev import OSVDevService from macaron.slsa_analyzer.registry import registry from macaron.slsa_analyzer.slsa_req import ReqName logger: logging.Logger = logging.getLogger(__name__) +UNPINNED_ACTION_RE = re.compile(r"^(?P[^@\s]+)@(?P[^\s]+)$") class GitHubActionsVulnsFacts(CheckFacts): @@ -43,6 +51,12 @@ class GitHubActionsVulnsFacts(CheckFacts): DBJsonList, nullable=False, info={"justification": JustificationType.TEXT} ) + #: The finding category. + #: - ``osv_vulnerability`` for OSV-backed vulnerability findings. + #: - ``unpinned_third_party_action`` for third-party actions not pinned to a commit SHA. + #: - ``workflow_security_issue`` for other workflow security issues. + finding_type: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.TEXT}) + #: The GitHub Action Identifier. github_actions_id: Mapped[str] = mapped_column( String, nullable=False, info={"justification": JustificationType.TEXT} @@ -56,6 +70,19 @@ class GitHubActionsVulnsFacts(CheckFacts): #: The GitHub Action workflow that calls the vulnerable GitHub Action. caller_workflow: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.HREF}) + #: Whether the action reference is pinned to a full commit SHA. + is_pinned_sha: Mapped[bool | None] = mapped_column(Boolean, nullable=True) + + #: Human-readable finding details. + finding_message: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} + ) + + #: Recommended immutable action reference, if applicable. + recommended_ref: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} + ) + __mapper_args__ = { "polymorphic_identity": "_github_actions_vulnerabilities_check", } @@ -95,15 +122,40 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: for ci_info in ci_services: callgraph = ci_info["callgraph"] + gh_api_client = ci_info["service"].api_client if hasattr(ci_info["service"], "api_client") else None if findings := detect_github_actions_security_issues(callgraph): for finding in findings: for issue in finding["issues"]: + if parsed_issue := _parse_unpinned_action_issue(str(issue)): + action_name, action_version = parsed_issue + resolved_sha = _resolve_action_ref_to_sha(gh_api_client, action_name, action_version) + recommendation = recommend_for_unpinned_action(action_name, resolved_sha) + result_tables.append( + GitHubActionsVulnsFacts( + vulnerability_urls=[], + finding_type="unpinned_third_party_action", + github_actions_id=action_name, + github_actions_version=action_version, + caller_workflow=finding["workflow_name"], + is_pinned_sha=False, + finding_message=recommendation.message, + recommended_ref=recommendation.recommended_ref, + confidence=Confidence.HIGH, + ) + ) + continue + + recommendation = recommend_for_workflow_issue(str(issue)) result_tables.append( GitHubActionsVulnsFacts( - vulnerability_urls=[issue], + vulnerability_urls=[], + finding_type="workflow_security_issue", github_actions_id=finding["workflow_name"], - github_actions_version="None", - caller_workflow="None", + github_actions_version="", + caller_workflow=finding["workflow_name"], + is_pinned_sha=None, + finding_message=f"{issue} Recommendation: {recommendation.message}", + recommended_ref=recommendation.recommended_ref, confidence=Confidence.HIGH, ) ) @@ -189,12 +241,17 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: except APIAccessError as error: logger.debug(error) if vuln_mapping: + recommendation = recommend_for_osv_vulnerability(workflow_name, workflow_inv["version"]) result_tables.append( GitHubActionsVulnsFacts( vulnerability_urls=vuln_mapping, + finding_type="osv_vulnerability", github_actions_id=workflow_name, github_actions_version=workflow_inv["version"], caller_workflow=workflow_inv["caller_path"], + is_pinned_sha=bool(re.fullmatch(r"[0-9a-f]{40}", workflow_inv["version"])), + finding_message=recommendation.message, + recommended_ref=recommendation.recommended_ref, confidence=Confidence.HIGH, ) ) @@ -212,3 +269,26 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: registry.register(GitHubActionsVulnsCheck()) + + +def _parse_unpinned_action_issue(issue: str) -> tuple[str, str] | None: + """Parse an unpinned third-party action reference from issue text.""" + match = UNPINNED_ACTION_RE.fullmatch(issue.strip()) + if not match: + return None + action = match.group("action") + version = match.group("version") + if action.startswith("./"): + return None + if "/" not in action: + return None + return action, version + + +def _resolve_action_ref_to_sha(api_client: object, action_name: str, action_version: str) -> str | None: + """Resolve an action ref to an immutable commit SHA using GitHub API.""" + if not isinstance(api_client, GhAPIClient): + return None + if not action_name or not action_version: + return None + return api_client.get_commit_sha_from_ref(action_name, action_version) diff --git a/src/macaron/slsa_analyzer/git_service/api_client.py b/src/macaron/slsa_analyzer/git_service/api_client.py index 9921c2dc9..f49beda7c 100644 --- a/src/macaron/slsa_analyzer/git_service/api_client.py +++ b/src/macaron/slsa_analyzer/git_service/api_client.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """The module provides API clients for VCS services, such as GitHub.""" @@ -11,6 +11,7 @@ from typing import NamedTuple from macaron.config.defaults import defaults +from macaron.json_tools import json_extract from macaron.slsa_analyzer.asset import AssetLocator from macaron.util import ( construct_query, @@ -388,6 +389,28 @@ def get_commit_data_from_hash(self, full_name: str, commit_hash: str) -> dict: return response_data + def get_commit_sha_from_ref(self, full_name: str, ref: str) -> str | None: + """Resolve a Git reference (tag/branch/sha) to a 40-character commit SHA. + + Parameters + ---------- + full_name : str + The full name of the repository in the format ``owner/name``. + ref : str + The git reference to resolve (e.g. ``v5``, ``main``, ``v1.2.3``). + + Returns + ------- + str | None + The resolved commit SHA, or ``None`` if resolution fails. + """ + if not full_name or not ref: + return None + + response_data = self.get_commit_data_from_hash(full_name, ref) + sha = json_extract(response_data, ["sha"], str) + return sha if sha and len(sha) == 40 else None + def search(self, target: str, query: str) -> dict: """Perform a search using GitHub REST API. diff --git a/tests/integration/cases/oracle_coherence-js-client/policy.dl b/tests/integration/cases/oracle_coherence-js-client/policy.dl index 5b814eb39..5c1c8b870 100644 --- a/tests/integration/cases/oracle_coherence-js-client/policy.dl +++ b/tests/integration/cases/oracle_coherence-js-client/policy.dl @@ -8,9 +8,13 @@ Policy("check-github-actions-vulnerabilities", component_id, "Check GitHub Actio github_actions_vulnerabilities_check( _, "[\"https://osv.dev/vulnerability/GHSA-69fq-xp46-6x23\", \"https://osv.dev/vulnerability/GHSA-9p44-j4g5-cfx5\"]", + "osv_vulnerability", "aquasecurity/trivy-action", "0.32.0", - "https://github.com/oracle/coherence-js-client/blob/39166341bc31f75b663ff439dae36170fb3e99a9/.github/workflows/trivy-scan.yml" + "https://github.com/oracle/coherence-js-client/blob/39166341bc31f75b663ff439dae36170fb3e99a9/.github/workflows/trivy-scan.yml", + _, + _, + _ ). apply_policy_to("check-github-actions-vulnerabilities", component_id) :- From 9ae13054bbcb28f155e5b15b88b502e2e4e6c743 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sun, 29 Mar 2026 21:06:49 +1000 Subject: [PATCH 17/30] test: test priorities Signed-off-by: behnazh-w --- scripts/actions/write_job_summary.py | 165 ++++++++- .../gha_security_analysis/detect_injection.py | 348 +++++++++++------- .../gha_security_analysis/recommendation.py | 55 +++ .../policies/sql/check-github-actions.sql | 2 + .../github_actions_vulnerability_check.py | 97 +++-- .../oracle_coherence-js-client/policy.dl | 3 +- 6 files changed, 500 insertions(+), 170 deletions(-) diff --git a/scripts/actions/write_job_summary.py b/scripts/actions/write_job_summary.py index 93717e758..405a2c7a5 100644 --- a/scripts/actions/write_job_summary.py +++ b/scripts/actions/write_job_summary.py @@ -20,6 +20,8 @@ "passed", ] +MAX_TABLE_CELL_LEN = 72 + def _env(name: str, default: str = "") -> str: return os.environ.get(name, default) @@ -207,7 +209,9 @@ def _format_table_cell(value: object) -> str: segments = [part for part in parsed.path.split("/") if part] label = segments[-1] if segments else parsed.netloc return f"[`{label}`]({text})" - return f"`{text}`" + if len(text) > MAX_TABLE_CELL_LEN: + text = f"{text[: MAX_TABLE_CELL_LEN - 3]}..." + return f"`{_sanitize_for_markdown_table_code(text)}`" def _parse_list_cell(text: str) -> list[object] | None: @@ -228,7 +232,158 @@ def _format_list_item(value: object) -> str: segments = [part for part in parsed.path.split("/") if part] label = segments[-1] if segments else parsed.netloc return f"[`{label}`]({text})" - return f"`{text}`" + if len(text) > MAX_TABLE_CELL_LEN: + text = f"{text[: MAX_TABLE_CELL_LEN - 3]}..." + return f"`{_sanitize_for_markdown_table_code(text)}`" + + +def _sanitize_for_markdown_table_code(text: str) -> str: + """Sanitize inline-code content for markdown table cells.""" + return text.replace("`", "'").replace("|", "\\|").replace("\n", " ") + + +def _priority_label(priority: object) -> str: + """Map numeric priority to a concise severity-like label.""" + try: + value = int(priority) + except (TypeError, ValueError): + return str(priority) + + if value >= 90: + return "critical" + if value >= 70: + return "high" + if value >= 50: + return "medium" + return "low" + + +def _gha_group_label(group: str) -> str: + # finding_group is the top-level section key; finding_type is rendered per-row as the subtype. + if group == "third_party_action_risk": + return "Third-party action risks" + if group == "workflow_security_issue": + return "Workflow security issues" + return group + + +def _write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], rows: list[tuple]) -> bool: + """Write compact diagnostics for check-github-actions policy failures.""" + if not columns or not rows: + return False + + col_index = {name: idx for idx, name in enumerate(columns)} + required = [ + "finding_priority", + "finding_type", + "third_party_action_name", + "third_party_action_version", + "vulnerable_workflow", + ] + if any(name not in col_index for name in required): + return _write_markdown_table(summary_path, columns, rows) + + sorted_rows = sorted( + rows, + key=lambda row: ( + int(row[col_index["finding_priority"]]) if str(row[col_index["finding_priority"]]).isdigit() else 0 + ), + reverse=True, + ) + display_rows = sorted_rows[:10] + group_idx = col_index.get("finding_group") + + _append_line( + summary_path, + "_Showing top 10 findings by priority. Expand details below for full diagnostics._", + ) + preferred_groups = ["third_party_action_risk", "workflow_security_issue"] + groups_in_rows: list[str] = [] + if group_idx is not None: + discovered_groups = [str(row[group_idx]) for row in display_rows] + groups_in_rows.extend([group for group in preferred_groups if group in discovered_groups]) + groups_in_rows.extend([group for group in discovered_groups if group not in groups_in_rows]) + else: + groups_in_rows = ["all_findings"] + + for group in groups_in_rows: + if group_idx is None: + group_rows = display_rows + title = "Findings" + else: + group_rows = [row for row in display_rows if str(row[group_idx]) == group] + if not group_rows: + continue + title = _gha_group_label(group) + _append_line(summary_path) + _append_line(summary_path, f"#### {title}") + _append_line(summary_path) + _append_line(summary_path, "| priority | type | action | version | workflow |") + _append_line(summary_path, "|---|---|---|---|---|") + for row in group_rows: + priority_raw = row[col_index["finding_priority"]] + priority = f"`{_priority_label(priority_raw)} ({priority_raw})`" + finding_type = _format_table_cell(row[col_index["finding_type"]]) + action_name = _format_table_cell(row[col_index["third_party_action_name"]]) + action_version = _format_table_cell(row[col_index["third_party_action_version"]]) + workflow = _format_table_cell(row[col_index["vulnerable_workflow"]]) + _append_line( + summary_path, + f"| {priority} | {finding_type} | {action_name} | {action_version} | {workflow} |", + ) + + _append_line(summary_path) + _append_line(summary_path, "
") + _append_line(summary_path, "Detailed findings") + _append_line(summary_path) + detail_groups = groups_in_rows if groups_in_rows else ["all_findings"] + row_counter = 1 + for group in detail_groups: + if group_idx is None: + group_rows = sorted_rows + title = "Findings" + else: + group_rows = [row for row in sorted_rows if str(row[group_idx]) == group] + if not group_rows: + continue + title = _gha_group_label(group) + _append_line(summary_path, f"**{title}**") + for row in group_rows: + action = str(row[col_index["third_party_action_name"]]) + version = str(row[col_index["third_party_action_version"]]) + priority = row[col_index["finding_priority"]] + finding_type = str(row[col_index["finding_type"]]) + workflow = str(row[col_index["vulnerable_workflow"]]) + _append_line( + summary_path, f"{row_counter}. **`{action}@{version}`** (`{finding_type}`, priority `{priority}`)" + ) + _append_line(summary_path, f"- Workflow: `{workflow}`") + + pin_idx = col_index.get("is_pinned_sha") + row_group = str(row[group_idx]) if group_idx is not None else "" + if pin_idx is not None and row_group == "third_party_action_risk" and row[pin_idx] is not None: + pin_state = "yes" if bool(row[pin_idx]) else "no" + _append_line(summary_path, f"- Pinned to full commit SHA: `{pin_state}`") + + vul_idx = col_index.get("vulnerabilities") + if vul_idx is not None and row[vul_idx]: + parsed = _parse_list_cell(str(row[vul_idx])) + if parsed: + _append_line(summary_path, "- Vulnerabilities:") + for item in parsed: + _append_line(summary_path, f" - {_format_list_item(item)}") + + rec_idx = col_index.get("recommended_ref") + if rec_idx is not None and row[rec_idx]: + _append_line(summary_path, f"- Recommended ref: {_format_table_cell(row[rec_idx])}") + + msg_idx = col_index.get("finding_message") + if msg_idx is not None and row[msg_idx]: + _append_line(summary_path, f"- Details: {_format_table_cell(row[msg_idx])}") + _append_line(summary_path) + row_counter += 1 + _append_line(summary_path, "
") + return True def _write_policy_check_lists(summary_path: Path, policy_check_ids: list[str]) -> None: @@ -299,7 +454,11 @@ def _write_existing_policy_failure_diagnostics( if cols and rows: _append_line(summary_path) _append_line(summary_path, f"#### Results") - if _write_markdown_table(summary_path, cols, rows): + if policy_name == "check-github-actions": + rendered = _write_compact_gha_vuln_diagnostics(summary_path, cols, rows) + else: + rendered = _write_markdown_table(summary_path, cols, rows) + if rendered: has_details = True if not has_details: diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py index f070406f8..22e33fc6e 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -4,16 +4,20 @@ """Detect security issues and injection risks in GitHub Actions workflows.""" import re -from typing import cast +from typing import TypedDict, cast -from macaron.code_analyzer.dataflow_analysis import bash, core +from macaron.code_analyzer.dataflow_analysis import bash, core, facts from macaron.code_analyzer.dataflow_analysis.core import NodeForest, traverse_bfs -from macaron.code_analyzer.dataflow_analysis.github import GitHubActionsWorkflowNode +from macaron.code_analyzer.dataflow_analysis.github import ( + GitHubActionsActionStepNode, + GitHubActionsNormalJobNode, + GitHubActionsRunStepNode, + GitHubActionsWorkflowNode, +) from macaron.parsers.bashparser_model import CallExpr, is_call_expr, is_lit, is_param_exp -from macaron.parsers.github_workflow_model import is_normal_job +from macaron.parsers.github_workflow_model import Workflow REMOTE_SCRIPT_RE = re.compile(r"(curl|wget)\s+.*\|\s*(bash|sh|tar)", re.IGNORECASE) -SHA_PINNED_USES_RE = re.compile(r".+@([0-9a-f]{40})$") # commit SHA pinning UNTRUSTED_PR_REFS = { "${{ github.event.pull_request.head.ref }}", @@ -30,7 +34,27 @@ } -def detect_github_actions_security_issues(nodes: NodeForest) -> list[dict[str, str | list[str]]]: +PRIORITY_CRITICAL = 100 +PRIORITY_HIGH = 80 +PRIORITY_MEDIUM = 60 +PRIORITY_LOW = 40 + + +class PrioritizedIssue(TypedDict): + """A workflow security finding with priority metadata.""" + + issue: str + priority: int + + +class WorkflowFinding(TypedDict): + """Workflow-level security findings.""" + + workflow_name: str + issues: list[PrioritizedIssue] + + +def detect_github_actions_security_issues(nodes: NodeForest) -> list[WorkflowFinding]: """Detect security issues across GitHub Actions workflow nodes. Parameters @@ -40,10 +64,10 @@ def detect_github_actions_security_issues(nodes: NodeForest) -> list[dict[str, s Returns ------- - list[dict[str, str | list[str]]] + list[WorkflowFinding] A list of workflow-level findings. Each item contains: - ``workflow_name``: workflow file path. - - ``issues``: list of detected security issue messages. + - ``issues``: list of detected security issue messages with priorities. """ findings = [] for root in nodes.root_nodes: @@ -56,7 +80,7 @@ def detect_github_actions_security_issues(nodes: NodeForest) -> list[dict[str, s def analyze_workflow( workflow_node: GitHubActionsWorkflowNode, -) -> dict[str, str | list[str]] | None: +) -> WorkflowFinding | None: """Analyze a GitHub Actions workflow for security issues. Parameters @@ -78,141 +102,207 @@ def analyze_workflow( checkout risks, remote-script execution heuristics, self-hosted runner usage, and dataflow-based expression injection patterns. """ - wf = workflow_node.definition - findings: list[str] = [] + findings: list[PrioritizedIssue] = [] + on_keys = _extract_on_keys(workflow_node.definition) + seen_jobs: set[str] = set() + + for node in core.traverse_bfs(workflow_node): + if isinstance(node, GitHubActionsWorkflowNode): + _append_workflow_level_findings(findings, on_keys, node.definition) + continue + + if isinstance(node, GitHubActionsNormalJobNode): + if node.job_id in seen_jobs: + continue + seen_jobs.add(node.job_id) + _append_job_level_findings(findings, node) + continue + + if isinstance(node, GitHubActionsActionStepNode): + _append_action_step_findings(findings, node, on_keys) + continue + + if isinstance(node, GitHubActionsRunStepNode): + _append_run_step_findings(findings, node) + continue + + if isinstance(node, bash.BashSingleCommandNode): + _append_injection_findings(findings, node) + + if "pull_request_target" in on_keys and _has_privileged_trigger_risk_combo(findings): + _add_finding( + findings, + ( + "privileged-trigger: Workflow uses `pull_request_target` with additional risky patterns; " + "treat this workflow as high risk and harden immediately." + ), + PRIORITY_HIGH, + ) - on_section = wf.get("on") - on_keys = set() + if findings: + findings_sorted = sorted(findings, key=lambda finding: (-finding["priority"], finding["issue"])) + return {"workflow_name": workflow_node.context.ref.source_filepath, "issues": findings_sorted} + + return None + + +def _extract_on_keys(workflow: Workflow) -> set[str]: + """Extract the set of event names from a workflow ``on`` section.""" + on_section = workflow.get("on") if isinstance(on_section, dict): - on_keys = set(on_section.keys()) - elif isinstance(on_section, list): - on_keys = set(on_section) - elif isinstance(on_section, str): - on_keys = {on_section} + return set(on_section.keys()) + if isinstance(on_section, list): + return set(on_section) + return {on_section} - # --- A. Triggers that often need extra hardening / gating --- + +def _append_workflow_level_findings(findings: list[PrioritizedIssue], on_keys: set[str], workflow: Workflow) -> None: + """Append workflow-level hardening findings.""" sensitive = sorted(on_keys.intersection(DANGEROUS_TRIGGERS)) if sensitive: - findings.append( + _add_finding( + findings, f"sensitive-trigger: Workflow uses {sensitive}. Ensure strict gating (e.g., actor allowlist, " - "branch protection, and minimal permissions)." + "branch protection, and minimal permissions).", + PRIORITY_LOW, ) - # --- B. Privileged trigger check (existing) --- - if "pull_request_target" in on_keys: - findings.append( - "privileged-trigger: Workflow uses `pull_request_target`, which runs with elevated permissions." + if "permissions" not in workflow: + _add_finding( + findings, + "missing-permissions: No explicit workflow permissions defined; defaults may be overly broad.", + PRIORITY_MEDIUM, ) + return + + permissions = workflow.get("permissions") + if isinstance(permissions, str) and permissions.lower() == "write-all": + _add_finding(findings, "overbroad-permissions: Workflow uses `permissions: write-all`.", PRIORITY_HIGH) + if isinstance(permissions, dict) and "pull_request_target" in on_keys: + for scope, level in permissions.items(): + if isinstance(level, str) and "write" in level.lower(): + _add_finding( + findings, + f"overbroad-permissions: PR-triggered workflow requests `{scope}: {level}`.", + PRIORITY_HIGH, + ) - # --- C. Missing workflow permissions (existing) --- - if "permissions" not in wf: - findings.append("missing-permissions: No explicit workflow permissions defined; defaults may be overly broad.") - else: - # --- C2. Overly broad workflow permissions (new heuristic) --- - perms = wf.get("permissions") - if isinstance(perms, str) and perms.lower() == "write-all": - findings.append("overbroad-permissions: Workflow uses `permissions: write-all`.") - if isinstance(perms, dict): - # Example policy: flag any write permissions on PR-triggered workflows - if "pull_request_target" in on_keys: - for scope, level in perms.items(): - if isinstance(level, str) and "write" in level.lower(): - findings.append( - f"overbroad-permissions: PR-triggered workflow requests " f"`{scope}: {level}`." - ) - - # Walk jobs/steps for step-level checks. - jobs = wf.get("jobs", {}) if isinstance(wf.get("jobs"), dict) else {} - for job_name, job in jobs.items(): - if not is_normal_job(job): - continue - # --- D. Self-hosted runners (new) --- - runs_on = job.get("runs-on") - if runs_on: - runs_on_str = str(runs_on) - if "self-hosted" in runs_on_str: - findings.append( - f"self-hosted-runner: Job `{job_name}` runs on self-hosted runners; " - "ensure isolation and never run untrusted PR code there." - ) +def _append_job_level_findings(findings: list[PrioritizedIssue], job_node: GitHubActionsNormalJobNode) -> None: + """Append findings derived from a single job node.""" + runs_on = job_node.definition.get("runs-on") + if runs_on and "self-hosted" in str(runs_on): + _add_finding( + findings, + f"self-hosted-runner: Job `{job_node.job_id}` runs on self-hosted runners; " + "ensure isolation and never run untrusted PR code there.", + PRIORITY_MEDIUM, + ) - steps = job.get("steps", []) if isinstance(job.get("steps"), list) else [] - - for step in steps: - uses = step.get("uses", "") if isinstance(step, dict) else "" - run = step.get("run", "") if isinstance(step, dict) else "" - - # --- E. Action SHA pinning (new) --- - if uses: - # Ignore local actions "./.github/actions/..." - if not uses.startswith("./") and not SHA_PINNED_USES_RE.match(uses): - # findings.append(f"unpinned-action: Job `{job_name}` uses `{uses}` not pinned to a commit SHA.") - findings.append(uses) - - # --- F. Checkout untrusted fork refs on PR event (existing, expanded) --- - if uses and "actions/checkout" in uses: - with_section = step.get("with", {}) if isinstance(step.get("with"), dict) else {} - ref = with_section.get("ref", "") - if ref in UNTRUSTED_PR_REFS and "pull_request" in on_keys: - findings.append( - f"untrusted-fork-code: Job `{job_name}` checks out " - f"untrusted fork code (`ref: {ref}`) on PR event." - ) - - # --- G. persist-credentials (new) --- - # Default is true for checkout; many orgs prefer setting false explicitly. - persist = with_section.get("persist-credentials", None) - if persist is True or (isinstance(persist, str) and persist.lower() == "true"): - findings.append( - f"persist-credentials: Job `{job_name}` uses checkout " - "with `persist-credentials: true`; may expose " - "GITHUB_TOKEN to subsequent git commands." - ) - - # --- H. Remote script execution: curl|bash (new heuristic) --- - if isinstance(run, str) and REMOTE_SCRIPT_RE.search(run): - findings.append( - f"remote-script-exec: Job `{job_name}` step appears to " "download and pipe to shell (`curl|bash`)." - ) - # --- I. Extra dangerous combo: pull_request_target + checkout PR head ref (new) --- - if "pull_request_target" in on_keys and uses and "actions/checkout" in uses: - with_section = step.get("with", {}) if isinstance(step.get("with"), dict) else {} - ref = with_section.get("ref", "") - if ref in UNTRUSTED_PR_REFS: - findings.append( - f"pr-target-untrusted-checkout: Job `{job_name}` uses " - f"pull_request_target and checks out PR-controlled " - f"ref `{ref}`." - ) - - # --- J. Your existing dataflow-based injection heuristic (kept) --- - for node in core.traverse_bfs(workflow_node): - if isinstance(node, bash.BashSingleCommandNode): - # step_node = get_containing_github_step(node, nodes.parents) - if is_call_expr(node.definition.get("Cmd")): - call_exp = cast(CallExpr, node.definition["Cmd"]) - for arg in call_exp.get("Args", []): - expansion = False - pr_head_ref = False - for part in arg.get("Parts", []): - if is_param_exp(part) and part.get("Param", {}).get("Value") == "github": - expansion = True - if is_lit(part) and part.get("Value") in { - ".event.pull_request.head.ref", - ".head_ref", - ".event.issue.body", - ".event.comment.body", - }: - pr_head_ref = True - if expansion and pr_head_ref: - findings.append(f"potential-injection: {arg.get('Parts')}") +def _append_action_step_findings( + findings: list[PrioritizedIssue], + action_node: GitHubActionsActionStepNode, + on_keys: set[str], +) -> None: + """Append findings derived from an action step node.""" + uses_name = action_node.uses_name + uses_version = action_node.uses_version + if ( + uses_name + and not uses_name.startswith("./") + and uses_version + and not re.fullmatch(r"[0-9a-f]{40}", uses_version) + ): + _add_finding(findings, f"{uses_name}@{uses_version}", PRIORITY_HIGH) + + if uses_name == "actions/checkout": + ref = _literal_value(action_node.with_parameters.get("ref")) + if ref in UNTRUSTED_PR_REFS and "pull_request" in on_keys: + _add_finding( + findings, + f"untrusted-fork-code: A checkout step uses untrusted fork code (`ref: {ref}`) on PR event.", + PRIORITY_CRITICAL, + ) + + persist = _literal_value(action_node.with_parameters.get("persist-credentials")) + if persist.lower() == "true": + _add_finding( + findings, + "persist-credentials: Checkout uses `persist-credentials: true`; " + "this may expose GITHUB_TOKEN to subsequent git commands.", + PRIORITY_MEDIUM, + ) + + if "pull_request_target" in on_keys and ref in UNTRUSTED_PR_REFS: + _add_finding( + findings, + f"pr-target-untrusted-checkout: Workflow uses pull_request_target and checks out PR-controlled ref `{ref}`.", + PRIORITY_CRITICAL, + ) + + +def _append_run_step_findings(findings: list[PrioritizedIssue], run_step_node: GitHubActionsRunStepNode) -> None: + """Append findings derived from a run step node.""" + run_script = run_step_node.definition.get("run", "") + if isinstance(run_script, str) and REMOTE_SCRIPT_RE.search(run_script): + _add_finding( + findings, + "remote-script-exec: A step appears to download and pipe to shell (`curl|bash`).", + PRIORITY_HIGH, + ) - if findings: - return {"workflow_name": workflow_node.context.ref.source_filepath, "issues": findings} - return None +def _append_injection_findings( + findings: list[PrioritizedIssue], + bash_node: bash.BashSingleCommandNode, +) -> None: + """Append potential injection findings discovered from parsed bash command nodes.""" + if not is_call_expr(bash_node.definition.get("Cmd")): + return + + call_exp = cast(CallExpr, bash_node.definition["Cmd"]) + for arg in call_exp.get("Args", []): + expansion = False + pr_head_ref = False + for part in arg.get("Parts", []): + if is_param_exp(part) and part.get("Param", {}).get("Value") == "github": + expansion = True + if is_lit(part) and part.get("Value") in { + ".event.pull_request.head.ref", + ".head_ref", + ".event.issue.body", + ".event.comment.body", + }: + pr_head_ref = True + if expansion and pr_head_ref: + _add_finding(findings, f"potential-injection: {arg.get('Parts')}", PRIORITY_CRITICAL) + + +def _has_privileged_trigger_risk_combo(findings: list[PrioritizedIssue]) -> bool: + """Return whether findings contain risky patterns that amplify pull_request_target risk.""" + risky_prefixes = ( + "overbroad-permissions:", + "untrusted-fork-code:", + "persist-credentials:", + "remote-script-exec:", + "pr-target-untrusted-checkout:", + "potential-injection:", + "self-hosted-runner:", + ) + return any(any(finding["issue"].startswith(prefix) for prefix in risky_prefixes) for finding in findings) + + +def _literal_value(value: facts.Value | None) -> str: + """Return literal string value from a facts expression when available.""" + if isinstance(value, facts.StringLiteral): + return value.literal + return "" + + +def _add_finding(findings: list[PrioritizedIssue], issue: str, priority: int) -> None: + """Append a finding with priority metadata.""" + findings.append({"issue": issue, "priority": priority}) # def analyze_workflow(workflow_node: GitHubActionsWorkflowNode, nodes: NodeForest) -> list[dict[str, str]]: diff --git a/src/macaron/code_analyzer/gha_security_analysis/recommendation.py b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py index 5202856c1..190d42536 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/recommendation.py +++ b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py @@ -9,8 +9,13 @@ from __future__ import annotations +import re from dataclasses import dataclass +from macaron.slsa_analyzer.git_service.api_client import GhAPIClient + +UNPINNED_ACTION_RE = re.compile(r"^(?P[^@\s]+)@(?P[^\s]+)$") + @dataclass(frozen=True) class Recommendation: @@ -51,6 +56,56 @@ def recommend_for_unpinned_action(action_name: str, resolved_sha: str | None = N ) +def parse_unpinned_action_issue(issue: str) -> tuple[str, str] | None: + """Parse an unpinned third-party action reference from issue text. + + Parameters + ---------- + issue : str + Raw issue text emitted by workflow security analysis. + + Returns + ------- + tuple[str, str] | None + Parsed ``(action_name, action_version)`` when the issue matches + ``owner/repo@ref`` format for a third-party action. ``None`` otherwise. + """ + match = UNPINNED_ACTION_RE.fullmatch(issue.strip()) + if not match: + return None + action = match.group("action") + version = match.group("version") + if action.startswith("./"): + return None + if "/" not in action: + return None + return action, version + + +def resolve_action_ref_to_sha(api_client: object, action_name: str, action_version: str) -> str | None: + """Resolve an action reference to an immutable commit SHA. + + Parameters + ---------- + api_client : object + API client instance used for GitHub API calls. + action_name : str + GitHub Action identifier in the form ``owner/repo``. + action_version : str + Action ref currently used by the workflow. + + Returns + ------- + str | None + The resolved commit SHA if resolution succeeds; otherwise ``None``. + """ + if not isinstance(api_client, GhAPIClient): + return None + if not action_name or not action_version: + return None + return api_client.get_commit_sha_from_ref(action_name, action_version) + + def recommend_for_workflow_issue(issue: str) -> Recommendation: """Map a workflow issue string to a remediation recommendation. diff --git a/src/macaron/resources/policies/sql/check-github-actions.sql b/src/macaron/resources/policies/sql/check-github-actions.sql index f0831e5cd..9d4f0b9fb 100644 --- a/src/macaron/resources/policies/sql/check-github-actions.sql +++ b/src/macaron/resources/policies/sql/check-github-actions.sql @@ -3,6 +3,8 @@ -- Failed check facts for check-github-actions policy template. SELECT + gha_check.finding_group, + gha_check.finding_priority, gha_check.finding_type, gha_check.github_actions_id AS third_party_action_name, gha_check.github_actions_version AS third_party_action_version, diff --git a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py index 4a57b5082..d8919dada 100644 --- a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py +++ b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py @@ -6,8 +6,9 @@ import logging import os import re +from enum import Enum -from sqlalchemy import Boolean, ForeignKey, String +from sqlalchemy import Boolean, ForeignKey, Integer, String from sqlalchemy.orm import Mapped, mapped_column from macaron.code_analyzer.dataflow_analysis.analysis import get_containing_github_job @@ -18,9 +19,11 @@ ) from macaron.code_analyzer.gha_security_analysis.detect_injection import detect_github_actions_security_issues from macaron.code_analyzer.gha_security_analysis.recommendation import ( + parse_unpinned_action_issue, recommend_for_osv_vulnerability, recommend_for_unpinned_action, recommend_for_workflow_issue, + resolve_action_ref_to_sha, ) from macaron.database.db_custom_types import DBJsonList from macaron.database.table_definitions import CheckFacts @@ -29,13 +32,29 @@ from macaron.slsa_analyzer.analyze_context import AnalyzeContext from macaron.slsa_analyzer.checks.base_check import BaseCheck, CheckResultType from macaron.slsa_analyzer.checks.check_result import CheckResultData, Confidence, JustificationType -from macaron.slsa_analyzer.git_service.api_client import GhAPIClient from macaron.slsa_analyzer.package_registry.osv_dev import OSVDevService from macaron.slsa_analyzer.registry import registry from macaron.slsa_analyzer.slsa_req import ReqName logger: logging.Logger = logging.getLogger(__name__) -UNPINNED_ACTION_RE = re.compile(r"^(?P[^@\s]+)@(?P[^\s]+)$") + + +class GitHubActionsFindingType(str, Enum): + """Enumeration of finding categories for GitHub Actions vulnerability check facts.""" + + # Note: finding_type is the subtype within a top-level finding_group. + # It intentionally carries more granular detail than finding_group. + KNOWN_VULNERABILITY = "known-vulnerability" + UNPINNED_THIRD_PARTY_ACTION = "unpinned-third-party-action" + + +class GitHubActionsFindingGroup(str, Enum): + """Top-level finding groups for GitHub Actions vulnerability check facts.""" + + # Note: finding_group is the high-level bucket used for reporting sections. + # finding_type refines the exact issue inside one of these groups. + THIRD_PARTY_ACTION_RISK = "third_party_action_risk" + WORKFLOW_SECURITY_ISSUE = "workflow_security_issue" class GitHubActionsVulnsFacts(CheckFacts): @@ -51,12 +70,19 @@ class GitHubActionsVulnsFacts(CheckFacts): DBJsonList, nullable=False, info={"justification": JustificationType.TEXT} ) - #: The finding category. - #: - ``osv_vulnerability`` for OSV-backed vulnerability findings. - #: - ``unpinned_third_party_action`` for third-party actions not pinned to a commit SHA. - #: - ``workflow_security_issue`` for other workflow security issues. + #: The finding category (subtype). + #: - ``known-vulnerability`` for known vulnerability findings. + #: - ``unpinned-third-party-action`` for third-party actions not pinned to a commit SHA. + #: - workflow issue subtype names (for example ``overbroad-permissions``). + #: This complements ``finding_group`` instead of replacing it. finding_type: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.TEXT}) + #: The top-level finding group. + #: - ``third_party_action_risk`` for action dependency risks. + #: - ``workflow_security_issue`` for workflow implementation security issues. + #: Use this to group rows in summaries; use ``finding_type`` for specific issue filtering. + finding_group: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.TEXT}) + #: The GitHub Action Identifier. github_actions_id: Mapped[str] = mapped_column( String, nullable=False, info={"justification": JustificationType.TEXT} @@ -78,6 +104,11 @@ class GitHubActionsVulnsFacts(CheckFacts): String, nullable=True, info={"justification": JustificationType.TEXT} ) + #: Priority score for sorting and triaging findings in summary outputs. + finding_priority: Mapped[int] = mapped_column( + Integer, nullable=False, info={"justification": JustificationType.TEXT} + ) + #: Recommended immutable action reference, if applicable. recommended_ref: Mapped[str | None] = mapped_column( String, nullable=True, info={"justification": JustificationType.TEXT} @@ -123,38 +154,44 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: for ci_info in ci_services: callgraph = ci_info["callgraph"] gh_api_client = ci_info["service"].api_client if hasattr(ci_info["service"], "api_client") else None - if findings := detect_github_actions_security_issues(callgraph): - for finding in findings: - for issue in finding["issues"]: - if parsed_issue := _parse_unpinned_action_issue(str(issue)): + if workflow_findings := detect_github_actions_security_issues(callgraph): + for finding in workflow_findings: + for prioritized_issue in finding["issues"]: + issue = prioritized_issue["issue"] + issue_priority = int(prioritized_issue["priority"]) + if parsed_issue := parse_unpinned_action_issue(issue): action_name, action_version = parsed_issue - resolved_sha = _resolve_action_ref_to_sha(gh_api_client, action_name, action_version) + resolved_sha = resolve_action_ref_to_sha(gh_api_client, action_name, action_version) recommendation = recommend_for_unpinned_action(action_name, resolved_sha) result_tables.append( GitHubActionsVulnsFacts( vulnerability_urls=[], - finding_type="unpinned_third_party_action", + finding_type=GitHubActionsFindingType.UNPINNED_THIRD_PARTY_ACTION.value, + finding_group=GitHubActionsFindingGroup.THIRD_PARTY_ACTION_RISK.value, github_actions_id=action_name, github_actions_version=action_version, caller_workflow=finding["workflow_name"], is_pinned_sha=False, finding_message=recommendation.message, + finding_priority=issue_priority, recommended_ref=recommendation.recommended_ref, confidence=Confidence.HIGH, ) ) continue - recommendation = recommend_for_workflow_issue(str(issue)) + recommendation = recommend_for_workflow_issue(issue) result_tables.append( GitHubActionsVulnsFacts( vulnerability_urls=[], - finding_type="workflow_security_issue", + finding_type=_extract_workflow_issue_type(issue), + finding_group=GitHubActionsFindingGroup.WORKFLOW_SECURITY_ISSUE.value, github_actions_id=finding["workflow_name"], github_actions_version="", caller_workflow=finding["workflow_name"], is_pinned_sha=None, finding_message=f"{issue} Recommendation: {recommendation.message}", + finding_priority=issue_priority, recommended_ref=recommendation.recommended_ref, confidence=Confidence.HIGH, ) @@ -245,12 +282,14 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: result_tables.append( GitHubActionsVulnsFacts( vulnerability_urls=vuln_mapping, - finding_type="osv_vulnerability", + finding_type=GitHubActionsFindingType.KNOWN_VULNERABILITY.value, + finding_group=GitHubActionsFindingGroup.THIRD_PARTY_ACTION_RISK.value, github_actions_id=workflow_name, github_actions_version=workflow_inv["version"], caller_workflow=workflow_inv["caller_path"], is_pinned_sha=bool(re.fullmatch(r"[0-9a-f]{40}", workflow_inv["version"])), finding_message=recommendation.message, + finding_priority=100, recommended_ref=recommendation.recommended_ref, confidence=Confidence.HIGH, ) @@ -271,24 +310,8 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: registry.register(GitHubActionsVulnsCheck()) -def _parse_unpinned_action_issue(issue: str) -> tuple[str, str] | None: - """Parse an unpinned third-party action reference from issue text.""" - match = UNPINNED_ACTION_RE.fullmatch(issue.strip()) - if not match: - return None - action = match.group("action") - version = match.group("version") - if action.startswith("./"): - return None - if "/" not in action: - return None - return action, version - - -def _resolve_action_ref_to_sha(api_client: object, action_name: str, action_version: str) -> str | None: - """Resolve an action ref to an immutable commit SHA using GitHub API.""" - if not isinstance(api_client, GhAPIClient): - return None - if not action_name or not action_version: - return None - return api_client.get_commit_sha_from_ref(action_name, action_version) +def _extract_workflow_issue_type(issue: str) -> str: + """Extract a normalized workflow issue subtype from issue text.""" + prefix, _, _ = issue.partition(":") + normalized = prefix.strip().replace("_", "-") + return normalized or "workflow-security-issue" diff --git a/tests/integration/cases/oracle_coherence-js-client/policy.dl b/tests/integration/cases/oracle_coherence-js-client/policy.dl index 5c1c8b870..4016e02b4 100644 --- a/tests/integration/cases/oracle_coherence-js-client/policy.dl +++ b/tests/integration/cases/oracle_coherence-js-client/policy.dl @@ -8,12 +8,13 @@ Policy("check-github-actions-vulnerabilities", component_id, "Check GitHub Actio github_actions_vulnerabilities_check( _, "[\"https://osv.dev/vulnerability/GHSA-69fq-xp46-6x23\", \"https://osv.dev/vulnerability/GHSA-9p44-j4g5-cfx5\"]", - "osv_vulnerability", + "known-vulnerability", "aquasecurity/trivy-action", "0.32.0", "https://github.com/oracle/coherence-js-client/blob/39166341bc31f75b663ff439dae36170fb3e99a9/.github/workflows/trivy-scan.yml", _, _, + _, _ ). From 0a454902fc3cab50f746bcdf3084817b71c23c3a Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sun, 29 Mar 2026 21:37:47 +1000 Subject: [PATCH 18/30] chore: improve table summary Signed-off-by: behnazh-w --- scripts/actions/write_job_summary.py | 32 +++++------ .../gha_security_analysis/recommendation.py | 57 ++++++++++++++++++- .../github_actions_vulnerability_check.py | 44 +++++++++----- 3 files changed, 100 insertions(+), 33 deletions(-) diff --git a/scripts/actions/write_job_summary.py b/scripts/actions/write_job_summary.py index 405a2c7a5..af9f02c6b 100644 --- a/scripts/actions/write_job_summary.py +++ b/scripts/actions/write_job_summary.py @@ -20,8 +20,6 @@ "passed", ] -MAX_TABLE_CELL_LEN = 72 - def _env(name: str, default: str = "") -> str: return os.environ.get(name, default) @@ -209,8 +207,6 @@ def _format_table_cell(value: object) -> str: segments = [part for part in parsed.path.split("/") if part] label = segments[-1] if segments else parsed.netloc return f"[`{label}`]({text})" - if len(text) > MAX_TABLE_CELL_LEN: - text = f"{text[: MAX_TABLE_CELL_LEN - 3]}..." return f"`{_sanitize_for_markdown_table_code(text)}`" @@ -232,8 +228,6 @@ def _format_list_item(value: object) -> str: segments = [part for part in parsed.path.split("/") if part] label = segments[-1] if segments else parsed.netloc return f"[`{label}`]({text})" - if len(text) > MAX_TABLE_CELL_LEN: - text = f"{text[: MAX_TABLE_CELL_LEN - 3]}..." return f"`{_sanitize_for_markdown_table_code(text)}`" @@ -297,7 +291,7 @@ def _write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], summary_path, "_Showing top 10 findings by priority. Expand details below for full diagnostics._", ) - preferred_groups = ["third_party_action_risk", "workflow_security_issue"] + preferred_groups = ["workflow_security_issue", "third_party_action_risk"] groups_in_rows: list[str] = [] if group_idx is not None: discovered_groups = [str(row[group_idx]) for row in display_rows] @@ -318,8 +312,12 @@ def _write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], _append_line(summary_path) _append_line(summary_path, f"#### {title}") _append_line(summary_path) - _append_line(summary_path, "| priority | type | action | version | workflow |") - _append_line(summary_path, "|---|---|---|---|---|") + if group == "workflow_security_issue": + _append_line(summary_path, "| priority | type | action | workflow |") + _append_line(summary_path, "|---|---|---|---|") + else: + _append_line(summary_path, "| priority | type | action | version | workflow |") + _append_line(summary_path, "|---|---|---|---|---|") for row in group_rows: priority_raw = row[col_index["finding_priority"]] priority = f"`{_priority_label(priority_raw)} ({priority_raw})`" @@ -327,10 +325,13 @@ def _write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], action_name = _format_table_cell(row[col_index["third_party_action_name"]]) action_version = _format_table_cell(row[col_index["third_party_action_version"]]) workflow = _format_table_cell(row[col_index["vulnerable_workflow"]]) - _append_line( - summary_path, - f"| {priority} | {finding_type} | {action_name} | {action_version} | {workflow} |", - ) + if group == "workflow_security_issue": + _append_line(summary_path, f"| {priority} | {finding_type} | {action_name} | {workflow} |") + else: + _append_line( + summary_path, + f"| {priority} | {finding_type} | {action_name} | {action_version} | {workflow} |", + ) _append_line(summary_path) _append_line(summary_path, "
") @@ -354,9 +355,8 @@ def _write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], priority = row[col_index["finding_priority"]] finding_type = str(row[col_index["finding_type"]]) workflow = str(row[col_index["vulnerable_workflow"]]) - _append_line( - summary_path, f"{row_counter}. **`{action}@{version}`** (`{finding_type}`, priority `{priority}`)" - ) + subject = f"{action}@{version}" if version else action + _append_line(summary_path, f"{row_counter}. **`{subject}`** (`{finding_type}`, priority `{priority}`)") _append_line(summary_path, f"- Workflow: `{workflow}`") pin_idx = col_index.get("is_pinned_sha") diff --git a/src/macaron/code_analyzer/gha_security_analysis/recommendation.py b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py index 190d42536..2daa7f602 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/recommendation.py +++ b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py @@ -12,7 +12,9 @@ import re from dataclasses import dataclass +from macaron.errors import GitTagError from macaron.slsa_analyzer.git_service.api_client import GhAPIClient +from macaron.slsa_analyzer.git_url import find_highest_git_tag, get_tags_via_git_remote, is_commit_hash UNPINNED_ACTION_RE = re.compile(r"^(?P[^@\s]+)@(?P[^\s]+)$") @@ -33,7 +35,9 @@ class Recommendation: recommended_ref: str | None = None -def recommend_for_unpinned_action(action_name: str, resolved_sha: str | None = None) -> Recommendation: +def recommend_for_unpinned_action( + action_name: str, resolved_sha: str | None = None, resolved_tag: str | None = None +) -> Recommendation: """Create a recommendation for an unpinned third-party action. Parameters @@ -42,6 +46,8 @@ def recommend_for_unpinned_action(action_name: str, resolved_sha: str | None = N GitHub Action identifier in the form ``owner/repo``. resolved_sha : str | None, optional Resolved commit SHA for the action ref if available. + resolved_tag : str | None, optional + Tag corresponding to ``resolved_sha`` when available. Returns ------- @@ -49,7 +55,12 @@ def recommend_for_unpinned_action(action_name: str, resolved_sha: str | None = N Recommendation containing pinning guidance and a suggested immutable action reference. """ - recommended_ref = f"{action_name}@{resolved_sha}" if resolved_sha else f"{action_name}@<40-char-commit-sha>" + if resolved_sha and resolved_tag: + recommended_ref = f"{action_name}@{resolved_sha} # {resolved_tag}" + elif resolved_sha: + recommended_ref = f"{action_name}@{resolved_sha}" + else: + recommended_ref = f"{action_name}@<40-char-commit-sha>" return Recommendation( message="Pin this third-party action to a full 40-character commit SHA to prevent tag drift or takeover risk.", recommended_ref=recommended_ref, @@ -103,9 +114,51 @@ def resolve_action_ref_to_sha(api_client: object, action_name: str, action_versi return None if not action_name or not action_version: return None + if is_commit_hash(action_version): + # Normalize short SHAs by resolving them through the API. + return ( + action_version + if len(action_version) == 40 + else api_client.get_commit_sha_from_ref(action_name, action_version) + ) return api_client.get_commit_sha_from_ref(action_name, action_version) +def resolve_action_ref_to_tag(action_name: str, resolved_sha: str | None, action_version: str = "") -> str | None: + """Resolve a commit SHA to a corresponding Git tag for an action repository. + + Parameters + ---------- + action_name : str + GitHub Action identifier in the form ``owner/repo``. + resolved_sha : str | None, optional + Resolved commit SHA for the action ref. + action_version : str, optional + Original action version/ref. If this exact ref is a tag on the same + commit, it is preferred. + + Returns + ------- + str | None + The corresponding tag name if found; otherwise ``None``. + """ + if not action_name or not resolved_sha: + return None + tags = get_tags_via_git_remote(f"https://github.com/{action_name}") + if not tags: + return None + + matching_tags = [tag for tag, tag_sha in tags.items() if tag_sha == resolved_sha] + if not matching_tags: + return None + if action_version and action_version in matching_tags: + return action_version + try: + return find_highest_git_tag(set(matching_tags)) + except GitTagError: + return matching_tags[0] + + def recommend_for_workflow_issue(issue: str) -> Recommendation: """Map a workflow issue string to a remediation recommendation. diff --git a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py index d8919dada..da7a0716c 100644 --- a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py +++ b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py @@ -24,6 +24,7 @@ recommend_for_unpinned_action, recommend_for_workflow_issue, resolve_action_ref_to_sha, + resolve_action_ref_to_tag, ) from macaron.database.db_custom_types import DBJsonList from macaron.database.table_definitions import CheckFacts @@ -156,13 +157,25 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: gh_api_client = ci_info["service"].api_client if hasattr(ci_info["service"], "api_client") else None if workflow_findings := detect_github_actions_security_issues(callgraph): for finding in workflow_findings: + caller_workflow_link = "" + if gh_api_client: + caller_workflow_link = gh_api_client.get_file_link( + ctx.component.repository.full_name, + ctx.component.repository.commit_sha, + file_path=( + gh_api_client.get_relative_path_of_workflow(os.path.basename(finding["workflow_name"])) + if finding["workflow_name"] + else "" + ), + ) for prioritized_issue in finding["issues"]: issue = prioritized_issue["issue"] issue_priority = int(prioritized_issue["priority"]) if parsed_issue := parse_unpinned_action_issue(issue): action_name, action_version = parsed_issue resolved_sha = resolve_action_ref_to_sha(gh_api_client, action_name, action_version) - recommendation = recommend_for_unpinned_action(action_name, resolved_sha) + resolved_tag = resolve_action_ref_to_tag(action_name, resolved_sha, action_version) + recommendation = recommend_for_unpinned_action(action_name, resolved_sha, resolved_tag) result_tables.append( GitHubActionsVulnsFacts( vulnerability_urls=[], @@ -170,7 +183,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: finding_group=GitHubActionsFindingGroup.THIRD_PARTY_ACTION_RISK.value, github_actions_id=action_name, github_actions_version=action_version, - caller_workflow=finding["workflow_name"], + caller_workflow=caller_workflow_link, is_pinned_sha=False, finding_message=recommendation.message, finding_priority=issue_priority, @@ -186,9 +199,9 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: vulnerability_urls=[], finding_type=_extract_workflow_issue_type(issue), finding_group=GitHubActionsFindingGroup.WORKFLOW_SECURITY_ISSUE.value, - github_actions_id=finding["workflow_name"], + github_actions_id=caller_workflow_link, github_actions_version="", - caller_workflow=finding["workflow_name"], + caller_workflow=caller_workflow_link, is_pinned_sha=None, finding_message=f"{issue} Recommendation: {recommendation.message}", finding_priority=issue_priority, @@ -217,22 +230,23 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: continue caller_path = job.context.ref.workflow_context.ref.source_filepath + caller_path_link = caller_path + if gh_api_client: + caller_path_link = gh_api_client.get_file_link( + ctx.component.repository.full_name, + ctx.component.repository.commit_sha, + file_path=( + gh_api_client.get_relative_path_of_workflow(os.path.basename(caller_path)) + if caller_path + else "" + ), + ) ext_workflow: list = external_workflows.get(workflow_name, []) ext_workflow.append( { "version": workflow_version, - "caller_path": ci_info["service"].api_client.get_file_link( - ctx.component.repository.full_name, - ctx.component.repository.commit_sha, - file_path=( - ci_info["service"].api_client.get_relative_path_of_workflow( - os.path.basename(caller_path) - ) - if caller_path - else "" - ), - ), + "caller_path": caller_path_link, } ) external_workflows[workflow_name] = ext_workflow From 25a336393f6e6c72c301a515f08aea618667a047 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Mon, 30 Mar 2026 07:38:47 +1000 Subject: [PATCH 19/30] chore: improve the summary and result details Signed-off-by: behnazh-w --- .../gha_security_analysis/detect_injection.py | 3 +- .../gha_security_analysis/recommendation.py | 4 +- .../github_actions_vulnerability_check.py | 44 ++++++++++++--- .../test_gha_security_analysis.ambr | 31 +++++++++-- .../test_recommendation.py | 54 +++++++++++++++++++ 5 files changed, 124 insertions(+), 12 deletions(-) create mode 100644 tests/code_analyzer/gha_security_analysis/test_recommendation.py diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py index 22e33fc6e..2603ec0f6 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -38,6 +38,7 @@ PRIORITY_HIGH = 80 PRIORITY_MEDIUM = 60 PRIORITY_LOW = 40 +PRIORITY_MIN = 20 class PrioritizedIssue(TypedDict): @@ -214,7 +215,7 @@ def _append_action_step_findings( and uses_version and not re.fullmatch(r"[0-9a-f]{40}", uses_version) ): - _add_finding(findings, f"{uses_name}@{uses_version}", PRIORITY_HIGH) + _add_finding(findings, f"{uses_name}@{uses_version}", PRIORITY_MIN) if uses_name == "actions/checkout": ref = _literal_value(action_node.with_parameters.get("ref")) diff --git a/src/macaron/code_analyzer/gha_security_analysis/recommendation.py b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py index 2daa7f602..3856fdf41 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/recommendation.py +++ b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py @@ -60,9 +60,9 @@ def recommend_for_unpinned_action( elif resolved_sha: recommended_ref = f"{action_name}@{resolved_sha}" else: - recommended_ref = f"{action_name}@<40-char-commit-sha>" + recommended_ref = "cannot be found" return Recommendation( - message="Pin this third-party action to a full 40-character commit SHA to prevent tag drift or takeover risk.", + message="Pin this third-party action to a 40-character commit SHA.", recommended_ref=recommended_ref, ) diff --git a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py index da7a0716c..4090c3971 100644 --- a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py +++ b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py @@ -176,16 +176,20 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: resolved_sha = resolve_action_ref_to_sha(gh_api_client, action_name, action_version) resolved_tag = resolve_action_ref_to_tag(action_name, resolved_sha, action_version) recommendation = recommend_for_unpinned_action(action_name, resolved_sha, resolved_tag) + finding_type = GitHubActionsFindingType.UNPINNED_THIRD_PARTY_ACTION.value result_tables.append( GitHubActionsVulnsFacts( vulnerability_urls=[], - finding_type=GitHubActionsFindingType.UNPINNED_THIRD_PARTY_ACTION.value, + finding_type=finding_type, finding_group=GitHubActionsFindingGroup.THIRD_PARTY_ACTION_RISK.value, github_actions_id=action_name, github_actions_version=action_version, caller_workflow=caller_workflow_link, is_pinned_sha=False, - finding_message=recommendation.message, + finding_message=( + f"Summary: {_short_description_for_finding_type(finding_type)} " + f"Recommendation: {recommendation.message}" + ), finding_priority=issue_priority, recommended_ref=recommendation.recommended_ref, confidence=Confidence.HIGH, @@ -194,16 +198,20 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: continue recommendation = recommend_for_workflow_issue(issue) + finding_type = _extract_workflow_issue_type(issue) result_tables.append( GitHubActionsVulnsFacts( vulnerability_urls=[], - finding_type=_extract_workflow_issue_type(issue), + finding_type=finding_type, finding_group=GitHubActionsFindingGroup.WORKFLOW_SECURITY_ISSUE.value, github_actions_id=caller_workflow_link, github_actions_version="", caller_workflow=caller_workflow_link, is_pinned_sha=None, - finding_message=f"{issue} Recommendation: {recommendation.message}", + finding_message=( + f"Summary: {_short_description_for_finding_type(finding_type)} " + f"Details: {issue} Recommendation: {recommendation.message}" + ), finding_priority=issue_priority, recommended_ref=recommendation.recommended_ref, confidence=Confidence.HIGH, @@ -293,16 +301,20 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: logger.debug(error) if vuln_mapping: recommendation = recommend_for_osv_vulnerability(workflow_name, workflow_inv["version"]) + finding_type = GitHubActionsFindingType.KNOWN_VULNERABILITY.value result_tables.append( GitHubActionsVulnsFacts( vulnerability_urls=vuln_mapping, - finding_type=GitHubActionsFindingType.KNOWN_VULNERABILITY.value, + finding_type=finding_type, finding_group=GitHubActionsFindingGroup.THIRD_PARTY_ACTION_RISK.value, github_actions_id=workflow_name, github_actions_version=workflow_inv["version"], caller_workflow=workflow_inv["caller_path"], is_pinned_sha=bool(re.fullmatch(r"[0-9a-f]{40}", workflow_inv["version"])), - finding_message=recommendation.message, + finding_message=( + f"Summary: {_short_description_for_finding_type(finding_type)} " + f"Recommendation: {recommendation.message}" + ), finding_priority=100, recommended_ref=recommendation.recommended_ref, confidence=Confidence.HIGH, @@ -329,3 +341,23 @@ def _extract_workflow_issue_type(issue: str) -> str: prefix, _, _ = issue.partition(":") normalized = prefix.strip().replace("_", "-") return normalized or "workflow-security-issue" + + +def _short_description_for_finding_type(finding_type: str) -> str: + """Return a concise, human-readable summary for a finding subtype.""" + finding_summaries = { + GitHubActionsFindingType.KNOWN_VULNERABILITY.value: "Action version is known to be vulnerable.", + GitHubActionsFindingType.UNPINNED_THIRD_PARTY_ACTION.value: "Third-party action is not pinned to an immutable SHA.", + "sensitive-trigger": "Workflow uses a sensitive trigger and needs strict gating.", + "privileged-trigger": "Privileged trigger can expose elevated token scope to untrusted input.", + "missing-permissions": "Workflow omits explicit permissions and may inherit broad defaults.", + "overbroad-permissions": "Workflow requests permissions broader than required.", + "untrusted-fork-code": "Workflow can execute code controlled by an untrusted fork.", + "persist-credentials": "Persisted checkout credentials can leak token access to later steps.", + "remote-script-exec": "Workflow downloads and executes remote scripts inline.", + "pr-target-untrusted-checkout": "pull_request_target is combined with checkout of PR-controlled refs.", + "potential-injection": "Untrusted GitHub context data may flow into shell execution.", + "self-hosted-runner": "Job uses self-hosted runners, increasing blast radius for untrusted code.", + "workflow-security-issue": "Workflow includes a security issue that requires hardening.", + } + return finding_summaries.get(finding_type, "Workflow security finding detected.") diff --git a/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr index c01a5a96f..80bea0779 100644 --- a/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr +++ b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr @@ -3,9 +3,34 @@ list([ dict({ 'issues': list([ - "sensitive-trigger: Workflow uses ['pull_request_target']. Ensure strict gating (e.g., actor allowlist, branch protection, and minimal permissions).", - 'privileged-trigger: Workflow uses `pull_request_target`, which runs with elevated permissions.', - 'missing-permissions: No explicit workflow permissions defined; defaults may be overly broad.', + dict({ + 'issue': "potential-injection: [{'Type': 'Lit', 'Pos': {'Offset': 184, 'Line': 7, 'Col': 17}, 'End': {'Offset': 189, 'Line': 7, 'Col': 22}, 'ValuePos': {'Offset': 184, 'Line': 7, 'Col': 17}, 'ValueEnd': {'Offset': 189, 'Line': 7, 'Col': 22}, 'Value': 'HEAD:'}, {'Type': 'ParamExp', 'Pos': {'Offset': 189, 'Line': 7, 'Col': 22}, 'End': {'Offset': 196, 'Line': 7, 'Col': 29}, 'Dollar': {'Offset': 189, 'Line': 7, 'Col': 22}, 'Short': True, 'Param': {'Pos': {'Offset': 190, 'Line': 7, 'Col': 23}, 'End': {'Offset': 196, 'Line': 7, 'Col': 29}, 'ValuePos': {'Offset': 190, 'Line': 7, 'Col': 23}, 'ValueEnd': {'Offset': 196, 'Line': 7, 'Col': 29}, 'Value': 'github'}}, {'Type': 'Lit', 'Pos': {'Offset': 196, 'Line': 7, 'Col': 29}, 'End': {'Offset': 224, 'Line': 7, 'Col': 57}, 'ValuePos': {'Offset': 196, 'Line': 7, 'Col': 29}, 'ValueEnd': {'Offset': 224, 'Line': 7, 'Col': 57}, 'Value': '.event.pull_request.head.ref'}]", + 'priority': 100, + }), + dict({ + 'issue': 'privileged-trigger: Workflow uses `pull_request_target` with additional risky patterns; treat this workflow as high risk and harden immediately.', + 'priority': 80, + }), + dict({ + 'issue': 'missing-permissions: No explicit workflow permissions defined; defaults may be overly broad.', + 'priority': 60, + }), + dict({ + 'issue': "sensitive-trigger: Workflow uses ['pull_request_target']. Ensure strict gating (e.g., actor allowlist, branch protection, and minimal permissions).", + 'priority': 40, + }), + dict({ + 'issue': 'actions/checkout@v5', + 'priority': 20, + }), + dict({ + 'issue': 'dtolnay/rust-toolchain@stable', + 'priority': 20, + }), + dict({ + 'issue': 'poseidon/wait-for-status-checks@v0.6.0', + 'priority': 20, + }), ]), 'workflow_name': '/home/behnaz/research/github/macaron/tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml', }), diff --git a/tests/code_analyzer/gha_security_analysis/test_recommendation.py b/tests/code_analyzer/gha_security_analysis/test_recommendation.py new file mode 100644 index 000000000..c9b2aeea0 --- /dev/null +++ b/tests/code_analyzer/gha_security_analysis/test_recommendation.py @@ -0,0 +1,54 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for GitHub Actions security recommendation helpers.""" + +import pytest + +from macaron.code_analyzer.gha_security_analysis.recommendation import ( + recommend_for_unpinned_action, + resolve_action_ref_to_tag, +) + + +def test_recommend_for_unpinned_action_with_tag_hint() -> None: + """Return pinned action recommendation with tag hint when SHA and tag are resolved.""" + recommendation = recommend_for_unpinned_action( + "actions/checkout", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "v4.2.2", + ) + + assert recommendation.recommended_ref == "actions/checkout@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa # v4.2.2" + + +def test_recommend_for_unpinned_action_when_sha_not_resolved() -> None: + """Return fallback recommendation text when action SHA cannot be resolved.""" + recommendation = recommend_for_unpinned_action("actions/checkout") + + assert recommendation.recommended_ref == "cannot be found" + assert recommendation.message == "Pin this third-party action to a 40-character commit SHA." + + +def test_resolve_action_ref_to_tag_found(monkeypatch: pytest.MonkeyPatch) -> None: + """Resolve the matching tag when a tag points to the resolved action SHA.""" + monkeypatch.setattr( + "macaron.code_analyzer.gha_security_analysis.recommendation.get_tags_via_git_remote", + lambda repo: {"v4.2.2": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"}, + ) + + tag = resolve_action_ref_to_tag("actions/checkout", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", "v4") + + assert tag == "v4.2.2" + + +def test_resolve_action_ref_to_tag_none_when_no_match(monkeypatch: pytest.MonkeyPatch) -> None: + """Return None when no fetched tag points to the resolved action SHA.""" + monkeypatch.setattr( + "macaron.code_analyzer.gha_security_analysis.recommendation.get_tags_via_git_remote", + lambda repo: {"v4.2.2": "dddddddddddddddddddddddddddddddddddddddd"}, + ) + + tag = resolve_action_ref_to_tag("actions/checkout", "cccccccccccccccccccccccccccccccccccccccc", "v4") + + assert tag is None From 9acf3d694077510d9e2a6a0a74d128c63650c8ec Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Mon, 30 Mar 2026 07:54:25 +1000 Subject: [PATCH 20/30] chore: add summary column for workflow security issues Signed-off-by: behnazh-w --- scripts/actions/write_job_summary.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/scripts/actions/write_job_summary.py b/scripts/actions/write_job_summary.py index af9f02c6b..76bf613e4 100644 --- a/scripts/actions/write_job_summary.py +++ b/scripts/actions/write_job_summary.py @@ -261,6 +261,20 @@ def _gha_group_label(group: str) -> str: return group +def _extract_finding_summary(message: object) -> str: + """Extract a compact summary from a finding message.""" + text = str(message).strip() + if not text: + return "" + + # Expected format: "Summary: ... Details: ... Recommendation: ..." + match = re.search(r"Summary:\s*(.*?)(?:\s+Details:\s*|\s+Recommendation:\s*|$)", text, flags=re.IGNORECASE) + if match: + return match.group(1).strip() + + return text + + def _write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], rows: list[tuple]) -> bool: """Write compact diagnostics for check-github-actions policy failures.""" if not columns or not rows: @@ -313,8 +327,8 @@ def _write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], _append_line(summary_path, f"#### {title}") _append_line(summary_path) if group == "workflow_security_issue": - _append_line(summary_path, "| priority | type | action | workflow |") - _append_line(summary_path, "|---|---|---|---|") + _append_line(summary_path, "| priority | type | summary | action | workflow |") + _append_line(summary_path, "|---|---|---|---|---|") else: _append_line(summary_path, "| priority | type | action | version | workflow |") _append_line(summary_path, "|---|---|---|---|---|") @@ -322,11 +336,17 @@ def _write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], priority_raw = row[col_index["finding_priority"]] priority = f"`{_priority_label(priority_raw)} ({priority_raw})`" finding_type = _format_table_cell(row[col_index["finding_type"]]) + finding_summary = _format_table_cell( + _extract_finding_summary(row[col_index["finding_message"]]) if "finding_message" in col_index else "" + ) action_name = _format_table_cell(row[col_index["third_party_action_name"]]) action_version = _format_table_cell(row[col_index["third_party_action_version"]]) workflow = _format_table_cell(row[col_index["vulnerable_workflow"]]) if group == "workflow_security_issue": - _append_line(summary_path, f"| {priority} | {finding_type} | {action_name} | {workflow} |") + _append_line( + summary_path, + f"| {priority} | {finding_type} | {finding_summary} | {action_name} | {workflow} |", + ) else: _append_line( summary_path, From ba380961f67afa85bb923e15e1ffbf65dc4cd3a5 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Mon, 30 Mar 2026 08:54:28 +1000 Subject: [PATCH 21/30] chore: change the table schema Signed-off-by: behnazh-w --- scripts/actions/write_job_summary.py | 48 ++++++--- .../gha_security_analysis/detect_injection.py | 55 ++++++++++ .../gha_security_analysis/recommendation.py | 2 +- .../policies/sql/check-github-actions.sql | 8 +- .../github_actions_vulnerability_check.py | 100 ++++++++---------- .../test_recommendation.py | 2 +- .../oracle_coherence-js-client/policy.dl | 11 +- .../output_reporter/test_write_job_summary.py | 59 +++++++++++ 8 files changed, 203 insertions(+), 82 deletions(-) create mode 100644 tests/output_reporter/test_write_job_summary.py diff --git a/scripts/actions/write_job_summary.py b/scripts/actions/write_job_summary.py index 76bf613e4..63b1e12ec 100644 --- a/scripts/actions/write_job_summary.py +++ b/scripts/actions/write_job_summary.py @@ -275,8 +275,23 @@ def _extract_finding_summary(message: object) -> str: return text -def _write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], rows: list[tuple]) -> bool: - """Write compact diagnostics for check-github-actions policy failures.""" +def write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], rows: list[tuple]) -> bool: + """Write compact GitHub Actions vulnerability diagnostics to the job summary. + + Parameters + ---------- + summary_path : Path + Path to the GitHub job summary markdown file. + columns : list[str] + Ordered column names from the SQL diagnostics query result. + rows : list[tuple] + Row values matching ``columns`` order. + + Returns + ------- + bool + ``True`` if content was rendered; ``False`` when inputs are empty. + """ if not columns or not rows: return False @@ -284,8 +299,8 @@ def _write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], required = [ "finding_priority", "finding_type", - "third_party_action_name", - "third_party_action_version", + "action_name", + "action_ref", "vulnerable_workflow", ] if any(name not in col_index for name in required): @@ -327,8 +342,8 @@ def _write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], _append_line(summary_path, f"#### {title}") _append_line(summary_path) if group == "workflow_security_issue": - _append_line(summary_path, "| priority | type | summary | action | workflow |") - _append_line(summary_path, "|---|---|---|---|---|") + _append_line(summary_path, "| priority | type | summary | workflow |") + _append_line(summary_path, "|---|---|---|---|") else: _append_line(summary_path, "| priority | type | action | version | workflow |") _append_line(summary_path, "|---|---|---|---|---|") @@ -339,13 +354,13 @@ def _write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], finding_summary = _format_table_cell( _extract_finding_summary(row[col_index["finding_message"]]) if "finding_message" in col_index else "" ) - action_name = _format_table_cell(row[col_index["third_party_action_name"]]) - action_version = _format_table_cell(row[col_index["third_party_action_version"]]) + action_name = _format_table_cell(row[col_index["action_name"]]) + action_version = _format_table_cell(row[col_index["action_ref"]]) workflow = _format_table_cell(row[col_index["vulnerable_workflow"]]) if group == "workflow_security_issue": _append_line( summary_path, - f"| {priority} | {finding_type} | {finding_summary} | {action_name} | {workflow} |", + f"| {priority} | {finding_type} | {finding_summary} | {workflow} |", ) else: _append_line( @@ -370,22 +385,25 @@ def _write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], title = _gha_group_label(group) _append_line(summary_path, f"**{title}**") for row in group_rows: - action = str(row[col_index["third_party_action_name"]]) - version = str(row[col_index["third_party_action_version"]]) + action = str(row[col_index["action_name"]]) + version = str(row[col_index["action_ref"]]) priority = row[col_index["finding_priority"]] finding_type = str(row[col_index["finding_type"]]) workflow = str(row[col_index["vulnerable_workflow"]]) - subject = f"{action}@{version}" if version else action + if group == "workflow_security_issue": + subject = workflow + else: + subject = f"{action}@{version}" if version else action _append_line(summary_path, f"{row_counter}. **`{subject}`** (`{finding_type}`, priority `{priority}`)") _append_line(summary_path, f"- Workflow: `{workflow}`") - pin_idx = col_index.get("is_pinned_sha") + pin_idx = col_index.get("sha_pinned") row_group = str(row[group_idx]) if group_idx is not None else "" if pin_idx is not None and row_group == "third_party_action_risk" and row[pin_idx] is not None: pin_state = "yes" if bool(row[pin_idx]) else "no" _append_line(summary_path, f"- Pinned to full commit SHA: `{pin_state}`") - vul_idx = col_index.get("vulnerabilities") + vul_idx = col_index.get("vuln_urls") if vul_idx is not None and row[vul_idx]: parsed = _parse_list_cell(str(row[vul_idx])) if parsed: @@ -475,7 +493,7 @@ def _write_existing_policy_failure_diagnostics( _append_line(summary_path) _append_line(summary_path, f"#### Results") if policy_name == "check-github-actions": - rendered = _write_compact_gha_vuln_diagnostics(summary_path, cols, rows) + rendered = write_compact_gha_vuln_diagnostics(summary_path, cols, rows) else: rendered = _write_markdown_table(summary_path, cols, rows) if rendered: diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py index 2603ec0f6..6d45fb6be 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -14,6 +14,14 @@ GitHubActionsRunStepNode, GitHubActionsWorkflowNode, ) +from macaron.code_analyzer.gha_security_analysis.recommendation import ( + Recommendation, + parse_unpinned_action_issue, + recommend_for_unpinned_action, + recommend_for_workflow_issue, + resolve_action_ref_to_sha, + resolve_action_ref_to_tag, +) from macaron.parsers.bashparser_model import CallExpr, is_call_expr, is_lit, is_param_exp from macaron.parsers.github_workflow_model import Workflow @@ -306,6 +314,53 @@ def _add_finding(findings: list[PrioritizedIssue], issue: str, priority: int) -> findings.append({"issue": issue, "priority": priority}) +def get_workflow_issue_type(issue: str) -> str: + """Extract a normalized workflow issue subtype from issue text.""" + prefix, _, _ = issue.partition(":") + normalized = prefix.strip().replace("_", "-") + return normalized or "workflow-security-issue" + + +def get_workflow_issue_summary(finding_type: str) -> str: + """Return a concise summary for a workflow issue subtype.""" + finding_summaries = { + "sensitive-trigger": "Workflow uses a sensitive trigger and needs strict gating.", + "privileged-trigger": "Privileged trigger can expose elevated token scope to untrusted input.", + "missing-permissions": "Workflow omits explicit permissions and may inherit broad defaults.", + "overbroad-permissions": "Workflow requests permissions broader than required.", + "untrusted-fork-code": "Workflow can execute code controlled by an untrusted fork.", + "persist-credentials": "Persisted checkout credentials can leak token access to later steps.", + "remote-script-exec": "Workflow downloads and executes remote scripts inline.", + "pr-target-untrusted-checkout": "pull_request_target is combined with checkout of PR-controlled refs.", + "potential-injection": "Untrusted GitHub context data may flow into shell execution.", + "self-hosted-runner": "Job uses self-hosted runners, increasing blast radius for untrusted code.", + "workflow-security-issue": "Workflow includes a security issue that requires hardening.", + } + return finding_summaries.get(finding_type, "Workflow security finding detected.") + + +def build_workflow_issue_recommendation(issue: str) -> tuple[str, Recommendation, str]: + """Build normalized workflow issue recommendation metadata.""" + finding_type = get_workflow_issue_type(issue) + summary = get_workflow_issue_summary(finding_type) + recommendation = recommend_for_workflow_issue(issue) + finding_message = f"Summary: {summary} Details: {issue} Recommendation: {recommendation.message}" + return finding_type, recommendation, finding_message + + +def build_unpinned_action_recommendation(issue: str, api_client: object) -> tuple[str, str, Recommendation] | None: + """Build normalized recommendation metadata for an unpinned third-party action finding.""" + parsed_issue = parse_unpinned_action_issue(issue) + if not parsed_issue: + return None + + action_name, action_ref = parsed_issue + resolved_sha = resolve_action_ref_to_sha(api_client, action_name, action_ref) + resolved_tag = resolve_action_ref_to_tag(action_name, resolved_sha, action_ref) + recommendation = recommend_for_unpinned_action(action_name, resolved_sha, resolved_tag) + return action_name, action_ref, recommendation + + # def analyze_workflow(workflow_node: GitHubActionsWorkflowNode, nodes: NodeForest) -> list[dict[str, str]]: # """ # Analyze a GitHub Actions workflow for common security misconfigurations. diff --git a/src/macaron/code_analyzer/gha_security_analysis/recommendation.py b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py index 3856fdf41..1e2f65435 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/recommendation.py +++ b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py @@ -60,7 +60,7 @@ def recommend_for_unpinned_action( elif resolved_sha: recommended_ref = f"{action_name}@{resolved_sha}" else: - recommended_ref = "cannot be found" + recommended_ref = "Unable to resolve automatically" return Recommendation( message="Pin this third-party action to a 40-character commit SHA.", recommended_ref=recommended_ref, diff --git a/src/macaron/resources/policies/sql/check-github-actions.sql b/src/macaron/resources/policies/sql/check-github-actions.sql index 9d4f0b9fb..09ba2555b 100644 --- a/src/macaron/resources/policies/sql/check-github-actions.sql +++ b/src/macaron/resources/policies/sql/check-github-actions.sql @@ -6,12 +6,12 @@ SELECT gha_check.finding_group, gha_check.finding_priority, gha_check.finding_type, - gha_check.github_actions_id AS third_party_action_name, - gha_check.github_actions_version AS third_party_action_version, - gha_check.vulnerability_urls AS vulnerabilities, + gha_check.action_name, + gha_check.action_ref, + gha_check.vuln_urls, gha_check.finding_message, gha_check.recommended_ref, - gha_check.is_pinned_sha, + gha_check.sha_pinned, gha_check.caller_workflow AS vulnerable_workflow, analysis.analysis_time FROM github_actions_vulnerabilities_check AS gha_check diff --git a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py index 4090c3971..3466d6752 100644 --- a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py +++ b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py @@ -17,14 +17,13 @@ GitHubActionsActionStepNode, GitHubActionsReusableWorkflowCallNode, ) -from macaron.code_analyzer.gha_security_analysis.detect_injection import detect_github_actions_security_issues +from macaron.code_analyzer.gha_security_analysis.detect_injection import ( + build_unpinned_action_recommendation, + build_workflow_issue_recommendation, + detect_github_actions_security_issues, +) from macaron.code_analyzer.gha_security_analysis.recommendation import ( - parse_unpinned_action_issue, recommend_for_osv_vulnerability, - recommend_for_unpinned_action, - recommend_for_workflow_issue, - resolve_action_ref_to_sha, - resolve_action_ref_to_tag, ) from macaron.database.db_custom_types import DBJsonList from macaron.database.table_definitions import CheckFacts @@ -66,10 +65,8 @@ class GitHubActionsVulnsFacts(CheckFacts): #: The primary key. id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003 - #: The list of vulnerability URLs. - vulnerability_urls: Mapped[list[str]] = mapped_column( - DBJsonList, nullable=False, info={"justification": JustificationType.TEXT} - ) + #: The GitHub Action workflow that may have various security issues. + caller_workflow: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.HREF}) #: The finding category (subtype). #: - ``known-vulnerability`` for known vulnerability findings. @@ -84,22 +81,6 @@ class GitHubActionsVulnsFacts(CheckFacts): #: Use this to group rows in summaries; use ``finding_type`` for specific issue filtering. finding_group: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.TEXT}) - #: The GitHub Action Identifier. - github_actions_id: Mapped[str] = mapped_column( - String, nullable=False, info={"justification": JustificationType.TEXT} - ) - - #: The GitHub Action version. - github_actions_version: Mapped[str] = mapped_column( - String, nullable=False, info={"justification": JustificationType.TEXT} - ) - - #: The GitHub Action workflow that calls the vulnerable GitHub Action. - caller_workflow: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.HREF}) - - #: Whether the action reference is pinned to a full commit SHA. - is_pinned_sha: Mapped[bool | None] = mapped_column(Boolean, nullable=True) - #: Human-readable finding details. finding_message: Mapped[str | None] = mapped_column( String, nullable=True, info={"justification": JustificationType.TEXT} @@ -115,6 +96,27 @@ class GitHubActionsVulnsFacts(CheckFacts): String, nullable=True, info={"justification": JustificationType.TEXT} ) + #: Third-party action identifier (for ``third_party_action_risk`` findings only). + #: For workflow-security findings, this field is ``None``. + action_name: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} + ) + + #: Third-party action version/ref (for ``third_party_action_risk`` findings only). + #: For workflow-security findings, this field is ``None``. + action_ref: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} + ) + + #: Whether the action reference is pinned to a full commit SHA. + sha_pinned: Mapped[bool | None] = mapped_column(Boolean, nullable=True) + + #: Related vulnerability URLs (for ``third_party_action_risk`` findings only). + #: For workflow-security findings, this field is an empty list. + vuln_urls: Mapped[list[str]] = mapped_column( + DBJsonList, nullable=False, info={"justification": JustificationType.TEXT} + ) + __mapper_args__ = { "polymorphic_identity": "_github_actions_vulnerabilities_check", } @@ -171,21 +173,18 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: for prioritized_issue in finding["issues"]: issue = prioritized_issue["issue"] issue_priority = int(prioritized_issue["priority"]) - if parsed_issue := parse_unpinned_action_issue(issue): - action_name, action_version = parsed_issue - resolved_sha = resolve_action_ref_to_sha(gh_api_client, action_name, action_version) - resolved_tag = resolve_action_ref_to_tag(action_name, resolved_sha, action_version) - recommendation = recommend_for_unpinned_action(action_name, resolved_sha, resolved_tag) + if unpinned_action_info := build_unpinned_action_recommendation(issue, gh_api_client): + action_name, action_version, recommendation = unpinned_action_info finding_type = GitHubActionsFindingType.UNPINNED_THIRD_PARTY_ACTION.value result_tables.append( GitHubActionsVulnsFacts( - vulnerability_urls=[], + vuln_urls=[], finding_type=finding_type, finding_group=GitHubActionsFindingGroup.THIRD_PARTY_ACTION_RISK.value, - github_actions_id=action_name, - github_actions_version=action_version, + action_name=action_name, + action_ref=action_version, caller_workflow=caller_workflow_link, - is_pinned_sha=False, + sha_pinned=False, finding_message=( f"Summary: {_short_description_for_finding_type(finding_type)} " f"Recommendation: {recommendation.message}" @@ -197,21 +196,17 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: ) continue - recommendation = recommend_for_workflow_issue(issue) - finding_type = _extract_workflow_issue_type(issue) + finding_type, recommendation, finding_message = build_workflow_issue_recommendation(issue) result_tables.append( GitHubActionsVulnsFacts( - vulnerability_urls=[], + vuln_urls=[], finding_type=finding_type, finding_group=GitHubActionsFindingGroup.WORKFLOW_SECURITY_ISSUE.value, - github_actions_id=caller_workflow_link, - github_actions_version="", + action_name=None, + action_ref=None, caller_workflow=caller_workflow_link, - is_pinned_sha=None, - finding_message=( - f"Summary: {_short_description_for_finding_type(finding_type)} " - f"Details: {issue} Recommendation: {recommendation.message}" - ), + sha_pinned=None, + finding_message=finding_message, finding_priority=issue_priority, recommended_ref=recommendation.recommended_ref, confidence=Confidence.HIGH, @@ -304,13 +299,13 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: finding_type = GitHubActionsFindingType.KNOWN_VULNERABILITY.value result_tables.append( GitHubActionsVulnsFacts( - vulnerability_urls=vuln_mapping, + vuln_urls=vuln_mapping, finding_type=finding_type, finding_group=GitHubActionsFindingGroup.THIRD_PARTY_ACTION_RISK.value, - github_actions_id=workflow_name, - github_actions_version=workflow_inv["version"], + action_name=workflow_name, + action_ref=workflow_inv["version"], caller_workflow=workflow_inv["caller_path"], - is_pinned_sha=bool(re.fullmatch(r"[0-9a-f]{40}", workflow_inv["version"])), + sha_pinned=bool(re.fullmatch(r"[0-9a-f]{40}", workflow_inv["version"])), finding_message=( f"Summary: {_short_description_for_finding_type(finding_type)} " f"Recommendation: {recommendation.message}" @@ -336,13 +331,6 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: registry.register(GitHubActionsVulnsCheck()) -def _extract_workflow_issue_type(issue: str) -> str: - """Extract a normalized workflow issue subtype from issue text.""" - prefix, _, _ = issue.partition(":") - normalized = prefix.strip().replace("_", "-") - return normalized or "workflow-security-issue" - - def _short_description_for_finding_type(finding_type: str) -> str: """Return a concise, human-readable summary for a finding subtype.""" finding_summaries = { diff --git a/tests/code_analyzer/gha_security_analysis/test_recommendation.py b/tests/code_analyzer/gha_security_analysis/test_recommendation.py index c9b2aeea0..8ebd86e90 100644 --- a/tests/code_analyzer/gha_security_analysis/test_recommendation.py +++ b/tests/code_analyzer/gha_security_analysis/test_recommendation.py @@ -26,7 +26,7 @@ def test_recommend_for_unpinned_action_when_sha_not_resolved() -> None: """Return fallback recommendation text when action SHA cannot be resolved.""" recommendation = recommend_for_unpinned_action("actions/checkout") - assert recommendation.recommended_ref == "cannot be found" + assert recommendation.recommended_ref == "Unable to resolve automatically" assert recommendation.message == "Pin this third-party action to a 40-character commit SHA." diff --git a/tests/integration/cases/oracle_coherence-js-client/policy.dl b/tests/integration/cases/oracle_coherence-js-client/policy.dl index 4016e02b4..4406970d8 100644 --- a/tests/integration/cases/oracle_coherence-js-client/policy.dl +++ b/tests/integration/cases/oracle_coherence-js-client/policy.dl @@ -7,15 +7,16 @@ Policy("check-github-actions-vulnerabilities", component_id, "Check GitHub Actio check_failed(component_id, "mcn_githubactions_vulnerabilities_1"), github_actions_vulnerabilities_check( _, - "[\"https://osv.dev/vulnerability/GHSA-69fq-xp46-6x23\", \"https://osv.dev/vulnerability/GHSA-9p44-j4g5-cfx5\"]", - "known-vulnerability", - "aquasecurity/trivy-action", - "0.32.0", "https://github.com/oracle/coherence-js-client/blob/39166341bc31f75b663ff439dae36170fb3e99a9/.github/workflows/trivy-scan.yml", + "known-vulnerability", + "third_party_action_risk", _, _, _, - _ + "aquasecurity/trivy-action", + "0.32.0", + _, + "[\"https://osv.dev/vulnerability/GHSA-69fq-xp46-6x23\", \"https://osv.dev/vulnerability/GHSA-9p44-j4g5-cfx5\"]" ). apply_policy_to("check-github-actions-vulnerabilities", component_id) :- diff --git a/tests/output_reporter/test_write_job_summary.py b/tests/output_reporter/test_write_job_summary.py new file mode 100644 index 000000000..c0b0b6955 --- /dev/null +++ b/tests/output_reporter/test_write_job_summary.py @@ -0,0 +1,59 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for GitHub Actions job summary rendering helpers.""" + +from __future__ import annotations + +import importlib.util +from pathlib import Path +from types import ModuleType + + +def _load_write_job_summary_module() -> ModuleType: + """Load the write_job_summary script as a Python module for testing.""" + script_path = Path(__file__).parents[2] / "scripts" / "actions" / "write_job_summary.py" + spec = importlib.util.spec_from_file_location("write_job_summary", script_path) + if spec is None or spec.loader is None: + raise RuntimeError("Unable to load write_job_summary.py module.") + + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def test_workflow_security_table_includes_summary_column(tmp_path: Path) -> None: + """Render workflow security findings with the short summary column in compact table output.""" + module = _load_write_job_summary_module() + summary_path = tmp_path / "summary.md" + columns = [ + "finding_group", + "finding_priority", + "finding_type", + "action_name", + "action_ref", + "vulnerable_workflow", + "finding_message", + ] + rows = [ + ( + "workflow_security_issue", + 80, + "remote-script-exec", + "https://github.com/org/repo/.github/workflows/build.yml", + "", + "https://github.com/org/repo/.github/workflows/build.yml", + ( + "Summary: Workflow downloads and executes remote scripts inline. " + "Details: remote-script-exec: A step appears to download and pipe to shell (`curl|bash`). " + "Recommendation: Avoid curl|bash patterns." + ), + ), + ] + + rendered = module.write_compact_gha_vuln_diagnostics(summary_path, columns, rows) + output = summary_path.read_text(encoding="utf-8") + + assert rendered is True + assert "| priority | type | summary | workflow |" in output + assert "Workflow downloads and executes remote scripts inline." in output From 2caf5bf981258607a7f4e99b9b02d70798d75883 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Mon, 30 Mar 2026 10:26:15 +1000 Subject: [PATCH 22/30] chore: test other bug types Signed-off-by: behnazh-w --- .github/workflows/test_macaron_action.yaml | 41 +++ scripts/actions/write_job_summary.py | 26 +- .../gha_security_analysis/detect_injection.py | 321 +++++++++++++++++- .../github_actions_vulnerability_check.py | 9 +- .../test_gha_security_analysis.py | 57 +++- .../output_reporter/test_write_job_summary.py | 4 +- 6 files changed, 428 insertions(+), 30 deletions(-) diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index 652c15dec..64f47d5e4 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -408,3 +408,44 @@ jobs: echo "Expected verify step to fail, but it did not." exit 1 fi + + test-detect-potential-injection: + name: How to detect vulnerable GitHub Actions + runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar + + - name: Run Macaron (analyze github_actions_vulns for https://github.com/oracle/coherence-js-client) + id: verify_github_actions_vulns_repo_test + # This integration target is intentionally vulnerable; failure is expected. + continue-on-error: true + uses: ./ + with: + repo_path: https://github.com/oracle/graalpython + digest: f5f7e67823a699213ab06c86440da94ead672467 + policy_file: check-github-actions + policy_purl: pkg:github.com/oracle/graalpython@.* + output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'false' + reports_artifact_name: macaron-injection-actions-fail-diagnosis + write_job_summary: 'true' + - name: Assert expected failure (github_actions_vulns for repo test) + if: ${{ always() }} + run: | + # Explicitly assert failure so regressions are visible in CI results. + if [ "${{ steps.verify_github_actions_vulns_repo_test.outcome }}" != "failure" ]; then + echo "Expected verify step to fail, but it did not." + exit 1 + fi diff --git a/scripts/actions/write_job_summary.py b/scripts/actions/write_job_summary.py index 63b1e12ec..85f01c46d 100644 --- a/scripts/actions/write_job_summary.py +++ b/scripts/actions/write_job_summary.py @@ -41,14 +41,16 @@ def _resolve_policy_source(policy_input: str) -> tuple[Path | None, str]: action_path = _env("GITHUB_ACTION_PATH", "") if action_path: - template_path = ( - Path(action_path) - / "src" - / "macaron" - / "resources" - / "policies" - / "datalog" - / (f"{policy_input}.dl.template") + template_path = Path( + os.path.join( + action_path, + "src", + "macaron", + "resources", + "policies", + "datalog", + f"{policy_input}.dl.template", + ) ) if template_path.is_file(): return template_path, "predefined" @@ -61,7 +63,7 @@ def _resolve_existing_policy_sql(policy_name: str) -> Path | None: action_path = _env("GITHUB_ACTION_PATH", "") if not action_path: return None - sql_path = Path(action_path) / "src" / "macaron" / "resources" / "policies" / "sql" / f"{policy_name}.sql" + sql_path = Path(os.path.join(action_path, "src", "macaron", "resources", "policies", "sql", f"{policy_name}.sql")) return sql_path if sql_path.is_file() else None @@ -505,8 +507,8 @@ def _write_existing_policy_failure_diagnostics( def main() -> None: output_dir = Path(_env("OUTPUT_DIR", "output")) - db_path = Path(_env("DB_PATH", str(output_dir / "macaron.db"))) - policy_report = _env("POLICY_REPORT", str(output_dir / "policy_report.json")) + db_path = Path(_env("DB_PATH", os.path.join(str(output_dir), "macaron.db"))) + policy_report = _env("POLICY_REPORT", os.path.join(str(output_dir), "policy_report.json")) policy_file_value = _env("POLICY_FILE", "") resolved_policy_file, policy_mode = _resolve_policy_source(policy_file_value) policy_label = "" @@ -517,7 +519,7 @@ def main() -> None: elif policy_mode == "unresolved": policy_label = f"{policy_file_value} (unresolved)" html_report = _env("HTML_REPORT_PATH", "") - vsa_path_value = _env("VSA_PATH", str(output_dir / "vsa.intoto.jsonl")) + vsa_path_value = _env("VSA_PATH", os.path.join(str(output_dir), "vsa.intoto.jsonl")) vsa_path = Path(vsa_path_value) if vsa_path_value else None summary_output = _env("GITHUB_STEP_SUMMARY") diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py index 6d45fb6be..b4f766c71 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -3,10 +3,13 @@ """Detect security issues and injection risks in GitHub Actions workflows.""" +import ast +import os import re from typing import TypedDict, cast from macaron.code_analyzer.dataflow_analysis import bash, core, facts +from macaron.code_analyzer.dataflow_analysis.analysis import get_containing_github_job, get_containing_github_step from macaron.code_analyzer.dataflow_analysis.core import NodeForest, traverse_bfs from macaron.code_analyzer.dataflow_analysis.github import ( GitHubActionsActionStepNode, @@ -82,27 +85,27 @@ def detect_github_actions_security_issues(nodes: NodeForest) -> list[WorkflowFin for root in nodes.root_nodes: for callee in traverse_bfs(root): if isinstance(callee, GitHubActionsWorkflowNode): - if result := analyze_workflow(callee): + if result := analyze_workflow(callee, nodes=nodes): findings.append(result) return findings -def analyze_workflow( - workflow_node: GitHubActionsWorkflowNode, -) -> WorkflowFinding | None: +def analyze_workflow(workflow_node: GitHubActionsWorkflowNode, nodes: NodeForest) -> WorkflowFinding | None: """Analyze a GitHub Actions workflow for security issues. Parameters ---------- workflow_node : GitHubActionsWorkflowNode The workflow node to analyze. + nodes : NodeForest + The full node forest used to resolve parent relationships while analyzing findings. Returns ------- - dict[str, object] | None + WorkflowFinding | None A finding dictionary with: - ``workflow_name``: source filepath of the workflow. - - ``issues``: list of issue messages. + - ``issues``: list of issue messages with associated priorities. Returns ``None`` when no issues are detected. Notes @@ -136,7 +139,7 @@ def analyze_workflow( continue if isinstance(node, bash.BashSingleCommandNode): - _append_injection_findings(findings, node) + _append_injection_findings(findings, node, nodes) if "pull_request_target" in on_keys and _has_privileged_trigger_risk_combo(findings): _add_finding( @@ -263,8 +266,7 @@ def _append_run_step_findings(findings: list[PrioritizedIssue], run_step_node: G def _append_injection_findings( - findings: list[PrioritizedIssue], - bash_node: bash.BashSingleCommandNode, + findings: list[PrioritizedIssue], bash_node: bash.BashSingleCommandNode, nodes: NodeForest ) -> None: """Append potential injection findings discovered from parsed bash command nodes.""" if not is_call_expr(bash_node.definition.get("Cmd")): @@ -285,7 +287,213 @@ def _append_injection_findings( }: pr_head_ref = True if expansion and pr_head_ref: - _add_finding(findings, f"potential-injection: {arg.get('Parts')}", PRIORITY_CRITICAL) + job_node = get_containing_github_job(bash_node, nodes.parents) + step_node = get_containing_github_step(bash_node, nodes.parents) + script_line = _extract_script_line_from_parts(arg.get("Parts")) + workflow_line = _map_script_line_to_workflow_line(step_node, script_line) + if workflow_line is None: + workflow_line = _extract_run_step_line(step_node) + issue_payload = { + "step_line": workflow_line, + "script_line": script_line, + "job": job_node.job_id if job_node else "", + "step": _extract_step_name(step_node), + "command": _extract_command_text(step_node, script_line), + "parts": arg.get("Parts"), + } + _add_finding(findings, f"potential-injection: {issue_payload}", PRIORITY_CRITICAL) + + +def _extract_step_name(step_node: GitHubActionsRunStepNode | None) -> str: + """Extract a display name for a workflow run step.""" + if step_node is None: + return "" + step_name = step_node.definition.get("name") + if isinstance(step_name, str): + return step_name + step_id = step_node.definition.get("id") + if isinstance(step_id, str): + return step_id + return "" + + +def _extract_command_text(step_node: GitHubActionsRunStepNode | None, script_line: int | None) -> str: + """Extract a compact command snippet from the run script for display in diagnostics.""" + if step_node is None: + return "" + + run_script = step_node.definition["run"] + script_lines = run_script.splitlines() + if script_line and 1 <= script_line <= len(script_lines): + return script_lines[script_line - 1].strip() + + for line in script_lines: + if line.strip(): + return line.strip() + return "" + + +def _extract_run_step_line(step_node: GitHubActionsRunStepNode | None) -> int | None: + """Extract a 1-based workflow line number for a run step when metadata is available.""" + if step_node is None: + return None + + definition = step_node.definition + line_container = getattr(definition, "lc", None) + if line_container is None: + return _infer_run_step_line_from_source(step_node) + + line = getattr(line_container, "line", None) + if isinstance(line, int) and line >= 0: + # ruamel stores line numbers as 0-based. + return line + 1 + + return _infer_run_step_line_from_source(step_node) + + +def _extract_script_line_from_parts(parts: object) -> int | None: + """Extract the 1-based script line number from parsed shell argument parts.""" + if not isinstance(parts, list): + return None + + for part in parts: + if not isinstance(part, dict): + continue + pos = part.get("Pos") + if not isinstance(pos, dict): + continue + line = pos.get("Line") + if isinstance(line, int) and line > 0: + return line + + return None + + +def _map_script_line_to_workflow_line( + step_node: GitHubActionsRunStepNode | None, script_line: int | None +) -> int | None: + """Map a line number inside a run script to the corresponding workflow source line.""" + if step_node is None or script_line is None or script_line < 1: + return None + + workflow_path = step_node.context.ref.job_context.ref.workflow_context.ref.source_filepath + run_script = step_node.definition.get("run") + if not workflow_path or not isinstance(run_script, str) or not os.path.isfile(workflow_path): + return None + + try: + with open(workflow_path, encoding="utf-8") as workflow_file: + workflow_lines = workflow_file.readlines() + except OSError: + return None + + for block_start, block_lines in _iter_run_blocks(workflow_lines): + if _normalize_multiline_text("\n".join(block_lines)) != _normalize_multiline_text(run_script): + continue + if script_line > len(block_lines): + return None + return block_start + script_line - 1 + + return None + + +def _iter_run_blocks(workflow_lines: list[str]) -> list[tuple[int, list[str]]]: + """Collect run-step script blocks as (1-based start line, content lines).""" + run_key_re = re.compile(r"^(\s*)(?:-\s*)?run\s*:\s*(.*)$") + blocks: list[tuple[int, list[str]]] = [] + i = 0 + while i < len(workflow_lines): + line = workflow_lines[i] + match = run_key_re.match(line) + if not match: + i += 1 + continue + + indent = len(match.group(1)) + run_value = match.group(2).rstrip("\n") + + if run_value.strip().startswith(("|", ">")): + block_start = i + 2 + block_buffer: list[str] = [] + j = i + 1 + min_indent: int | None = None + while j < len(workflow_lines): + candidate = workflow_lines[j] + if candidate.strip(): + candidate_indent = len(candidate) - len(candidate.lstrip(" ")) + if candidate_indent <= indent: + break + if min_indent is None or candidate_indent < min_indent: + min_indent = candidate_indent + block_buffer.append(candidate.rstrip("\n")) + j += 1 + + if min_indent is None: + blocks.append((block_start, [])) + else: + dedented = [b[min_indent:] if len(b) >= min_indent else b for b in block_buffer] + blocks.append((block_start, dedented)) + i = j + continue + + inline_value = run_value.strip().strip("\"'") + blocks.append((i + 1, [inline_value])) + i += 1 + + return blocks + + +def _normalize_multiline_text(text: str) -> str: + """Normalize text for robust matching between YAML-extracted and parsed run scripts.""" + return "\n".join(line.rstrip() for line in text.strip("\n").splitlines()) + + +def _infer_run_step_line_from_source(step_node: GitHubActionsRunStepNode) -> int | None: + """Infer a run step line by matching its script against the workflow source file.""" + workflow_path = step_node.context.ref.job_context.ref.workflow_context.ref.source_filepath + if not workflow_path or not os.path.isfile(workflow_path): + return None + + run_script = step_node.definition["run"] + first_script_line = "" + for line in run_script.splitlines(): + stripped = line.strip() + if stripped: + first_script_line = stripped + break + if not first_script_line: + return None + + try: + with open(workflow_path, encoding="utf-8") as workflow_file: + workflow_lines = workflow_file.readlines() + except OSError: + return None + + run_key_re = re.compile(r"^\s*(?:-\s*)?run\s*:\s*(.*)$") + for index, line in enumerate(workflow_lines): + match = run_key_re.match(line) + if not match: + continue + + run_value = match.group(1).strip() + if run_value and not run_value.startswith("|") and not run_value.startswith(">"): + inline_value = run_value.strip("\"'") + if first_script_line in inline_value or inline_value in first_script_line: + return index + 1 + continue + + run_indent = len(line) - len(line.lstrip(" ")) + for nested_line in workflow_lines[index + 1 :]: + if not nested_line.strip(): + continue + nested_indent = len(nested_line) - len(nested_line.lstrip(" ")) + if nested_indent <= run_indent: + break + if first_script_line in nested_line.strip(): + return index + 1 + + return None def _has_privileged_trigger_risk_combo(findings: list[PrioritizedIssue]) -> bool: @@ -332,7 +540,7 @@ def get_workflow_issue_summary(finding_type: str) -> str: "persist-credentials": "Persisted checkout credentials can leak token access to later steps.", "remote-script-exec": "Workflow downloads and executes remote scripts inline.", "pr-target-untrusted-checkout": "pull_request_target is combined with checkout of PR-controlled refs.", - "potential-injection": "Untrusted GitHub context data may flow into shell execution.", + "potential-injection": "Unsafe expansion of attacker-controllable GitHub context can enable command injection.", "self-hosted-runner": "Job uses self-hosted runners, increasing blast radius for untrusted code.", "workflow-security-issue": "Workflow includes a security issue that requires hardening.", } @@ -344,10 +552,41 @@ def build_workflow_issue_recommendation(issue: str) -> tuple[str, Recommendation finding_type = get_workflow_issue_type(issue) summary = get_workflow_issue_summary(finding_type) recommendation = recommend_for_workflow_issue(issue) - finding_message = f"Summary: {summary} Details: {issue} Recommendation: {recommendation.message}" + details = _format_issue_details(finding_type, issue) + finding_message = f"Summary: {summary} Details: {details} Recommendation: {recommendation.message}" return finding_type, recommendation, finding_message +def _format_issue_details(finding_type: str, issue: str) -> str: + """Format human-readable issue details for job summaries.""" + if finding_type != "potential-injection": + return issue + + payload = _parse_issue_payload(issue) + if not isinstance(payload, dict): + return issue + + job_name = str(payload.get("job") or "unknown") + step_name = str(payload.get("step") or "unknown") + command_text = str(payload.get("command") or "unknown") + command_text = command_text.replace("`", "'") + return f"Job: {job_name} Step: {step_name} Command: `{command_text}`" + + +def _parse_issue_payload(issue: str) -> object | None: + """Parse the serialized issue payload after the finding type prefix.""" + _, _, payload = issue.partition(":") + payload = payload.strip() + if not payload: + return None + + try: + parsed: object = ast.literal_eval(payload) + return parsed + except (SyntaxError, ValueError): + return None + + def build_unpinned_action_recommendation(issue: str, api_client: object) -> tuple[str, str, Recommendation] | None: """Build normalized recommendation metadata for an unpinned third-party action finding.""" parsed_issue = parse_unpinned_action_issue(issue) @@ -361,6 +600,64 @@ def build_unpinned_action_recommendation(issue: str, api_client: object) -> tupl return action_name, action_ref, recommendation +def extract_workflow_issue_line(issue: str) -> int | None: + """Extract a 1-based workflow source line number from an issue payload. + + Parameters + ---------- + issue : str + Serialized workflow issue string produced by the detector. + + Returns + ------- + int | None + The 1-based line number when available; otherwise ``None``. + """ + if not issue.startswith("potential-injection:"): + return None + + _, _, payload = issue.partition(":") + if not payload.strip(): + return None + + parsed_payload = _parse_issue_payload(issue) + if isinstance(parsed_payload, dict): + step_line = parsed_payload.get("step_line") + if isinstance(step_line, int) and step_line > 0: + return step_line + + step_line_match = re.search(r"\[step-line=(\d+)\]", payload) + if step_line_match: + step_line = int(step_line_match.group(1)) + if step_line > 0: + return step_line + + parts: object | None + if isinstance(parsed_payload, list): + parts = parsed_payload + elif isinstance(parsed_payload, dict): + parts = parsed_payload.get("parts") + else: + parts = None + + if isinstance(parts, list): + for part in parts: + if not isinstance(part, dict): + continue + pos = part.get("Pos") + if not isinstance(pos, dict): + continue + line = pos.get("Line") + if isinstance(line, int) and line > 0: + return line + + match = re.search(r"'Line':\s*(\d+)", payload) + if not match: + return None + line = int(match.group(1)) + return line if line > 0 else None + + # def analyze_workflow(workflow_node: GitHubActionsWorkflowNode, nodes: NodeForest) -> list[dict[str, str]]: # """ # Analyze a GitHub Actions workflow for common security misconfigurations. diff --git a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py index 3466d6752..4fb2e92ec 100644 --- a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py +++ b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py @@ -21,6 +21,7 @@ build_unpinned_action_recommendation, build_workflow_issue_recommendation, detect_github_actions_security_issues, + extract_workflow_issue_line, ) from macaron.code_analyzer.gha_security_analysis.recommendation import ( recommend_for_osv_vulnerability, @@ -173,6 +174,10 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: for prioritized_issue in finding["issues"]: issue = prioritized_issue["issue"] issue_priority = int(prioritized_issue["priority"]) + issue_line = extract_workflow_issue_line(issue) + finding_workflow_link = caller_workflow_link + if issue_line and finding_workflow_link: + finding_workflow_link = f"{finding_workflow_link}#L{issue_line}" if unpinned_action_info := build_unpinned_action_recommendation(issue, gh_api_client): action_name, action_version, recommendation = unpinned_action_info finding_type = GitHubActionsFindingType.UNPINNED_THIRD_PARTY_ACTION.value @@ -183,7 +188,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: finding_group=GitHubActionsFindingGroup.THIRD_PARTY_ACTION_RISK.value, action_name=action_name, action_ref=action_version, - caller_workflow=caller_workflow_link, + caller_workflow=finding_workflow_link, sha_pinned=False, finding_message=( f"Summary: {_short_description_for_finding_type(finding_type)} " @@ -204,7 +209,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: finding_group=GitHubActionsFindingGroup.WORKFLOW_SECURITY_ISSUE.value, action_name=None, action_ref=None, - caller_workflow=caller_workflow_link, + caller_workflow=finding_workflow_link, sha_pinned=None, finding_message=finding_message, finding_priority=issue_priority, diff --git a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py index 658a6ebf0..9b59ed85a 100644 --- a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py +++ b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py @@ -8,7 +8,12 @@ import pytest -from macaron.code_analyzer.gha_security_analysis.detect_injection import detect_github_actions_security_issues +from macaron.code_analyzer.gha_security_analysis.detect_injection import ( + WorkflowFinding, + build_workflow_issue_recommendation, + detect_github_actions_security_issues, + extract_workflow_issue_line, +) from macaron.slsa_analyzer.ci_service.github_actions.github_actions_ci import GitHubActions RESOURCES_DIR = Path(__file__).parent.joinpath("resources") @@ -21,7 +26,7 @@ ], ) def test_detect_github_actions_security_issues( - snapshot: list[dict[str, str | list[str]]], workflow_path: str, github_actions_service: GitHubActions + snapshot: list[WorkflowFinding], workflow_path: str, github_actions_service: GitHubActions ) -> None: """Test GH Actions workflows injection patterns.""" callgraph = github_actions_service.build_call_graph_for_files( @@ -29,3 +34,51 @@ def test_detect_github_actions_security_issues( repo_path=os.path.join(RESOURCES_DIR, "workflow_files"), ) assert detect_github_actions_security_issues(callgraph) == snapshot + + +def test_extract_workflow_issue_line_from_potential_injection() -> None: + """Extract the source line from a potential-injection issue payload.""" + issue = ( + "potential-injection: " + "[{'Type': 'Lit', 'Pos': {'Offset': 269, 'Line': 6, 'Col': 48}, 'Value': 'origin/'}, " + "{'Type': 'ParamExp', 'Pos': {'Offset': 276, 'Line': 6, 'Col': 55}}]" + ) + + assert extract_workflow_issue_line(issue) == 6 + + +def test_extract_workflow_issue_line_prefers_step_line_marker() -> None: + """Extract the workflow line from an explicit step-line marker.""" + issue = ( + "potential-injection: " + "[step-line=14] " + "[{'Type': 'Lit', 'Pos': {'Offset': 269, 'Line': 6, 'Col': 48}, 'Value': 'origin/'}]" + ) + + assert extract_workflow_issue_line(issue) == 14 + + +def test_extract_workflow_issue_line_from_structured_payload() -> None: + """Extract workflow line from structured potential-injection payload.""" + issue = ( + "potential-injection: " + "{'step_line': 62, 'script_line': 6, 'job': 'retag', 'step': 'Retag', 'command': " + "'git push origin/${github.head_ref}', 'parts': []}" + ) + + assert extract_workflow_issue_line(issue) == 62 + + +def test_build_workflow_issue_recommendation_formats_potential_injection_details() -> None: + """Format concise user-facing details for potential-injection findings.""" + issue = ( + "potential-injection: " + "{'step_line': 62, 'script_line': 6, 'job': 'retag', 'step': 'Retag', 'command': " + "'git push origin/${github.head_ref}', 'parts': []}" + ) + + finding_type, _, finding_message = build_workflow_issue_recommendation(issue) + + assert finding_type == "potential-injection" + assert "Unsafe expansion of attacker-controllable GitHub context can enable command injection." in finding_message + assert "Details: Job: retag Step: Retag Command: `git push origin/${github.head_ref}`" in finding_message diff --git a/tests/output_reporter/test_write_job_summary.py b/tests/output_reporter/test_write_job_summary.py index c0b0b6955..db970468d 100644 --- a/tests/output_reporter/test_write_job_summary.py +++ b/tests/output_reporter/test_write_job_summary.py @@ -12,7 +12,7 @@ def _load_write_job_summary_module() -> ModuleType: """Load the write_job_summary script as a Python module for testing.""" - script_path = Path(__file__).parents[2] / "scripts" / "actions" / "write_job_summary.py" + script_path = Path(Path(__file__).parents[2], "scripts", "actions", "write_job_summary.py") spec = importlib.util.spec_from_file_location("write_job_summary", script_path) if spec is None or spec.loader is None: raise RuntimeError("Unable to load write_job_summary.py module.") @@ -25,7 +25,7 @@ def _load_write_job_summary_module() -> ModuleType: def test_workflow_security_table_includes_summary_column(tmp_path: Path) -> None: """Render workflow security findings with the short summary column in compact table output.""" module = _load_write_job_summary_module() - summary_path = tmp_path / "summary.md" + summary_path = Path(tmp_path, "summary.md") columns = [ "finding_group", "finding_priority", From add0412c4f058e8eaaba3f8a7ec27e891bfa845b Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Mon, 30 Mar 2026 10:44:34 +1000 Subject: [PATCH 23/30] fix: fix issue in report summary Signed-off-by: behnazh-w --- scripts/actions/write_job_summary.py | 4 +- .../gha_security_analysis/detect_injection.py | 11 +++-- .../test_gha_security_analysis.py | 8 ++-- .../output_reporter/test_write_job_summary.py | 45 +++++++++++++++++++ 4 files changed, 56 insertions(+), 12 deletions(-) diff --git a/scripts/actions/write_job_summary.py b/scripts/actions/write_job_summary.py index 85f01c46d..bec011787 100644 --- a/scripts/actions/write_job_summary.py +++ b/scripts/actions/write_job_summary.py @@ -325,7 +325,7 @@ def write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], r preferred_groups = ["workflow_security_issue", "third_party_action_risk"] groups_in_rows: list[str] = [] if group_idx is not None: - discovered_groups = [str(row[group_idx]) for row in display_rows] + discovered_groups = [str(row[group_idx]) for row in sorted_rows] groups_in_rows.extend([group for group in preferred_groups if group in discovered_groups]) groups_in_rows.extend([group for group in discovered_groups if group not in groups_in_rows]) else: @@ -336,7 +336,7 @@ def write_compact_gha_vuln_diagnostics(summary_path: Path, columns: list[str], r group_rows = display_rows title = "Findings" else: - group_rows = [row for row in display_rows if str(row[group_idx]) == group] + group_rows = [row for row in sorted_rows if str(row[group_idx]) == group][:10] if not group_rows: continue title = _gha_group_label(group) diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py index b4f766c71..f1574b3c9 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -3,7 +3,7 @@ """Detect security issues and injection risks in GitHub Actions workflows.""" -import ast +import json import os import re from typing import TypedDict, cast @@ -301,7 +301,7 @@ def _append_injection_findings( "command": _extract_command_text(step_node, script_line), "parts": arg.get("Parts"), } - _add_finding(findings, f"potential-injection: {issue_payload}", PRIORITY_CRITICAL) + _add_finding(findings, f"potential-injection: {json.dumps(issue_payload)}", PRIORITY_CRITICAL) def _extract_step_name(step_node: GitHubActionsRunStepNode | None) -> str: @@ -581,9 +581,8 @@ def _parse_issue_payload(issue: str) -> object | None: return None try: - parsed: object = ast.literal_eval(payload) - return parsed - except (SyntaxError, ValueError): + return cast(object, json.loads(payload)) + except json.JSONDecodeError: return None @@ -651,7 +650,7 @@ def extract_workflow_issue_line(issue: str) -> int | None: if isinstance(line, int) and line > 0: return line - match = re.search(r"'Line':\s*(\d+)", payload) + match = re.search(r"""["']Line["']:\s*(\d+)""", payload) if not match: return None line = int(match.group(1)) diff --git a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py index 9b59ed85a..6d94cd436 100644 --- a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py +++ b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py @@ -62,8 +62,8 @@ def test_extract_workflow_issue_line_from_structured_payload() -> None: """Extract workflow line from structured potential-injection payload.""" issue = ( "potential-injection: " - "{'step_line': 62, 'script_line': 6, 'job': 'retag', 'step': 'Retag', 'command': " - "'git push origin/${github.head_ref}', 'parts': []}" + '{"step_line": 62, "script_line": 6, "job": "retag", "step": "Retag", ' + '"command": "git push origin/${github.head_ref}", "parts": []}' ) assert extract_workflow_issue_line(issue) == 62 @@ -73,8 +73,8 @@ def test_build_workflow_issue_recommendation_formats_potential_injection_details """Format concise user-facing details for potential-injection findings.""" issue = ( "potential-injection: " - "{'step_line': 62, 'script_line': 6, 'job': 'retag', 'step': 'Retag', 'command': " - "'git push origin/${github.head_ref}', 'parts': []}" + '{"step_line": 62, "script_line": 6, "job": "retag", "step": "Retag", ' + '"command": "git push origin/${github.head_ref}", "parts": []}' ) finding_type, _, finding_message = build_workflow_issue_recommendation(issue) diff --git a/tests/output_reporter/test_write_job_summary.py b/tests/output_reporter/test_write_job_summary.py index db970468d..210dc7ac8 100644 --- a/tests/output_reporter/test_write_job_summary.py +++ b/tests/output_reporter/test_write_job_summary.py @@ -57,3 +57,48 @@ def test_workflow_security_table_includes_summary_column(tmp_path: Path) -> None assert rendered is True assert "| priority | type | summary | workflow |" in output assert "Workflow downloads and executes remote scripts inline." in output + + +def test_compact_summary_keeps_all_groups_in_detailed_section(tmp_path: Path) -> None: + """Render detailed section with both finding groups even when top priorities are workflow-only.""" + module = _load_write_job_summary_module() + summary_path = Path(tmp_path, "summary.md") + columns = [ + "finding_group", + "finding_priority", + "finding_type", + "action_name", + "action_ref", + "vulnerable_workflow", + "finding_message", + ] + rows = [ + ( + "workflow_security_issue", + 100, + "potential-injection", + "", + "", + "https://github.com/org/repo/.github/workflows/ci.yml", + "Summary: Injection risk. Details: ... Recommendation: ...", + ), + ( + "third_party_action_risk", + 20, + "unpinned-third-party-action", + "actions/checkout", + "v4", + "https://github.com/org/repo/.github/workflows/ci.yml", + "Summary: Unpinned action. Recommendation: ...", + ), + ] + + rendered = module.write_compact_gha_vuln_diagnostics(summary_path, columns, rows) + output = summary_path.read_text(encoding="utf-8") + + assert rendered is True + assert "#### Workflow security issues" in output + assert "#### Third-party action risks" in output + assert "**Workflow security issues**" in output + assert "**Third-party action risks**" in output + assert "`actions/checkout@v4`" in output From c3763be2457ac8556b5196dc4f4d403eb9574a5b Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Mon, 30 Mar 2026 11:17:54 +1000 Subject: [PATCH 24/30] chore: add line info for script exec bug type Signed-off-by: behnazh-w --- .../gha_security_analysis/detect_injection.py | 200 +++++++++--------- .../test_gha_security_analysis.py | 26 +++ 2 files changed, 128 insertions(+), 98 deletions(-) diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py index f1574b3c9..6463e7ffe 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -27,8 +27,7 @@ ) from macaron.parsers.bashparser_model import CallExpr, is_call_expr, is_lit, is_param_exp from macaron.parsers.github_workflow_model import Workflow - -REMOTE_SCRIPT_RE = re.compile(r"(curl|wget)\s+.*\|\s*(bash|sh|tar)", re.IGNORECASE) +from macaron.slsa_analyzer.git_url import is_commit_hash UNTRUSTED_PR_REFS = { "${{ github.event.pull_request.head.ref }}", @@ -135,12 +134,9 @@ def analyze_workflow(workflow_node: GitHubActionsWorkflowNode, nodes: NodeForest continue if isinstance(node, GitHubActionsRunStepNode): - _append_run_step_findings(findings, node) + _append_run_step_findings(findings, node, nodes) continue - if isinstance(node, bash.BashSingleCommandNode): - _append_injection_findings(findings, node, nodes) - if "pull_request_target" in on_keys and _has_privileged_trigger_risk_combo(findings): _add_finding( findings, @@ -220,12 +216,7 @@ def _append_action_step_findings( """Append findings derived from an action step node.""" uses_name = action_node.uses_name uses_version = action_node.uses_version - if ( - uses_name - and not uses_name.startswith("./") - and uses_version - and not re.fullmatch(r"[0-9a-f]{40}", uses_version) - ): + if uses_name and not uses_name.startswith("./") and uses_version and not is_commit_hash(uses_version): _add_finding(findings, f"{uses_name}@{uses_version}", PRIORITY_MIN) if uses_name == "actions/checkout": @@ -254,15 +245,104 @@ def _append_action_step_findings( ) -def _append_run_step_findings(findings: list[PrioritizedIssue], run_step_node: GitHubActionsRunStepNode) -> None: +def _append_run_step_findings( + findings: list[PrioritizedIssue], run_step_node: GitHubActionsRunStepNode, nodes: NodeForest +) -> None: """Append findings derived from a run step node.""" - run_script = run_step_node.definition.get("run", "") - if isinstance(run_script, str) and REMOTE_SCRIPT_RE.search(run_script): - _add_finding( - findings, - "remote-script-exec: A step appears to download and pipe to shell (`curl|bash`).", - PRIORITY_HIGH, - ) + for node in core.traverse_bfs(run_step_node): + # Command-level injection checks rely on parsed call argument parts from single-command nodes. + if isinstance(node, bash.BashSingleCommandNode): + _append_injection_findings(findings, node, nodes) + continue + + # Remote script execution risk is structural: downloader output piped into an executor. + if isinstance(node, bash.BashPipeNode): + _append_remote_script_exec_findings(findings, node, run_step_node, nodes) + + +def _append_remote_script_exec_findings( + findings: list[PrioritizedIssue], + pipe_node: bash.BashPipeNode, + run_step_node: GitHubActionsRunStepNode, + nodes: NodeForest, +) -> None: + """Append remote-script-exec findings discovered from parsed bash pipe nodes.""" + if not _is_remote_script_exec_pipe(pipe_node): + return + + # Map the pipe's script-relative line to workflow source line so summary links jump to YAML. + script_line = pipe_node.definition["Pos"]["Line"] + workflow_line = _map_script_line_to_workflow_line(run_step_node, script_line) + if workflow_line is None: + workflow_line = _extract_run_step_line(run_step_node) + job_node = get_containing_github_job(pipe_node, nodes.parents) + issue_payload = { + "step_line": workflow_line, + "script_line": script_line, + "job": job_node.job_id if job_node else "", + "step": _extract_step_name(run_step_node), + "command": _extract_command_text(run_step_node, script_line), + } + _add_finding( + findings, + f"remote-script-exec: {json.dumps(issue_payload)}", + PRIORITY_HIGH, + ) + + +def _is_remote_script_exec_pipe(pipe_node: bash.BashPipeNode) -> bool: + """Return whether a pipe node matches downloader-to-executor behavior.""" + lhs_words = _extract_statement_words(pipe_node.lhs) + rhs_words = _extract_statement_words(pipe_node.rhs) + if not lhs_words or not rhs_words: + return False + + downloader_cmd = lhs_words[0] + if downloader_cmd not in {"curl", "wget"}: + return False + + return _is_executor_invocation(rhs_words) + + +def _extract_statement_words(statement_node: bash.BashStatementNode) -> list[str]: + """Extract normalized literal command words from a Bash statement when available.""" + cmd = statement_node.definition.get("Cmd") + if not is_call_expr(cmd): + return [] + return _extract_call_words(cmd) + + +def _extract_call_words(call_expr: CallExpr) -> list[str]: + """Extract literal word values from a call expression.""" + args = call_expr["Args"] + words: list[str] = [] + for arg in args: + parts = arg["Parts"] + word = "".join(part.get("Value", "") for part in parts if is_lit(part)).strip() + if not word: + return [] + words.append(word) + if not words: + return [] + + normalized = [os.path.basename(word).lower() if idx == 0 else word for idx, word in enumerate(words)] + return normalized + + +def _is_executor_invocation(words: list[str]) -> bool: + """Return whether extracted words represent shell/archive execution.""" + if not words: + return False + direct_executors = {"bash", "sh", "tar"} + wrapper_cmds = {"sudo", "env", "command"} + + command = words[0] + if command in direct_executors: + return True + if command in wrapper_cmds and len(words) > 1: + wrapped = os.path.basename(words[1]).lower() + return wrapped in direct_executors + return False def _append_injection_findings( @@ -559,7 +639,7 @@ def build_workflow_issue_recommendation(issue: str) -> tuple[str, Recommendation def _format_issue_details(finding_type: str, issue: str) -> str: """Format human-readable issue details for job summaries.""" - if finding_type != "potential-injection": + if finding_type not in {"potential-injection", "remote-script-exec"}: return issue payload = _parse_issue_payload(issue) @@ -612,7 +692,7 @@ def extract_workflow_issue_line(issue: str) -> int | None: int | None The 1-based line number when available; otherwise ``None``. """ - if not issue.startswith("potential-injection:"): + if not issue.startswith("potential-injection:") and not issue.startswith("remote-script-exec:"): return None _, _, payload = issue.partition(":") @@ -655,79 +735,3 @@ def extract_workflow_issue_line(issue: str) -> int | None: return None line = int(match.group(1)) return line if line > 0 else None - - -# def analyze_workflow(workflow_node: GitHubActionsWorkflowNode, nodes: NodeForest) -> list[dict[str, str]]: -# """ -# Analyze a GitHub Actions workflow for common security misconfigurations. - -# Issues Detected: -# - Privileged triggers such as pull_request_target -# - Execution of untrusted code from forked PRs -# - Inline shell scripts or unvalidated input usage -# - Missing permissions or authorization checks -# """ -# wf = workflow_node.definition -# findings = [] - -# for node in core.traverse_bfs(workflow_node): -# if isinstance(node, bash.BashSingleCommandNode): -# # The step in GitHub Actions job that triggers the path in the callgraph. -# step_node = get_containing_github_step(node, nodes.parents) -# if is_call_expr(node.definition["Cmd"]): -# call_exp = cast(CallExpr, node.definition["Cmd"]) -# for arg in call_exp["Args"]: -# expansion = False -# pr_head_ref = False -# for part in arg["Parts"]: -# if is_param_exp(part) and part["Param"]["Value"] == "github": -# expansion = True -# if is_lit(part) and part["Value"] == ".event.pull_request.head.ref": -# pr_head_ref = True -# if expansion and pr_head_ref: -# findings.append( -# f"Potential injection: {arg['Parts']}" -# ) - -# # --- 1. Privileged trigger check --- -# if isinstance(wf.get("on"), dict) and "pull_request_target" in wf["on"]: -# findings.append( -# "privileged-trigger: Workflow uses `pull_request_target`, which runs with elevated permissions." -# ) - -# # --- 2. Untrusted code execution --- -# if isinstance(wf.get("on"), dict) and "pull_request" in wf["on"]: -# for job_name, job in wf["jobs"].items(): -# if is_normal_job(job) and "steps" in job: -# for step in job["steps"]: -# uses = step.get("uses", "") -# if "actions/checkout" in uses: -# ref = step.get("with", {}).get("ref", "") -# if ref in ["${{ github.event.pull_request.head.ref }}", "${{ github.head_ref }}"]: -# findings.append( -# f"untrusted-fork-code Job `{job_name}` checks out untrusted fork code on PR event." -# ) - -# # --- 3. Inline shell or unvalidated inputs --- -# # for job_name, job in wf["jobs"].items(): -# # if is_normal_job(job) and "steps" in job: -# # for step in job["steps"]: -# # script = get_run_step(step) -# # if script and ("${{ github" in script or "${{ inputs" in script): -# # findings.append( -# # f"unvalidated-input-script: Step `{step.get('name', job_name)}` runs inline shell with expressions." -# # ) -# # elif script and re.search(r"(curl|wget|bash\s+-c)", script): -# # findings.append( -# # f"inline-shell-risk Step `{step.get('name', job_name)}` runs shell commands directly." -# # ) - -# # --- 4. Authorization check --- -# if "permissions" not in wf: -# findings.append("missing-permissions: No explicit workflow permissions defined; defaults may be overly broad.") - -# if findings: -# result: dict[str, list[str]] = {"workflow_name": wf.get("name"), "issues": findings} -# return result - -# return None diff --git a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py index 6d94cd436..2181a18a0 100644 --- a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py +++ b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py @@ -82,3 +82,29 @@ def test_build_workflow_issue_recommendation_formats_potential_injection_details assert finding_type == "potential-injection" assert "Unsafe expansion of attacker-controllable GitHub context can enable command injection." in finding_message assert "Details: Job: retag Step: Retag Command: `git push origin/${github.head_ref}`" in finding_message + + +def test_build_workflow_issue_recommendation_formats_remote_script_exec_details() -> None: + """Format concise user-facing details for remote-script-exec findings.""" + issue = ( + "remote-script-exec: " + '{"step_line": 24, "script_line": 3, "job": "build", "step": "Setup", ' + '"command": "curl -fsSL https://x | bash"}' + ) + + finding_type, _, finding_message = build_workflow_issue_recommendation(issue) + + assert finding_type == "remote-script-exec" + assert "Workflow downloads and executes remote scripts inline." in finding_message + assert "Details: Job: build Step: Setup Command: `curl -fsSL https://x | bash`" in finding_message + + +def test_extract_workflow_issue_line_from_remote_script_exec_payload() -> None: + """Extract workflow line from structured remote-script-exec payload.""" + issue = ( + "remote-script-exec: " + '{"step_line": 24, "script_line": 3, "job": "build", "step": "Setup", ' + '"command": "curl -fsSL https://x | bash"}' + ) + + assert extract_workflow_issue_line(issue) == 24 From c1bf4b1dd0f763db6e79fef3e8ccd1a194d857c5 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Mon, 30 Mar 2026 12:29:37 +1000 Subject: [PATCH 25/30] test: test mapping gh variables in bashparser Signed-off-by: behnazh-w --- golang/cmd/bashparser/bashparser.go | 15 +- golang/internal/bashparser/bashparser.go | 111 +++++++--- golang/internal/bashparser/bashparser_test.go | 30 +++ .../code_analyzer/dataflow_analysis/bash.py | 45 +++- .../gha_security_analysis/detect_injection.py | 204 ++++++++++++++++-- src/macaron/parsers/bashparser.py | 70 +++++- .../test_gha_security_analysis.py | 27 +++ tests/parsers/bashparser/test_bashparser.py | 14 +- 8 files changed, 455 insertions(+), 61 deletions(-) diff --git a/golang/cmd/bashparser/bashparser.go b/golang/cmd/bashparser/bashparser.go index 50cc6fec2..530bed89e 100644 --- a/golang/cmd/bashparser/bashparser.go +++ b/golang/cmd/bashparser/bashparser.go @@ -1,4 +1,4 @@ -/* Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ package main @@ -30,13 +30,18 @@ func main() { input := flag.String("input", "", "The bash script content to be parsed. Input is prioritized over file option.") out_path := flag.String("output", "", "The output file path to store the JSON content.") raw := flag.Bool("raw", false, "Return raw parse-tree") + rawGHAMap := flag.Bool("raw-gha-map", false, "Return raw parse-tree with GitHub expression mapping") flag.Parse() var json_content string var parse_err error if len(*input) > 0 { // Read the bash script from command line argument. - json_content, parse_err = bashparser.Parse(*input, *raw) + if *rawGHAMap { + json_content, parse_err = bashparser.ParseRawWithGitHubExprMap(*input) + } else { + json_content, parse_err = bashparser.Parse(*input, *raw) + } } else if len(*file_path) <= 0 { fmt.Fprintln(os.Stderr, "Missing bash script input or file path.") flag.PrintDefaults() @@ -48,7 +53,11 @@ func main() { fmt.Fprintln(os.Stderr, read_err.Error()) os.Exit(1) } - json_content, parse_err = bashparser.Parse(string(data), *raw) + if *rawGHAMap { + json_content, parse_err = bashparser.ParseRawWithGitHubExprMap(string(data)) + } else { + json_content, parse_err = bashparser.Parse(string(data), *raw) + } } if parse_err != nil { diff --git a/golang/internal/bashparser/bashparser.go b/golang/internal/bashparser/bashparser.go index cbd105df8..ee7b178b5 100644 --- a/golang/internal/bashparser/bashparser.go +++ b/golang/internal/bashparser/bashparser.go @@ -1,4 +1,4 @@ -/* Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ // Package bashparser parses the bash scripts and provides parsed objects in JSON. @@ -7,6 +7,7 @@ package bashparser import ( "bytes" "encoding/json" + "fmt" "regexp" "strings" @@ -19,28 +20,54 @@ type CMDResult struct { Commands [][]string `json:"commands"` } -// ParseCommands parses the bash script to find bash commands. -// It returns the parsed commands in JSON format. -func ParseCommands(data string) (string, error) { +// RawWithGHAMapResult is used to export the raw bash AST with a GitHub-expression mapping. +type RawWithGHAMapResult struct { + AST any `json:"ast"` + GHAExprMap map[string]string `json:"gha_expr_map"` +} + +func preprocessGitHubActionsExpr(data string) (string, error) { // Replace GitHub Actions's expressions with ``$MACARON_UNKNOWN``` variable because the bash parser // doesn't recognize such expressions. For example: ``${{ foo }}`` will be replaced by ``$MACARON_UNKNOWN``. // Note that we don't use greedy matching, so if we have `${{ ${{ foo }} }}`, it will not be replaced by // `$MACARON_UNKNOWN`. // See: https://docs.github.com/en/actions/learn-github-actions/expressions. - var re, reg_error = regexp.Compile(`\$\{\{.*?\}\}`) + re, reg_error := regexp.Compile(`\$\{\{.*?\}\}`) if reg_error != nil { return "", reg_error } + // We replace the GH Actions variables with "$MACARON_UNKNOWN". + return string(re.ReplaceAll([]byte(data), []byte("$$MACARON_UNKNOWN"))), nil +} + +func preprocessGitHubActionsExprWithMap(data string) (string, map[string]string, error) { + re, reg_error := regexp.Compile(`\$\{\{.*?\}\}`) + if reg_error != nil { + return "", nil, reg_error + } - // Remove '{', '}', and spaces from each matched value. - data = string(re.ReplaceAllFunc([]byte(data), func(m []byte) []byte { - s := string(m) - s = strings.ReplaceAll(s, "{", "") - s = strings.ReplaceAll(s, "}", "") - s = strings.ReplaceAll(s, " ", "") - return []byte(s) - })) - data_str := strings.NewReader(data) + index := 0 + ghaMap := make(map[string]string) + processed := re.ReplaceAllStringFunc(data, func(match string) string { + index += 1 + key := fmt.Sprintf("MACARON_GHA_%04d", index) + expr := strings.TrimSpace(strings.TrimSuffix(strings.TrimPrefix(match, "${{"), "}}")) + ghaMap[key] = expr + return "$$" + key + }) + + return processed, ghaMap, nil +} + +// ParseCommands parses the bash script to find bash commands. +// It returns the parsed commands in JSON format. +func ParseCommands(data string) (string, error) { + processed, preprocessErr := preprocessGitHubActionsExpr(data) + if preprocessErr != nil { + return "", preprocessErr + } + + data_str := strings.NewReader(processed) data_parsed, parse_err := syntax.NewParser().Parse(data_str, "") if parse_err != nil { return "", parse_err @@ -77,25 +104,12 @@ func ParseCommands(data string) (string, error) { } func ParseRaw(data string) (string, error) { - // Replace GitHub Actions's expressions with ``$MACARON_UNKNOWN``` variable because the bash parser - // doesn't recognize such expressions. For example: ``${{ foo }}`` will be replaced by ``$MACARON_UNKNOWN``. - // Note that we don't use greedy matching, so if we have `${{ ${{ foo }} }}`, it will not be replaced by - // `$MACARON_UNKNOWN`. - // See: https://docs.github.com/en/actions/learn-github-actions/expressions. - var re, reg_error = regexp.Compile(`\$\{\{.*?\}\}`) - if reg_error != nil { - return "", reg_error + processed, preprocessErr := preprocessGitHubActionsExpr(data) + if preprocessErr != nil { + return "", preprocessErr } - // Remove '{', '}', and spaces from each matched value. - data = string(re.ReplaceAllFunc([]byte(data), func(m []byte) []byte { - s := string(m) - s = strings.ReplaceAll(s, "{", "") - s = strings.ReplaceAll(s, "}", "") - s = strings.ReplaceAll(s, " ", "") - return []byte(s) - })) - data_str := strings.NewReader(data) + data_str := strings.NewReader(processed) data_parsed, parse_err := syntax.NewParser().Parse(data_str, "") if parse_err != nil { return "", parse_err @@ -110,6 +124,41 @@ func ParseRaw(data string) (string, error) { return b.String(), nil } +// ParseRawWithGitHubExprMap parses raw bash AST and returns it with a GitHub-expression placeholder mapping. +func ParseRawWithGitHubExprMap(data string) (string, error) { + processed, ghaMap, preprocessErr := preprocessGitHubActionsExprWithMap(data) + if preprocessErr != nil { + return "", preprocessErr + } + + data_str := strings.NewReader(processed) + data_parsed, parse_err := syntax.NewParser().Parse(data_str, "") + if parse_err != nil { + return "", parse_err + } + + b := new(strings.Builder) + encode_err := typedjson.Encode(b, data_parsed) + if encode_err != nil { + return "", encode_err + } + + var astObj any + if unmarshalErr := json.Unmarshal([]byte(b.String()), &astObj); unmarshalErr != nil { + return "", unmarshalErr + } + + result := RawWithGHAMapResult{ + AST: astObj, + GHAExprMap: ghaMap, + } + resultBytes, marshalErr := json.MarshalIndent(result, "", " ") + if marshalErr != nil { + return "", marshalErr + } + return string(resultBytes), nil +} + func Parse(data string, raw bool) (string, error) { if raw { return ParseRaw(data) diff --git a/golang/internal/bashparser/bashparser_test.go b/golang/internal/bashparser/bashparser_test.go index 3825f459b..82f861fe4 100644 --- a/golang/internal/bashparser/bashparser_test.go +++ b/golang/internal/bashparser/bashparser_test.go @@ -32,3 +32,33 @@ func Test_parse_valid_bash_script(t *testing.T) { t.Errorf("Cannot unmarshal the returned JSON content from parsing %s: %v.", json_content, err) } } + +func Test_parse_raw_with_gha_expr_map(t *testing.T) { + input := `echo "${{ github.head_ref }}" && echo "${{ needs.prepare.outputs.fullVersion }}"` + json_content, parse_err := ParseRawWithGitHubExprMap(input) + if parse_err != nil || json_content == "" { + t.Fatalf("expected successful parse with mapping, got error: %v", parse_err) + } + + var result map[string]any + if err := json.Unmarshal([]byte(json_content), &result); err != nil { + t.Fatalf("cannot unmarshal parser output: %v", err) + } + + ast, astOK := result["ast"] + if !astOK || ast == nil { + t.Fatalf("expected non-empty ast field") + } + + mapRaw, mapOK := result["gha_expr_map"] + if !mapOK { + t.Fatalf("expected gha_expr_map field") + } + ghaMap, ok := mapRaw.(map[string]any) + if !ok { + t.Fatalf("expected gha_expr_map to be an object") + } + if len(ghaMap) != 2 { + t.Fatalf("expected 2 mapped expressions, got %d", len(ghaMap)) + } +} diff --git a/src/macaron/code_analyzer/dataflow_analysis/bash.py b/src/macaron/code_analyzer/dataflow_analysis/bash.py index 4a4903c86..516ba2fd3 100644 --- a/src/macaron/code_analyzer/dataflow_analysis/bash.py +++ b/src/macaron/code_analyzer/dataflow_analysis/bash.py @@ -75,6 +75,8 @@ class BashScriptContext(core.Context): stdout_loc: facts.LocationSpecifier #: Filepath for Bash script file. source_filepath: str + #: Mapping of parser placeholder vars to original GitHub expression bodies. + gha_expr_map_items: tuple[tuple[str, str], ...] = () @staticmethod def create_from_run_step( @@ -106,6 +108,7 @@ def create_from_run_step( stdout_scope=context.ref.job_context.ref.workflow_context.ref.console.get_non_owned(), stdout_loc=facts.Console(), source_filepath=source_filepath, + gha_expr_map_items=(), ) @staticmethod @@ -136,6 +139,7 @@ def create_from_bash_script(context: core.ContextRef[BashScriptContext], source_ stdout_scope=context.ref.stdout_scope.get_non_owned(), stdout_loc=facts.Console(), source_filepath=source_filepath, + gha_expr_map_items=(), ) @staticmethod @@ -164,6 +168,7 @@ def create_in_isolation(context: core.ContextRef[core.AnalysisContext], source_f stdout_scope=core.OwningContextRef(facts.Scope("stdout")), stdout_loc=facts.Console(), source_filepath=source_filepath, + gha_expr_map_items=(), ) def with_stdin( @@ -180,6 +185,7 @@ def with_stdin( self.stdout_scope, self.stdout_loc, self.source_filepath, + self.gha_expr_map_items, ) def with_stdout( @@ -196,6 +202,33 @@ def with_stdout( stdout_scope, stdout_loc, self.source_filepath, + self.gha_expr_map_items, + ) + + def with_gha_expr_map(self, gha_expr_map: dict[str, str]) -> BashScriptContext: + """Return a modified bash script context with GitHub-expression placeholder mappings. + + Parameters + ---------- + gha_expr_map : dict[str, str] + Mapping from parser placeholder variable names to original GitHub expression bodies. + + Returns + ------- + BashScriptContext + A context copy with updated GitHub-expression mapping metadata. + """ + return BashScriptContext( + self.outer_context, + self.filesystem, + self.env, + self.func_decls, + self.stdin_scope, + self.stdin_loc, + self.stdout_scope, + self.stdout_loc, + self.source_filepath, + tuple(sorted(gha_expr_map.items())), ) def get_containing_github_context(self) -> github.GitHubActionsStepContext | None: @@ -261,10 +294,16 @@ def identify_interpretations(self, state: core.State) -> dict[core.Interpretatio def build_bash_script() -> core.Node: try: - parsed_bash = bashparser.parse_raw(script_str, MACARON_PATH) - return BashScriptNode.create(parsed_bash, self.context.get_non_owned()) + parsed_bash, gha_expr_map = bashparser.parse_raw_with_gha_mapping(script_str, MACARON_PATH) + context_with_map = self.context.ref.with_gha_expr_map(gha_expr_map) + return BashScriptNode.create(parsed_bash, core.NonOwningContextRef(context_with_map)) except ParseError: - return core.NoOpStatementNode() + try: + # Backward-compatible fallback when parser mapping mode is unavailable. + parsed_bash = bashparser.parse_raw(script_str, MACARON_PATH) + return BashScriptNode.create(parsed_bash, self.context.get_non_owned()) + except ParseError: + return core.NoOpStatementNode() return {"default": build_bash_script} diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py index 6463e7ffe..aaeb3a51a 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -249,10 +249,14 @@ def _append_run_step_findings( findings: list[PrioritizedIssue], run_step_node: GitHubActionsRunStepNode, nodes: NodeForest ) -> None: """Append findings derived from a run step node.""" + # Traversing a run-step subgraph can reach semantically identical command nodes through + # multiple CFG/AST paths (for example nested/compound command structures). Track emitted + # injection findings by stable metadata to avoid duplicate reports for the same command line. + seen_injection_keys: set[tuple[int | None, str, str, str]] = set() for node in core.traverse_bfs(run_step_node): # Command-level injection checks rely on parsed call argument parts from single-command nodes. if isinstance(node, bash.BashSingleCommandNode): - _append_injection_findings(findings, node, nodes) + _append_injection_findings(findings, node, nodes, seen_injection_keys) continue # Remote script execution risk is structural: downloader output piped into an executor. @@ -346,7 +350,10 @@ def _is_executor_invocation(words: list[str]) -> bool: def _append_injection_findings( - findings: list[PrioritizedIssue], bash_node: bash.BashSingleCommandNode, nodes: NodeForest + findings: list[PrioritizedIssue], + bash_node: bash.BashSingleCommandNode, + nodes: NodeForest, + seen_injection_keys: set[tuple[int | None, str, str, str]] | None = None, ) -> None: """Append potential injection findings discovered from parsed bash command nodes.""" if not is_call_expr(bash_node.definition.get("Cmd")): @@ -354,36 +361,185 @@ def _append_injection_findings( call_exp = cast(CallExpr, bash_node.definition["Cmd"]) for arg in call_exp.get("Args", []): - expansion = False - pr_head_ref = False - for part in arg.get("Parts", []): - if is_param_exp(part) and part.get("Param", {}).get("Value") == "github": - expansion = True - if is_lit(part) and part.get("Value") in { - ".event.pull_request.head.ref", - ".head_ref", - ".event.issue.body", - ".event.comment.body", - }: - pr_head_ref = True - if expansion and pr_head_ref: + parts = arg.get("Parts") + step_node = get_containing_github_step(bash_node, nodes.parents) + script_line = _extract_script_line_from_parts(parts) + expanded_refs = _extract_expanded_github_refs(bash_node, step_node, script_line, parts) + if _arg_has_attacker_controlled_github_ref(parts) or _has_attacker_controlled_expanded_ref(expanded_refs): job_node = get_containing_github_job(bash_node, nodes.parents) - step_node = get_containing_github_step(bash_node, nodes.parents) - script_line = _extract_script_line_from_parts(arg.get("Parts")) workflow_line = _map_script_line_to_workflow_line(step_node, script_line) if workflow_line is None: workflow_line = _extract_run_step_line(step_node) + job_name = job_node.job_id if job_node else "" + step_name = _extract_step_name(step_node) + command_text = _extract_command_text(step_node, script_line) + dedupe_key = (workflow_line, job_name, step_name, command_text) + if seen_injection_keys is not None: + # Prevent duplicate findings when the same risky command is visited via + # different traversal paths in the run-step subgraph. + if dedupe_key in seen_injection_keys: + continue + seen_injection_keys.add(dedupe_key) issue_payload = { "step_line": workflow_line, "script_line": script_line, - "job": job_node.job_id if job_node else "", - "step": _extract_step_name(step_node), - "command": _extract_command_text(step_node, script_line), + "job": job_name, + "step": step_name, + "command": command_text, + "expanded_refs": expanded_refs, "parts": arg.get("Parts"), } _add_finding(findings, f"potential-injection: {json.dumps(issue_payload)}", PRIORITY_CRITICAL) +def _arg_has_attacker_controlled_github_ref(parts: object) -> bool: + """Return whether argument parts contain attacker-controlled GitHub context expansion. + + Parameters + ---------- + parts : object + Parsed argument ``Parts`` payload from the Bash call expression. + + Returns + ------- + bool + ``True`` when an attacker-controlled GitHub context reference is detected. + """ + if not isinstance(parts, list): + return False + + expansion = False + pr_head_ref = False + for part in parts: + if is_param_exp(part) and part.get("Param", {}).get("Value") == "github": + expansion = True + if is_lit(part) and part.get("Value") in { + ".event.pull_request.head.ref", + ".head_ref", + ".event.issue.body", + ".event.comment.body", + }: + pr_head_ref = True + if expansion and pr_head_ref: + return True + return False + + +def _has_attacker_controlled_expanded_ref(refs: list[str]) -> bool: + """Return whether extracted refs include attacker-controlled GitHub context values. + + Parameters + ---------- + refs : list[str] + Extracted GitHub expression references. + + Returns + ------- + bool + ``True`` if a known attacker-controlled ref is present. + """ + attacker_controlled = { + "github.event.pull_request.head.ref", + "github.head_ref", + "github.event.issue.body", + "github.event.comment.body", + } + return any(ref in attacker_controlled for ref in refs) + + +def _extract_expanded_github_refs( + bash_node: bash.BashSingleCommandNode, + step_node: GitHubActionsRunStepNode | None, + script_line: int | None, + parts: object, +) -> list[str]: + """Extract normalized expanded GitHub refs from parser mapping or fallback line scanning. + + Parameters + ---------- + bash_node : bash.BashSingleCommandNode + The Bash command node used to resolve parser placeholder mappings. + step_node : GitHubActionsRunStepNode | None + The containing run step node, used for fallback extraction from raw run script text. + script_line : int | None + 1-based line number within the inlined run script for line-targeted fallback extraction. + parts : object + Parsed argument ``Parts`` payload from the Bash call expression. + + Returns + ------- + list[str] + Ordered list of normalized GitHub expression references. + """ + refs: list[str] = [] + placeholder_map = dict(bash_node.context.ref.gha_expr_map_items) + if isinstance(parts, list): + for part in parts: + if not is_param_exp(part): + continue + placeholder = part.get("Param", {}).get("Value") + if isinstance(placeholder, str): + mapped = placeholder_map.get(placeholder) + if mapped: + refs.extend(_extract_github_refs_from_expression(mapped)) + if refs: + return _deduplicate_preserve_order(refs) + + if step_node is None: + return [] + run_script = step_node.definition["run"] + script_lines = run_script.splitlines() + if script_line is not None and 1 <= script_line <= len(script_lines): + line_text = script_lines[script_line - 1] + else: + line_text = run_script + + matches = re.findall(r"\$\{\{\s*(.*?)\s*\}\}", line_text) + normalized: list[str] = [] + for expr in matches: + normalized.extend(_extract_github_refs_from_expression(expr)) + return _deduplicate_preserve_order(normalized) + + +def _extract_github_refs_from_expression(expression: str) -> list[str]: + """Extract github-context reference paths from a GitHub Actions expression body. + + Parameters + ---------- + expression : str + Expression text inside ``${{ ... }}``. + + Returns + ------- + list[str] + Matched GitHub reference paths (for example ``github.head_ref``). + """ + return re.findall(r"github(?:\.[A-Za-z0-9_-]+)+", expression) + + +def _deduplicate_preserve_order(values: list[str]) -> list[str]: + """Deduplicate string values while preserving insertion order. + + Parameters + ---------- + values : list[str] + Input values that may contain duplicates. + + Returns + ------- + list[str] + Values in original order with duplicates removed. + """ + seen: set[str] = set() + result: list[str] = [] + for value in values: + if value in seen: + continue + seen.add(value) + result.append(value) + return result + + def _extract_step_name(step_node: GitHubActionsRunStepNode | None) -> str: """Extract a display name for a workflow run step.""" if step_node is None: @@ -650,7 +806,13 @@ def _format_issue_details(finding_type: str, issue: str) -> str: step_name = str(payload.get("step") or "unknown") command_text = str(payload.get("command") or "unknown") command_text = command_text.replace("`", "'") - return f"Job: {job_name} Step: {step_name} Command: `{command_text}`" + refs = payload.get("expanded_refs") + refs_display = "" + if isinstance(refs, list): + refs_clean = [str(ref) for ref in refs if str(ref)] + if refs_clean: + refs_display = f" Expanded refs: `{', '.join(refs_clean)}`" + return f"Job: {job_name} Step: {step_name} Command: `{command_text}`{refs_display}" def _parse_issue_payload(issue: str) -> object | None: diff --git a/src/macaron/parsers/bashparser.py b/src/macaron/parsers/bashparser.py index ac2ceed68..2b8de426a 100644 --- a/src/macaron/parsers/bashparser.py +++ b/src/macaron/parsers/bashparser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module is a Python wrapper for the compiled bashparser binary. @@ -162,6 +162,74 @@ def parse_raw(bash_content: str, macaron_path: str | None = None) -> File: raise ParseError("Error while loading the parsed bash script.") from error +def parse_raw_with_gha_mapping(bash_content: str, macaron_path: str | None = None) -> tuple[File, dict[str, str]]: + """Parse bash content and return raw AST plus GitHub expression mapping. + + Parameters + ---------- + bash_content : str + Bash script content. + macaron_path : str | None + Macaron's root path (optional). + + Returns + ------- + tuple[bashparser_model.File, dict[str, str]] + A tuple of: + - The parsed raw bash AST. + - Mapping from parser placeholder variable names to original GitHub expression bodies. + + Raises + ------ + ParseError + When parsing fails with errors or output cannot be decoded. + """ + if not macaron_path: + macaron_path = global_config.macaron_path + cmd = [ + os.path.join(macaron_path, "bin", "bashparser"), + "-input", + bash_content, + "-raw-gha-map", + ] + + try: + result = subprocess.run( # nosec B603 + cmd, + capture_output=True, + check=True, + cwd=macaron_path, + timeout=defaults.getint("bashparser", "timeout", fallback=30), + ) + except ( + subprocess.CalledProcessError, + subprocess.TimeoutExpired, + FileNotFoundError, + ) as error: + raise ParseError("Error while parsing bash script.") from error + + try: + if result.returncode != 0: + raise ParseError(f"Bash script parser failed: {result.stderr.decode('utf-8')}") + + payload = cast(dict[str, object], json.loads(result.stdout.decode("utf-8"))) + ast_data = payload.get("ast") + gha_map = payload.get("gha_expr_map") + if not isinstance(ast_data, dict): + raise ParseError("Error while loading the parsed bash script.") + if not isinstance(gha_map, dict): + raise ParseError("Error while loading the parsed bash script.") + gha_map_clean: dict[str, str] = {} + for key, value in gha_map.items(): + if isinstance(key, str) and isinstance(value, str): + gha_map_clean[key] = value + + return cast(File, ast_data), gha_map_clean + + except json.JSONDecodeError as error: + raise ParseError("Error while loading the parsed bash script.") from error + + def parse_expr(bash_expr_content: str, macaron_path: str | None = None) -> list[Word]: """Parse a bash script's content. diff --git a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py index 2181a18a0..93166ef49 100644 --- a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py +++ b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py @@ -84,6 +84,33 @@ def test_build_workflow_issue_recommendation_formats_potential_injection_details assert "Details: Job: retag Step: Retag Command: `git push origin/${github.head_ref}`" in finding_message +def test_build_workflow_issue_recommendation_includes_expanded_refs() -> None: + """Render expanded GitHub refs in potential-injection details when present.""" + issue = ( + "potential-injection: " + '{"step_line": 62, "script_line": 6, "job": "retag", "step": "Retag", ' + '"command": "git push origin/${github.head_ref}", "expanded_refs": ["github.head_ref"]}' + ) + + _, _, finding_message = build_workflow_issue_recommendation(issue) + + assert "Expanded refs: `github.head_ref`" in finding_message + + +def test_build_workflow_issue_recommendation_includes_refs_from_compound_expression() -> None: + """Render extracted github refs when original expression contains operators.""" + issue = ( + "potential-injection: " + '{"step_line": 62, "script_line": 6, "job": "retag", "step": "Retag", ' + '"command": "git push origin/${github.head_ref}", ' + '"expanded_refs": ["github.head_ref", "github.ref_name"]}' + ) + + _, _, finding_message = build_workflow_issue_recommendation(issue) + + assert "Expanded refs: `github.head_ref, github.ref_name`" in finding_message + + def test_build_workflow_issue_recommendation_formats_remote_script_exec_details() -> None: """Format concise user-facing details for remote-script-exec findings.""" issue = ( diff --git a/tests/parsers/bashparser/test_bashparser.py b/tests/parsers/bashparser/test_bashparser.py index 97c431034..a489330ac 100644 --- a/tests/parsers/bashparser/test_bashparser.py +++ b/tests/parsers/bashparser/test_bashparser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ @@ -13,7 +13,7 @@ from macaron import MACARON_PATH from macaron.errors import ParseError -from macaron.parsers.bashparser import parse, parse_file +from macaron.parsers.bashparser import parse, parse_file, parse_raw_with_gha_mapping @pytest.mark.parametrize( @@ -46,3 +46,13 @@ def test_bashparser_parse_invalid() -> None: # Parse the bash script file. with pytest.raises(ParseError): parse_file(file_path=file_path, macaron_path=MACARON_PATH) + + +def test_bashparser_parse_raw_with_gha_mapping() -> None: + """Test parsing raw bash script with GitHub expression mapping.""" + bash_content = 'echo "${{ github.head_ref }}"\n' + parsed_ast, gha_map = parse_raw_with_gha_mapping(bash_content, MACARON_PATH) + + assert "Stmts" in parsed_ast + assert gha_map + assert "github.head_ref" in gha_map.values() From a1950eac877285bee620d75e7127428f3713a70e Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Mon, 30 Mar 2026 13:42:20 +1000 Subject: [PATCH 26/30] test: turn all tests on Signed-off-by: behnazh-w --- .github/workflows/_build_docker.yaml | 14 +++++++------- .github/workflows/macaron-analysis.yaml | 4 ---- Makefile | 4 ++-- golang/internal/bashparser/bashparser.go | 9 +++++++++ .../__snapshots__/test_gha_security_analysis.ambr | 2 +- 5 files changed, 19 insertions(+), 14 deletions(-) diff --git a/.github/workflows/_build_docker.yaml b/.github/workflows/_build_docker.yaml index 36c4ae6ea..d503bfff9 100644 --- a/.github/workflows/_build_docker.yaml +++ b/.github/workflows/_build_docker.yaml @@ -83,13 +83,13 @@ jobs: run: make setup-integration-test-utility-for-docker # Run the integration tests against the built Docker image. - # - name: Test the Docker image - # env: - # # This environment variable will be picked up by run_macaron.sh. - # MACARON_IMAGE_TAG: test - # DOCKER_PULL: never - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # run: make integration-test-docker + - name: Test the Docker image + env: + # This environment variable will be picked up by run_macaron.sh. + MACARON_IMAGE_TAG: test + DOCKER_PULL: never + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: make integration-test-docker test-macaron-action: # Reuse the action test workflow against the exact Docker image built above. diff --git a/.github/workflows/macaron-analysis.yaml b/.github/workflows/macaron-analysis.yaml index ff0f875dd..e904b987d 100644 --- a/.github/workflows/macaron-analysis.yaml +++ b/.github/workflows/macaron-analysis.yaml @@ -35,13 +35,9 @@ jobs: # Note: adjust the policy_purl to refer to your repository URL. - name: Run Macaron action id: run_macaron - continue-on-error: true uses: ./ with: repo_path: ./ policy_file: check-github-actions policy_purl: pkg:github.com/oracle/macaron@.* - upload_reports: true - reports_artifact_name: macaron-reports reports_retention_days: 90 - write_job_summary: true diff --git a/Makefile b/Makefile index 35b1fef30..13394ddb3 100644 --- a/Makefile +++ b/Makefile @@ -386,10 +386,10 @@ integration-test-update: # set to the build date/epoch. For more details, see: https://flit.pypa.io/en/latest/reproducible.html .PHONY: dist dist: dist/$(PACKAGE_WHEEL_DIST_NAME).whl dist/$(PACKAGE_SDIST_NAME).tar.gz dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-docs-html.zip dist/$(PACKAGE_WHEEL_DIST_NAME)-build-epoch.txt -dist/$(PACKAGE_WHEEL_DIST_NAME).whl: +dist/$(PACKAGE_WHEEL_DIST_NAME).whl: check test integration-test SOURCE_DATE_EPOCH=$(SOURCE_DATE_EPOCH) flit build --setup-py --format wheel mv dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-py3-none-any.whl dist/$(PACKAGE_WHEEL_DIST_NAME).whl -dist/$(PACKAGE_SDIST_NAME).tar.gz: +dist/$(PACKAGE_SDIST_NAME).tar.gz: check test integration-test SOURCE_DATE_EPOCH=$(SOURCE_DATE_EPOCH) flit build --setup-py --format sdist dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-docs-html.zip: docs python -m zipfile -c dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-docs-html.zip docs/_build/html diff --git a/golang/internal/bashparser/bashparser.go b/golang/internal/bashparser/bashparser.go index ee7b178b5..e144a0b1a 100644 --- a/golang/internal/bashparser/bashparser.go +++ b/golang/internal/bashparser/bashparser.go @@ -41,6 +41,15 @@ func preprocessGitHubActionsExpr(data string) (string, error) { } func preprocessGitHubActionsExprWithMap(data string) (string, map[string]string, error) { + // Replace GitHub Actions expressions with unique bash-safe placeholders and return + // a mapping from placeholder variable names to the original expression body. + // + // Example: + // input: echo "${{ github.head_ref }}" + // output: echo "$MACARON_GHA_0001", {"MACARON_GHA_0001": "github.head_ref"} + // + // This preserves expression identity for downstream analysis while keeping the + // transformed script parseable by the bash parser. re, reg_error := regexp.Compile(`\$\{\{.*?\}\}`) if reg_error != nil { return "", nil, reg_error diff --git a/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr index 80bea0779..1a56cd3a1 100644 --- a/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr +++ b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr @@ -4,7 +4,7 @@ dict({ 'issues': list([ dict({ - 'issue': "potential-injection: [{'Type': 'Lit', 'Pos': {'Offset': 184, 'Line': 7, 'Col': 17}, 'End': {'Offset': 189, 'Line': 7, 'Col': 22}, 'ValuePos': {'Offset': 184, 'Line': 7, 'Col': 17}, 'ValueEnd': {'Offset': 189, 'Line': 7, 'Col': 22}, 'Value': 'HEAD:'}, {'Type': 'ParamExp', 'Pos': {'Offset': 189, 'Line': 7, 'Col': 22}, 'End': {'Offset': 196, 'Line': 7, 'Col': 29}, 'Dollar': {'Offset': 189, 'Line': 7, 'Col': 22}, 'Short': True, 'Param': {'Pos': {'Offset': 190, 'Line': 7, 'Col': 23}, 'End': {'Offset': 196, 'Line': 7, 'Col': 29}, 'ValuePos': {'Offset': 190, 'Line': 7, 'Col': 23}, 'ValueEnd': {'Offset': 196, 'Line': 7, 'Col': 29}, 'Value': 'github'}}, {'Type': 'Lit', 'Pos': {'Offset': 196, 'Line': 7, 'Col': 29}, 'End': {'Offset': 224, 'Line': 7, 'Col': 57}, 'ValuePos': {'Offset': 196, 'Line': 7, 'Col': 29}, 'ValueEnd': {'Offset': 224, 'Line': 7, 'Col': 57}, 'Value': '.event.pull_request.head.ref'}]", + 'issue': 'potential-injection: {"step_line": 75, "script_line": 7, "job": "auto_format", "step": "Commit and push formatting changes", "command": "git push origin HEAD:${{ github.event.pull_request.head.ref }}", "expanded_refs": ["github.event.pull_request.head.ref"], "parts": [{"End": {"Col": 4, "Line": 7, "Offset": 171}, "Pos": {"Col": 1, "Line": 7, "Offset": 168}, "Type": "Lit", "Value": "git", "ValueEnd": {"Col": 4, "Line": 7, "Offset": 171}, "ValuePos": {"Col": 1, "Line": 7, "Offset": 168}}]}', 'priority': 100, }), dict({ From 3027e558b91b916c60cc2107eba9603fa611f8d6 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Mon, 30 Mar 2026 14:14:16 +1000 Subject: [PATCH 27/30] test: fix the snapshot content Signed-off-by: behnazh-w --- docs/source/pages/macaron_action.rst | 42 ++++++++++++++----- .../gha_security_analysis/detect_injection.py | 5 ++- .../test_gha_security_analysis.ambr | 2 +- 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/docs/source/pages/macaron_action.rst b/docs/source/pages/macaron_action.rst index 55ce483ae..8deb62842 100644 --- a/docs/source/pages/macaron_action.rst +++ b/docs/source/pages/macaron_action.rst @@ -24,16 +24,18 @@ When you use this action, you can reference it directly in your workflow. For a repo_path: 'https://github.com/example/project' policy_file: check-github-actions policy_purl: 'pkg:github.com/example/project' - output_dir: 'macaron-output' + reports_retention_days: 90 -If you upload the results like in this `workflow `_ check this :ref:`documentation ` to see how to read and understand them. +By default, the action posts a human-friendly results summary to the GitHub Actions run page (job summary). If you upload the results like in this `workflow `_, check this :ref:`documentation ` to see how to read and understand them. Example: policy verification only ---------------------------------- To run only the policy verification step (when you already have an output -database), call the action with ``policy_file`` and set ``output_dir`` to the -directory containing ``macaron.db``: +database), call the action with ``policy_file``. If the previous analysis step +used the default output path, you can omit ``output_dir`` here. If you set a +custom ``output_dir`` in the previous step, use the same value here so policy +verification reads the matching ``macaron.db``. .. code-block:: yaml @@ -41,7 +43,6 @@ directory containing ``macaron.db``: uses: oracle/macaron@fda4dda04aa7228fcaba162804891806cf5a1375 # v0.22.0 with: policy_file: policy.dl - output_dir: macaron-output Inputs ------ @@ -101,6 +102,18 @@ options. Key inputs are listed below (see ``action.yaml`` for the full list): * - ``output_dir`` - Directory where Macaron writes results (database, reports, artifacts). - ``output`` + * - ``upload_reports`` + - When ``true``, upload generated Macaron reports as a workflow artifact. + - ``true`` + * - ``reports_artifact_name`` + - Name of the uploaded reports artifact. + - ``macaron-reports`` + * - ``reports_retention_days`` + - Retention period in days for uploaded reports artifacts. + - ``90`` + * - ``write_job_summary`` + - When ``true``, write a human-friendly summary to the workflow run page. + - ``true`` * - ``upload_attestation`` - When ``true``, the action will attempt to upload a generated verification attestation (VSA) after policy verification. The attestation will be available @@ -114,8 +127,9 @@ options. Key inputs are listed below (see ``action.yaml`` for the full list): Outputs ------- -The composite action exposes the following outputs (set by the -``run_macaron_policy_verification.sh`` script when applicable): +The composite action exposes the following outputs (set by the action steps, +primarily ``Collect report paths``, with some values populated only when +analysis/policy verification generated them): .. list-table:: :header-rows: 1 @@ -123,6 +137,12 @@ The composite action exposes the following outputs (set by the * - Output - Description + * - ``html_report_path`` + - Path to the generated HTML analysis report (when available). + * - ``report_dir`` + - Directory containing generated HTML/JSON reports. + * - ``db_path`` + - Path to the generated Macaron SQLite database (typically ``/macaron.db``). * - ``policy_report`` - Path to the generated policy report JSON file produced by ``macaron verify-policy``. This file contains the policy evaluation @@ -133,6 +153,8 @@ The composite action exposes the following outputs (set by the during verification, the action emits the string ``"VSA Not Generated."`` instead of a path. The attestation will be available under the ``Actions/management`` tab. + * - ``vsa_generated`` + - ``true`` when a VSA was generated; otherwise ``false``. Default Policies ---------------- @@ -172,7 +194,7 @@ How the action works which assembles the ``macaron analyze`` command from the inputs and runs it. Results are written into ``output_dir``. -3. ``Run Macaron Policy Verification``: if a policy file or PURL is supplied, +3. ``Run Macaron Policy Verification``: if ``policy_file`` is supplied, the corresponding script runs ``macaron verify-policy`` against the - analysis database and writes ``policy_report`` and ``vsa_report`` to - ``$GITHUB_OUTPUT`` when produced. + analysis database (using ``policy_purl`` when provided) and writes + policy-related outputs when produced. diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py index aaeb3a51a..43f9f0706 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -149,7 +149,10 @@ def analyze_workflow(workflow_node: GitHubActionsWorkflowNode, nodes: NodeForest if findings: findings_sorted = sorted(findings, key=lambda finding: (-finding["priority"], finding["issue"])) - return {"workflow_name": workflow_node.context.ref.source_filepath, "issues": findings_sorted} + return { + "workflow_name": os.path.relpath(workflow_node.context.ref.source_filepath, os.getcwd()), + "issues": findings_sorted, + } return None diff --git a/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr index 1a56cd3a1..016474999 100644 --- a/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr +++ b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr @@ -32,7 +32,7 @@ 'priority': 20, }), ]), - 'workflow_name': '/home/behnaz/research/github/macaron/tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml', + 'workflow_name': 'tests/code_analyzer/gha_security_analysis/resources/workflow_files/injection_pattern_1.yaml', }), ]) # --- From 82e7d720194702b6717dccbedb0a42528dd40620 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Mon, 30 Mar 2026 14:46:13 +1000 Subject: [PATCH 28/30] chore: improve the fallback solution for ref expansion Signed-off-by: behnazh-w --- golang/internal/bashparser/bashparser.go | 2 +- golang/internal/bashparser/bashparser_test.go | 18 ++++++++++++++++++ .../code_analyzer/dataflow_analysis/bash.py | 7 +------ .../gha_security_analysis/detect_injection.py | 11 +++++++---- 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/golang/internal/bashparser/bashparser.go b/golang/internal/bashparser/bashparser.go index e144a0b1a..fdfc63c2f 100644 --- a/golang/internal/bashparser/bashparser.go +++ b/golang/internal/bashparser/bashparser.go @@ -62,7 +62,7 @@ func preprocessGitHubActionsExprWithMap(data string) (string, map[string]string, key := fmt.Sprintf("MACARON_GHA_%04d", index) expr := strings.TrimSpace(strings.TrimSuffix(strings.TrimPrefix(match, "${{"), "}}")) ghaMap[key] = expr - return "$$" + key + return "$" + key }) return processed, ghaMap, nil diff --git a/golang/internal/bashparser/bashparser_test.go b/golang/internal/bashparser/bashparser_test.go index 82f861fe4..abf02055a 100644 --- a/golang/internal/bashparser/bashparser_test.go +++ b/golang/internal/bashparser/bashparser_test.go @@ -6,6 +6,7 @@ package bashparser import ( "encoding/json" "os" + "strings" "testing" ) @@ -62,3 +63,20 @@ func Test_parse_raw_with_gha_expr_map(t *testing.T) { t.Fatalf("expected 2 mapped expressions, got %d", len(ghaMap)) } } + +func Test_preprocess_github_actions_expr_with_map_replaces_with_single_dollar_var(t *testing.T) { + input := `echo "${{ github.head_ref }}"` + processed, ghaMap, err := preprocessGitHubActionsExprWithMap(input) + if err != nil { + t.Fatalf("unexpected preprocess error: %v", err) + } + if strings.Contains(processed, "$$MACARON_GHA_") { + t.Fatalf("expected single-dollar placeholder, got %q", processed) + } + if !strings.Contains(processed, "$MACARON_GHA_0001") { + t.Fatalf("expected placeholder var in processed script, got %q", processed) + } + if ghaMap["MACARON_GHA_0001"] != "github.head_ref" { + t.Fatalf("unexpected gha mapping: %#v", ghaMap) + } +} diff --git a/src/macaron/code_analyzer/dataflow_analysis/bash.py b/src/macaron/code_analyzer/dataflow_analysis/bash.py index 516ba2fd3..6b0f05813 100644 --- a/src/macaron/code_analyzer/dataflow_analysis/bash.py +++ b/src/macaron/code_analyzer/dataflow_analysis/bash.py @@ -298,12 +298,7 @@ def build_bash_script() -> core.Node: context_with_map = self.context.ref.with_gha_expr_map(gha_expr_map) return BashScriptNode.create(parsed_bash, core.NonOwningContextRef(context_with_map)) except ParseError: - try: - # Backward-compatible fallback when parser mapping mode is unavailable. - parsed_bash = bashparser.parse_raw(script_str, MACARON_PATH) - return BashScriptNode.create(parsed_bash, self.context.get_non_owned()) - except ParseError: - return core.NoOpStatementNode() + return core.NoOpStatementNode() return {"default": build_bash_script} diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py index 43f9f0706..61ee8bfab 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -456,7 +456,7 @@ def _extract_expanded_github_refs( script_line: int | None, parts: object, ) -> list[str]: - """Extract normalized expanded GitHub refs from parser mapping or fallback line scanning. + """Extract normalized expanded GitHub refs from mapping with a line-text fallback. Parameters ---------- @@ -490,6 +490,9 @@ def _extract_expanded_github_refs( if step_node is None: return [] + # Fallback: some complex shell constructs (for example command substitution in compound + # test/boolean commands) may not expose mapped placeholders on the current arg parts. + # In those cases, recover refs directly from the original run-script line text. run_script = step_node.definition["run"] script_lines = run_script.splitlines() if script_line is not None and 1 <= script_line <= len(script_lines): @@ -498,10 +501,10 @@ def _extract_expanded_github_refs( line_text = run_script matches = re.findall(r"\$\{\{\s*(.*?)\s*\}\}", line_text) - normalized: list[str] = [] + fallback_refs: list[str] = [] for expr in matches: - normalized.extend(_extract_github_refs_from_expression(expr)) - return _deduplicate_preserve_order(normalized) + fallback_refs.extend(_extract_github_refs_from_expression(expr)) + return _deduplicate_preserve_order(fallback_refs) def _extract_github_refs_from_expression(expression: str) -> list[str]: From 52da55ea178e2f66d47bb3c9aaaf891d2ceb115a Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Mon, 30 Mar 2026 14:51:36 +1000 Subject: [PATCH 29/30] test: update log4j integration tests Signed-off-by: behnazh-w --- .../integration/cases/org_apache_logging_log4j/policy_purl.dl | 2 +- .../cases/org_apache_logging_log4j/policy_repo_url.dl | 4 ++-- tests/integration/cases/org_apache_logging_log4j/test.yaml | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/integration/cases/org_apache_logging_log4j/policy_purl.dl b/tests/integration/cases/org_apache_logging_log4j/policy_purl.dl index 49df9eba0..3a03cdca5 100644 --- a/tests/integration/cases/org_apache_logging_log4j/policy_purl.dl +++ b/tests/integration/cases/org_apache_logging_log4j/policy_purl.dl @@ -10,7 +10,7 @@ Policy("test_policy", component_id, "") :- check_passed_with_confidence(component_id, "mcn_find_artifact_pipeline_1", confidence), confidence = 0.7, // Medium confidence because the pipeline was not found from a provenance. check_passed(component_id, "mcn_version_control_system_1"), - check_passed(component_id, "mcn_githubactions_vulnerabilities_1"), + check_failed(component_id, "mcn_githubactions_vulnerabilities_1"), check_failed(component_id, "mcn_provenance_available_1"), check_failed(component_id, "mcn_provenance_derived_commit_1"), check_failed(component_id, "mcn_provenance_derived_repo_1"), diff --git a/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl b/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl index 00b141481..8d9367255 100644 --- a/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl +++ b/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl @@ -1,10 +1,10 @@ -/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ #include "prelude.dl" Policy("test_policy", component_id, "") :- - check_passed(component_id, "mcn_githubactions_vulnerabilities_1"). + check_failed(component_id, "mcn_githubactions_vulnerabilities_1"). apply_policy_to("test_policy", component_id) :- is_repo_url(component_id, "https://github.com/apache/logging-log4j2"). diff --git a/tests/integration/cases/org_apache_logging_log4j/test.yaml b/tests/integration/cases/org_apache_logging_log4j/test.yaml index 7871c7a5a..0dec38f7d 100644 --- a/tests/integration/cases/org_apache_logging_log4j/test.yaml +++ b/tests/integration/cases/org_apache_logging_log4j/test.yaml @@ -27,6 +27,8 @@ steps: command_args: - -rp - https://github.com/apache/logging-log4j2 + - d + - 028e9fad03ae7bcbf2e49ab8d32d8cfb900f3587 - name: Run macaron verify-policy to verify passed/failed checks kind: verify options: From 1a933530c7cae9f6fddc44162779e0d30b4888c5 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Mon, 30 Mar 2026 16:56:00 +1000 Subject: [PATCH 30/30] fix: fix duplicate repo issues Signed-off-by: behnazh-w --- .github/workflows/macaron-analysis.yaml | 2 +- .github/workflows/test_macaron_action.yaml | 38 ----- .../gha_security_analysis/detect_injection.py | 156 ++++++++++++------ .../gha_security_analysis/recommendation.py | 4 +- .../test_gha_security_analysis.ambr | 14 +- .../test_gha_security_analysis.py | 19 +++ .../test_recommendation.py | 15 ++ .../org_apache_logging_log4j/policy_purl.dl | 2 +- .../policy_repo_url.dl | 2 +- .../cases/org_apache_logging_log4j/test.yaml | 2 +- 10 files changed, 154 insertions(+), 100 deletions(-) diff --git a/.github/workflows/macaron-analysis.yaml b/.github/workflows/macaron-analysis.yaml index e904b987d..d0da663d1 100644 --- a/.github/workflows/macaron-analysis.yaml +++ b/.github/workflows/macaron-analysis.yaml @@ -35,7 +35,7 @@ jobs: # Note: adjust the policy_purl to refer to your repository URL. - name: Run Macaron action id: run_macaron - uses: ./ + uses: oracle/macaron@fda4dda04aa7228fcaba162804891806cf5a1375 # v0.22.0 with: repo_path: ./ policy_file: check-github-actions diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index 64f47d5e4..4c72dffda 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -164,8 +164,6 @@ jobs: - name: Run Macaron (verify policy - github_actions_vulns for repo) id: verify_github_actions_vulns_repo_tutorial - # This verification is expected to fail and surface vulnerable actions. - continue-on-error: true uses: ./ with: policy_file: ./tests/tutorial_resources/detect_vulnerable_github_actions/check_github_actions_vuln_repo.dl @@ -173,14 +171,6 @@ jobs: upload_reports: 'true' reports_artifact_name: macaron-reports-vulnerable-actions-repo write_job_summary: 'true' - - name: Assert expected failure (github_actions_vulns for repo tutorial) - if: ${{ always() }} - run: | - # Ensure this tutorial case remains a failing policy verification. - if [ "${{ steps.verify_github_actions_vulns_repo_tutorial.outcome }}" != "failure" ]; then - echo "Expected verify step to fail, but it did not." - exit 1 - fi - name: Run Macaron (analyze purl - log4j-core example) uses: ./ @@ -192,8 +182,6 @@ jobs: - name: Run Macaron (verify policy - github_actions_vulns for purl) id: verify_github_actions_vulns_purl_tutorial - # This verification is expected to fail and detect vulnerable actions. - continue-on-error: true uses: ./ with: policy_file: ./tests/tutorial_resources/detect_vulnerable_github_actions/check_github_actions_vuln_purl.dl @@ -201,14 +189,6 @@ jobs: upload_reports: 'true' reports_artifact_name: macaron-reports-vulnerable-actions-purl write_job_summary: 'true' - - name: Assert expected failure (github_actions_vulns for purl tutorial) - if: ${{ always() }} - run: | - # Ensure this tutorial case remains a failing policy verification. - if [ "${{ steps.verify_github_actions_vulns_purl_tutorial.outcome }}" != "failure" ]; then - echo "Expected verify step to fail, but it did not." - exit 1 - fi tutorial-provenance: name: Provenance discovery, extraction, and verification @@ -350,24 +330,6 @@ jobs: output_dir: macaron_output/exclude_include_checks/excluded upload_reports: 'false' - # tutorial-upload-attestation: - # name: Upload verification summary attestation - # runs-on: ubuntu-latest - # steps: - # - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - # - name: Run Macaron (analyze macaron repo + custom policy + upload attestation) - # uses: ./ - # with: - # repo_path: ./ - # policy_file: ./tests/tutorial_resources/provenance/attest-macaron-supply-chain.dl - # policy_purl: pkg:github.com/oracle/macaron@.* - # output_dir: macaron_output/upload_attestation - # upload_attestation: 'true' - # subject_path: ./macaron_output/upload_attestation/vsa.intoto.jsonl - # upload_reports: 'true' - # reports_artifact_name: macaron-reports-upload-attestation - # write_job_summary: 'true' - test-detect-vulnerable-actions: name: How to detect vulnerable GitHub Actions runs-on: ubuntu-latest diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py index 61ee8bfab..80364ea76 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -36,14 +36,6 @@ "${{ github.event.pull_request.head.repo.full_name }}", } -DANGEROUS_TRIGGERS = { - "pull_request_target", # elevated token context - "workflow_run", # can chain privileged workflows - "repository_dispatch", # external event injection risk if misused - "issue_comment", # often used to trigger runs; needs strict gating -} - - PRIORITY_CRITICAL = 100 PRIORITY_HIGH = 80 PRIORITY_MEDIUM = 60 @@ -116,6 +108,8 @@ def analyze_workflow(workflow_node: GitHubActionsWorkflowNode, nodes: NodeForest findings: list[PrioritizedIssue] = [] on_keys = _extract_on_keys(workflow_node.definition) seen_jobs: set[str] = set() + workflow_permissions_defined = "permissions" in workflow_node.definition + has_job_without_permissions = False for node in core.traverse_bfs(workflow_node): if isinstance(node, GitHubActionsWorkflowNode): @@ -126,6 +120,8 @@ def analyze_workflow(workflow_node: GitHubActionsWorkflowNode, nodes: NodeForest if node.job_id in seen_jobs: continue seen_jobs.add(node.job_id) + if "permissions" not in node.definition: + has_job_without_permissions = True _append_job_level_findings(findings, node) continue @@ -137,6 +133,16 @@ def analyze_workflow(workflow_node: GitHubActionsWorkflowNode, nodes: NodeForest _append_run_step_findings(findings, node, nodes) continue + if not workflow_permissions_defined and has_job_without_permissions: + _add_finding( + findings, + ( + "missing-permissions: No explicit workflow permissions defined, and one or more jobs also omit " + "permissions; defaults may be overly broad." + ), + PRIORITY_MEDIUM, + ) + if "pull_request_target" in on_keys and _has_privileged_trigger_risk_combo(findings): _add_finding( findings, @@ -169,24 +175,10 @@ def _extract_on_keys(workflow: Workflow) -> set[str]: def _append_workflow_level_findings(findings: list[PrioritizedIssue], on_keys: set[str], workflow: Workflow) -> None: """Append workflow-level hardening findings.""" - sensitive = sorted(on_keys.intersection(DANGEROUS_TRIGGERS)) - if sensitive: - _add_finding( - findings, - f"sensitive-trigger: Workflow uses {sensitive}. Ensure strict gating (e.g., actor allowlist, " - "branch protection, and minimal permissions).", - PRIORITY_LOW, - ) - if "permissions" not in workflow: - _add_finding( - findings, - "missing-permissions: No explicit workflow permissions defined; defaults may be overly broad.", - PRIORITY_MEDIUM, - ) return - permissions = workflow.get("permissions") + permissions = workflow["permissions"] if isinstance(permissions, str) and permissions.lower() == "write-all": _add_finding(findings, "overbroad-permissions: Workflow uses `permissions: write-all`.", PRIORITY_HIGH) if isinstance(permissions, dict) and "pull_request_target" in on_keys: @@ -220,7 +212,13 @@ def _append_action_step_findings( uses_name = action_node.uses_name uses_version = action_node.uses_version if uses_name and not uses_name.startswith("./") and uses_version and not is_commit_hash(uses_version): - _add_finding(findings, f"{uses_name}@{uses_version}", PRIORITY_MIN) + step_line = _extract_action_step_line(action_node) + line_marker = f"[step-line={step_line}] " if step_line else "" + _add_finding( + findings, + f"unpinned-third-party-action: {line_marker}{uses_name}@{uses_version}", + PRIORITY_MIN, + ) if uses_name == "actions/checkout": ref = _literal_value(action_node.with_parameters.get("ref")) @@ -231,15 +229,6 @@ def _append_action_step_findings( PRIORITY_CRITICAL, ) - persist = _literal_value(action_node.with_parameters.get("persist-credentials")) - if persist.lower() == "true": - _add_finding( - findings, - "persist-credentials: Checkout uses `persist-credentials: true`; " - "this may expose GITHUB_TOKEN to subsequent git commands.", - PRIORITY_MEDIUM, - ) - if "pull_request_target" in on_keys and ref in UNTRUSTED_PR_REFS: _add_finding( findings, @@ -593,6 +582,70 @@ def _extract_run_step_line(step_node: GitHubActionsRunStepNode | None) -> int | return _infer_run_step_line_from_source(step_node) +def _extract_action_step_line(step_node: GitHubActionsActionStepNode | None) -> int | None: + """Extract a 1-based workflow line number for an action step when metadata is available.""" + if step_node is None: + return None + + definition = step_node.definition + line_container = getattr(definition, "lc", None) + if line_container is None: + return _infer_action_step_line_from_source(step_node) + + line = getattr(line_container, "line", None) + if isinstance(line, int) and line >= 0: + # ruamel stores line numbers as 0-based. + return line + 1 + + return _infer_action_step_line_from_source(step_node) + + +def _infer_action_step_line_from_source(step_node: GitHubActionsActionStepNode) -> int | None: + """Infer an action-step line by matching the ``uses`` value in the workflow source.""" + workflow_path = step_node.context.ref.job_context.ref.workflow_context.ref.source_filepath + if not workflow_path or not os.path.isfile(workflow_path): + return None + + uses_name = step_node.uses_name + uses_version = step_node.uses_version + if not uses_name or not uses_version: + return None + + target_uses = f"{uses_name}@{uses_version}" + step_name = step_node.definition.get("name") + step_id = step_node.definition.get("id") + step_identifier = step_name if isinstance(step_name, str) else step_id if isinstance(step_id, str) else None + + try: + with open(workflow_path, encoding="utf-8") as workflow_file: + workflow_lines = workflow_file.readlines() + except OSError: + return None + + uses_key_re = re.compile(r"^\s*(?:-\s*)?uses\s*:\s*(.*)$") + candidate_lines: list[int] = [] + for index, line in enumerate(workflow_lines): + match = uses_key_re.match(line) + if not match: + continue + uses_value = match.group(1).strip().strip("\"'") + if uses_value == target_uses: + candidate_lines.append(index + 1) + + if not candidate_lines: + return None + if len(candidate_lines) == 1 or not step_identifier: + return candidate_lines[0] + + for candidate_line in candidate_lines: + for lookback_index in range(max(0, candidate_line - 8 - 1), candidate_line - 1): + lookback_line = workflow_lines[lookback_index].strip() + if lookback_line in {f"name: {step_identifier}", f"id: {step_identifier}"}: + return candidate_line + + return candidate_lines[0] + + def _extract_script_line_from_parts(parts: object) -> int | None: """Extract the 1-based script line number from parsed shell argument parts.""" if not isinstance(parts, list): @@ -743,7 +796,6 @@ def _has_privileged_trigger_risk_combo(findings: list[PrioritizedIssue]) -> bool risky_prefixes = ( "overbroad-permissions:", "untrusted-fork-code:", - "persist-credentials:", "remote-script-exec:", "pr-target-untrusted-checkout:", "potential-injection:", @@ -760,7 +812,21 @@ def _literal_value(value: facts.Value | None) -> str: def _add_finding(findings: list[PrioritizedIssue], issue: str, priority: int) -> None: - """Append a finding with priority metadata.""" + """Append a finding once and keep the highest priority for duplicate issues. + + Parameters + ---------- + findings : list[PrioritizedIssue] + Mutable finding list for the current workflow. + issue : str + Normalized finding identifier/message. + priority : int + Finding priority score. + """ + for existing in findings: + if existing["issue"] == issue: + existing["priority"] = max(existing["priority"], priority) + return findings.append({"issue": issue, "priority": priority}) @@ -774,12 +840,10 @@ def get_workflow_issue_type(issue: str) -> str: def get_workflow_issue_summary(finding_type: str) -> str: """Return a concise summary for a workflow issue subtype.""" finding_summaries = { - "sensitive-trigger": "Workflow uses a sensitive trigger and needs strict gating.", "privileged-trigger": "Privileged trigger can expose elevated token scope to untrusted input.", "missing-permissions": "Workflow omits explicit permissions and may inherit broad defaults.", "overbroad-permissions": "Workflow requests permissions broader than required.", "untrusted-fork-code": "Workflow can execute code controlled by an untrusted fork.", - "persist-credentials": "Persisted checkout credentials can leak token access to later steps.", "remote-script-exec": "Workflow downloads and executes remote scripts inline.", "pr-target-untrusted-checkout": "pull_request_target is combined with checkout of PR-controlled refs.", "potential-injection": "Unsafe expansion of attacker-controllable GitHub context can enable command injection.", @@ -860,6 +924,12 @@ def extract_workflow_issue_line(issue: str) -> int | None: int | None The 1-based line number when available; otherwise ``None``. """ + step_line_match = re.search(r"\[step-line=(\d+)\]", issue) + if step_line_match: + step_line = int(step_line_match.group(1)) + if step_line > 0: + return step_line + if not issue.startswith("potential-injection:") and not issue.startswith("remote-script-exec:"): return None @@ -869,15 +939,9 @@ def extract_workflow_issue_line(issue: str) -> int | None: parsed_payload = _parse_issue_payload(issue) if isinstance(parsed_payload, dict): - step_line = parsed_payload.get("step_line") - if isinstance(step_line, int) and step_line > 0: - return step_line - - step_line_match = re.search(r"\[step-line=(\d+)\]", payload) - if step_line_match: - step_line = int(step_line_match.group(1)) - if step_line > 0: - return step_line + payload_step_line = parsed_payload.get("step_line") + if isinstance(payload_step_line, int) and payload_step_line > 0: + return payload_step_line parts: object | None if isinstance(parsed_payload, list): diff --git a/src/macaron/code_analyzer/gha_security_analysis/recommendation.py b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py index 1e2f65435..332add599 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/recommendation.py +++ b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py @@ -16,7 +16,9 @@ from macaron.slsa_analyzer.git_service.api_client import GhAPIClient from macaron.slsa_analyzer.git_url import find_highest_git_tag, get_tags_via_git_remote, is_commit_hash -UNPINNED_ACTION_RE = re.compile(r"^(?P[^@\s]+)@(?P[^\s]+)$") +UNPINNED_ACTION_RE = re.compile( + r"^(?:unpinned-third-party-action:\s*)?(?:\[step-line=(?P\d+)\]\s*)?(?P[^@\s]+)@(?P[^\s]+)$" +) @dataclass(frozen=True) diff --git a/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr index 016474999..984d2d208 100644 --- a/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr +++ b/tests/code_analyzer/gha_security_analysis/__snapshots__/test_gha_security_analysis.ambr @@ -12,23 +12,15 @@ 'priority': 80, }), dict({ - 'issue': 'missing-permissions: No explicit workflow permissions defined; defaults may be overly broad.', - 'priority': 60, - }), - dict({ - 'issue': "sensitive-trigger: Workflow uses ['pull_request_target']. Ensure strict gating (e.g., actor allowlist, branch protection, and minimal permissions).", - 'priority': 40, - }), - dict({ - 'issue': 'actions/checkout@v5', + 'issue': 'unpinned-third-party-action: [step-line=28] actions/checkout@v5', 'priority': 20, }), dict({ - 'issue': 'dtolnay/rust-toolchain@stable', + 'issue': 'unpinned-third-party-action: [step-line=37] poseidon/wait-for-status-checks@v0.6.0', 'priority': 20, }), dict({ - 'issue': 'poseidon/wait-for-status-checks@v0.6.0', + 'issue': 'unpinned-third-party-action: [step-line=48] dtolnay/rust-toolchain@stable', 'priority': 20, }), ]), diff --git a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py index 93166ef49..cf4990a16 100644 --- a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py +++ b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py @@ -9,7 +9,9 @@ import pytest from macaron.code_analyzer.gha_security_analysis.detect_injection import ( + PrioritizedIssue, WorkflowFinding, + _add_finding, build_workflow_issue_recommendation, detect_github_actions_security_issues, extract_workflow_issue_line, @@ -135,3 +137,20 @@ def test_extract_workflow_issue_line_from_remote_script_exec_payload() -> None: ) assert extract_workflow_issue_line(issue) == 24 + + +def test_extract_workflow_issue_line_from_unpinned_action_marker() -> None: + """Extract workflow line from unpinned action issue marker.""" + issue = "unpinned-third-party-action: [step-line=62] actions/checkout@v4.2.2" + + assert extract_workflow_issue_line(issue) == 62 + + +def test_add_finding_deduplicates_and_preserves_highest_priority() -> None: + """Keep one finding entry per issue and retain the highest priority.""" + findings: list[PrioritizedIssue] = [] + _add_finding(findings, "remote-script-exec: {}", 80) + _add_finding(findings, "remote-script-exec: {}", 60) + _add_finding(findings, "remote-script-exec: {}", 100) + + assert findings == [{"issue": "remote-script-exec: {}", "priority": 100}] diff --git a/tests/code_analyzer/gha_security_analysis/test_recommendation.py b/tests/code_analyzer/gha_security_analysis/test_recommendation.py index 8ebd86e90..13a5217ed 100644 --- a/tests/code_analyzer/gha_security_analysis/test_recommendation.py +++ b/tests/code_analyzer/gha_security_analysis/test_recommendation.py @@ -6,6 +6,7 @@ import pytest from macaron.code_analyzer.gha_security_analysis.recommendation import ( + parse_unpinned_action_issue, recommend_for_unpinned_action, resolve_action_ref_to_tag, ) @@ -52,3 +53,17 @@ def test_resolve_action_ref_to_tag_none_when_no_match(monkeypatch: pytest.Monkey tag = resolve_action_ref_to_tag("actions/checkout", "cccccccccccccccccccccccccccccccccccccccc", "v4") assert tag is None + + +def test_parse_unpinned_action_issue_with_step_line_prefix() -> None: + """Parse unpinned action issues that include finding type and step-line marker.""" + parsed = parse_unpinned_action_issue("unpinned-third-party-action: [step-line=62] actions/checkout@v4.2.2") + + assert parsed == ("actions/checkout", "v4.2.2") + + +def test_parse_unpinned_action_issue_plain_format() -> None: + """Parse legacy unpinned action issues without metadata prefix.""" + parsed = parse_unpinned_action_issue("actions/setup-python@v5.6.0") + + assert parsed == ("actions/setup-python", "v5.6.0") diff --git a/tests/integration/cases/org_apache_logging_log4j/policy_purl.dl b/tests/integration/cases/org_apache_logging_log4j/policy_purl.dl index 3a03cdca5..49df9eba0 100644 --- a/tests/integration/cases/org_apache_logging_log4j/policy_purl.dl +++ b/tests/integration/cases/org_apache_logging_log4j/policy_purl.dl @@ -10,7 +10,7 @@ Policy("test_policy", component_id, "") :- check_passed_with_confidence(component_id, "mcn_find_artifact_pipeline_1", confidence), confidence = 0.7, // Medium confidence because the pipeline was not found from a provenance. check_passed(component_id, "mcn_version_control_system_1"), - check_failed(component_id, "mcn_githubactions_vulnerabilities_1"), + check_passed(component_id, "mcn_githubactions_vulnerabilities_1"), check_failed(component_id, "mcn_provenance_available_1"), check_failed(component_id, "mcn_provenance_derived_commit_1"), check_failed(component_id, "mcn_provenance_derived_repo_1"), diff --git a/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl b/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl index 8d9367255..8f34d5674 100644 --- a/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl +++ b/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl @@ -4,7 +4,7 @@ #include "prelude.dl" Policy("test_policy", component_id, "") :- - check_failed(component_id, "mcn_githubactions_vulnerabilities_1"). + check_passed(component_id, "mcn_githubactions_vulnerabilities_1"). apply_policy_to("test_policy", component_id) :- is_repo_url(component_id, "https://github.com/apache/logging-log4j2"). diff --git a/tests/integration/cases/org_apache_logging_log4j/test.yaml b/tests/integration/cases/org_apache_logging_log4j/test.yaml index 0dec38f7d..b0820223d 100644 --- a/tests/integration/cases/org_apache_logging_log4j/test.yaml +++ b/tests/integration/cases/org_apache_logging_log4j/test.yaml @@ -27,7 +27,7 @@ steps: command_args: - -rp - https://github.com/apache/logging-log4j2 - - d + - -d - 028e9fad03ae7bcbf2e49ab8d32d8cfb900f3587 - name: Run macaron verify-policy to verify passed/failed checks kind: verify