diff --git a/src/macaron/code_analyzer/dataflow_analysis/analysis.py b/src/macaron/code_analyzer/dataflow_analysis/analysis.py index 6f7c3f35f..1fed33070 100644 --- a/src/macaron/code_analyzer/dataflow_analysis/analysis.py +++ b/src/macaron/code_analyzer/dataflow_analysis/analysis.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Entry points to perform and use the dataflow analysis.""" @@ -30,19 +30,22 @@ def analyse_github_workflow_file(workflow_path: str, repo_path: str | None, dump core.Node Graph representation of workflow and analysis results. """ - workflow = actionparser.parse(workflow_path) + try: + workflow = actionparser.parse(workflow_path) - analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) + analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) - core.reset_debug_sequence_number() - raw_workflow_node = github.RawGitHubActionsWorkflowNode.create(workflow, analysis_context, workflow_path) - core.increment_debug_sequence_number() + core.reset_debug_sequence_number() + raw_workflow_node = github.RawGitHubActionsWorkflowNode.create(workflow, analysis_context, workflow_path) + core.increment_debug_sequence_number() - raw_workflow_node.analyse() + raw_workflow_node.analyse() - if dump_debug: - with open("analysis." + workflow_path.replace("/", "_") + ".dot", "w", encoding="utf-8") as f: - printing.print_as_dot_graph(raw_workflow_node, f, include_properties=True, include_states=True) + if dump_debug: + with open("analysis." + workflow_path.replace("/", "_") + ".dot", "w", encoding="utf-8") as f: + printing.print_as_dot_graph(raw_workflow_node, f, include_properties=True, include_states=True) + except Exception as e: + raise CallGraphError("Failed to analyze github workflow '" + workflow_path + "'") from e return raw_workflow_node @@ -68,17 +71,20 @@ def analyse_github_workflow( core.Node Graph representation of workflow and analysis results. """ - analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) + try: + analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) - core.reset_debug_sequence_number() - raw_workflow_node = github.RawGitHubActionsWorkflowNode.create(workflow, analysis_context, workflow_source_path) - core.increment_debug_sequence_number() + core.reset_debug_sequence_number() + raw_workflow_node = github.RawGitHubActionsWorkflowNode.create(workflow, analysis_context, workflow_source_path) + core.increment_debug_sequence_number() - raw_workflow_node.analyse() + raw_workflow_node.analyse() - if dump_debug: - with open("analysis." + workflow_source_path.replace("/", "_") + ".dot", "w", encoding="utf-8") as f: - printing.print_as_dot_graph(raw_workflow_node, f, include_properties=True, include_states=True) + if dump_debug: + with open("analysis." + workflow_source_path.replace("/", "_") + ".dot", "w", encoding="utf-8") as f: + printing.print_as_dot_graph(raw_workflow_node, f, include_properties=True, include_states=True) + except Exception as e: + raise CallGraphError("Failed to analyze github workflow '" + workflow_source_path + "'") from e return raw_workflow_node @@ -104,19 +110,24 @@ def analyse_bash_script( core.Node Graph representation of Bash script and analysis results. """ - analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) - bash_context = core.OwningContextRef(bash.BashScriptContext.create_in_isolation(analysis_context, source_path)) - core.reset_debug_sequence_number() - bash_node = bash.RawBashScriptNode(facts.StringLiteral(bash_content), bash_context) - core.increment_debug_sequence_number() - - bash_node.analyse() - - if dump_debug: - with open( - "analysis." + source_path.replace("/", "_") + "." + str(hash(bash_content)) + ".dot", "w", encoding="utf-8" - ) as f: - printing.print_as_dot_graph(bash_node, f, include_properties=True, include_states=True) + try: + analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) + bash_context = core.OwningContextRef(bash.BashScriptContext.create_in_isolation(analysis_context, source_path)) + core.reset_debug_sequence_number() + bash_node = bash.RawBashScriptNode(facts.StringLiteral(bash_content), bash_context) + core.increment_debug_sequence_number() + + bash_node.analyse() + + if dump_debug: + with open( + "analysis." + source_path.replace("/", "_") + "." + str(hash(bash_content)) + ".dot", + "w", + encoding="utf-8", + ) as f: + printing.print_as_dot_graph(bash_node, f, include_properties=True, include_states=True) + except Exception as e: + raise CallGraphError("Failed to analyze bash script '" + source_path + "'") from e return bash_node diff --git a/src/macaron/code_analyzer/dataflow_analysis/github.py b/src/macaron/code_analyzer/dataflow_analysis/github.py index 222f55fb1..6231c0ea6 100644 --- a/src/macaron/code_analyzer/dataflow_analysis/github.py +++ b/src/macaron/code_analyzer/dataflow_analysis/github.py @@ -363,12 +363,16 @@ def create( for need in needs: actual_need = GitHubActionsWorkflowNode._find_job_id_case_insensitive(jobs, need) if actual_need is None: - raise CallGraphError("needs refers to invalid job") + raise CallGraphError( + "needs refers to invalid job '" + need + "', jobs are " + str(list(jobs.keys())) + ) edges.append(actual_need) elif isinstance(needs, str): actual_need = GitHubActionsWorkflowNode._find_job_id_case_insensitive(jobs, needs) if actual_need is None: - raise CallGraphError("needs refers to invalid job") + raise CallGraphError( + "needs refers to invalid job '" + needs + "', jobs are " + str(list(jobs.keys())) + ) edges.append(actual_need) dependency_graph[job_id] = edges diff --git a/src/macaron/code_analyzer/dataflow_analysis/github_expr.py b/src/macaron/code_analyzer/dataflow_analysis/github_expr.py index 8961750a4..9ecb39426 100644 --- a/src/macaron/code_analyzer/dataflow_analysis/github_expr.py +++ b/src/macaron/code_analyzer/dataflow_analysis/github_expr.py @@ -1,13 +1,14 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Parser for GitHub Actions expression language.""" from typing import cast -from lark import Lark, Token, Tree +from lark import Lark, Token, Tree, UnexpectedInput from macaron.code_analyzer.dataflow_analysis import facts +from macaron.errors import CallGraphError # Parser for GitHub Actions expression language grammar. github_expr_parser = Lark( @@ -67,7 +68,7 @@ function_call: identifier "(" _expr ("," _expr)* ")" %import common.SIGNED_NUMBER - %import common.WS + %import unicode.WS %import common.LETTER %import common.DIGIT %import common._STRING_INNER @@ -111,8 +112,10 @@ def extract_value_from_expr_string(s: str, var_scope: facts.Scope | None) -> fac values.append(facts.StringLiteral(cur_str)) cur_expr_end = s.find("}}", cur_expr_begin) cur_expr = s[cur_expr_begin + 3 : cur_expr_end] - parse_tree = github_expr_parser.parse(cur_expr) - + try: + parse_tree = github_expr_parser.parse(cur_expr) + except UnexpectedInput as e: + raise CallGraphError("Failed to parse github expression '" + cur_expr + "' in string '" + s + "'") from e node = parse_tree.children[0] var_str = extract_expr_variable_name(node) diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py index b24dc5963..4700e5e85 100644 --- a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py +++ b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module analyzes GitHub Actions CI.""" @@ -8,13 +8,14 @@ import glob import logging import os +import traceback from datetime import datetime, timedelta, timezone from macaron.code_analyzer.dataflow_analysis.analysis import analyse_github_workflow_file from macaron.code_analyzer.dataflow_analysis.core import Node, NodeForest from macaron.config.defaults import defaults from macaron.config.global_config import global_config -from macaron.errors import GitHubActionsValueError, ParseError +from macaron.errors import CallGraphError, GitHubActionsValueError, ParseError from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService from macaron.slsa_analyzer.git_service.api_client import GhAPIClient, get_default_gh_client from macaron.slsa_analyzer.git_service.base_git_service import BaseGitService @@ -593,8 +594,9 @@ def build_call_graph(self, repo_path: str, macaron_path: str = "") -> NodeForest try: workflow_node = analyse_github_workflow_file(workflow_path, repo_path) - except ParseError: + except (ParseError, CallGraphError): logger.debug("Skip adding workflow at %s to the callgraph.", workflow_path) + logger.debug("Reason: %s", traceback.format_exc()) continue nodes.append(workflow_node) return NodeForest(nodes)