From c68a1417a9abe134d3b0eeaea41430bd2c2514e1 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Wed, 18 Feb 2026 17:28:07 +1000 Subject: [PATCH 1/7] feat: extend build command information in buildspec Signed-off-by: behnazh-w --- .../build_command_patcher.py | 128 +++++------ .../common_spec/base_spec.py | 36 ++- .../build_spec_generator/common_spec/core.py | 33 +-- .../common_spec/maven_spec.py | 50 +++-- .../common_spec/pypi_spec.py | 33 +-- .../dockerfile/pypi_dockerfile_output.py | 2 +- .../reproducible_central.py | 4 +- .../common_spec/test_core.py | 6 +- .../dockerfile/test_dockerfile_output.py | 6 +- .../dockerfile/test_pypi_dockerfile_output.py | 4 +- .../test_reproducible_central.py | 13 +- .../test_build_command_patcher.py | 209 +++++------------- .../computer-k8s/expected_default.buildspec | 23 +- .../expected_default.buildspec | 17 +- .../expected_default.buildspec | 11 +- .../pypi_toga/expected_default.buildspec | 21 +- 16 files changed, 275 insertions(+), 321 deletions(-) diff --git a/src/macaron/build_spec_generator/build_command_patcher.py b/src/macaron/build_spec_generator/build_command_patcher.py index cbfd32722..0802fd773 100644 --- a/src/macaron/build_spec_generator/build_command_patcher.py +++ b/src/macaron/build_spec_generator/build_command_patcher.py @@ -65,80 +65,74 @@ } -def _patch_commands( - cmds_sequence: Sequence[list[str]], +def _patch_command( + cmd: list[str], cli_parsers: Sequence[CLICommandParser], patches: Mapping[ PatchCommandBuildTool, Mapping[str, PatchValueType | None], ], -) -> list[CLICommand] | None: - """Patch the sequence of build commands, using the provided CLICommandParser instances. +) -> CLICommand | None: + """Patch the build command, using the provided CLICommandParser instances. - For each command in `cmds_sequence`, it will be checked against all CLICommandParser instances until there is + The command will be checked against all CLICommandParser instances to find one that can parse it, then a patch from ``patches`` is applied for this command if provided. If a command doesn't have any corresponding ``CLICommandParser`` instance it will be parsed as UnparsedCLICommand, which just holds the original command as a list of string, without any changes. """ - result: list[CLICommand] = [] - for cmd in cmds_sequence: - # Checking if the command is a valid non-empty list. - if not cmd: - continue - effective_cli_parser = None - for cli_parser in cli_parsers: - if cli_parser.is_build_tool(cmd[0]): - effective_cli_parser = cli_parser - break - - if not effective_cli_parser: - result.append(UnparsedCLICommand(original_cmds=cmd)) - continue - - try: - cli_command = effective_cli_parser.parse(cmd) - except CommandLineParseError as error: - logger.error( - "Failed to patch the cli command %s. Error %s.", - " ".join(cmd), - error, - ) - return None - - patch = patches.get(effective_cli_parser.build_tool, None) - if not patch: - result.append(cli_command) - continue - - try: - new_cli_command = effective_cli_parser.apply_patch( - cli_command=cli_command, - patch_options=patch, - ) - except PatchBuildCommandError as error: - logger.error( - "Failed to patch the build command %s. Error %s.", - " ".join(cmd), - error, - ) - return None - - result.append(new_cli_command) - - return result - - -def patch_commands( - cmds_sequence: Sequence[list[str]], + # Checking if the command is a valid non-empty list. + if not cmd: + return None + + effective_cli_parser = None + for cli_parser in cli_parsers: + if cli_parser.is_build_tool(cmd[0]): + effective_cli_parser = cli_parser + break + + if not effective_cli_parser: + return UnparsedCLICommand(original_cmds=cmd) + + try: + cli_command = effective_cli_parser.parse(cmd) + except CommandLineParseError as error: + logger.error( + "Failed to patch the cli command %s. Error %s.", + " ".join(cmd), + error, + ) + return None + + patch = patches.get(effective_cli_parser.build_tool, None) + if not patch: + return cli_command + + try: + patched_command: CLICommand = effective_cli_parser.apply_patch( + cli_command=cli_command, + patch_options=patch, + ) + return patched_command + except PatchBuildCommandError as error: + logger.error( + "Failed to patch the build command %s. Error %s.", + " ".join(cmd), + error, + ) + return None + + +def patch_command( + cmd: list[str], patches: Mapping[ PatchCommandBuildTool, Mapping[str, PatchValueType | None], ], -) -> list[list[str]] | None: - """Patch a sequence of CLI commands. +) -> list[str] | None: + """Patch a CLI command. - For each command in this command sequence: + Possible scenarios: - If the command is not a build command, or it's a tool we do not support, it will be left intact. @@ -158,21 +152,17 @@ def patch_commands( Returns ------- - list[list[str]] | None - The patched command sequence or None if there is an error. The errors that can happen if any command - which we support is invalid in ``cmds_sequence``, or the patch value is valid. + list[str] | None + The patched command or None if there is an error. An error happens if a command, + or the patch value is valid. """ - result = [] - patch_cli_commands = _patch_commands( - cmds_sequence=cmds_sequence, + patch_cli_command = _patch_command( + cmd=cmd, cli_parsers=[MVN_CLI_PARSER, GRADLE_CLI_PARSER], patches=patches, ) - if patch_cli_commands is None: + if patch_cli_command is None: return None - for patch_cmd in patch_cli_commands: - result.append(patch_cmd.to_cmds()) - - return result + return patch_cli_command.to_cmds() diff --git a/src/macaron/build_spec_generator/common_spec/base_spec.py b/src/macaron/build_spec_generator/common_spec/base_spec.py index ac954c0a3..db4a94477 100644 --- a/src/macaron/build_spec_generator/common_spec/base_spec.py +++ b/src/macaron/build_spec_generator/common_spec/base_spec.py @@ -9,6 +9,32 @@ from packageurl import PackageURL +class SpecBuildCommandDict(TypedDict, total=False): + """ + Initialize build command section of the build specification. + + It contains helpful information related to a build command. + """ + + #: The build tool. + build_tool: Required[str] + + #: The build tool version. + build_tool_version: NotRequired[str] + + #: The build configuration path + build_tool_path: NotRequired[str] + + #: The build command. + command: Required[list[str]] + + #: The pre-build commands. + pre_build_cmds: NotRequired[list[list[str]]] + + #: The post-build commands. + post_build_cmds: NotRequired[list[list[str]]] + + class BaseBuildSpecDict(TypedDict, total=False): """ Initialize base build specification. @@ -58,8 +84,8 @@ class BaseBuildSpecDict(TypedDict, total=False): #: List of build dependencies, which includes tests. build_dependencies: NotRequired[list[str]] - #: List of shell commands to build the project. - build_commands: NotRequired[list[list[str]]] + #: List of shell commands and related information to build the project. + build_commands: NotRequired[list[SpecBuildCommandDict]] #: List of shell commands to test the project. test_commands: NotRequired[list[list[str]]] @@ -106,7 +132,7 @@ def resolve_fields(self, purl: PackageURL) -> None: def get_default_build_commands( self, build_tool_names: list[str], - ) -> list[list[str]]: + ) -> list[SpecBuildCommandDict]: """Return the default build commands for the build tools. Parameters @@ -116,8 +142,8 @@ def get_default_build_commands( Returns ------- - list[list[str]] - The build command as a list[list[str]]. + list[SpecBuildCommandDict] + The build command and relevant information as a list[SpecBuildCommandDict]. Raises ------ diff --git a/src/macaron/build_spec_generator/common_spec/core.py b/src/macaron/build_spec_generator/common_spec/core.py index 4c2cf1ecd..b6e283dcd 100644 --- a/src/macaron/build_spec_generator/common_spec/core.py +++ b/src/macaron/build_spec_generator/common_spec/core.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the logic to generate a build spec in a generic format that can be transformed if needed.""" @@ -13,7 +13,7 @@ import sqlalchemy.orm from packageurl import PackageURL -from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpecDict +from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpecDict, SpecBuildCommandDict from macaron.build_spec_generator.common_spec.maven_spec import MavenBuildSpec from macaron.build_spec_generator.common_spec.pypi_spec import PyPIBuildSpec from macaron.build_spec_generator.macaron_db_extractor import ( @@ -75,8 +75,8 @@ def format_build_command_info(build_command_info: list[GenericBuildCommandInfo]) str The prettified output. """ - pretty_formatted_ouput = [pprint.pformat(build_command_info) for build_command_info in build_command_info] - return "\n".join(pretty_formatted_ouput) + pretty_formatted_output = [pprint.pformat(build_command_info) for build_command_info in build_command_info] + return "\n".join(pretty_formatted_output) def remove_shell_quote(cmd: list[str]) -> list[str]: @@ -351,18 +351,22 @@ def gen_generic_build_spec( if build_tools is not None: build_tool_names = [build_tool.value for build_tool in build_tools] - build_command_info = get_build_command_info( + db_build_command_info = get_build_command_info( component_id=latest_component.id, session=session, ) - logger.info( - "Attempted to find build command from the database. Result: %s", - build_command_info or "Cannot find any.", - ) - - selected_build_command = build_command_info.command if build_command_info else [] - lang_version = get_language_version(build_command_info) if build_command_info else "" + lang_version = None + spec_build_commad_info = None + if db_build_command_info: + logger.info( + "Attempted to find build command from the database. Result: %s", + db_build_command_info or "Cannot find any.", + ) + lang_version = get_language_version(db_build_command_info) if db_build_command_info else "" + spec_build_commad_info = SpecBuildCommandDict( + build_tool=db_build_command_info.build_tool_name, command=db_build_command_info.command + ) base_build_spec_dict = BaseBuildSpecDict( { @@ -378,8 +382,11 @@ def gen_generic_build_spec( "purl": str(purl), "language": target_language, "build_tools": build_tool_names, - "build_commands": [selected_build_command] if selected_build_command else [], + "build_commands": ( + [spec_build_commad_info] if spec_build_commad_info and spec_build_commad_info["command"] else [] + ), } ) + ECOSYSTEMS[purl.type.upper()].value(base_build_spec_dict).resolve_fields(purl) return base_build_spec_dict diff --git a/src/macaron/build_spec_generator/common_spec/maven_spec.py b/src/macaron/build_spec_generator/common_spec/maven_spec.py index de0b4c5df..2741f0923 100644 --- a/src/macaron/build_spec_generator/common_spec/maven_spec.py +++ b/src/macaron/build_spec_generator/common_spec/maven_spec.py @@ -8,8 +8,8 @@ from packageurl import PackageURL -from macaron.build_spec_generator.build_command_patcher import CLI_COMMAND_PATCHES, patch_commands -from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpec, BaseBuildSpecDict +from macaron.build_spec_generator.build_command_patcher import CLI_COMMAND_PATCHES, patch_command +from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpec, BaseBuildSpecDict, SpecBuildCommandDict from macaron.build_spec_generator.common_spec.jdk_finder import find_jdk_version_from_central_maven_repo from macaron.build_spec_generator.common_spec.jdk_version_normalizer import normalize_jdk_version @@ -33,7 +33,7 @@ def __init__(self, data: BaseBuildSpecDict): def get_default_build_commands( self, build_tool_names: list[str], - ) -> list[list[str]]: + ) -> list[SpecBuildCommandDict]: """Return the default build commands for the build tools. Parameters @@ -43,28 +43,34 @@ def get_default_build_commands( Returns ------- - list[list[str]] - The build command as a list[list[str]]. + list[SpecBuildCommandDict] + The build command as a list[SpecBuildCommandDict]. """ - default_build_commands = [] + default_build_cmd_list = [] for build_tool_name in build_tool_names: match build_tool_name: case "maven": - default_build_commands.append("mvn clean package".split()) + default_build_cmd_list.append( + SpecBuildCommandDict(build_tool=build_tool_name, command="mvn clean package".split()) + ) case "gradle": - default_build_commands.append("./gradlew clean assemble publishToMavenLocal".split()) + default_build_cmd_list.append( + SpecBuildCommandDict( + build_tool=build_tool_name, command="./gradlew clean assemble publishToMavenLocal".split() + ) + ) case _: pass - if not default_build_commands: + if not default_build_cmd_list: logger.debug( "There is no default build command available for the build tools %s.", build_tool_names, ) - return default_build_commands + return default_build_cmd_list def resolve_fields(self, purl: PackageURL) -> None: """ @@ -108,16 +114,14 @@ def resolve_fields(self, purl: PackageURL) -> None: self.data["language_version"] = [major_jdk_version] # Resolve and patch build commands. - selected_build_commands = self.data["build_commands"] or self.get_default_build_commands( - self.data["build_tools"] - ) - patched_build_commands = patch_commands( - cmds_sequence=selected_build_commands, - patches=CLI_COMMAND_PATCHES, - ) - if not patched_build_commands: - logger.debug("Failed to patch build command sequences %s", selected_build_commands) - self.data["build_commands"] = [] - return - - self.data["build_commands"] = patched_build_commands + if not self.data["build_commands"]: + self.data["build_commands"] = self.get_default_build_commands(self.data["build_tools"]) + + for build_command_info in self.data["build_commands"]: + if build_command_info["command"] and ( + patched_cmd := patch_command( + cmd=build_command_info["command"], + patches=CLI_COMMAND_PATCHES, + ) + ): + build_command_info["command"] = patched_cmd diff --git a/src/macaron/build_spec_generator/common_spec/pypi_spec.py b/src/macaron/build_spec_generator/common_spec/pypi_spec.py index ee67578c9..0726e75f6 100644 --- a/src/macaron/build_spec_generator/common_spec/pypi_spec.py +++ b/src/macaron/build_spec_generator/common_spec/pypi_spec.py @@ -14,7 +14,7 @@ from packaging.specifiers import InvalidSpecifier from packaging.utils import InvalidWheelFilename, parse_wheel_filename -from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpec, BaseBuildSpecDict +from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpec, BaseBuildSpecDict, SpecBuildCommandDict from macaron.config.defaults import defaults from macaron.errors import SourceCodeError, WheelTagError from macaron.json_tools import json_extract @@ -43,7 +43,7 @@ def __init__(self, data: BaseBuildSpecDict): def get_default_build_commands( self, build_tool_names: list[str], - ) -> list[list[str]]: + ) -> list[SpecBuildCommandDict]: """Return the default build commands for the build tools. Parameters @@ -53,37 +53,44 @@ def get_default_build_commands( Returns ------- - list[list[str]] - The build command as a list[list[str]]. + list[SpecBuildCommandDict] + The build command as a list[SpecBuildCommandDict]. """ - default_build_commands = [] - + default_build_cmd_list = [] for build_tool_name in build_tool_names: match build_tool_name: case "pip": - default_build_commands.append("python -m build --wheel -n".split()) + default_build_cmd_list.append( + SpecBuildCommandDict(build_tool=build_tool_name, command="python -m build --wheel -n".split()) + ) case "poetry": - default_build_commands.append("poetry build".split()) + default_build_cmd_list.append( + SpecBuildCommandDict(build_tool=build_tool_name, command="poetry build".split()) + ) case "flit": # We might also want to deal with existence flit.ini, we can do so via # "python -m flit.tomlify" - default_build_commands.append("flit build".split()) + default_build_cmd_list.append( + SpecBuildCommandDict(build_tool=build_tool_name, command="flit build".split()) + ) case "hatch": - default_build_commands.append("hatch build".split()) + default_build_cmd_list.append( + SpecBuildCommandDict(build_tool=build_tool_name, command="hatch build".split()) + ) case "conda": # TODO: update this if a build command can be used for conda. pass case _: pass - if not default_build_commands: + if not default_build_cmd_list: logger.debug( "There is no default build command available for the build tools %s.", build_tool_names, ) - return default_build_commands + return default_build_cmd_list def resolve_fields(self, purl: PackageURL) -> None: """ @@ -108,7 +115,7 @@ def resolve_fields(self, purl: PackageURL) -> None: upstream_artifacts: dict[str, list[str]] = {} pypi_package_json = pypi_registry.find_or_create_pypi_asset(purl.name, purl.version, registry_info) - patched_build_commands: list[list[str]] = [] + patched_build_commands: list[SpecBuildCommandDict] = [] build_backends_set: set[str] = set() parsed_build_requires: dict[str, str] = {} sdist_build_requires: dict[str, str] = {} diff --git a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py index 67d1c6308..24b9e406d 100644 --- a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py +++ b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py @@ -63,7 +63,7 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str: f"pip install {buildspec['build_tools'][0]} && if test -f \"flit.ini\"; then python -m flit.tomlify; fi && " ) - modern_build_command = build_tool_install + " ".join(x for x in buildspec["build_commands"][0]) + modern_build_command = build_tool_install + " ".join(x for x in buildspec["build_commands"][0]["command"]) legacy_build_command = ( 'if test -f "setup.py"; then pip install wheel && python setup.py bdist_wheel; ' "else python -m build --wheel -n; fi" diff --git a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py index c5f861c90..38fb1dec7 100644 --- a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py +++ b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py @@ -104,7 +104,9 @@ def gen_reproducible_central_build_spec(build_spec: BaseBuildSpecDict) -> str | "newline": build_spec["newline"], "buildinfo": f"target/{build_spec['artifact_id']}-{build_spec['version']}.buildinfo", "jdk": build_spec["language_version"][0], - "command": compose_shell_commands(adapted_build_commands), + "command": compose_shell_commands( + [b_info["command"] for b_info in adapted_build_commands["build_commands"] if b_info["command"]] + ), } return STRING_TEMPLATE.format_map(template_format_values) diff --git a/tests/build_spec_generator/common_spec/test_core.py b/tests/build_spec_generator/common_spec/test_core.py index a0620c869..538d13695 100644 --- a/tests/build_spec_generator/common_spec/test_core.py +++ b/tests/build_spec_generator/common_spec/test_core.py @@ -6,7 +6,7 @@ import pytest from packageurl import PackageURL -from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpecDict +from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpecDict, SpecBuildCommandDict from macaron.build_spec_generator.common_spec.core import ( ECOSYSTEMS, LANGUAGES, @@ -185,7 +185,7 @@ def test_get_language_version( "purl": "pkg:maven/foo/bar@1.0.0", "language": LANGUAGES.MAVEN.value, "build_tools": ["ant"], - "build_commands": [["ant", "dist"]], + "build_commands": [SpecBuildCommandDict(build_tool="ant", command=["ant", "dist"])], } ), id="unsupported build tool for maven", @@ -225,7 +225,7 @@ def test_get_language_version( "purl": "pkg:pypi/bar@1.0.0", "language": LANGUAGES.PYPI.value, "build_tools": ["uv"], - "build_commands": [["python", "-m", "build"]], + "build_commands": [SpecBuildCommandDict(build_tool="uv", command=["python", "-m", "build"])], } ), id="unsupported build tool for pypi", diff --git a/tests/build_spec_generator/dockerfile/test_dockerfile_output.py b/tests/build_spec_generator/dockerfile/test_dockerfile_output.py index f78b566f2..c8c55be29 100644 --- a/tests/build_spec_generator/dockerfile/test_dockerfile_output.py +++ b/tests/build_spec_generator/dockerfile/test_dockerfile_output.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ @@ -7,7 +7,7 @@ import pytest -from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpecDict +from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpecDict, SpecBuildCommandDict from macaron.build_spec_generator.dockerfile import dockerfile_output from macaron.errors import GenerateBuildSpecError @@ -28,7 +28,7 @@ def fixture_base_build_spec() -> BaseBuildSpecDict: "build_tools": ["maven"], "newline": "lf", "language_version": ["17"], - "build_commands": [["mvn", "package"]], + "build_commands": [SpecBuildCommandDict(build_tool="maven", command=["mvn", "package"])], "purl": "pkg:maven/com.oracle/example-artifact@1.2.3", } ) diff --git a/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py b/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py index 4c8902325..0c3523474 100644 --- a/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py +++ b/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py @@ -7,7 +7,7 @@ import pytest -from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpecDict +from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpecDict, SpecBuildCommandDict from macaron.build_spec_generator.dockerfile.pypi_dockerfile_output import gen_dockerfile @@ -29,7 +29,7 @@ def fixture_base_build_spec() -> BaseBuildSpecDict: "language": "python", "has_binaries": False, "build_tools": ["pip"], - "build_commands": [["python", "-m", "build"]], + "build_commands": [SpecBuildCommandDict(build_tool="pip", command=["python", "-m", "build"])], "build_requires": {"setuptools": "==80.9.0", "wheel": ""}, "build_backends": ["setuptools.build_meta"], "upstream_artifacts": { diff --git a/tests/build_spec_generator/reproducible_central/test_reproducible_central.py b/tests/build_spec_generator/reproducible_central/test_reproducible_central.py index f95fefeb7..9b8b47379 100644 --- a/tests/build_spec_generator/reproducible_central/test_reproducible_central.py +++ b/tests/build_spec_generator/reproducible_central/test_reproducible_central.py @@ -5,7 +5,7 @@ import pytest -from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpecDict +from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpecDict, SpecBuildCommandDict from macaron.build_spec_generator.common_spec.core import compose_shell_commands from macaron.build_spec_generator.reproducible_central.reproducible_central import gen_reproducible_central_build_spec from macaron.errors import GenerateBuildSpecError @@ -27,7 +27,7 @@ def fixture_base_build_spec() -> BaseBuildSpecDict: "build_tools": ["maven"], "newline": "lf", "language_version": ["17"], - "build_commands": [["mvn", "package"]], + "build_commands": [SpecBuildCommandDict(build_tool="maven", command=["mvn", "package"])], "purl": "pkg:maven/com.oracle/example-artifact@1.2.3", } ) @@ -78,10 +78,11 @@ def test_build_tool_name_variants(base_build_spec: BaseBuildSpecDict, build_tool def test_compose_shell_commands_integration(base_build_spec: BaseBuildSpecDict) -> None: """Test that the correct compose_shell_commands function is used.""" - base_build_spec["build_commands"] = [["mvn", "clean", "package"], ["echo", "done"]] + base_build_spec["build_commands"] = [ + SpecBuildCommandDict(build_tool="maven", command=["mvn", "clean", "package"]), + SpecBuildCommandDict(build_tool="maven", command=["mvn", "deploy"]), + ] content = gen_reproducible_central_build_spec(base_build_spec) - expected_commands = compose_shell_commands( - [["mvn", "-Dmaven.test.skip=true", "clean", "package"], ["echo", "done"]] - ) + expected_commands = compose_shell_commands([["mvn", "clean", "package"], ["mvn", "deploy"]]) assert content assert f'command="{expected_commands}"' in content diff --git a/tests/build_spec_generator/test_build_command_patcher.py b/tests/build_spec_generator/test_build_command_patcher.py index dad1f04ee..b1efc261b 100644 --- a/tests/build_spec_generator/test_build_command_patcher.py +++ b/tests/build_spec_generator/test_build_command_patcher.py @@ -8,10 +8,8 @@ import pytest from macaron.build_spec_generator.build_command_patcher import ( - CLICommand, - CLICommandParser, PatchValueType, - _patch_commands, + _patch_command, ) from macaron.build_spec_generator.cli_command_parser import PatchCommandBuildTool from macaron.build_spec_generator.cli_command_parser.gradle_cli_parser import ( @@ -22,7 +20,6 @@ MavenCLICommandParser, MavenOptionPatchValueType, ) -from macaron.build_spec_generator.cli_command_parser.unparsed_cli_command import UnparsedCLICommand @pytest.mark.parametrize( @@ -119,15 +116,14 @@ def test_patch_mvn_cli_command( expected: str, ) -> None: """Test the patch maven cli command on valid input.""" - patch_cmds = _patch_commands( - cmds_sequence=[original.split()], + patch_cmd = _patch_command( + cmd=original.split(), cli_parsers=[maven_cli_parser], patches={PatchCommandBuildTool.MAVEN: patch_options}, ) - assert patch_cmds - assert len(patch_cmds) == 1 + assert patch_cmd - patch_mvn_cli_command = maven_cli_parser.parse(patch_cmds.pop().to_cmds()) + patch_mvn_cli_command = maven_cli_parser.parse(patch_cmd.to_cmds()) expected_mvn_cli_command = maven_cli_parser.parse(expected.split()) assert patch_mvn_cli_command == expected_mvn_cli_command @@ -173,11 +169,11 @@ def test_patch_mvn_cli_command_error( invalid_patch: dict[str, MavenOptionPatchValueType | None], ) -> None: """Test patch mvn cli command patching with invalid patch.""" - cmd_list = "mvn -s ../.github/maven-settings.xml install -Pexamples,noRun".split() + original_cmd = "mvn -s ../.github/maven-settings.xml install -Pexamples,noRun".split() assert ( - _patch_commands( - cmds_sequence=[cmd_list], + _patch_command( + cmd=original_cmd, cli_parsers=[maven_cli_parser], patches={ PatchCommandBuildTool.MAVEN: invalid_patch, @@ -281,15 +277,14 @@ def test_patch_gradle_cli_command( expected: str, ) -> None: """Test the patch gradle cli command on valid input.""" - patch_cmds = _patch_commands( - cmds_sequence=[original.split()], + patch_cmd = _patch_command( + cmd=original.split(), cli_parsers=[gradle_cli_parser], patches={PatchCommandBuildTool.GRADLE: patch_options}, ) - assert patch_cmds - assert len(patch_cmds) == 1 + assert patch_cmd - patch_gradle_cli_command = gradle_cli_parser.parse(patch_cmds.pop().to_cmds()) + patch_gradle_cli_command = gradle_cli_parser.parse(patch_cmd.to_cmds()) expected_gradle_cli_command = gradle_cli_parser.parse(expected.split()) assert patch_gradle_cli_command == expected_gradle_cli_command @@ -353,10 +348,10 @@ def test_patch_gradle_cli_command_error( invalid_patch: dict[str, GradleOptionPatchValueType | None], ) -> None: """Test patch mvn cli command patching with invalid patch.""" - cmd_list = "gradle clean build --no-build-cache --debug --console plain -Dorg.gradle.parallel=true".split() + original_cmd = "gradle clean build --no-build-cache --debug --console plain -Dorg.gradle.parallel=true".split() assert ( - _patch_commands( - cmds_sequence=[cmd_list], + _patch_command( + cmd=original_cmd, cli_parsers=[gradle_cli_parser], patches={ PatchCommandBuildTool.GRADLE: invalid_patch, @@ -367,128 +362,51 @@ def test_patch_gradle_cli_command_error( @pytest.mark.parametrize( - ("cmds_sequence", "patches", "expected"), + ("original", "patch_options", "expected"), [ pytest.param( - [ - "mvn clean package".split(), - "gradle clean build".split(), - ], - { - PatchCommandBuildTool.MAVEN: { - "--debug": True, - }, - PatchCommandBuildTool.GRADLE: { - "--debug": True, - }, - }, - [ - "mvn clean package --debug".split(), - "gradle clean build --debug".split(), - ], - id="apply_multiple_types_of_patches", - ), - pytest.param( - [ - "mvn clean package".split(), - "gradle clean build".split(), - ], + "make setup", { - PatchCommandBuildTool.MAVEN: { - "--debug": True, - }, + "--threads": None, + "--no-transfer-progress": None, + "--define": None, }, - [ - "mvn clean package --debug".split(), - "gradle clean build".split(), - ], - id="apply_one_type_of_patch_to_multiple_commands", + "make setup", + id="make_command", ), pytest.param( - [ - "mvn clean package".split(), - "gradle clean build".split(), - ], - {}, - [ - "mvn clean package".split(), - "gradle clean build".split(), - ], - id="apply_no_patch_to_multiple_build_commands", - ), - pytest.param( - [ - "make setup".split(), - "mvn clean package".split(), - "gradle clean build".split(), - "make clean".split(), - ], + "./configure", { - PatchCommandBuildTool.MAVEN: { - "--debug": True, - }, - PatchCommandBuildTool.GRADLE: { - "--debug": True, - }, + "--threads": None, + "--no-transfer-progress": None, + "--define": None, }, - [ - "make setup".split(), - "mvn clean package --debug".split(), - "gradle clean build --debug".split(), - "make clean".split(), - ], - id="command_that_we_cannot_parse_stay_the_same", + "./configure", + id="configure_command", ), ], ) -def test_patching_multiple_commands( +def test_patch_arbitrary_command( maven_cli_parser: MavenCLICommandParser, - gradle_cli_parser: GradleCLICommandParser, - cmds_sequence: list[list[str]], - patches: Mapping[ - PatchCommandBuildTool, - Mapping[str, PatchValueType | None], - ], - expected: list[list[str]], + original: str, + patch_options: Mapping[str, MavenOptionPatchValueType | None], + expected: str, ) -> None: - """Test patching multiple commands.""" - patch_cli_commands = _patch_commands( - cmds_sequence=cmds_sequence, - cli_parsers=[maven_cli_parser, gradle_cli_parser], - patches=patches, + """Test the patch function for arbitrary commands.""" + patched_cmd = _patch_command( + cmd=original.split(), + cli_parsers=[maven_cli_parser], + patches={PatchCommandBuildTool.MAVEN: patch_options}, ) - - assert patch_cli_commands - - expected_cli_commands: list[CLICommand] = [] - cli_parsers: list[CLICommandParser] = [maven_cli_parser, gradle_cli_parser] - for cmd in expected: - effective_cli_parser = None - for cli_parser in cli_parsers: - if cli_parser.is_build_tool(cmd[0]): - effective_cli_parser = cli_parser - break - - if effective_cli_parser: - expected_cli_commands.append(effective_cli_parser.parse(cmd)) - else: - expected_cli_commands.append( - UnparsedCLICommand( - original_cmds=cmd, - ) - ) - - assert patch_cli_commands == expected_cli_commands + assert patched_cmd + assert patched_cmd.to_cmds() == expected.split() @pytest.mark.parametrize( - ("cmds_sequence", "patches"), + ("cmd", "patches"), [ pytest.param( - [ - "mvn --this-is-not-a-mvn-option".split(), - "gradle clean build".split(), - ], + "mvn --this-is-not-a-mvn-option".split(), { PatchCommandBuildTool.MAVEN: { "--debug": True, @@ -500,10 +418,7 @@ def test_patching_multiple_commands( id="incorrect_mvn_command", ), pytest.param( - [ - "mvn clean package".split(), - "gradle clean build --not-a-gradle-command".split(), - ], + "gradle clean build --not-a-gradle-command".split(), { PatchCommandBuildTool.MAVEN: { "--debug": True, @@ -515,45 +430,39 @@ def test_patching_multiple_commands( id="incorrect_gradle_command", ), pytest.param( - [ - "mvn clean package".split(), - "gradle clean build".split(), - ], + "mvn clean package".split(), { PatchCommandBuildTool.MAVEN: { "--not-a-valid-option": True, }, }, - id="incorrrect_patch_option_long_name", + id="incorrect_patch_option_long_name", ), pytest.param( - [ - "mvn clean package".split(), - "gradle clean build".split(), - ], + "mvn clean package".split(), { PatchCommandBuildTool.MAVEN: { # --debug expects a boolean or a None value. "--debug": 10, }, }, - id="incorrrect_patch_value", + id="incorrect_patch_value", ), ], ) -def test_patching_multiple_commands_error( +def test_multiple_patches_error( maven_cli_parser: MavenCLICommandParser, gradle_cli_parser: GradleCLICommandParser, - cmds_sequence: list[list[str]], + cmd: list[str], patches: Mapping[ PatchCommandBuildTool, Mapping[str, PatchValueType | None], ], ) -> None: - """Test error cases for patching multiple commands.""" + """Test error cases for multiple patches and parsers.""" assert ( - _patch_commands( - cmds_sequence=cmds_sequence, + _patch_command( + cmd=cmd, cli_parsers=[maven_cli_parser, gradle_cli_parser], patches=patches, ) @@ -562,23 +471,19 @@ def test_patching_multiple_commands_error( @pytest.mark.parametrize( - ("original_cmd_sequence"), + ("original_cmd"), [ pytest.param( [], - id="empty sequence", - ), - pytest.param( - [[]], id="empty command", ), ], ) -def test_empty_command(maven_cli_parser: MavenCLICommandParser, original_cmd_sequence: list[list[str]]) -> None: - """Test the patch command for empty commands.""" - patch_cmds = _patch_commands( - cmds_sequence=original_cmd_sequence, +def test_empty_command(maven_cli_parser: MavenCLICommandParser, original_cmd: list[str]) -> None: + """Test the patch command for an empty command.""" + patch_cmd = _patch_command( + cmd=original_cmd, cli_parsers=[maven_cli_parser], patches={PatchCommandBuildTool.MAVEN: {}}, ) - assert patch_cmds == [] + assert patch_cmd is None diff --git a/tests/integration/cases/org_apache_hugegraph/computer-k8s/expected_default.buildspec b/tests/integration/cases/org_apache_hugegraph/computer-k8s/expected_default.buildspec index 05dbdb6f2..79d0c8fbb 100644 --- a/tests/integration/cases/org_apache_hugegraph/computer-k8s/expected_default.buildspec +++ b/tests/integration/cases/org_apache_hugegraph/computer-k8s/expected_default.buildspec @@ -1,5 +1,5 @@ { - "macaron_version": "0.18.0", + "macaron_version": "0.20.0", "group_id": "org.apache.hugegraph", "artifact_id": "computer-k8s", "version": "1.0.0", @@ -16,14 +16,17 @@ "maven" ], "build_commands": [ - [ - "mvn", - "-DskipTests=true", - "-Dmaven.site.skip=true", - "-Drat.skip=true", - "-Dmaven.javadoc.skip=true", - "clean", - "package" - ] + { + "build_tool": "maven", + "command": [ + "mvn", + "-DskipTests=true", + "-Dmaven.site.skip=true", + "-Drat.skip=true", + "-Dmaven.javadoc.skip=true", + "clean", + "package" + ] + } ] } diff --git a/tests/integration/cases/pypi_cachetools/expected_default.buildspec b/tests/integration/cases/pypi_cachetools/expected_default.buildspec index 87859fbd4..53ae6d8f4 100644 --- a/tests/integration/cases/pypi_cachetools/expected_default.buildspec +++ b/tests/integration/cases/pypi_cachetools/expected_default.buildspec @@ -16,13 +16,16 @@ "pip" ], "build_commands": [ - [ - "python", - "-m", - "build", - "--wheel", - "-n" - ] + { + "build_tool": "pip", + "command": [ + "python", + "-m", + "build", + "--wheel", + "-n" + ] + } ], "has_binaries": false, "build_requires": { diff --git a/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec b/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec index de0634640..79071d6c3 100644 --- a/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec +++ b/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec @@ -17,10 +17,13 @@ "flit" ], "build_commands": [ - [ - "flit", - "build" - ] + { + "build_tool": "flit", + "command": [ + "flit", + "build" + ] + } ], "has_binaries": false, "build_requires": { diff --git a/tests/integration/cases/pypi_toga/expected_default.buildspec b/tests/integration/cases/pypi_toga/expected_default.buildspec index ac873e87f..29e2f4cf8 100644 --- a/tests/integration/cases/pypi_toga/expected_default.buildspec +++ b/tests/integration/cases/pypi_toga/expected_default.buildspec @@ -17,19 +17,22 @@ "pip" ], "build_commands": [ - [ - "python", - "-m", - "build", - "--wheel", - "-n" - ] + { + "build_tool": "pip", + "command": [ + "python", + "-m", + "build", + "--wheel", + "-n" + ] + } ], "has_binaries": false, "build_requires": { "setuptools": "==80.3.1", - "setuptools_dynamic_dependencies": "==1.0.0", - "setuptools_scm": "==8.3.1" + "setuptools_scm": "==8.3.1", + "setuptools_dynamic_dependencies": "==1.0.0" }, "build_backends": [ "setuptools.build_meta" From 1f47efda5af76c887e1d24c05382cc194ae45905 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Mon, 16 Mar 2026 17:03:54 +1000 Subject: [PATCH 2/7] feat: validate the build config and report the parent Signed-off-by: behnazh-w --- .../common_spec/base_spec.py | 32 +-- .../build_spec_generator/common_spec/core.py | 80 ++++-- .../common_spec/maven_spec.py | 56 ++-- .../common_spec/pypi_spec.py | 71 ++--- src/macaron/parsers/pomparser.py | 242 ++++++++++++++++++ .../repo_verifier/repo_verifier_maven.py | 10 +- src/macaron/slsa_analyzer/analyzer.py | 11 +- .../build_tool/base_build_tool.py | 95 +++++-- src/macaron/slsa_analyzer/build_tool/conda.py | 19 +- .../slsa_analyzer/build_tool/docker.py | 19 +- src/macaron/slsa_analyzer/build_tool/flit.py | 19 +- src/macaron/slsa_analyzer/build_tool/go.py | 19 +- .../slsa_analyzer/build_tool/gradle.py | 19 +- src/macaron/slsa_analyzer/build_tool/hatch.py | 19 +- src/macaron/slsa_analyzer/build_tool/maven.py | 80 +++++- src/macaron/slsa_analyzer/build_tool/npm.py | 19 +- src/macaron/slsa_analyzer/build_tool/pip.py | 35 ++- .../slsa_analyzer/build_tool/poetry.py | 19 +- src/macaron/slsa_analyzer/build_tool/yarn.py | 19 +- .../slsa_analyzer/checks/build_tool_check.py | 63 ++++- .../slsa_analyzer/checks/check_result.py | 19 +- .../git_service/base_git_service.py | 9 +- .../slsa_analyzer/git_service/github.py | 4 +- 23 files changed, 753 insertions(+), 225 deletions(-) diff --git a/src/macaron/build_spec_generator/common_spec/base_spec.py b/src/macaron/build_spec_generator/common_spec/base_spec.py index db4a94477..4d99025c5 100644 --- a/src/macaron/build_spec_generator/common_spec/base_spec.py +++ b/src/macaron/build_spec_generator/common_spec/base_spec.py @@ -23,16 +23,16 @@ class SpecBuildCommandDict(TypedDict, total=False): build_tool_version: NotRequired[str] #: The build configuration path - build_tool_path: NotRequired[str] + build_config_path: Required[str] + + #: The root build configuration path if present + root_build_config_path: NotRequired[str] #: The build command. command: Required[list[str]] - #: The pre-build commands. - pre_build_cmds: NotRequired[list[list[str]]] - - #: The post-build commands. - post_build_cmds: NotRequired[list[list[str]]] + #: The confidence score for the analysis result that has inferred the build tool information. + confidence_score: Required[float] class BaseBuildSpecDict(TypedDict, total=False): @@ -129,24 +129,14 @@ def resolve_fields(self, purl: PackageURL) -> None: """ @abstractmethod - def get_default_build_commands( + def set_default_build_commands( self, - build_tool_names: list[str], - ) -> list[SpecBuildCommandDict]: + build_cmd_spec: SpecBuildCommandDict, + ) -> None: """Return the default build commands for the build tools. Parameters ---------- - build_tool_names: list[str] - The build tools to get the default build command. - - Returns - ------- - list[SpecBuildCommandDict] - The build command and relevant information as a list[SpecBuildCommandDict]. - - Raises - ------ - GenerateBuildSpecError - If there is no default build command available for the specified build tool. + build_cmd_spec: SpecBuildCommandDict + The build command and related information. """ diff --git a/src/macaron/build_spec_generator/common_spec/core.py b/src/macaron/build_spec_generator/common_spec/core.py index b6e283dcd..3967ca151 100644 --- a/src/macaron/build_spec_generator/common_spec/core.py +++ b/src/macaron/build_spec_generator/common_spec/core.py @@ -120,9 +120,9 @@ def compose_shell_commands(cmds_sequence: list[list[str]]) -> str: return result -def get_macaron_build_tool_names( +def get_macaron_build_tools( build_tool_facts: Sequence[BuildToolFacts], target_language: str -) -> list[MacaronBuildToolName] | None: +) -> dict[str, dict[str, str | None]] | None: """ Retrieve the Macaron build tool names for supported projects from the database facts. @@ -138,23 +138,28 @@ def get_macaron_build_tool_names( Returns ------- - list[MacaronBuildToolName] None - The corresponding Macaron build tool names, or None otherwise. + dict[str, dict[str, float, str | None]]| None: + The corresponding Macaron build tool name, config_path, confidence score, optional build tool version, + and optional root config path if present. """ - build_tool_names = [] + build_tools = {} for fact in build_tool_facts: if fact.language.lower() == target_language: try: - build_tool_names.append(MacaronBuildToolName(fact.build_tool_name)) + build_tools[MacaronBuildToolName(fact.build_tool_name).value] = { + "build_config_path": fact.build_config_path, + "confidence_score": fact.confidence, + "build_tool_version": fact.build_tool_version, + "root_build_config_path": fact.root_build_config_path, + } except ValueError: continue - - return build_tool_names or None + return build_tools or None -def get_build_tool_names( +def get_build_tools( component_id: int, session: sqlalchemy.orm.Session, target_language: str -) -> list[MacaronBuildToolName] | None: +) -> dict[str, dict[str, float, str | None]] | None: """Retrieve the Macaron build tool names for a given component. Queries the database for build tool facts associated with the specified component ID. @@ -171,8 +176,9 @@ def get_build_tool_names( Returns ------- - list[MacaronBuildToolName] | None - The corresponding build tool name for the component if available, otherwise None. + dict[str, dict[str, float, str | None]]| None: + The corresponding Macaron build tool name, config_path, confidence score, optional build tool version, + and optional root config path if present. """ try: build_tool_facts = lookup_build_tools_check( @@ -195,10 +201,19 @@ def get_build_tool_names( logger.info( "Build tools discovered from the %s table: %s", BuildToolFacts.__tablename__, - [(fact.build_tool_name, fact.language) for fact in build_tool_facts], + [ + ( + fact.build_tool_name, + fact.language, + fact.build_config_path, + fact.root_build_config_path, + fact.build_tool_version, + ) + for fact in build_tool_facts + ], ) - return get_macaron_build_tool_names(build_tool_facts, target_language) + return get_macaron_build_tools(build_tool_facts, target_language) def get_build_command_info( @@ -341,15 +356,13 @@ def gen_generic_build_spec( ) build_tool_names = [] - build_tools = get_build_tool_names( - component_id=latest_component.id, session=session, target_language=target_language - ) + build_tools = get_build_tools(component_id=latest_component.id, session=session, target_language=target_language) if not build_tools: raise GenerateBuildSpecError(f"Failed to determine build tool for {purl}.") # This check is for Pylint, which is not able to iterate over build_tools, even though it cannot be None. if build_tools is not None: - build_tool_names = [build_tool.value for build_tool in build_tools] + build_tool_names = list(build_tools.keys()) db_build_command_info = get_build_command_info( component_id=latest_component.id, @@ -357,16 +370,35 @@ def gen_generic_build_spec( ) lang_version = None - spec_build_commad_info = None + spec_build_commad_info_list = [] if db_build_command_info: logger.info( "Attempted to find build command from the database. Result: %s", db_build_command_info or "Cannot find any.", ) lang_version = get_language_version(db_build_command_info) if db_build_command_info else "" - spec_build_commad_info = SpecBuildCommandDict( - build_tool=db_build_command_info.build_tool_name, command=db_build_command_info.command - ) + spec_build_commad_info_list = [ + SpecBuildCommandDict( + build_tool=db_build_command_info.build_tool_name, + command=db_build_command_info.command, + build_config_path=build_tools[db_build_command_info.build_tool_name]["build_config_path"], + root_build_config_path=build_tools[db_build_command_info.build_tool_name]["root_build_config_path"], + build_config_version=build_tools[db_build_command_info.build_tool_name]["build_tool_version"], + confidence_score=build_tools[db_build_command_info.build_tool_name]["confidence_score"], + ) + ] + else: + for build_tool_name in build_tool_names: + spec_build_commad_info_list.append( + SpecBuildCommandDict( + build_tool=build_tool_name, + command=[], + build_config_path=build_tools[build_tool_name]["build_config_path"], + root_build_config_path=build_tools[build_tool_name]["root_build_config_path"], + build_config_version=build_tools[build_tool_name]["build_tool_version"], + confidence_score=build_tools[build_tool_name]["confidence_score"], + ) + ) base_build_spec_dict = BaseBuildSpecDict( { @@ -382,9 +414,7 @@ def gen_generic_build_spec( "purl": str(purl), "language": target_language, "build_tools": build_tool_names, - "build_commands": ( - [spec_build_commad_info] if spec_build_commad_info and spec_build_commad_info["command"] else [] - ), + "build_commands": spec_build_commad_info_list, } ) diff --git a/src/macaron/build_spec_generator/common_spec/maven_spec.py b/src/macaron/build_spec_generator/common_spec/maven_spec.py index 2741f0923..26c2395b5 100644 --- a/src/macaron/build_spec_generator/common_spec/maven_spec.py +++ b/src/macaron/build_spec_generator/common_spec/maven_spec.py @@ -30,47 +30,28 @@ def __init__(self, data: BaseBuildSpecDict): """ self.data = data - def get_default_build_commands( + def set_default_build_commands( self, - build_tool_names: list[str], - ) -> list[SpecBuildCommandDict]: + build_cmd_spec: SpecBuildCommandDict, + ) -> None: """Return the default build commands for the build tools. Parameters ---------- - build_tool_names: list[str] - The build tools to get the default build command. - - Returns - ------- - list[SpecBuildCommandDict] - The build command as a list[SpecBuildCommandDict]. + build_cmd_spec: SpecBuildCommandDict + The build command and related information. """ - default_build_cmd_list = [] - - for build_tool_name in build_tool_names: - - match build_tool_name: - case "maven": - default_build_cmd_list.append( - SpecBuildCommandDict(build_tool=build_tool_name, command="mvn clean package".split()) - ) - case "gradle": - default_build_cmd_list.append( - SpecBuildCommandDict( - build_tool=build_tool_name, command="./gradlew clean assemble publishToMavenLocal".split() - ) - ) - case _: - pass - - if not default_build_cmd_list: - logger.debug( - "There is no default build command available for the build tools %s.", - build_tool_names, - ) - - return default_build_cmd_list + match build_cmd_spec["build_tool"]: + case "maven": + build_cmd_spec["command"] = "mvn clean package".split() + + case "gradle": + build_cmd_spec["command"] = "./gradlew clean assemble publishToMavenLocal".split() + case _: + logger.debug( + "There is no default build command available for the build tools %s.", + build_cmd_spec["build_tool"], + ) def resolve_fields(self, purl: PackageURL) -> None: """ @@ -114,8 +95,9 @@ def resolve_fields(self, purl: PackageURL) -> None: self.data["language_version"] = [major_jdk_version] # Resolve and patch build commands. - if not self.data["build_commands"]: - self.data["build_commands"] = self.get_default_build_commands(self.data["build_tools"]) + for build_cmd_spec in self.data["build_commands"]: + if build_cmd_spec["command"] == None: + self.set_default_build_commands(build_cmd_spec) for build_command_info in self.data["build_commands"]: if build_command_info["command"] and ( diff --git a/src/macaron/build_spec_generator/common_spec/pypi_spec.py b/src/macaron/build_spec_generator/common_spec/pypi_spec.py index 0726e75f6..c6f4c8ba1 100644 --- a/src/macaron/build_spec_generator/common_spec/pypi_spec.py +++ b/src/macaron/build_spec_generator/common_spec/pypi_spec.py @@ -40,57 +40,34 @@ def __init__(self, data: BaseBuildSpecDict): """ self.data = data - def get_default_build_commands( + def set_default_build_commands( self, - build_tool_names: list[str], - ) -> list[SpecBuildCommandDict]: + build_cmd_spec: SpecBuildCommandDict, + ) -> None: """Return the default build commands for the build tools. Parameters ---------- - build_tool_names: list[str] - The build tools to get the default build command. - - Returns - ------- - list[SpecBuildCommandDict] - The build command as a list[SpecBuildCommandDict]. + build_cmd_spec: SpecBuildCommandDict + The build command and related information. """ - default_build_cmd_list = [] - for build_tool_name in build_tool_names: - - match build_tool_name: - case "pip": - default_build_cmd_list.append( - SpecBuildCommandDict(build_tool=build_tool_name, command="python -m build --wheel -n".split()) - ) - case "poetry": - default_build_cmd_list.append( - SpecBuildCommandDict(build_tool=build_tool_name, command="poetry build".split()) - ) - case "flit": - # We might also want to deal with existence flit.ini, we can do so via - # "python -m flit.tomlify" - default_build_cmd_list.append( - SpecBuildCommandDict(build_tool=build_tool_name, command="flit build".split()) - ) - case "hatch": - default_build_cmd_list.append( - SpecBuildCommandDict(build_tool=build_tool_name, command="hatch build".split()) - ) - case "conda": - # TODO: update this if a build command can be used for conda. - pass - case _: - pass - - if not default_build_cmd_list: - logger.debug( - "There is no default build command available for the build tools %s.", - build_tool_names, - ) - - return default_build_cmd_list + match build_cmd_spec["build_tool"]: + case "pip": + build_cmd_spec["command"] = "python -m build --wheel -n".split() + case "poetry": + build_cmd_spec["command"] = "poetry build".split() + + case "flit": + # We might also want to deal with existence flit.ini, we can do so via + # "python -m flit.tomlify" + build_cmd_spec["command"] = "flit build".split() + case "hatch": + build_cmd_spec["command"] = command = "hatch build".split() + case _: + logger.debug( + "There is no default build command available for the build tools %s.", + build_cmd_spec["build_tool"], + ) def resolve_fields(self, purl: PackageURL) -> None: """ @@ -278,8 +255,8 @@ def resolve_fields(self, purl: PackageURL) -> None: self.data["build_backends"] = list(build_backends_set) # We do not generate a build command for non-pure packages if not self.data["has_binaries"]: - patched_build_commands = self.get_default_build_commands(self.data["build_tools"]) - self.data["build_commands"] = patched_build_commands + for build_cmd_spec in self.data["build_commands"]: + self.set_default_build_commands(build_cmd_spec) self.data["upstream_artifacts"] = upstream_artifacts def add_parsed_requirement(self, build_requirements: dict[str, str], requirement: str) -> None: diff --git a/src/macaron/parsers/pomparser.py b/src/macaron/parsers/pomparser.py index 2597e1938..4df1c9f3a 100644 --- a/src/macaron/parsers/pomparser.py +++ b/src/macaron/parsers/pomparser.py @@ -3,6 +3,8 @@ """This module contains the parser for POM files.""" import logging +import os +from pathlib import Path from xml.etree.ElementTree import Element # nosec B405 import defusedxml.ElementTree @@ -35,3 +37,243 @@ def parse_pom_string(pom_string: str) -> Element | None: except DefusedXmlException as error: logger.debug("POM rejected due to possible security issues: %s", error) return None + + +def extract_gav_from_pom(pom_file: Path) -> tuple[str | None, str | None, str | None]: + """ + Extract Maven coordinates (groupId, artifactId, version) from a `pom.xml`. + + The function reads and parses the POM and attempts to extract the + ``, ``, and `` values from the root `` + element. If an individual coordinate cannot be found, that field is returned + as ``None``. If the POM cannot be parsed at all, all three values are + returned as ``None``. + + If `` is not present directly under ``, the function falls + back to `//`. + + Parameters + ---------- + pom_file : pathlib.Path + Path to the `pom.xml` file to parse. + + Returns + ------- + group_id : str | None + The Maven `groupId` if found; otherwise ``None``. + artifact_id : str | None + The Maven `artifactId` if found; otherwise ``None``. + version : str | None + The Maven `version` if found; otherwise ``None``. + + Notes + ----- + * This function does not resolve property-substituted values (e.g., + ``${project.version}``). + * XML namespaces are handled by matching tag suffixes (e.g., ``...}groupId``). + """ + pom_content = pom_file.read_text(encoding="utf-8") + pom_root = parse_pom_string(pom_content) + + if pom_root is None: + logger.debug("Could not parse pom.xml: %s", pom_file.as_posix()) + return None, None, None + + def _find_child_text(parent, local_name: str) -> str | None: + # The closing curly brace represents the end of the XML namespace. + elem = next((ch for ch in parent if ch.tag.endswith("}" + local_name)), None) + if elem is None or not elem.text: + return None + return elem.text.strip() + + # Direct project coordinates + group_id = _find_child_text(pom_root, "groupId") + artifact_id = _find_child_text(pom_root, "artifactId") + version = _find_child_text(pom_root, "version") + + # Fallback: groupId may be inherited from parent + if group_id is None: + parent_elem = next((ch for ch in pom_root if ch.tag.endswith("}parent")), None) + if parent_elem is not None: + group_id = _find_child_text(parent_elem, "groupId") + + if group_id is None: + logger.debug("Could not find groupId in pom.xml (project or parent): %s", pom_file.as_posix()) + if artifact_id is None: + logger.debug("Could not find artifactId in pom.xml: %s", pom_file.as_posix()) + if version is None: + logger.debug("Could not find version in pom.xml: %s", pom_file.as_posix()) + + return group_id, artifact_id, version + + +def detect_parent_pom(pom_path: Path, repo_root: str | Path) -> str | None: + """Detect a parent POM file for a given `pom.xml` if it exists in the repo. + + This inspects the `` section of the POM and resolves the parent POM + file path using Maven semantics: + + * If `//` is present and non-empty, that path + (relative to the directory containing `pom.xml`) is used. + * Otherwise Maven defaults to ``../pom.xml``. + see https://maven.apache.org/ref/3.0/maven-model/maven.html#class_parent + + If the resolved parent POM exists on disk and is within `repo_root`, this + returns its path relative to `repo_root`. Otherwise returns ``None``. + + Parameters + ---------- + pom_path : Path + Path to the child `pom.xml`. + repo_root : str | Path + Repository root path used to produce a repo-relative return value. + + Returns + ------- + parent_pom : str | None + Repo-relative path to the parent `pom.xml` if found; otherwise ``None``. + """ + repo_root = Path(repo_root) + + try: + pom_content = pom_path.read_text(encoding="utf-8") + except OSError as error: + logger.debug(error) + return None + + pom_root = parse_pom_string(pom_content) + if pom_root is None: + return None + + def _find_child(elem, local_name: str): + return next((ch for ch in elem if ch.tag.endswith("}" + local_name)), None) + + parent_elem = _find_child(pom_root, "parent") + if parent_elem is None: + return None + + rel_path_elem = _find_child(parent_elem, "relativePath") + # Maven default is ../pom.xml if relativePath is absent or empty + relative_path = ( + rel_path_elem.text.strip() + if (rel_path_elem is not None and rel_path_elem.text and rel_path_elem.text.strip()) + else os.path.join("../") + ) + + parent_candidate = Path(pom_path.parent, relative_path, "pom.xml").resolve() + if not parent_candidate.is_file(): + return None + + # Ensure it is inside the repo (avoid returning paths outside repo_root) + try: + return str(parent_candidate.relative_to(repo_root)) + except ValueError: + return None + + +def pom_has_modules(pom_path: Path) -> bool: + """Check whether a POM contains a non-empty ```` section. + + This function parses the POM and returns ``True`` if it finds at least one + ```` entry under ```` (i.e., the POM is an aggregator/reactor + POM). + + Parameters + ---------- + pom_path : Path + Path to the ``pom.xml`` to inspect. + + Returns + ------- + bool + ``True`` if the POM has a ``...`` entry; otherwise + ``False``. + """ + try: + pom_content = pom_path.read_text(encoding="utf-8") + except OSError as error: + logger.debug(error) + return False + + pom_root = parse_pom_string(pom_content) + if pom_root is None: + return False + + def _find_child(elem, local_name: str): + return next((ch for ch in elem if ch.tag.endswith("}" + local_name)), None) + + modules_elem = _find_child(pom_root, "modules") + if modules_elem is None: + return False + + for ch in modules_elem: + if ch.tag.endswith("}module") and ch.text and ch.text.strip(): + return True + + return False + + +def find_nearest_modules_pom( + pom_path: Path, + repo_root: str | Path, + *, + max_depth: int = 50, +) -> str | None: + """Find the nearest POM (self or Maven parent chain) that defines modules. + + Starting from ``pom_path``, this function checks whether the current POM is + an aggregator (i.e., contains a non-empty ```` section). If not, it + resolves the Maven parent POM and repeats recursively until: + + * a POM with modules is found (returned), or + * there is no parent POM resolvable within ``repo_root`` (returns ``None``), + or + * a cycle is detected (returns ``None``), or + * ``max_depth`` is exceeded (returns ``None``). + + Parameters + ---------- + pom_path : Path + Path to the starting (child) ``pom.xml``. + repo_root : str or pathlib.Path + Repository root path used to validate parent POMs are inside the repo and + to produce a repo-relative return value. + max_depth : int, optional + Maximum number of parent hops to attempt before aborting. Default is 50. + + Returns + ------- + str | None + Repo-relative path to the nearest POM that contains a non-empty + ```` section. If none is found, returns ``None``. + """ + repo_root = Path(repo_root).resolve() + current = pom_path.resolve() + + visited: set[Path] = set() + depth = 0 + + while True: + if current in visited: + return None + visited.add(current) + + if pom_has_modules(current): + try: + return str(current.relative_to(repo_root)) + except ValueError: + return None + + if depth >= max_depth: + return None + depth += 1 + + parent_rel = detect_parent_pom(current, repo_root) + if not parent_rel: + return None + + parent_abs = Path(repo_root, parent_rel).resolve() + if not parent_abs.is_file(): + return None + + current = parent_abs diff --git a/src/macaron/repo_verifier/repo_verifier_maven.py b/src/macaron/repo_verifier/repo_verifier_maven.py index f09d9ad3b..545087ba2 100644 --- a/src/macaron/repo_verifier/repo_verifier_maven.py +++ b/src/macaron/repo_verifier/repo_verifier_maven.py @@ -5,7 +5,7 @@ import logging from urllib.parse import urlparse -from macaron.parsers.pomparser import parse_pom_string +from macaron.parsers.pomparser import extract_gav_from_pom, parse_pom_string from macaron.repo_verifier.repo_verifier_base import ( RepositoryVerificationResult, RepositoryVerificationStatus, @@ -80,13 +80,11 @@ def extract_group_id_from_pom(self) -> str | None: return None # Find the group id in the pom (project/groupId). - # The closing curly brace represents the end of the XML namespace. - pom_group_id_elem = next((ch for ch in pom_root if ch.tag.endswith("}groupId")), None) - if pom_group_id_elem is None or not pom_group_id_elem.text: - logger.debug("Could not find groupId in pom.xml: %s", pom_file) + pom_group_id_elem, _, _ = extract_gav_from_pom(pom_file) + if pom_group_id_elem is None: return None - return pom_group_id_elem.text.strip() + return pom_group_id_elem def verify_domains_from_recognized_code_hosting_services(self) -> RepositoryVerificationResult: """Verify repository link by comparing the maven domain name and the account on code hosting services. diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index a76e45e1b..a9e43dfd8 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module handles the cloning and analyzing a Git repo.""" @@ -1050,14 +1050,21 @@ def _determine_build_tools(self, analyze_ctx: AnalyzeContext, git_service: BaseG continue if build_tool.match_purl_type(analyze_ctx.component.type): + if build_tool.name not in ["pip", "maven"]: + continue logger.info( "Checking if the repo %s uses build tool %s", analyze_ctx.component.repository.complete_name, build_tool.name, ) - if build_tool.is_detected(analyze_ctx.component.repository.fs_path): + if build_tool_configs := build_tool.is_detected( + analyze_ctx.component.repository.fs_path, + groupID=analyze_ctx.component.namespace, + artifactID=analyze_ctx.component.name, + ): logger.info("The repo uses %s build tool.", build_tool.name) + build_tool.set_build_tool_configurations(build_tool_configs) analyze_ctx.dynamic_data["build_spec"]["tools"].append(build_tool) if not analyze_ctx.dynamic_data["build_spec"]["tools"]: diff --git a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py index d6f7f9d99..febd1f090 100644 --- a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py +++ b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the BaseBuildTool class to be inherited by other specific Build Tools.""" @@ -12,11 +12,11 @@ import os from abc import ABC, abstractmethod from collections import deque -from collections.abc import Iterable +from collections.abc import Callable, Iterable from dataclasses import dataclass from enum import Enum from pathlib import Path -from typing import TYPE_CHECKING, TypedDict +from typing import TYPE_CHECKING, Any, TypedDict from macaron.config.defaults import defaults from macaron.dependency_analyzer.cyclonedx import DependencyAnalyzer, NoneDependencyAnalyzer @@ -91,32 +91,57 @@ def find_first_matching_file(directory: Path, pattern: str) -> Path | None: return None -def file_exists(path: str, file_name: str, filters: list[str] | None = None) -> Path | None: - """Search recursively for the first matching file in a directory, skipping directories containing filter keywords. +def file_exists( + path: str, + file_name: str, + filters: list[str] | None = None, + predicate: Callable[[Path, Any], bool] | None = None, + *predicate_args: Any, + **predicate_kwargs: Any, +) -> Path | None: + """Search recursively for the first matching file, optionally validating it with a predicate. - To disable filtering, pass an empty list or `None` to the `filters` parameter. + The search performs a breadth-first traversal (closest directories first) and + skips directories whose names contain any of the provided filter keywords. + + To disable filtering, pass an empty list or ``None`` to `filters`. Parameters ---------- path : str - The path to search for the file. + Root directory to search. file_name : str - The name of the file to search or a glob pattern (e.g., "Dockerfile.*"). - filters: list[str] | None - The list of keywords that should be filtered. + File name to search for, or a glob pattern (e.g., ``"Dockerfile.*"``). + filters : list[str] or None, optional + Directory-name keywords to skip (case-insensitive). If ``None`` or empty, + no directories are skipped. + predicate : callable or None, optional + Optional callable used to validate a matched file. If provided, a file is + accepted only if ``predicate(candidate_path, *predicate_args, **predicate_kwargs)`` + returns ``True``. + *predicate_args : Any + Positional arguments forwarded to `predicate`. + **predicate_kwargs : Any + Keyword arguments forwarded to `predicate`. Returns ------- Path | None - The path to the file if it exists, otherwise + The path to the first matching (and predicate-accepted) file, or ``None`` + if no match is found. """ if not os.path.isdir(path): return None - # Check for file directly at root. root_dir = Path(path) + + def _accepted(p: Path) -> bool: + return True if predicate is None else bool(predicate(p, *predicate_args, **predicate_kwargs)) + + # Check for file directly at root. if target_path := find_first_matching_file(root_dir, file_name): - return target_path + if _accepted(target_path): + return target_path def _enqueue_subdirs(directory: Path, queue: deque[Path]) -> None: """Add non-symlink subdirectories to the search queue.""" @@ -128,7 +153,6 @@ def _enqueue_subdirs(directory: Path, queue: deque[Path]) -> None: _enqueue_subdirs(root_dir, search_queue) while search_queue: - current_dir = search_queue.popleft() # Skip filtered directories. @@ -136,7 +160,8 @@ def _enqueue_subdirs(directory: Path, queue: deque[Path]) -> None: continue if candidate_path := find_first_matching_file(current_dir, file_name): - return candidate_path + if _accepted(candidate_path): + return candidate_path _enqueue_subdirs(current_dir, search_queue) @@ -202,23 +227,35 @@ def __init__(self, name: str, language: BuildLanguage, purl_type: str) -> None: self.wrapper_files: list[str] = [] self.runtime_options = RuntimeOptions() self.path_filters: list[str] = [] + self.build_tool_configs: list[tuple[str, float, str | None, str | None]] = [] def __str__(self) -> str: return self.name @abstractmethod - def is_detected(self, repo_path: str) -> bool: - """Return True if this build tool is used in the target repo. + def is_detected( + self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + ) -> list[tuple[str, float, str | None, str | None]]: + """ + Return the list of build tools and their information used in the target repo. Parameters ---------- repo_path : str The path to the target repo. + groupID : str | None + Optional Maven `groupId` used to refine detection (e.g., selecting the + correct `pom.xml` when multiple are present). If ``None``, no filtering + is applied. + artifactID : str | None + Optional Maven `artifactId` used to refine detection. If ``None``, no + filtering is applied. Returns ------- - bool - True if this build tool is detected, else False. + list[tuple[str, float, str | None, str | None]] + Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, + where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ @abstractmethod @@ -261,6 +298,26 @@ def get_dep_analyzer(self) -> DependencyAnalyzer: """ return NoneDependencyAnalyzer() + def set_build_tool_configurations( + self, build_tool_configs: list[tuple[str, float, str | None, str | None]] + ) -> None: + """Set the build tool configurations for the instance. + + Parameters + ---------- + build_tool_configs : list[tuple[str, float, str | None]] + A list containing configuration tuples for each build tool. Each tuple consists of: + - str: The path to the build tool configuration file. + - float: The confidence score between 0 and 1 for identifying the correct build tool configuration. + - str | None: An optional build tool version. + - str | None: An optional path to the parent configuration file. + + Returns + ------- + None + """ + self.build_tool_configs = build_tool_configs + def get_build_dirs(self, repo_path: str) -> Iterable[Path]: """Find directories in the repository that have their own build scripts. diff --git a/src/macaron/slsa_analyzer/build_tool/conda.py b/src/macaron/slsa_analyzer/build_tool/conda.py index af72dff05..75a48d4c5 100644 --- a/src/macaron/slsa_analyzer/build_tool/conda.py +++ b/src/macaron/slsa_analyzer/build_tool/conda.py @@ -42,18 +42,29 @@ def load_defaults(self) -> None: if item in self.ci_deploy_kws: self.ci_deploy_kws[item] = defaults.get_list("builder.conda.ci.deploy", item) - def is_detected(self, repo_path: str) -> bool: - """Return True if this build tool is used in the target repo. + def is_detected( + self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + ) -> list[tuple[str, float, str | None, str | None]]: + """ + Return the list of build tools and their information used in the target repo. Parameters ---------- repo_path : str The path to the target repo. + groupID : str | None + Optional Maven `groupId` used to refine detection (e.g., selecting the + correct `pom.xml` when multiple are present). If ``None``, no filtering + is applied. + artifactID : str | None + Optional Maven `artifactId` used to refine detection. If ``None``, no + filtering is applied. Returns ------- - bool - True if this build tool is detected, else False. + list[tuple[str, float, str | None, str | None]] + Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, + where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ return any(file_exists(repo_path, file, filters=self.path_filters) for file in self.build_configs) diff --git a/src/macaron/slsa_analyzer/build_tool/docker.py b/src/macaron/slsa_analyzer/build_tool/docker.py index fc9c909d8..e6178942f 100644 --- a/src/macaron/slsa_analyzer/build_tool/docker.py +++ b/src/macaron/slsa_analyzer/build_tool/docker.py @@ -31,17 +31,28 @@ def load_defaults(self) -> None: if item in self.ci_deploy_kws: self.ci_deploy_kws[item] = defaults.get_list("builder.docker.ci.deploy", item) - def is_detected(self, repo_path: str) -> bool: - """Return True if this build tool is used in the target repo. + def is_detected( + self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + ) -> list[tuple[str, float, str | None, str | None]]: + """ + Return the list of build tools and their information used in the target repo. Parameters ---------- repo_path : str The path to the target repo. + groupID : str | None + Optional Maven `groupId` used to refine detection (e.g., selecting the + correct `pom.xml` when multiple are present). If ``None``, no filtering + is applied. + artifactID : str | None + Optional Maven `artifactId` used to refine detection. If ``None``, no + filtering is applied. Returns ------- - bool - True if this build tool is detected, else False. + list[tuple[str, float, str | None, str | None]] + Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, + where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ return any(file_exists(repo_path, file, filters=self.path_filters) for file in self.build_configs) diff --git a/src/macaron/slsa_analyzer/build_tool/flit.py b/src/macaron/slsa_analyzer/build_tool/flit.py index e03eb477b..9f1408ae2 100644 --- a/src/macaron/slsa_analyzer/build_tool/flit.py +++ b/src/macaron/slsa_analyzer/build_tool/flit.py @@ -43,18 +43,29 @@ def load_defaults(self) -> None: if item in self.ci_deploy_kws: self.ci_deploy_kws[item] = defaults.get_list("builder.flit.ci.deploy", item) - def is_detected(self, repo_path: str) -> bool: - """Return True if this build tool is used in the target repo. + def is_detected( + self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + ) -> list[tuple[str, float, str | None, str | None]]: + """ + Return the list of build tools and their information used in the target repo. Parameters ---------- repo_path : str The path to the target repo. + groupID : str | None + Optional Maven `groupId` used to refine detection (e.g., selecting the + correct `pom.xml` when multiple are present). If ``None``, no filtering + is applied. + artifactID : str | None + Optional Maven `artifactId` used to refine detection. If ``None``, no + filtering is applied. Returns ------- - bool - True if this build tool is detected, else False. + list[tuple[str, float, str | None, str | None]] + Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, + where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ for config_name in self.build_configs: if config_path := file_exists(repo_path, config_name, filters=self.path_filters): diff --git a/src/macaron/slsa_analyzer/build_tool/go.py b/src/macaron/slsa_analyzer/build_tool/go.py index 5610a3f81..ae5c7997c 100644 --- a/src/macaron/slsa_analyzer/build_tool/go.py +++ b/src/macaron/slsa_analyzer/build_tool/go.py @@ -30,18 +30,29 @@ def load_defaults(self) -> None: if item in self.ci_deploy_kws: self.ci_deploy_kws[item] = defaults.get_list("builder.go.ci.deploy", item) - def is_detected(self, repo_path: str) -> bool: - """Return True if this build tool is used in the target repo. + def is_detected( + self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + ) -> list[tuple[str, float, str | None, str | None]]: + """ + Return the list of build tools and their information used in the target repo. Parameters ---------- repo_path : str The path to the target repo. + groupID : str | None + Optional Maven `groupId` used to refine detection (e.g., selecting the + correct `pom.xml` when multiple are present). If ``None``, no filtering + is applied. + artifactID : str | None + Optional Maven `artifactId` used to refine detection. If ``None``, no + filtering is applied. Returns ------- - bool - True if this build tool is detected, else False. + list[tuple[str, float, str | None, str | None]] + Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, + where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ go_config_files = self.build_configs + self.entry_conf return any(file_exists(repo_path, file, filters=self.path_filters) for file in go_config_files) diff --git a/src/macaron/slsa_analyzer/build_tool/gradle.py b/src/macaron/slsa_analyzer/build_tool/gradle.py index c1e4d991f..dcc00ce81 100644 --- a/src/macaron/slsa_analyzer/build_tool/gradle.py +++ b/src/macaron/slsa_analyzer/build_tool/gradle.py @@ -54,18 +54,29 @@ def load_defaults(self) -> None: error, ) - def is_detected(self, repo_path: str) -> bool: - """Return True if this build tool is used in the target repo. + def is_detected( + self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + ) -> list[tuple[str, float, str | None, str | None]]: + """ + Return the list of build tools and their information used in the target repo. Parameters ---------- repo_path : str The path to the target repo. + groupID : str | None + Optional Maven `groupId` used to refine detection (e.g., selecting the + correct `pom.xml` when multiple are present). If ``None``, no filtering + is applied. + artifactID : str | None + Optional Maven `artifactId` used to refine detection. If ``None``, no + filtering is applied. Returns ------- - bool - True if this build tool is detected, else False. + list[tuple[str, float, str | None, str | None]] + Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, + where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ gradle_config_files = self.build_configs + self.entry_conf return any(file_exists(repo_path, file, filters=self.path_filters) for file in gradle_config_files) diff --git a/src/macaron/slsa_analyzer/build_tool/hatch.py b/src/macaron/slsa_analyzer/build_tool/hatch.py index 22e2c2e0a..df0108cad 100644 --- a/src/macaron/slsa_analyzer/build_tool/hatch.py +++ b/src/macaron/slsa_analyzer/build_tool/hatch.py @@ -43,18 +43,29 @@ def load_defaults(self) -> None: if item in self.ci_deploy_kws: self.ci_deploy_kws[item] = defaults.get_list("builder.hatch.ci.deploy", item) - def is_detected(self, repo_path: str) -> bool: - """Return True if this build tool is used in the target repo. + def is_detected( + self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + ) -> list[tuple[str, float, str | None, str | None]]: + """ + Return the list of build tools and their information used in the target repo. Parameters ---------- repo_path : str The path to the target repo. + groupID : str | None + Optional Maven `groupId` used to refine detection (e.g., selecting the + correct `pom.xml` when multiple are present). If ``None``, no filtering + is applied. + artifactID : str | None + Optional Maven `artifactId` used to refine detection. If ``None``, no + filtering is applied. Returns ------- - bool - True if this build tool is detected, else False. + list[tuple[str, float, str | None, str | None]] + Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, + where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ for config_name in self.build_configs: if config_path := file_exists(repo_path, config_name, filters=self.path_filters): diff --git a/src/macaron/slsa_analyzer/build_tool/maven.py b/src/macaron/slsa_analyzer/build_tool/maven.py index d6fcd1c51..b350b0c85 100644 --- a/src/macaron/slsa_analyzer/build_tool/maven.py +++ b/src/macaron/slsa_analyzer/build_tool/maven.py @@ -11,6 +11,7 @@ from macaron.config.defaults import defaults from macaron.config.global_config import global_config +from macaron.parsers.pomparser import extract_gav_from_pom, find_nearest_modules_pom from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool, file_exists from macaron.slsa_analyzer.build_tool.language import BuildLanguage @@ -42,26 +43,81 @@ def load_defaults(self) -> None: if item in self.ci_deploy_kws: self.ci_deploy_kws[item] = defaults.get_list("builder.maven.ci.deploy", item) - def is_detected(self, repo_path: str) -> bool: - """Return True if this build tool is used in the target repo. + def is_detected( + self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + ) -> list[tuple[str, float, str | None, str | None]]: + """ + Return the list of build tools and their information used in the target repo. Parameters ---------- repo_path : str The path to the target repo. + groupID : str | None + Optional Maven `groupId` used to refine detection (e.g., selecting the + correct `pom.xml` when multiple are present). If ``None``, no filtering + is applied. + artifactID : str | None + Optional Maven `artifactId` used to refine detection. If ``None``, no + filtering is applied. Returns ------- - bool - True if this build tool is detected, else False. + list[tuple[str, float, str | None, str | None]] + Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, + where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ - # The repo path can be pointed to the same directory as the macaron root path. - # However, there shouldn't be any pom.xml in the macaron root path. + results: list[tuple[str, float, str | None, str | None]] = [] + confidence_score = 1.0 + if os.path.isfile(os.path.join(global_config.macaron_path, "pom.xml")): - logger.error( - "Please remove pom.xml file in %s.", - global_config.macaron_path, + logger.error("Please remove pom.xml file in %s.", global_config.macaron_path) + return [] + + for config_name in self.build_configs: + config_path = file_exists( + repo_path, + config_name, + filters=self.path_filters, + predicate=self.validate_pom_file, + groupID=groupID, + artifactID=artifactID, ) - return False - maven_config_files = self.build_configs - return any(file_exists(repo_path, file, filters=self.path_filters) for file in maven_config_files) + if config_path: + entrypoint_pom = find_nearest_modules_pom(config_path, repo_path) + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, entrypoint_pom)) + confidence_score = confidence_score / 2 * 100 + + return results + + def validate_pom_file(self, config_path=str, groupID: str | None = None, artifactID: str | None = None) -> bool: + """Validate a pom.xml file against an expected Maven G/A. + + This method is intended to be used as a lightweight filter when multiple + candidate configuration files (e.g., `pom.xml`) are discovered. If both + `groupID` and `artifactID` are provided, the method extracts the + ``(groupId, artifactId, version)`` from the POM at `config_path` and returns + ``True`` only when the extracted group/artifact match the expected values. + If either `groupID` or `artifactID` is not provided, the method returns + ``False``. + + Parameters + ---------- + config_path : str + Path to the candidate configuration file (typically a `pom.xml`). + groupID : str or None, optional + Expected Maven `groupId`. If ``None``, no match can be performed. + artifactID : str or None, optional + Expected Maven `artifactId`. If ``None``, no match can be performed. + + Returns + ------- + is_valid : bool + ``True`` if `groupID` and `artifactID` are provided and the POM at + `config_path` contains matching values; otherwise ``False``. + """ + if groupID and artifactID: + ex_groupId, ex_artifactId, _ = extract_gav_from_pom(config_path) + if groupID == ex_groupId and artifactID == ex_artifactId: + return True + return False diff --git a/src/macaron/slsa_analyzer/build_tool/npm.py b/src/macaron/slsa_analyzer/build_tool/npm.py index cae93d257..daad8248d 100644 --- a/src/macaron/slsa_analyzer/build_tool/npm.py +++ b/src/macaron/slsa_analyzer/build_tool/npm.py @@ -40,18 +40,29 @@ def load_defaults(self) -> None: if item in self.ci_deploy_kws: self.ci_deploy_kws[item] = defaults.get_list("builder.npm.ci.deploy", item) - def is_detected(self, repo_path: str) -> bool: - """Return True if this build tool is used in the target repo. + def is_detected( + self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + ) -> list[tuple[str, float, str | None, str | None]]: + """ + Return the list of build tools and their information used in the target repo. Parameters ---------- repo_path : str The path to the target repo. + groupID : str | None + Optional Maven `groupId` used to refine detection (e.g., selecting the + correct `pom.xml` when multiple are present). If ``None``, no filtering + is applied. + artifactID : str | None + Optional Maven `artifactId` used to refine detection. If ``None``, no + filtering is applied. Returns ------- - bool - True if this build tool is detected, else False. + list[tuple[str, float, str | None, str | None]] + Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, + where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ # TODO: When more complex build detection is being implemented, consider # cases like .npmrc existing but not package-lock.json and whether diff --git a/src/macaron/slsa_analyzer/build_tool/pip.py b/src/macaron/slsa_analyzer/build_tool/pip.py index 2ee2752c7..2ccbbd8ed 100644 --- a/src/macaron/slsa_analyzer/build_tool/pip.py +++ b/src/macaron/slsa_analyzer/build_tool/pip.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the Pip class which inherits BaseBuildTool. @@ -43,32 +43,49 @@ def load_defaults(self) -> None: if item in self.ci_deploy_kws: self.ci_deploy_kws[item] = defaults.get_list("builder.pip.ci.deploy", item) - def is_detected(self, repo_path: str) -> bool: - """Return True if this build tool is used in the target repo. + def is_detected( + self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + ) -> list[tuple[str, float, str | None, str | None]]: + """ + Return the list of build tools and their information used in the target repo. Parameters ---------- repo_path : str The path to the target repo. + groupID : str | None + Optional Maven `groupId` used to refine detection (e.g., selecting the + correct `pom.xml` when multiple are present). If ``None``, no filtering + is applied. + artifactID : str | None + Optional Maven `artifactId` used to refine detection. If ``None``, no + filtering is applied. Returns ------- - bool - True if this build tool is detected, else False. + list[tuple[str, float, str | None, str | None]] + Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, + where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ + results: list[tuple[str, float, str | None, str | None]] = ( + [] + ) # (config_path, confidence_score, build_tool_version) + confidence_score = 1.0 for config_name in self.build_configs: if config_path := file_exists(repo_path, config_name, filters=self.path_filters): if os.path.basename(config_path) == "pyproject.toml": # Check the build-system section. If it doesn't exist, by default setuptools should be used. if pyproject.get_build_system(config_path) is None: - return True + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) for tool in self.build_requires + self.build_backend: if pyproject.build_system_contains_tool(tool, config_path): - return True + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) + break else: # TODO: For other build configuration files, like setup.py, we need to improve the logic. - return True - return False + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) + confidence_score = confidence_score / 2 * 100 + return results def get_dep_analyzer(self) -> DependencyAnalyzer: """Create a DependencyAnalyzer for the build tool. diff --git a/src/macaron/slsa_analyzer/build_tool/poetry.py b/src/macaron/slsa_analyzer/build_tool/poetry.py index dde2bfa28..a67360d51 100644 --- a/src/macaron/slsa_analyzer/build_tool/poetry.py +++ b/src/macaron/slsa_analyzer/build_tool/poetry.py @@ -43,18 +43,29 @@ def load_defaults(self) -> None: if item in self.ci_deploy_kws: self.ci_deploy_kws[item] = defaults.get_list("builder.poetry.ci.deploy", item) - def is_detected(self, repo_path: str) -> bool: - """Return True if this build tool is used in the target repo. + def is_detected( + self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + ) -> list[tuple[str, float, str | None, str | None]]: + """ + Return the list of build tools and their information used in the target repo. Parameters ---------- repo_path : str The path to the target repo. + groupID : str | None + Optional Maven `groupId` used to refine detection (e.g., selecting the + correct `pom.xml` when multiple are present). If ``None``, no filtering + is applied. + artifactID : str | None + Optional Maven `artifactId` used to refine detection. If ``None``, no + filtering is applied. Returns ------- - bool - True if this build tool is detected, else False. + list[tuple[str, float, str | None, str | None]] + Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, + where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ package_lock_exists = "" for file in self.package_lock: diff --git a/src/macaron/slsa_analyzer/build_tool/yarn.py b/src/macaron/slsa_analyzer/build_tool/yarn.py index 4660faf12..5bcfc5a07 100644 --- a/src/macaron/slsa_analyzer/build_tool/yarn.py +++ b/src/macaron/slsa_analyzer/build_tool/yarn.py @@ -38,18 +38,29 @@ def load_defaults(self) -> None: # if item in self.ci_deploy_kws: # self.ci_deploy_kws[item] = defaults.get_list("builder.yarn.ci.deploy", item) - def is_detected(self, repo_path: str) -> bool: - """Return True if this build tool is used in the target repo. + def is_detected( + self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + ) -> list[tuple[str, float, str | None, str | None]]: + """ + Return the list of build tools and their information used in the target repo. Parameters ---------- repo_path : str The path to the target repo. + groupID : str | None + Optional Maven `groupId` used to refine detection (e.g., selecting the + correct `pom.xml` when multiple are present). If ``None``, no filtering + is applied. + artifactID : str | None + Optional Maven `artifactId` used to refine detection. If ``None``, no + filtering is applied. Returns ------- - bool - True if this build tool is detected, else False. + list[tuple[str, float, str | None, str | None]] + Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, + where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ # TODO: When more complex build detection is being implemented, consider # cases like .yarnrc existing but not package-lock.json and whether diff --git a/src/macaron/slsa_analyzer/checks/build_tool_check.py b/src/macaron/slsa_analyzer/checks/build_tool_check.py index 8432b014e..c1d3c07ef 100644 --- a/src/macaron/slsa_analyzer/checks/build_tool_check.py +++ b/src/macaron/slsa_analyzer/checks/build_tool_check.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the implementation of the build tool detection check.""" @@ -27,11 +27,32 @@ class BuildToolFacts(CheckFacts): #: The primary key. id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003 + #: The language of the artifact built by build tool. + language: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.TEXT}) + #: The build tool name. build_tool_name: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.TEXT}) - #: The language of the artifact built by build tool. - language: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.TEXT}) + #: The build tool version. + build_tool_version: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.TEXT} + ) + + #: The build tool configuration path. + build_config_path: Mapped[str] = mapped_column(String, nullable=False) + + #: The build tool configuration path link. + build_config_path_link: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.HREF} + ) + + #: The entry point build tool configuration path if it exists. + root_build_config_path: Mapped[str | None] = mapped_column(String, nullable=True) + + #: The entry point build tool configuration path link if it exists. + root_build_config_path_link: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.HREF} + ) __mapper_args__ = { "polymorphic_identity": "_build_tool_check", @@ -70,11 +91,41 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: if not build_tools: return CheckResultData(result_tables=[], result_type=CheckResultType.FAILED) + # Find the Git service to create links for build tool configuration files. + git_service = ctx.dynamic_data["git_service"] + result_tables: list[CheckFacts] = [] for tool in build_tools: - result_tables.append( - BuildToolFacts(build_tool_name=tool.name, language=tool.language.value, confidence=Confidence.HIGH) - ) + for build_tool_path, score, _, root_build_conf_file in tool.build_tool_configs: + file_link = ( + git_service.api_client.get_file_link( + ctx.component.repository.full_name, + ctx.component.repository.commit_sha, + file_path=build_tool_path, + ) + if git_service.api_client + else None + ) + root_build_config_path_link = ( + git_service.api_client.get_file_link( + ctx.component.repository.full_name, + ctx.component.repository.commit_sha, + file_path=root_build_conf_file, + ) + if git_service.api_client and root_build_conf_file + else None + ) + result_tables.append( + BuildToolFacts( + build_tool_name=tool.name, + build_config_path=build_tool_path, + build_config_path_link=file_link, + root_build_config_path=root_build_conf_file, + root_build_config_path_link=root_build_config_path_link, + language=tool.language.value, + confidence=Confidence.get_confidence_level(score), + ) + ) return CheckResultData( result_tables=result_tables, diff --git a/src/macaron/slsa_analyzer/checks/check_result.py b/src/macaron/slsa_analyzer/checks/check_result.py index f9d5c1ad0..089d933e5 100644 --- a/src/macaron/slsa_analyzer/checks/check_result.py +++ b/src/macaron/slsa_analyzer/checks/check_result.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the CheckResult class for storing the result of a check.""" @@ -137,7 +137,22 @@ def normalize(cls, evidence_weight_map: EvidenceWeightMap) -> "Confidence": normalized_score = score / max_score - # Return the confidence level that is closest to the normalized score. + return cls.get_confidence_level(normalized_score) + + @classmethod + def get_confidence_level(cls, normalized_score: float) -> "Confidence": + """Return the Confidence level closest to a given normalized score. + + Parameters + ---------- + normalized_score : float + A score normalized to the range expected by the Confidence values. + + Returns + ------- + Confidence + The Confidence enum member whose value is closest to the given normalized score. + """ return min(cls, key=lambda c: abs(c.value - normalized_score)) diff --git a/src/macaron/slsa_analyzer/git_service/base_git_service.py b/src/macaron/slsa_analyzer/git_service/base_git_service.py index 840d9f493..4fd891a45 100644 --- a/src/macaron/slsa_analyzer/git_service/base_git_service.py +++ b/src/macaron/slsa_analyzer/git_service/base_git_service.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the BaseGitService class to be inherited by a git service.""" @@ -10,6 +10,7 @@ from macaron.config.defaults import defaults from macaron.errors import CloneError, ConfigurationError, RepoCheckOutError from macaron.slsa_analyzer import git_url +from macaron.slsa_analyzer.git_service.api_client import BaseAPIClient class BaseGitService: @@ -25,6 +26,12 @@ def __init__(self, name: str) -> None: """ self.name = name self.hostname: str | None = None + self._api_client: BaseAPIClient | None = None + + @property + def api_client(self) -> BaseAPIClient | None: + """Get the API client used for querying the Git service.""" + return self._api_client @abstractmethod def load_defaults(self) -> None: diff --git a/src/macaron/slsa_analyzer/git_service/github.py b/src/macaron/slsa_analyzer/git_service/github.py index d5e1c8548..bccd74ddd 100644 --- a/src/macaron/slsa_analyzer/git_service/github.py +++ b/src/macaron/slsa_analyzer/git_service/github.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the spec for the GitHub service.""" @@ -42,7 +42,7 @@ def load_defaults(self) -> None: @property def api_client(self) -> GhAPIClient: - """Return the API client used for querying GitHub API. + """Get the API client used for querying GitHub API. This API is used to check if a GitHub repo can be cloned. """ From f786edbac63529b9b0d45273f6fcfea2c5107fdf Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Wed, 18 Mar 2026 17:03:44 +1000 Subject: [PATCH 3/7] fix: always return a command for an identified build tool Signed-off-by: behnazh-w --- .../build_spec_generator/common_spec/core.py | 24 +++++---- .../macaron_db_extractor.py | 52 +++++++++++-------- src/macaron/config/defaults.ini | 1 - src/macaron/slsa_analyzer/analyzer.py | 2 +- src/macaron/slsa_analyzer/build_tool/hatch.py | 18 ++++--- .../common_spec/test_core.py | 2 + 6 files changed, 58 insertions(+), 41 deletions(-) diff --git a/src/macaron/build_spec_generator/common_spec/core.py b/src/macaron/build_spec_generator/common_spec/core.py index 3967ca151..6a25e76fc 100644 --- a/src/macaron/build_spec_generator/common_spec/core.py +++ b/src/macaron/build_spec_generator/common_spec/core.py @@ -219,7 +219,7 @@ def get_build_tools( def get_build_command_info( component_id: int, session: sqlalchemy.orm.Session, -) -> GenericBuildCommandInfo | None: +) -> list[GenericBuildCommandInfo]: """Return the highest confidence build command information from the database for a component. The build command is found by looking up CheckFacts for build-related checks. @@ -233,9 +233,9 @@ def get_build_command_info( Returns ------- - GenericBuildCommandInfo | None - The GenericBuildCommandInfo object for the highest confidence build command; or None if there was - an error, or no build command is found from the database. + list[GenericBuildCommandInfo] + The list of GenericBuildCommandInfo objects with the highest confidence build command as the first element; + or [] if there was an error, or no build command is found from the database. """ try: lookup_build_command_info = lookup_any_build_command(component_id, session) @@ -245,13 +245,13 @@ def get_build_command_info( component_id, lookup_build_command_error, ) - return None + return [] logger.debug( "Build command information discovered\n%s", format_build_command_info(lookup_build_command_info), ) - return lookup_build_command_info[0] if lookup_build_command_info else None + return lookup_build_command_info or [] def get_language_version( @@ -364,20 +364,20 @@ def gen_generic_build_spec( if build_tools is not None: build_tool_names = list(build_tools.keys()) - db_build_command_info = get_build_command_info( + db_build_command_info_list = get_build_command_info( component_id=latest_component.id, session=session, ) lang_version = None spec_build_commad_info_list = [] - if db_build_command_info: + for db_build_command_info in db_build_command_info_list: logger.info( "Attempted to find build command from the database. Result: %s", db_build_command_info or "Cannot find any.", ) lang_version = get_language_version(db_build_command_info) if db_build_command_info else "" - spec_build_commad_info_list = [ + spec_build_commad_info_list.append( SpecBuildCommandDict( build_tool=db_build_command_info.build_tool_name, command=db_build_command_info.command, @@ -386,8 +386,10 @@ def gen_generic_build_spec( build_config_version=build_tools[db_build_command_info.build_tool_name]["build_tool_version"], confidence_score=build_tools[db_build_command_info.build_tool_name]["confidence_score"], ) - ] - else: + ) + + # If no build commands were found from the analyze phase, add default commands for the identified build tools. + if not db_build_command_info_list: for build_tool_name in build_tool_names: spec_build_commad_info_list.append( SpecBuildCommandDict( diff --git a/src/macaron/build_spec_generator/macaron_db_extractor.py b/src/macaron/build_spec_generator/macaron_db_extractor.py index 883f7e750..660dfe208 100644 --- a/src/macaron/build_spec_generator/macaron_db_extractor.py +++ b/src/macaron/build_spec_generator/macaron_db_extractor.py @@ -33,6 +33,7 @@ class GenericBuildCommandInfo: language: str language_versions: list[str] build_tool_name: str + confidence_score: float T = TypeVar("T") @@ -562,37 +563,44 @@ def extract_generic_build_command_info( json.decoder.JSONDecodeError If we failed to decode the JSON-serialized values stored in the Build*Facts instances. """ - result = [] + best_by_tool: dict[str, GenericBuildCommandInfo] = {} for fact in check_facts: match fact: case BuildAsCodeFacts(): - result.append( - GenericBuildCommandInfo( - command=json.loads(fact.deploy_command), - language=fact.language, - language_versions=json.loads(fact.language_versions) if fact.language_versions else [], - build_tool_name=fact.build_tool_name, - ) + info = GenericBuildCommandInfo( + command=json.loads(fact.deploy_command), + language=fact.language, + language_versions=json.loads(fact.language_versions) if fact.language_versions else [], + build_tool_name=fact.build_tool_name, + confidence_score=fact.confidence, ) + case BuildServiceFacts(): - result.append( - GenericBuildCommandInfo( - command=json.loads(fact.build_command), - language=fact.language, - language_versions=json.loads(fact.language_versions) if fact.language_versions else [], - build_tool_name=fact.build_tool_name, - ) + info = GenericBuildCommandInfo( + command=json.loads(fact.build_command), + language=fact.language, + language_versions=json.loads(fact.language_versions) if fact.language_versions else [], + build_tool_name=fact.build_tool_name, + confidence_score=fact.confidence, ) + case BuildScriptFacts(): - result.append( - GenericBuildCommandInfo( - command=json.loads(fact.build_tool_command), - language=fact.language, - language_versions=json.loads(fact.language_versions) if fact.language_versions else [], - build_tool_name=fact.build_tool_name, - ) + info = GenericBuildCommandInfo( + command=json.loads(fact.build_tool_command), + language=fact.language, + language_versions=json.loads(fact.language_versions) if fact.language_versions else [], + build_tool_name=fact.build_tool_name, + confidence_score=fact.confidence, ) + existing = best_by_tool.get(info.build_tool_name) + if existing is None or info.confidence_score > existing.confidence_score: + best_by_tool[info.build_tool_name] = info + + result = list(best_by_tool.values()) + + # Highest confidence first. + result.sort(key=lambda x: x.confidence_score, reverse=True) return result diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini index 96b702b8e..d03d6b43e 100644 --- a/src/macaron/config/defaults.ini +++ b/src/macaron/config/defaults.ini @@ -289,7 +289,6 @@ build_configs = setup.cfg pyproject.toml packager = - pip build publisher = twine diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index a9e43dfd8..9b98776e2 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -1050,7 +1050,7 @@ def _determine_build_tools(self, analyze_ctx: AnalyzeContext, git_service: BaseG continue if build_tool.match_purl_type(analyze_ctx.component.type): - if build_tool.name not in ["pip", "maven"]: + if build_tool.name not in ["pip", "maven", "hatch"]: continue logger.info( "Checking if the repo %s uses build tool %s", diff --git a/src/macaron/slsa_analyzer/build_tool/hatch.py b/src/macaron/slsa_analyzer/build_tool/hatch.py index df0108cad..641927e48 100644 --- a/src/macaron/slsa_analyzer/build_tool/hatch.py +++ b/src/macaron/slsa_analyzer/build_tool/hatch.py @@ -67,19 +67,25 @@ def is_detected( Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ + results: list[tuple[str, float, str | None, str | None]] = ( + [] + ) # (config_path, confidence_score, build_tool_version) + confidence_score = 1.0 for config_name in self.build_configs: if config_path := file_exists(repo_path, config_name, filters=self.path_filters): if os.path.basename(config_path) == "pyproject.toml": if pyproject.contains_build_tool("hatch", config_path): - return True + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) # Check the build-system section. - for tool in self.build_requires + self.build_backend: - if pyproject.build_system_contains_tool(tool, config_path): - return True + else: + for tool in self.build_requires + self.build_backend: + if pyproject.build_system_contains_tool(tool, config_path): + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) + break else: # For other build configuration files, the presence of the file alone is sufficient. - return True - return False + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) + return results def get_dep_analyzer(self) -> DependencyAnalyzer: """Create a DependencyAnalyzer for the build tool. diff --git a/tests/build_spec_generator/common_spec/test_core.py b/tests/build_spec_generator/common_spec/test_core.py index 538d13695..b40da16df 100644 --- a/tests/build_spec_generator/common_spec/test_core.py +++ b/tests/build_spec_generator/common_spec/test_core.py @@ -123,6 +123,7 @@ def test_get_build_tool_name( language="java", language_versions=["8"], build_tool_name="maven", + confidence_score=1.0, ), "8", id="has_language_version", @@ -133,6 +134,7 @@ def test_get_build_tool_name( language="java", language_versions=[], build_tool_name="maven", + confidence_score=1.0, ), None, id="no_language_version", From 9b053953faaaacf880c33d7ba8d7962670715d50 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Thu, 19 Mar 2026 16:45:50 +1000 Subject: [PATCH 4/7] feat: handle Gradle build files and validate them Signed-off-by: behnazh-w --- .../build_spec_generator/common_spec/core.py | 9 +- src/macaron/parsers/gradleparser.py | 254 ++++++++++++++++++ src/macaron/parsers/pomparser.py | 15 +- src/macaron/slsa_analyzer/analyzer.py | 6 +- .../build_tool/base_build_tool.py | 6 +- src/macaron/slsa_analyzer/build_tool/conda.py | 6 +- .../slsa_analyzer/build_tool/docker.py | 6 +- src/macaron/slsa_analyzer/build_tool/flit.py | 6 +- src/macaron/slsa_analyzer/build_tool/go.py | 6 +- .../slsa_analyzer/build_tool/gradle.py | 140 +++++++++- src/macaron/slsa_analyzer/build_tool/hatch.py | 6 +- src/macaron/slsa_analyzer/build_tool/maven.py | 32 +-- src/macaron/slsa_analyzer/build_tool/npm.py | 6 +- src/macaron/slsa_analyzer/build_tool/pip.py | 8 +- .../slsa_analyzer/build_tool/poetry.py | 6 +- src/macaron/slsa_analyzer/build_tool/yarn.py | 6 +- .../parsers/gradleparser/test_gradleparser.py | 108 ++++++++ tests/slsa_analyzer/build_tool/test_gradle.py | 97 ++++++- tests/slsa_analyzer/build_tool/test_maven.py | 72 ++++- 19 files changed, 714 insertions(+), 81 deletions(-) create mode 100644 src/macaron/parsers/gradleparser.py create mode 100644 tests/parsers/gradleparser/test_gradleparser.py diff --git a/src/macaron/build_spec_generator/common_spec/core.py b/src/macaron/build_spec_generator/common_spec/core.py index 6a25e76fc..7cb83a7cf 100644 --- a/src/macaron/build_spec_generator/common_spec/core.py +++ b/src/macaron/build_spec_generator/common_spec/core.py @@ -146,12 +146,19 @@ def get_macaron_build_tools( for fact in build_tool_facts: if fact.language.lower() == target_language: try: - build_tools[MacaronBuildToolName(fact.build_tool_name).value] = { + tool_name = MacaronBuildToolName(fact.build_tool_name).value + build_tool_info = { "build_config_path": fact.build_config_path, "confidence_score": fact.confidence, "build_tool_version": fact.build_tool_version, "root_build_config_path": fact.root_build_config_path, } + existing_build_tool_info = build_tools.get(tool_name) + if ( + existing_build_tool_info is None + or build_tool_info["confidence_score"] > existing_build_tool_info["confidence_score"] + ): + build_tools[tool_name] = build_tool_info except ValueError: continue return build_tools or None diff --git a/src/macaron/parsers/gradleparser.py b/src/macaron/parsers/gradleparser.py new file mode 100644 index 000000000..29931e077 --- /dev/null +++ b/src/macaron/parsers/gradleparser.py @@ -0,0 +1,254 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains helpers for parsing Gradle build configuration files.""" + +import logging +import re +from pathlib import Path + +logger: logging.Logger = logging.getLogger(__name__) + + +def _extract_assignment_value(file_path: Path, keys: set[str]) -> str | None: + """Extract an assignment value for a supported key from a Gradle-like file. + + Parameters + ---------- + file_path : Path + The file to inspect. + keys : set[str] + Accepted key names (for example ``{"group"}`` or ``{"rootProject.name"}``). + + Returns + ------- + str | None + The extracted value if a matching ``key = value`` assignment is found; + otherwise ``None``. + """ + if not file_path.is_file(): + return None + + try: + lines = file_path.read_text(encoding="utf-8", errors="ignore").splitlines() + except OSError as error: + logger.debug("Failed to read Gradle file %s: %s", str(file_path), error) + return None + + assignment_re = re.compile(r"^\s*([A-Za-z0-9_.]+)\s*=\s*(.+?)\s*$") + for line in lines: + try: + match = assignment_re.match(line) + except re.error as error: + logger.debug("Failed to apply assignment regex on %s: %s", str(file_path), error) + continue + if not match: + continue + + key = match.group(1).strip() + if key not in keys: + continue + + raw_value = match.group(2).strip() + if len(raw_value) >= 2 and raw_value[0] == raw_value[-1] and raw_value[0] in {"'", '"'}: + raw_value = raw_value[1:-1] + return raw_value + + return None + + +def extract_gav_from_gradle_project(project_path: Path) -> tuple[str | None, str | None, str | None]: + """Extract Gradle coordinates (group, artifact, version) from project files. + + Parameters + ---------- + project_path : Path + Path to the root directory of a Gradle project. + + Returns + ------- + tuple[str | None, str | None, str | None] + A tuple of ``(group_id, artifact_id, version)`` extracted from common + Gradle configuration files. Any missing value is returned as ``None``. + + Notes + ----- + This parser is intentionally lightweight and matches direct ``key = value`` + assignments only. It does not evaluate expressions or variable references. + """ + group_id = ( + _extract_assignment_value( + project_path.joinpath("gradle.properties"), {"group", "projectGroup", "projectGroupId"} + ) + or _extract_assignment_value(project_path.joinpath("build.gradle"), {"group"}) + or _extract_assignment_value(project_path.joinpath("build.gradle.kts"), {"group"}) + ) + artifact_id = ( + _extract_assignment_value(project_path.joinpath("settings.gradle"), {"rootProject.name"}) + or _extract_assignment_value(project_path.joinpath("settings.gradle.kts"), {"rootProject.name"}) + or _extract_assignment_value(project_path.joinpath("gradle.properties"), {"name"}) + ) + version = ( + _extract_assignment_value(project_path.joinpath("gradle.properties"), {"version", "projectVersion"}) + or _extract_assignment_value(project_path.joinpath("build.gradle"), {"version"}) + or _extract_assignment_value(project_path.joinpath("build.gradle.kts"), {"version"}) + ) + + if group_id is None: + logger.debug("Could not find group id in Gradle project: %s", str(project_path)) + if artifact_id is None: + logger.debug("Could not find artifact id in Gradle project: %s", str(project_path)) + if version is None: + logger.debug("Could not find version in Gradle project: %s", str(project_path)) + + return group_id, artifact_id, version + + +def gradle_settings_has_modules(settings_path: Path) -> bool: + """Check whether a Gradle settings file declares one or more modules. + + Parameters + ---------- + settings_path : Path + Path to a ``settings.gradle`` or ``settings.gradle.kts`` file. + + Returns + ------- + bool + ``True`` when the file contains an ``include`` declaration; otherwise + ``False``. + """ + if not settings_path.is_file(): + return False + + try: + lines = settings_path.read_text(encoding="utf-8", errors="ignore").splitlines() + except OSError as error: + logger.debug("Failed to read Gradle settings file %s: %s", str(settings_path), error) + return False + + for line in lines: + stripped = line.strip() + if re.match(r"^include\s+.+", stripped) or re.match(r"^include\s*\(.+\)", stripped): + return True + + return False + + +def extract_included_gradle_modules(settings_path: Path) -> list[str]: + """Extract module include entries from a Gradle settings file. + + Parameters + ---------- + settings_path : Path + Path to a ``settings.gradle`` or ``settings.gradle.kts`` file. + + Returns + ------- + list[str] + Ordered list of module paths declared by ``include`` statements. + """ + if not settings_path.is_file(): + return [] + + try: + lines = settings_path.read_text(encoding="utf-8", errors="ignore").splitlines() + except OSError as error: + logger.debug("Failed to read Gradle settings file %s: %s", str(settings_path), error) + return [] + + modules: list[str] = [] + quoted_value_re = re.compile(r"""['"]([^'"]+)['"]""") + for line in lines: + stripped = line.strip() + if not stripped.startswith("include"): + continue + modules.extend(match.group(1).strip() for match in quoted_value_re.finditer(stripped) if match.group(1).strip()) + return modules + + +def find_matching_gradle_module_build_configs(repo_root: Path, artifact_id: str) -> list[Path]: + """Find module build config files likely associated with the given artifact id. + + Parameters + ---------- + repo_root : Path + Root directory of the Gradle repository. + artifact_id : str + Expected artifact id. + + Returns + ------- + list[Path] + Candidate module build files (for example ``module/build.gradle``) + associated with the artifact id. + """ + candidates: list[Path] = [] + seen: set[Path] = set() + for settings_name in ("settings.gradle", "settings.gradle.kts"): + settings_path = repo_root.joinpath(settings_name) + for module in extract_included_gradle_modules(settings_path): + module_path = module.strip().strip(":") + if not module_path: + continue + module_name = module_path.split(":")[-1] + if artifact_id != module_name and not artifact_id.endswith(f"-{module_name}"): + continue + module_dir = repo_root.joinpath(*module_path.split(":")) + for build_name in ("build.gradle", "build.gradle.kts"): + config_path = module_dir.joinpath(build_name) + if config_path.is_file() and config_path not in seen: + seen.add(config_path) + candidates.append(config_path) + + return candidates + + +def find_nearest_modules_gradle_config( + config_path: Path, + repo_root: str | Path, + *, + max_depth: int = 50, +) -> str | None: + """Find the nearest ancestor Gradle settings file that defines modules. + + Parameters + ---------- + config_path : Path + Path to the starting Gradle configuration file. + repo_root : str | Path + Repository root used to bound parent traversal and return a relative path. + max_depth : int, optional + Maximum number of parent-directory hops. Defaults to ``50``. + + Returns + ------- + str | None + Path to the nearest settings file relative to ``repo_root`` if it + contains ``include`` declarations. Returns ``None`` otherwise. + """ + repo_root = Path(repo_root).resolve() + current_dir = config_path.parent.resolve() + depth = 0 + + while True: + for settings_name in ("settings.gradle", "settings.gradle.kts"): + settings_path = current_dir.joinpath(settings_name) + if gradle_settings_has_modules(settings_path): + try: + return str(settings_path.relative_to(repo_root)) + except ValueError: + return None + + if current_dir == repo_root: + return None + + depth += 1 + if depth > max_depth: + return None + + parent_dir = current_dir.parent + if parent_dir == current_dir: + return None + + current_dir = parent_dir diff --git a/src/macaron/parsers/pomparser.py b/src/macaron/parsers/pomparser.py index 4df1c9f3a..8c19768c9 100644 --- a/src/macaron/parsers/pomparser.py +++ b/src/macaron/parsers/pomparser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the parser for POM files.""" @@ -76,7 +76,7 @@ def extract_gav_from_pom(pom_file: Path) -> tuple[str | None, str | None, str | pom_root = parse_pom_string(pom_content) if pom_root is None: - logger.debug("Could not parse pom.xml: %s", pom_file.as_posix()) + logger.debug("Could not parse pom.xml: %s", str(pom_file)) return None, None, None def _find_child_text(parent, local_name: str) -> str | None: @@ -98,11 +98,11 @@ def _find_child_text(parent, local_name: str) -> str | None: group_id = _find_child_text(parent_elem, "groupId") if group_id is None: - logger.debug("Could not find groupId in pom.xml (project or parent): %s", pom_file.as_posix()) + logger.debug("Could not find groupId in pom.xml (project or parent): %s", str(pom_file)) if artifact_id is None: - logger.debug("Could not find artifactId in pom.xml: %s", pom_file.as_posix()) + logger.debug("Could not find artifactId in pom.xml: %s", str(pom_file)) if version is None: - logger.debug("Could not find version in pom.xml: %s", pom_file.as_posix()) + logger.debug("Could not find version in pom.xml: %s", str(pom_file)) return group_id, artifact_id, version @@ -114,9 +114,10 @@ def detect_parent_pom(pom_path: Path, repo_root: str | Path) -> str | None: file path using Maven semantics: * If `//` is present and non-empty, that path - (relative to the directory containing `pom.xml`) is used. + (relative to the directory containing `pom.xml`) is used. * Otherwise Maven defaults to ``../pom.xml``. - see https://maven.apache.org/ref/3.0/maven-model/maven.html#class_parent + + See https://maven.apache.org/ref/3.0/maven-model/maven.html#class_parent. If the resolved parent POM exists on disk and is within `repo_root`, this returns its path relative to `repo_root`. Otherwise returns ``None``. diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index 9b98776e2..58fe6a9ac 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -1050,7 +1050,7 @@ def _determine_build_tools(self, analyze_ctx: AnalyzeContext, git_service: BaseG continue if build_tool.match_purl_type(analyze_ctx.component.type): - if build_tool.name not in ["pip", "maven", "hatch"]: + if build_tool.name not in ["pip", "maven", "hatch", "gradle"]: continue logger.info( "Checking if the repo %s uses build tool %s", @@ -1060,8 +1060,8 @@ def _determine_build_tools(self, analyze_ctx: AnalyzeContext, git_service: BaseG if build_tool_configs := build_tool.is_detected( analyze_ctx.component.repository.fs_path, - groupID=analyze_ctx.component.namespace, - artifactID=analyze_ctx.component.name, + group_id=analyze_ctx.component.namespace, + artifact_id=analyze_ctx.component.name, ): logger.info("The repo uses %s build tool.", build_tool.name) build_tool.set_build_tool_configurations(build_tool_configs) diff --git a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py index febd1f090..a4434af32 100644 --- a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py +++ b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py @@ -234,7 +234,7 @@ def __str__(self) -> str: @abstractmethod def is_detected( - self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + self, repo_path: str, group_id: str | None = None, artifact_id: str | None = None ) -> list[tuple[str, float, str | None, str | None]]: """ Return the list of build tools and their information used in the target repo. @@ -243,11 +243,11 @@ def is_detected( ---------- repo_path : str The path to the target repo. - groupID : str | None + group_id : str | None Optional Maven `groupId` used to refine detection (e.g., selecting the correct `pom.xml` when multiple are present). If ``None``, no filtering is applied. - artifactID : str | None + artifact_id : str | None Optional Maven `artifactId` used to refine detection. If ``None``, no filtering is applied. diff --git a/src/macaron/slsa_analyzer/build_tool/conda.py b/src/macaron/slsa_analyzer/build_tool/conda.py index 75a48d4c5..1418ae7bc 100644 --- a/src/macaron/slsa_analyzer/build_tool/conda.py +++ b/src/macaron/slsa_analyzer/build_tool/conda.py @@ -43,7 +43,7 @@ def load_defaults(self) -> None: self.ci_deploy_kws[item] = defaults.get_list("builder.conda.ci.deploy", item) def is_detected( - self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + self, repo_path: str, group_id: str | None = None, artifact_id: str | None = None ) -> list[tuple[str, float, str | None, str | None]]: """ Return the list of build tools and their information used in the target repo. @@ -52,11 +52,11 @@ def is_detected( ---------- repo_path : str The path to the target repo. - groupID : str | None + group_id : str | None Optional Maven `groupId` used to refine detection (e.g., selecting the correct `pom.xml` when multiple are present). If ``None``, no filtering is applied. - artifactID : str | None + artifact_id : str | None Optional Maven `artifactId` used to refine detection. If ``None``, no filtering is applied. diff --git a/src/macaron/slsa_analyzer/build_tool/docker.py b/src/macaron/slsa_analyzer/build_tool/docker.py index e6178942f..ecc4ae255 100644 --- a/src/macaron/slsa_analyzer/build_tool/docker.py +++ b/src/macaron/slsa_analyzer/build_tool/docker.py @@ -32,7 +32,7 @@ def load_defaults(self) -> None: self.ci_deploy_kws[item] = defaults.get_list("builder.docker.ci.deploy", item) def is_detected( - self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + self, repo_path: str, group_id: str | None = None, artifact_id: str | None = None ) -> list[tuple[str, float, str | None, str | None]]: """ Return the list of build tools and their information used in the target repo. @@ -41,11 +41,11 @@ def is_detected( ---------- repo_path : str The path to the target repo. - groupID : str | None + group_id : str | None Optional Maven `groupId` used to refine detection (e.g., selecting the correct `pom.xml` when multiple are present). If ``None``, no filtering is applied. - artifactID : str | None + artifact_id : str | None Optional Maven `artifactId` used to refine detection. If ``None``, no filtering is applied. diff --git a/src/macaron/slsa_analyzer/build_tool/flit.py b/src/macaron/slsa_analyzer/build_tool/flit.py index 9f1408ae2..4285bda8f 100644 --- a/src/macaron/slsa_analyzer/build_tool/flit.py +++ b/src/macaron/slsa_analyzer/build_tool/flit.py @@ -44,7 +44,7 @@ def load_defaults(self) -> None: self.ci_deploy_kws[item] = defaults.get_list("builder.flit.ci.deploy", item) def is_detected( - self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + self, repo_path: str, group_id: str | None = None, artifact_id: str | None = None ) -> list[tuple[str, float, str | None, str | None]]: """ Return the list of build tools and their information used in the target repo. @@ -53,11 +53,11 @@ def is_detected( ---------- repo_path : str The path to the target repo. - groupID : str | None + group_id : str | None Optional Maven `groupId` used to refine detection (e.g., selecting the correct `pom.xml` when multiple are present). If ``None``, no filtering is applied. - artifactID : str | None + artifact_id : str | None Optional Maven `artifactId` used to refine detection. If ``None``, no filtering is applied. diff --git a/src/macaron/slsa_analyzer/build_tool/go.py b/src/macaron/slsa_analyzer/build_tool/go.py index ae5c7997c..f304d1d45 100644 --- a/src/macaron/slsa_analyzer/build_tool/go.py +++ b/src/macaron/slsa_analyzer/build_tool/go.py @@ -31,7 +31,7 @@ def load_defaults(self) -> None: self.ci_deploy_kws[item] = defaults.get_list("builder.go.ci.deploy", item) def is_detected( - self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + self, repo_path: str, group_id: str | None = None, artifact_id: str | None = None ) -> list[tuple[str, float, str | None, str | None]]: """ Return the list of build tools and their information used in the target repo. @@ -40,11 +40,11 @@ def is_detected( ---------- repo_path : str The path to the target repo. - groupID : str | None + group_id : str | None Optional Maven `groupId` used to refine detection (e.g., selecting the correct `pom.xml` when multiple are present). If ``None``, no filtering is applied. - artifactID : str | None + artifact_id : str | None Optional Maven `artifactId` used to refine detection. If ``None``, no filtering is applied. diff --git a/src/macaron/slsa_analyzer/build_tool/gradle.py b/src/macaron/slsa_analyzer/build_tool/gradle.py index dcc00ce81..db41fafec 100644 --- a/src/macaron/slsa_analyzer/build_tool/gradle.py +++ b/src/macaron/slsa_analyzer/build_tool/gradle.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the Gradle class which inherits BaseBuildTool. @@ -8,8 +8,15 @@ import logging import subprocess # nosec B404 +from pathlib import Path from macaron.config.defaults import defaults +from macaron.parsers.gradleparser import ( + extract_gav_from_gradle_project, + extract_included_gradle_modules, + find_matching_gradle_module_build_configs, + find_nearest_modules_gradle_config, +) from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool, file_exists from macaron.slsa_analyzer.build_tool.language import BuildLanguage @@ -55,7 +62,10 @@ def load_defaults(self) -> None: ) def is_detected( - self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + self, + repo_path: str, + group_id: str | None = None, + artifact_id: str | None = None, ) -> list[tuple[str, float, str | None, str | None]]: """ Return the list of build tools and their information used in the target repo. @@ -64,11 +74,11 @@ def is_detected( ---------- repo_path : str The path to the target repo. - groupID : str | None + group_id : str | None Optional Maven `groupId` used to refine detection (e.g., selecting the correct `pom.xml` when multiple are present). If ``None``, no filtering is applied. - artifactID : str | None + artifact_id : str | None Optional Maven `artifactId` used to refine detection. If ``None``, no filtering is applied. @@ -78,8 +88,128 @@ def is_detected( Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ + results: list[tuple[str, float, str | None, str | None]] = [] + confidence_score = 1.0 gradle_config_files = self.build_configs + self.entry_conf - return any(file_exists(repo_path, file, filters=self.path_filters) for file in gradle_config_files) + seen_paths: set[Path] = set() + + # Prioritize module-level build configs for multi-module artifacts. + if artifact_id: + for module_config_path in find_matching_gradle_module_build_configs(Path(repo_path), artifact_id): + if module_config_path in seen_paths: + continue + if self.validate_gradle_file( + module_config_path, + group_id=group_id, + artifact_id=artifact_id, + repo_path=repo_path, + ): + entrypoint_gradle = find_nearest_modules_gradle_config(module_config_path, repo_path) + results.append( + (str(module_config_path.relative_to(repo_path)), confidence_score, None, entrypoint_gradle) + ) + seen_paths.add(module_config_path) + confidence_score = confidence_score / 2 + + for config_name in gradle_config_files: + config_path = file_exists( + repo_path, + config_name, + filters=self.path_filters, + predicate=self.validate_gradle_file, + group_id=group_id, + artifact_id=artifact_id, + ) + if config_path and config_path not in seen_paths: + entrypoint_gradle = find_nearest_modules_gradle_config(config_path, repo_path) + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, entrypoint_gradle)) + seen_paths.add(config_path) + confidence_score = confidence_score / 2 + + return results + + def validate_gradle_file( + self, + config_path: Path, + group_id: str | None = None, + artifact_id: str | None = None, + **kwargs: str | None, + ) -> bool: + """Validate a Gradle configuration path against expected G/A coordinates. + + Parameters + ---------- + config_path : Path + Path to a candidate Gradle configuration file. + group_id : str | None, optional + Expected group id. If ``None``, a fallback lookup is attempted from + ``kwargs["group_id"]``. + artifact_id : str | None, optional + Expected artifact id. If ``None``, a fallback lookup is attempted from + ``kwargs["artifact_id"]``. + kwargs : dict[str, str | None] + Additional keyword arguments propagated by the caller. + + Returns + ------- + bool + ``True`` if both expected values are present and match the extracted + Gradle group/artifact from the project; otherwise ``False``. + """ + group_id = group_id or kwargs.get("group_id") + artifact_id = artifact_id or kwargs.get("artifact_id") + repo_path = kwargs.get("repo_path") + if group_id and artifact_id: + project_root = Path(repo_path) if repo_path else config_path.parent + ex_group_id, ex_artifact_id, _ = extract_gav_from_gradle_project(project_root) + if group_id != ex_group_id: + return False + return self._validate_artifact_id(project_root, artifact_id, ex_artifact_id) + return False + + def _validate_artifact_id( + self, + project_path: Path, + expected_artifact_id: str, + extracted_artifact_id: str | None, + ) -> bool: + """Validate the artifact id against direct or multi-module Gradle metadata. + + Parameters + ---------- + project_path : Path + Path to the candidate Gradle project directory. + expected_artifact_id : str + Artifact id requested by detection. + extracted_artifact_id : str | None + Directly extracted artifact id, if present. + + Returns + ------- + bool + ``True`` when the expected artifact id matches either a direct + project artifact id or a module name declared in Gradle settings. + """ + if extracted_artifact_id and expected_artifact_id == extracted_artifact_id: + return True + + # Accept common multi-module naming where artifact ids prefix module names + # (for example, micronaut-test-junit5 for module test-junit5). + module_names: set[str] = {project_path.name} + for settings_name in ("settings.gradle", "settings.gradle.kts"): + settings_path = project_path.joinpath(settings_name) + for module in extract_included_gradle_modules(settings_path): + module_names.add(module.strip().strip(":").split(":")[-1]) + + for module_name in module_names: + if not module_name: + continue + if expected_artifact_id == module_name: + return True + if expected_artifact_id.endswith(f"-{module_name}"): + return True + + return False def get_group_id(self, gradle_exec: str, project_path: str) -> str | None: """Get the group id of a Gradle project. diff --git a/src/macaron/slsa_analyzer/build_tool/hatch.py b/src/macaron/slsa_analyzer/build_tool/hatch.py index 641927e48..2df36bdda 100644 --- a/src/macaron/slsa_analyzer/build_tool/hatch.py +++ b/src/macaron/slsa_analyzer/build_tool/hatch.py @@ -44,7 +44,7 @@ def load_defaults(self) -> None: self.ci_deploy_kws[item] = defaults.get_list("builder.hatch.ci.deploy", item) def is_detected( - self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + self, repo_path: str, group_id: str | None = None, artifact_id: str | None = None ) -> list[tuple[str, float, str | None, str | None]]: """ Return the list of build tools and their information used in the target repo. @@ -53,11 +53,11 @@ def is_detected( ---------- repo_path : str The path to the target repo. - groupID : str | None + group_id : str | None Optional Maven `groupId` used to refine detection (e.g., selecting the correct `pom.xml` when multiple are present). If ``None``, no filtering is applied. - artifactID : str | None + artifact_id : str | None Optional Maven `artifactId` used to refine detection. If ``None``, no filtering is applied. diff --git a/src/macaron/slsa_analyzer/build_tool/maven.py b/src/macaron/slsa_analyzer/build_tool/maven.py index b350b0c85..e29dfa282 100644 --- a/src/macaron/slsa_analyzer/build_tool/maven.py +++ b/src/macaron/slsa_analyzer/build_tool/maven.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the Maven class which inherits BaseBuildTool. @@ -44,7 +44,7 @@ def load_defaults(self) -> None: self.ci_deploy_kws[item] = defaults.get_list("builder.maven.ci.deploy", item) def is_detected( - self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + self, repo_path: str, group_id: str | None = None, artifact_id: str | None = None ) -> list[tuple[str, float, str | None, str | None]]: """ Return the list of build tools and their information used in the target repo. @@ -53,11 +53,11 @@ def is_detected( ---------- repo_path : str The path to the target repo. - groupID : str | None + group_id : str | None Optional Maven `groupId` used to refine detection (e.g., selecting the correct `pom.xml` when multiple are present). If ``None``, no filtering is applied. - artifactID : str | None + artifact_id : str | None Optional Maven `artifactId` used to refine detection. If ``None``, no filtering is applied. @@ -80,44 +80,44 @@ def is_detected( config_name, filters=self.path_filters, predicate=self.validate_pom_file, - groupID=groupID, - artifactID=artifactID, + group_id=group_id, + artifact_id=artifact_id, ) if config_path: entrypoint_pom = find_nearest_modules_pom(config_path, repo_path) results.append((str(config_path.relative_to(repo_path)), confidence_score, None, entrypoint_pom)) - confidence_score = confidence_score / 2 * 100 + confidence_score = confidence_score / 2 return results - def validate_pom_file(self, config_path=str, groupID: str | None = None, artifactID: str | None = None) -> bool: + def validate_pom_file(self, config_path=str, group_id: str | None = None, artifact_id: str | None = None) -> bool: """Validate a pom.xml file against an expected Maven G/A. This method is intended to be used as a lightweight filter when multiple candidate configuration files (e.g., `pom.xml`) are discovered. If both - `groupID` and `artifactID` are provided, the method extracts the + `group_id` and `artifact_id` are provided, the method extracts the ``(groupId, artifactId, version)`` from the POM at `config_path` and returns ``True`` only when the extracted group/artifact match the expected values. - If either `groupID` or `artifactID` is not provided, the method returns + If either `group_id` or `artifact_id` is not provided, the method returns ``False``. Parameters ---------- config_path : str Path to the candidate configuration file (typically a `pom.xml`). - groupID : str or None, optional + group_id : str or None, optional Expected Maven `groupId`. If ``None``, no match can be performed. - artifactID : str or None, optional + artifact_id : str or None, optional Expected Maven `artifactId`. If ``None``, no match can be performed. Returns ------- is_valid : bool - ``True`` if `groupID` and `artifactID` are provided and the POM at + ``True`` if `group_id` and `artifact_id` are provided and the POM at `config_path` contains matching values; otherwise ``False``. """ - if groupID and artifactID: - ex_groupId, ex_artifactId, _ = extract_gav_from_pom(config_path) - if groupID == ex_groupId and artifactID == ex_artifactId: + if group_id and artifact_id: + ex_group_id, ex_artifact_id, _ = extract_gav_from_pom(config_path) + if group_id == ex_group_id and artifact_id == ex_artifact_id: return True return False diff --git a/src/macaron/slsa_analyzer/build_tool/npm.py b/src/macaron/slsa_analyzer/build_tool/npm.py index daad8248d..21e0a5499 100644 --- a/src/macaron/slsa_analyzer/build_tool/npm.py +++ b/src/macaron/slsa_analyzer/build_tool/npm.py @@ -41,7 +41,7 @@ def load_defaults(self) -> None: self.ci_deploy_kws[item] = defaults.get_list("builder.npm.ci.deploy", item) def is_detected( - self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + self, repo_path: str, group_id: str | None = None, artifact_id: str | None = None ) -> list[tuple[str, float, str | None, str | None]]: """ Return the list of build tools and their information used in the target repo. @@ -50,11 +50,11 @@ def is_detected( ---------- repo_path : str The path to the target repo. - groupID : str | None + group_id : str | None Optional Maven `groupId` used to refine detection (e.g., selecting the correct `pom.xml` when multiple are present). If ``None``, no filtering is applied. - artifactID : str | None + artifact_id : str | None Optional Maven `artifactId` used to refine detection. If ``None``, no filtering is applied. diff --git a/src/macaron/slsa_analyzer/build_tool/pip.py b/src/macaron/slsa_analyzer/build_tool/pip.py index 2ccbbd8ed..9284e3af5 100644 --- a/src/macaron/slsa_analyzer/build_tool/pip.py +++ b/src/macaron/slsa_analyzer/build_tool/pip.py @@ -44,7 +44,7 @@ def load_defaults(self) -> None: self.ci_deploy_kws[item] = defaults.get_list("builder.pip.ci.deploy", item) def is_detected( - self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + self, repo_path: str, group_id: str | None = None, artifact_id: str | None = None ) -> list[tuple[str, float, str | None, str | None]]: """ Return the list of build tools and their information used in the target repo. @@ -53,11 +53,11 @@ def is_detected( ---------- repo_path : str The path to the target repo. - groupID : str | None + group_id : str | None Optional Maven `groupId` used to refine detection (e.g., selecting the correct `pom.xml` when multiple are present). If ``None``, no filtering is applied. - artifactID : str | None + artifact_id : str | None Optional Maven `artifactId` used to refine detection. If ``None``, no filtering is applied. @@ -84,7 +84,7 @@ def is_detected( else: # TODO: For other build configuration files, like setup.py, we need to improve the logic. results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) - confidence_score = confidence_score / 2 * 100 + confidence_score = confidence_score / 2 return results def get_dep_analyzer(self) -> DependencyAnalyzer: diff --git a/src/macaron/slsa_analyzer/build_tool/poetry.py b/src/macaron/slsa_analyzer/build_tool/poetry.py index a67360d51..425eb20fc 100644 --- a/src/macaron/slsa_analyzer/build_tool/poetry.py +++ b/src/macaron/slsa_analyzer/build_tool/poetry.py @@ -44,7 +44,7 @@ def load_defaults(self) -> None: self.ci_deploy_kws[item] = defaults.get_list("builder.poetry.ci.deploy", item) def is_detected( - self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + self, repo_path: str, group_id: str | None = None, artifact_id: str | None = None ) -> list[tuple[str, float, str | None, str | None]]: """ Return the list of build tools and their information used in the target repo. @@ -53,11 +53,11 @@ def is_detected( ---------- repo_path : str The path to the target repo. - groupID : str | None + group_id : str | None Optional Maven `groupId` used to refine detection (e.g., selecting the correct `pom.xml` when multiple are present). If ``None``, no filtering is applied. - artifactID : str | None + artifact_id : str | None Optional Maven `artifactId` used to refine detection. If ``None``, no filtering is applied. diff --git a/src/macaron/slsa_analyzer/build_tool/yarn.py b/src/macaron/slsa_analyzer/build_tool/yarn.py index 5bcfc5a07..192586136 100644 --- a/src/macaron/slsa_analyzer/build_tool/yarn.py +++ b/src/macaron/slsa_analyzer/build_tool/yarn.py @@ -39,7 +39,7 @@ def load_defaults(self) -> None: # self.ci_deploy_kws[item] = defaults.get_list("builder.yarn.ci.deploy", item) def is_detected( - self, repo_path: str, groupID: str | None = None, artifactID: str | None = None + self, repo_path: str, group_id: str | None = None, artifact_id: str | None = None ) -> list[tuple[str, float, str | None, str | None]]: """ Return the list of build tools and their information used in the target repo. @@ -48,11 +48,11 @@ def is_detected( ---------- repo_path : str The path to the target repo. - groupID : str | None + group_id : str | None Optional Maven `groupId` used to refine detection (e.g., selecting the correct `pom.xml` when multiple are present). If ``None``, no filtering is applied. - artifactID : str | None + artifact_id : str | None Optional Maven `artifactId` used to refine detection. If ``None``, no filtering is applied. diff --git a/tests/parsers/gradleparser/test_gradleparser.py b/tests/parsers/gradleparser/test_gradleparser.py new file mode 100644 index 000000000..950fb10d7 --- /dev/null +++ b/tests/parsers/gradleparser/test_gradleparser.py @@ -0,0 +1,108 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module tests the Gradle parser.""" + +from pathlib import Path + +from macaron.parsers.gradleparser import ( + extract_gav_from_gradle_project, + extract_included_gradle_modules, + find_matching_gradle_module_build_configs, + find_nearest_modules_gradle_config, +) + + +def test_extract_gav_from_gradle_project(tmp_path: Path) -> None: + """Test extracting Gradle coordinates from project files.""" + repo_path = tmp_path.joinpath("gradle_repo") + repo_path.mkdir() + repo_path.joinpath("gradle.properties").write_text("group=com.example\nversion=1.2.3\n") + repo_path.joinpath("settings.gradle").write_text("rootProject.name = 'demo-app'\n") + + assert extract_gav_from_gradle_project(repo_path) == ("com.example", "demo-app", "1.2.3") + + +def test_extract_gav_from_gradle_project_project_keys(tmp_path: Path) -> None: + """Test extracting Gradle coordinates from projectGroup/projectVersion keys.""" + repo_path = tmp_path.joinpath("gradle_repo") + repo_path.mkdir() + repo_path.joinpath("gradle.properties").write_text("projectGroup=io.micronaut.test\nprojectVersion=4.5.0\n") + repo_path.joinpath("settings.gradle").write_text("rootProject.name = 'test-parent'\n") + + assert extract_gav_from_gradle_project(repo_path) == ("io.micronaut.test", "test-parent", "4.5.0") + + +def test_extract_gav_from_gradle_project_project_group_id_key(tmp_path: Path) -> None: + """Test extracting Gradle coordinates from projectGroupId key.""" + repo_path = tmp_path.joinpath("gradle_repo") + repo_path.mkdir() + repo_path.joinpath("gradle.properties").write_text("projectGroupId=io.micronaut\nprojectVersion=4.2.3\n") + repo_path.joinpath("settings.gradle").write_text("rootProject.name = 'micronaut'\n") + + assert extract_gav_from_gradle_project(repo_path) == ("io.micronaut", "micronaut", "4.2.3") + + +def test_extract_gav_from_gradle_project_not_found(tmp_path: Path) -> None: + """Test extracting Gradle coordinates when no config values exist.""" + repo_path = tmp_path.joinpath("gradle_repo_empty") + repo_path.mkdir() + repo_path.joinpath("build.gradle").write_text("plugins { id 'java' }\n") + + assert extract_gav_from_gradle_project(repo_path) == (None, None, None) + + +def test_extract_included_gradle_modules(tmp_path: Path) -> None: + """Test extracting module names from include directives.""" + settings_file = tmp_path.joinpath("settings.gradle") + settings_file.write_text( + "\n".join( + [ + "include 'test-core'", + 'include "test-junit5"', + "include(':feature:service', ':feature:api')", + ] + ) + + "\n" + ) + + assert extract_included_gradle_modules(settings_file) == [ + "test-core", + "test-junit5", + ":feature:service", + ":feature:api", + ] + + +def test_find_matching_gradle_module_build_configs(tmp_path: Path) -> None: + """Test finding module build files based on artifact id suffix matching.""" + repo_path = tmp_path.joinpath("repo") + repo_path.joinpath("test-junit5").mkdir(parents=True) + repo_path.joinpath("settings.gradle").write_text("include 'test-core'\ninclude 'test-junit5'\n") + target_build = repo_path.joinpath("test-junit5", "build.gradle") + target_build.write_text("plugins { id 'java' }\n") + + assert find_matching_gradle_module_build_configs(repo_path, "micronaut-test-junit5") == [target_build] + + +def test_find_nearest_modules_gradle_config(tmp_path: Path) -> None: + """Test finding the nearest module-defining Gradle settings file.""" + repo_path = tmp_path.joinpath("repo") + submodule_path = repo_path.joinpath("project1") + submodule_path.mkdir(parents=True) + repo_path.joinpath("settings.gradle").write_text("include 'project1'\n") + submodule_build = submodule_path.joinpath("build.gradle") + submodule_build.write_text("plugins { id 'java' }\n") + + assert find_nearest_modules_gradle_config(submodule_build, repo_path) == "settings.gradle" + + +def test_find_nearest_modules_gradle_config_no_modules(tmp_path: Path) -> None: + """Test module settings lookup when no include declaration exists.""" + repo_path = tmp_path.joinpath("repo") + repo_path.mkdir() + repo_path.joinpath("settings.gradle").write_text("rootProject.name = 'demo'\n") + build_path = repo_path.joinpath("build.gradle") + build_path.write_text("plugins { id 'java' }\n") + + assert find_nearest_modules_gradle_config(build_path, repo_path) is None diff --git a/tests/slsa_analyzer/build_tool/test_gradle.py b/tests/slsa_analyzer/build_tool/test_gradle.py index 6896159df..70510cff6 100644 --- a/tests/slsa_analyzer/build_tool/test_gradle.py +++ b/tests/slsa_analyzer/build_tool/test_gradle.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the Gradle build functions.""" @@ -21,24 +21,103 @@ Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "no_gradle"), ], ) -def test_get_build_dirs(snapshot: list, gradle_tool: Gradle, mock_repo: Path) -> None: +def test_get_build_dirs(gradle_tool: Gradle, mock_repo: Path) -> None: """Test discovering build directories.""" - assert list(gradle_tool.get_build_dirs(str(mock_repo))) == snapshot + # Gradle detection now relies on group/artifact validation, which is not + # provided by get_build_dirs(). + assert not list(gradle_tool.get_build_dirs(str(mock_repo))) @pytest.mark.parametrize( - ("mock_repo", "expected_value"), + ("mock_repo", "group_id", "artifact_id", "expected_value"), [ - (Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "groovy_gradle"), True), - (Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "kotlin_gradle"), True), - (Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "no_gradle"), False), + ( + Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "groovy_gradle"), + "mock_repos", + "project1", + [ + ("project1/build.gradle", 1.0, None, "settings.gradle"), + ("build.gradle", 50.0, None, "settings.gradle"), + ("settings.gradle", 2500.0, None, "settings.gradle"), + ], + ), + ( + Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "kotlin_gradle"), + "mock_repos", + "project1", + [ + ("project1/build.gradle.kts", 1.0, None, "settings.gradle.kts"), + ("build.gradle.kts", 50.0, None, "settings.gradle.kts"), + ("settings.gradle.kts", 2500.0, None, "settings.gradle.kts"), + ], + ), + ( + Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "no_gradle"), + "mock_repos", + "project1", + [], + ), ], ) -def test_gradle_build_tool(gradle_tool: Gradle, macaron_path: str, mock_repo: str, expected_value: bool) -> None: +def test_gradle_build_tool( + gradle_tool: Gradle, + macaron_path: str, + mock_repo: str, + group_id: str, + artifact_id: str, + expected_value: list[tuple[str, float, str | None, str | None]], +) -> None: """Test the Gradle build tool.""" base_dir = Path(__file__).parent ctx = prepare_repo_for_testing(mock_repo, macaron_path, base_dir) - assert gradle_tool.is_detected(ctx.component.repository.fs_path) == expected_value + assert gradle_tool.is_detected(ctx.component.repository.fs_path, group_id=group_id, artifact_id=artifact_id) == ( + expected_value + ) + + +def test_gradle_build_tool_with_group_artifact_validation(gradle_tool: Gradle, tmp_path: Path) -> None: + """Test Gradle detection with explicit group/artifact validation.""" + gradle_repo = tmp_path.joinpath("gradle_repo") + gradle_repo.mkdir() + gradle_repo.joinpath("build.gradle").write_text("group = 'com.example'") + gradle_repo.joinpath("settings.gradle").write_text("rootProject.name = 'sample-app'\ninclude 'project1'\n") + + detected = gradle_tool.is_detected(str(gradle_repo), group_id="com.example", artifact_id="sample-app") + assert detected + assert {item[0] for item in detected} == {"build.gradle", "settings.gradle"} + assert {item[3] for item in detected} == {"settings.gradle"} + + not_detected = gradle_tool.is_detected(str(gradle_repo), group_id="com.example", artifact_id="another-app") + assert not not_detected + + +def test_gradle_build_tool_with_project_group_and_multimodule_name(gradle_tool: Gradle, tmp_path: Path) -> None: + """Test Gradle detection with projectGroup and prefixed multimodule artifact names.""" + gradle_repo = tmp_path.joinpath("gradle_repo") + gradle_repo.joinpath("test-junit5").mkdir(parents=True) + gradle_repo.joinpath("build.gradle").write_text("plugins { id 'java' }\n") + gradle_repo.joinpath("test-junit5", "build.gradle").write_text("plugins { id 'java' }\n") + gradle_repo.joinpath("settings.gradle").write_text("rootProject.name = 'test-parent'\ninclude 'test-junit5'\n") + gradle_repo.joinpath("gradle.properties").write_text("projectGroup=io.micronaut.test\nprojectVersion=4.5.0\n") + + detected = gradle_tool.is_detected( + str(gradle_repo), group_id="io.micronaut.test", artifact_id="micronaut-test-junit5" + ) + assert detected + assert detected[0][0] == "test-junit5/build.gradle" + assert detected[0][3] == "settings.gradle" + + +def test_gradle_build_tool_with_repo_namespace_group(gradle_tool: Gradle, tmp_path: Path) -> None: + """Test Gradle detection when group input is a repository namespace.""" + gradle_repo = tmp_path.joinpath("micronaut-test") + gradle_repo.mkdir(parents=True) + gradle_repo.joinpath("build.gradle").write_text("plugins { id 'java' }\n") + gradle_repo.joinpath("settings.gradle").write_text("rootProject.name = 'test-parent'\ninclude 'test-junit5'\n") + gradle_repo.joinpath("gradle.properties").write_text("projectGroup=io.micronaut.test\n") + + detected = gradle_tool.is_detected(str(gradle_repo), group_id="micronaut-projects", artifact_id="micronaut-test") + assert detected @pytest.mark.parametrize( diff --git a/tests/slsa_analyzer/build_tool/test_maven.py b/tests/slsa_analyzer/build_tool/test_maven.py index c67f99298..48cb04c08 100644 --- a/tests/slsa_analyzer/build_tool/test_maven.py +++ b/tests/slsa_analyzer/build_tool/test_maven.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the Maven build functions.""" @@ -21,24 +21,78 @@ Path(__file__).parent.joinpath("mock_repos", "maven_repos", "no_pom"), ], ) -def test_get_build_dirs(snapshot: list, maven_tool: Maven, mock_repo: Path) -> None: +def test_get_build_dirs(maven_tool: Maven, mock_repo: Path) -> None: """Test discovering build directories.""" - assert list(maven_tool.get_build_dirs(str(mock_repo))) == snapshot + # Maven detection now relies on group/artifact validation, which is not + # provided by get_build_dirs(). + assert not list(maven_tool.get_build_dirs(str(mock_repo))) @pytest.mark.parametrize( - ("mock_repo", "expected_value"), + ("mock_repo", "group_id", "artifact_id", "expected_value"), [ - (Path(__file__).parent.joinpath("mock_repos", "maven_repos", "has_parent_pom"), True), - (Path(__file__).parent.joinpath("mock_repos", "maven_repos", "no_parent_pom"), True), - (Path(__file__).parent.joinpath("mock_repos", "maven_repos", "no_pom"), False), + ( + Path(__file__).parent.joinpath("mock_repos", "maven_repos", "has_parent_pom"), + "com.mock_repos.has_parent_pom", + "sub_module_1", + [("sub_module_1/pom.xml", 1.0, None, "pom.xml")], + ), + ( + Path(__file__).parent.joinpath("mock_repos", "maven_repos", "no_parent_pom"), + "com.mock_repos.has_parent_pom", + "sub_module_1", + [], + ), + ( + Path(__file__).parent.joinpath("mock_repos", "maven_repos", "no_pom"), + "com.mock_repos.has_parent_pom", + "sub_module_1", + [], + ), ], ) -def test_maven_build_tool(maven_tool: Maven, macaron_path: str, mock_repo: str, expected_value: bool) -> None: +def test_maven_build_tool( + maven_tool: Maven, + macaron_path: str, + mock_repo: str, + group_id: str, + artifact_id: str, + expected_value: list[tuple[str, float, str | None, str | None]], +) -> None: """Test the Maven build tool.""" base_dir = Path(__file__).parent ctx = prepare_repo_for_testing(mock_repo, macaron_path, base_dir) - assert maven_tool.is_detected(ctx.component.repository.fs_path) == expected_value + assert ( + maven_tool.is_detected( + ctx.component.repository.fs_path, + group_id=group_id, + artifact_id=artifact_id, + ) + == expected_value + ) + + +def test_maven_build_tool_with_group_artifact_validation(maven_tool: Maven, macaron_path: str) -> None: + """Test Maven detection with explicit group/artifact validation.""" + base_dir = Path(__file__).parent + mock_repo = Path(__file__).parent.joinpath("mock_repos", "maven_repos", "has_parent_pom") + ctx = prepare_repo_for_testing(str(mock_repo), macaron_path, base_dir) + + detected = maven_tool.is_detected( + ctx.component.repository.fs_path, + group_id="com.mock_repos.has_parent_pom", + artifact_id="sub_module_1", + ) + assert detected + assert {item[0] for item in detected} == {"sub_module_1/pom.xml"} + assert {item[3] for item in detected} == {"pom.xml"} + + not_detected = maven_tool.is_detected( + ctx.component.repository.fs_path, + group_id="com.mock_repos.has_parent_pom", + artifact_id="does-not-exist", + ) + assert not not_detected @pytest.mark.parametrize( From f0d5e0a709db9cc64f09405fb918f21ae57658a4 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Sun, 22 Mar 2026 21:21:12 +1000 Subject: [PATCH 5/7] feat: add support for all build tools Signed-off-by: behnazh-w --- pyproject.toml | 4 + .../build_spec_generator/common_spec/core.py | 78 +++++++++++++------ .../common_spec/maven_spec.py | 2 +- .../common_spec/pypi_spec.py | 5 +- .../reproducible_central.py | 15 ++-- src/macaron/parsers/pomparser.py | 12 +-- src/macaron/slsa_analyzer/analyzer.py | 2 - .../build_tool/base_build_tool.py | 9 +-- src/macaron/slsa_analyzer/build_tool/conda.py | 10 ++- .../slsa_analyzer/build_tool/docker.py | 10 ++- src/macaron/slsa_analyzer/build_tool/flit.py | 19 +++-- src/macaron/slsa_analyzer/build_tool/go.py | 10 ++- .../slsa_analyzer/build_tool/gradle.py | 4 +- src/macaron/slsa_analyzer/build_tool/maven.py | 7 +- src/macaron/slsa_analyzer/build_tool/npm.py | 10 ++- .../slsa_analyzer/build_tool/poetry.py | 22 ++++-- src/macaron/slsa_analyzer/build_tool/yarn.py | 10 ++- .../common_spec/test_core.py | 31 ++++++-- .../dockerfile/test_dockerfile_output.py | 9 ++- .../dockerfile/test_pypi_dockerfile_output.py | 9 ++- .../test_reproducible_central.py | 30 ++++++- .../computer-k8s/expected_default.buildspec | 7 +- .../expected_default.buildspec | 6 +- .../expected_default.buildspec | 6 +- .../pypi_toga/expected_default.buildspec | 6 +- .../expected_default.buildspec | 6 +- tests/slsa_analyzer/build_tool/test_conda.py | 13 +++- tests/slsa_analyzer/build_tool/test_docker.py | 31 ++++++-- tests/slsa_analyzer/build_tool/test_flit.py | 16 +++- tests/slsa_analyzer/build_tool/test_go.py | 13 +++- tests/slsa_analyzer/build_tool/test_hatch.py | 16 +++- tests/slsa_analyzer/build_tool/test_npm.py | 26 +++++-- tests/slsa_analyzer/build_tool/test_poetry.py | 21 +++-- tests/slsa_analyzer/build_tool/test_yarn.py | 26 +++++-- 34 files changed, 357 insertions(+), 144 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ede72bdb5..67794b851 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -224,6 +224,9 @@ load-plugins = [ "pylint.extensions.set_membership", "pylint.extensions.typing", ] +# Disable unsubscriptable-object because Pylint has false positives and this check +# overlaps with mypy's checks. Enable the check when the related issue is resolved: +# https://github.com/pylint-dev/pylint/issues/9549 disable = [ "fixme", "line-too-long", # Replaced by Flake8 Bugbear B950 check. @@ -242,6 +245,7 @@ disable = [ "too-many-return-statements", "too-many-statements", "duplicate-code", + "unsubscriptable-object", ] [tool.pylint.MISCELLANEOUS] diff --git a/src/macaron/build_spec_generator/common_spec/core.py b/src/macaron/build_spec_generator/common_spec/core.py index 7cb83a7cf..e77afa21c 100644 --- a/src/macaron/build_spec_generator/common_spec/core.py +++ b/src/macaron/build_spec_generator/common_spec/core.py @@ -23,6 +23,7 @@ lookup_latest_component, ) from macaron.errors import GenerateBuildSpecError, QueryMacaronDatabaseError +from macaron.json_tools import json_extract from macaron.slsa_analyzer.checks.build_tool_check import BuildToolFacts logger: logging.Logger = logging.getLogger(__name__) @@ -122,7 +123,7 @@ def compose_shell_commands(cmds_sequence: list[list[str]]) -> str: def get_macaron_build_tools( build_tool_facts: Sequence[BuildToolFacts], target_language: str -) -> dict[str, dict[str, str | None]] | None: +) -> dict[str, dict[str, float | str | None]] | None: """ Retrieve the Macaron build tool names for supported projects from the database facts. @@ -142,21 +143,26 @@ def get_macaron_build_tools( The corresponding Macaron build tool name, config_path, confidence score, optional build tool version, and optional root config path if present. """ - build_tools = {} + build_tools: dict[str, dict[str, float | str | None]] = {} for fact in build_tool_facts: if fact.language.lower() == target_language: try: tool_name = MacaronBuildToolName(fact.build_tool_name).value - build_tool_info = { + current_confidence: float = float(fact.confidence) + build_tool_info: dict[str, float | str | None] = { "build_config_path": fact.build_config_path, - "confidence_score": fact.confidence, + "confidence_score": current_confidence, "build_tool_version": fact.build_tool_version, "root_build_config_path": fact.root_build_config_path, } existing_build_tool_info = build_tools.get(tool_name) + existing_confidence = ( + existing_build_tool_info.get("confidence_score") if existing_build_tool_info is not None else None + ) if ( existing_build_tool_info is None - or build_tool_info["confidence_score"] > existing_build_tool_info["confidence_score"] + or not isinstance(existing_confidence, float) + or current_confidence > existing_confidence ): build_tools[tool_name] = build_tool_info except ValueError: @@ -166,7 +172,7 @@ def get_macaron_build_tools( def get_build_tools( component_id: int, session: sqlalchemy.orm.Session, target_language: str -) -> dict[str, dict[str, float, str | None]] | None: +) -> dict[str, dict[str, float | str | None]] | None: """Retrieve the Macaron build tool names for a given component. Queries the database for build tool facts associated with the specified component ID. @@ -295,6 +301,36 @@ def get_language_version( return None +def _build_spec_build_command( + build_tools: dict[str, dict[str, float | str | None]], + build_tool_name: str, + command: list[str], +) -> SpecBuildCommandDict | None: + """Build a single SpecBuildCommandDict entry for a given build tool.""" + build_config_path = json_extract(build_tools, [build_tool_name, "build_config_path"], str) + # build_config_path is a required field. + if build_config_path is None: + return None + + root_build_config_path = json_extract(build_tools, [build_tool_name, "root_build_config_path"], str) + build_tool_version = json_extract(build_tools, [build_tool_name, "build_tool_version"], str) + confidence_score = json_extract(build_tools, [build_tool_name, "confidence_score"], float) + if confidence_score is None: + return None + + build_spec = SpecBuildCommandDict( + build_tool=build_tool_name, + command=command, + build_config_path=build_config_path, + confidence_score=confidence_score, + ) + if root_build_config_path is not None: + build_spec["root_build_config_path"] = root_build_config_path + if build_tool_version is not None: + build_spec["build_tool_version"] = build_tool_version + return build_spec + + def gen_generic_build_spec( purl: PackageURL, session: sqlalchemy.orm.Session, @@ -384,30 +420,24 @@ def gen_generic_build_spec( db_build_command_info or "Cannot find any.", ) lang_version = get_language_version(db_build_command_info) if db_build_command_info else "" - spec_build_commad_info_list.append( - SpecBuildCommandDict( - build_tool=db_build_command_info.build_tool_name, - command=db_build_command_info.command, - build_config_path=build_tools[db_build_command_info.build_tool_name]["build_config_path"], - root_build_config_path=build_tools[db_build_command_info.build_tool_name]["root_build_config_path"], - build_config_version=build_tools[db_build_command_info.build_tool_name]["build_tool_version"], - confidence_score=build_tools[db_build_command_info.build_tool_name]["confidence_score"], - ) + build_spec_command = _build_spec_build_command( + build_tools=build_tools, + build_tool_name=db_build_command_info.build_tool_name, + command=db_build_command_info.command, ) + if build_spec_command is not None: + spec_build_commad_info_list.append(build_spec_command) # If no build commands were found from the analyze phase, add default commands for the identified build tools. if not db_build_command_info_list: for build_tool_name in build_tool_names: - spec_build_commad_info_list.append( - SpecBuildCommandDict( - build_tool=build_tool_name, - command=[], - build_config_path=build_tools[build_tool_name]["build_config_path"], - root_build_config_path=build_tools[build_tool_name]["root_build_config_path"], - build_config_version=build_tools[build_tool_name]["build_tool_version"], - confidence_score=build_tools[build_tool_name]["confidence_score"], - ) + build_spec_command = _build_spec_build_command( + build_tools=build_tools, + build_tool_name=build_tool_name, + command=[], ) + if build_spec_command is not None: + spec_build_commad_info_list.append(build_spec_command) base_build_spec_dict = BaseBuildSpecDict( { diff --git a/src/macaron/build_spec_generator/common_spec/maven_spec.py b/src/macaron/build_spec_generator/common_spec/maven_spec.py index 26c2395b5..c91f3d7b5 100644 --- a/src/macaron/build_spec_generator/common_spec/maven_spec.py +++ b/src/macaron/build_spec_generator/common_spec/maven_spec.py @@ -96,7 +96,7 @@ def resolve_fields(self, purl: PackageURL) -> None: # Resolve and patch build commands. for build_cmd_spec in self.data["build_commands"]: - if build_cmd_spec["command"] == None: + if not build_cmd_spec["command"]: self.set_default_build_commands(build_cmd_spec) for build_command_info in self.data["build_commands"]: diff --git a/src/macaron/build_spec_generator/common_spec/pypi_spec.py b/src/macaron/build_spec_generator/common_spec/pypi_spec.py index c6f4c8ba1..c08bcecc1 100644 --- a/src/macaron/build_spec_generator/common_spec/pypi_spec.py +++ b/src/macaron/build_spec_generator/common_spec/pypi_spec.py @@ -62,7 +62,7 @@ def set_default_build_commands( # "python -m flit.tomlify" build_cmd_spec["command"] = "flit build".split() case "hatch": - build_cmd_spec["command"] = command = "hatch build".split() + build_cmd_spec["command"] = "hatch build".split() case _: logger.debug( "There is no default build command available for the build tools %s.", @@ -92,7 +92,6 @@ def resolve_fields(self, purl: PackageURL) -> None: upstream_artifacts: dict[str, list[str]] = {} pypi_package_json = pypi_registry.find_or_create_pypi_asset(purl.name, purl.version, registry_info) - patched_build_commands: list[SpecBuildCommandDict] = [] build_backends_set: set[str] = set() parsed_build_requires: dict[str, str] = {} sdist_build_requires: dict[str, str] = {} @@ -257,6 +256,8 @@ def resolve_fields(self, purl: PackageURL) -> None: if not self.data["has_binaries"]: for build_cmd_spec in self.data["build_commands"]: self.set_default_build_commands(build_cmd_spec) + else: + self.data["build_commands"] = [] self.data["upstream_artifacts"] = upstream_artifacts def add_parsed_requirement(self, build_requirements: dict[str, str], requirement: str) -> None: diff --git a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py index 38fb1dec7..76d7b0eea 100644 --- a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py +++ b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py @@ -88,10 +88,13 @@ def gen_reproducible_central_build_spec(build_spec: BaseBuildSpecDict) -> str | # Add -Dmaven.test.skip for Maven builds. # TODO: Use the build tool associated with the build command once # https://github.com/oracle/macaron/issues/1300 is closed. - adapted_build_commands = [ - cmd[:1] + ["-Dmaven.test.skip=true"] + cmd[1:] if ReproducibleCentralBuildTool.MAVEN in cmd[0] else cmd - for cmd in build_spec["build_commands"] - ] + adapted_build_commands: list[list[str]] = [] + for build_command in build_spec["build_commands"]: + command = build_command["command"] + if command and ReproducibleCentralBuildTool.MAVEN.value in command[0]: + adapted_build_commands.append(command[:1] + ["-Dmaven.test.skip=true"] + command[1:]) + else: + adapted_build_commands.append(command) template_format_values: dict[str, str] = { "macaron_version": importlib_metadata.version("macaron"), @@ -104,9 +107,7 @@ def gen_reproducible_central_build_spec(build_spec: BaseBuildSpecDict) -> str | "newline": build_spec["newline"], "buildinfo": f"target/{build_spec['artifact_id']}-{build_spec['version']}.buildinfo", "jdk": build_spec["language_version"][0], - "command": compose_shell_commands( - [b_info["command"] for b_info in adapted_build_commands["build_commands"] if b_info["command"]] - ), + "command": compose_shell_commands([command for command in adapted_build_commands if command]), } return STRING_TEMPLATE.format_map(template_format_values) diff --git a/src/macaron/parsers/pomparser.py b/src/macaron/parsers/pomparser.py index 8c19768c9..ce98ae92a 100644 --- a/src/macaron/parsers/pomparser.py +++ b/src/macaron/parsers/pomparser.py @@ -79,7 +79,7 @@ def extract_gav_from_pom(pom_file: Path) -> tuple[str | None, str | None, str | logger.debug("Could not parse pom.xml: %s", str(pom_file)) return None, None, None - def _find_child_text(parent, local_name: str) -> str | None: + def _find_child_text(parent: Element, local_name: str) -> str | None: # The closing curly brace represents the end of the XML namespace. elem = next((ch for ch in parent if ch.tag.endswith("}" + local_name)), None) if elem is None or not elem.text: @@ -146,7 +146,7 @@ def detect_parent_pom(pom_path: Path, repo_root: str | Path) -> str | None: if pom_root is None: return None - def _find_child(elem, local_name: str): + def _find_child(elem: Element, local_name: str) -> Element | None: return next((ch for ch in elem if ch.tag.endswith("}" + local_name)), None) parent_elem = _find_child(pom_root, "parent") @@ -200,18 +200,14 @@ def pom_has_modules(pom_path: Path) -> bool: if pom_root is None: return False - def _find_child(elem, local_name: str): + def _find_child(elem: Element, local_name: str) -> Element | None: return next((ch for ch in elem if ch.tag.endswith("}" + local_name)), None) modules_elem = _find_child(pom_root, "modules") if modules_elem is None: return False - for ch in modules_elem: - if ch.tag.endswith("}module") and ch.text and ch.text.strip(): - return True - - return False + return any(ch.tag.endswith("}module") and ch.text and ch.text.strip() for ch in modules_elem) def find_nearest_modules_pom( diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index 58fe6a9ac..143f4b8ca 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -1050,8 +1050,6 @@ def _determine_build_tools(self, analyze_ctx: AnalyzeContext, git_service: BaseG continue if build_tool.match_purl_type(analyze_ctx.component.type): - if build_tool.name not in ["pip", "maven", "hatch", "gradle"]: - continue logger.info( "Checking if the repo %s uses build tool %s", analyze_ctx.component.repository.complete_name, diff --git a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py index a4434af32..9bb572527 100644 --- a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py +++ b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py @@ -95,8 +95,7 @@ def file_exists( path: str, file_name: str, filters: list[str] | None = None, - predicate: Callable[[Path, Any], bool] | None = None, - *predicate_args: Any, + predicate: Callable[..., bool] | None = None, **predicate_kwargs: Any, ) -> Path | None: """Search recursively for the first matching file, optionally validating it with a predicate. @@ -119,9 +118,7 @@ def file_exists( Optional callable used to validate a matched file. If provided, a file is accepted only if ``predicate(candidate_path, *predicate_args, **predicate_kwargs)`` returns ``True``. - *predicate_args : Any - Positional arguments forwarded to `predicate`. - **predicate_kwargs : Any + predicate_kwargs : Any Keyword arguments forwarded to `predicate`. Returns @@ -136,7 +133,7 @@ def file_exists( root_dir = Path(path) def _accepted(p: Path) -> bool: - return True if predicate is None else bool(predicate(p, *predicate_args, **predicate_kwargs)) + return True if predicate is None else bool(predicate(p, **predicate_kwargs)) # Check for file directly at root. if target_path := find_first_matching_file(root_dir, file_name): diff --git a/src/macaron/slsa_analyzer/build_tool/conda.py b/src/macaron/slsa_analyzer/build_tool/conda.py index 1418ae7bc..97ba138e6 100644 --- a/src/macaron/slsa_analyzer/build_tool/conda.py +++ b/src/macaron/slsa_analyzer/build_tool/conda.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the Conda class which inherits BaseBuildTool. @@ -66,7 +66,13 @@ def is_detected( Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ - return any(file_exists(repo_path, file, filters=self.path_filters) for file in self.build_configs) + results: list[tuple[str, float, str | None, str | None]] = [] + confidence_score = 1.0 + for config_name in self.build_configs: + if config_path := file_exists(repo_path, config_name, filters=self.path_filters): + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) + confidence_score = confidence_score / 2 + return results def get_dep_analyzer(self) -> DependencyAnalyzer: """Create a DependencyAnalyzer for the build tool. diff --git a/src/macaron/slsa_analyzer/build_tool/docker.py b/src/macaron/slsa_analyzer/build_tool/docker.py index ecc4ae255..f8ed46a77 100644 --- a/src/macaron/slsa_analyzer/build_tool/docker.py +++ b/src/macaron/slsa_analyzer/build_tool/docker.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the Docker class which inherits BaseBuildTool. @@ -55,4 +55,10 @@ def is_detected( Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ - return any(file_exists(repo_path, file, filters=self.path_filters) for file in self.build_configs) + results: list[tuple[str, float, str | None, str | None]] = [] + confidence_score = 1.0 + for config_name in self.build_configs: + if config_path := file_exists(repo_path, config_name, filters=self.path_filters): + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) + confidence_score = confidence_score / 2 + return results diff --git a/src/macaron/slsa_analyzer/build_tool/flit.py b/src/macaron/slsa_analyzer/build_tool/flit.py index 4285bda8f..68531ef93 100644 --- a/src/macaron/slsa_analyzer/build_tool/flit.py +++ b/src/macaron/slsa_analyzer/build_tool/flit.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the Flit class which inherits BaseBuildTool. @@ -67,19 +67,24 @@ def is_detected( Tuples of ``(config_path, confidence_score, build_tool_version, parent_pom)``, where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ + results: list[tuple[str, float, str | None, str | None]] = [] + confidence_score = 1.0 for config_name in self.build_configs: if config_path := file_exists(repo_path, config_name, filters=self.path_filters): if os.path.basename(config_path) == "pyproject.toml": if pyproject.contains_build_tool("flit", config_path): - return True + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) # Check the build-system section. - for tool in self.build_requires + self.build_backend: - if pyproject.build_system_contains_tool(tool, config_path): - return True + else: + for tool in self.build_requires + self.build_backend: + if pyproject.build_system_contains_tool(tool, config_path): + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) + break else: # For other build configuration files, the presence of the file alone is sufficient. - return True - return False + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) + confidence_score = confidence_score / 2 + return results def get_dep_analyzer(self) -> DependencyAnalyzer: """Create a DependencyAnalyzer for the build tool. diff --git a/src/macaron/slsa_analyzer/build_tool/go.py b/src/macaron/slsa_analyzer/build_tool/go.py index f304d1d45..3a09c36c4 100644 --- a/src/macaron/slsa_analyzer/build_tool/go.py +++ b/src/macaron/slsa_analyzer/build_tool/go.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the Go class which inherits BaseBuildTool. @@ -55,4 +55,10 @@ def is_detected( where paths are relative to `repo_path` and `parent_pom` may be ``None``. """ go_config_files = self.build_configs + self.entry_conf - return any(file_exists(repo_path, file, filters=self.path_filters) for file in go_config_files) + results: list[tuple[str, float, str | None, str | None]] = [] + confidence_score = 1.0 + for config_name in go_config_files: + if config_path := file_exists(repo_path, config_name, filters=self.path_filters): + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) + confidence_score = confidence_score / 2 + return results diff --git a/src/macaron/slsa_analyzer/build_tool/gradle.py b/src/macaron/slsa_analyzer/build_tool/gradle.py index db41fafec..e34c269b6 100644 --- a/src/macaron/slsa_analyzer/build_tool/gradle.py +++ b/src/macaron/slsa_analyzer/build_tool/gradle.py @@ -112,13 +112,13 @@ def is_detected( confidence_score = confidence_score / 2 for config_name in gradle_config_files: + predicate_kwargs = {"group_id": group_id, "artifact_id": artifact_id} config_path = file_exists( repo_path, config_name, filters=self.path_filters, predicate=self.validate_gradle_file, - group_id=group_id, - artifact_id=artifact_id, + **predicate_kwargs, ) if config_path and config_path not in seen_paths: entrypoint_gradle = find_nearest_modules_gradle_config(config_path, repo_path) diff --git a/src/macaron/slsa_analyzer/build_tool/maven.py b/src/macaron/slsa_analyzer/build_tool/maven.py index e29dfa282..0817dd5f9 100644 --- a/src/macaron/slsa_analyzer/build_tool/maven.py +++ b/src/macaron/slsa_analyzer/build_tool/maven.py @@ -8,6 +8,7 @@ import logging import os +from pathlib import Path from macaron.config.defaults import defaults from macaron.config.global_config import global_config @@ -75,13 +76,13 @@ def is_detected( return [] for config_name in self.build_configs: + predicate_kwargs = {"group_id": group_id, "artifact_id": artifact_id} config_path = file_exists( repo_path, config_name, filters=self.path_filters, predicate=self.validate_pom_file, - group_id=group_id, - artifact_id=artifact_id, + **predicate_kwargs, ) if config_path: entrypoint_pom = find_nearest_modules_pom(config_path, repo_path) @@ -90,7 +91,7 @@ def is_detected( return results - def validate_pom_file(self, config_path=str, group_id: str | None = None, artifact_id: str | None = None) -> bool: + def validate_pom_file(self, config_path: Path, group_id: str | None = None, artifact_id: str | None = None) -> bool: """Validate a pom.xml file against an expected Maven G/A. This method is intended to be used as a lightweight filter when multiple diff --git a/src/macaron/slsa_analyzer/build_tool/npm.py b/src/macaron/slsa_analyzer/build_tool/npm.py index 21e0a5499..1f7392729 100644 --- a/src/macaron/slsa_analyzer/build_tool/npm.py +++ b/src/macaron/slsa_analyzer/build_tool/npm.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the NPM class which inherits BaseBuildTool. @@ -68,7 +68,13 @@ def is_detected( # cases like .npmrc existing but not package-lock.json and whether # they would still count as "detected" npm_config_files = self.build_configs + self.package_lock + self.entry_conf - return any(file_exists(repo_path, file, filters=self.path_filters) for file in npm_config_files) + results: list[tuple[str, float, str | None, str | None]] = [] + confidence_score = 1.0 + for config_name in npm_config_files: + if config_path := file_exists(repo_path, config_name, filters=self.path_filters): + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) + confidence_score = confidence_score / 2 + return results def is_deploy_command( self, cmd: BuildToolCommand, excluded_configs: list[str] | None = None, provenance_workflow: str | None = None diff --git a/src/macaron/slsa_analyzer/build_tool/poetry.py b/src/macaron/slsa_analyzer/build_tool/poetry.py index 425eb20fc..38f6001fa 100644 --- a/src/macaron/slsa_analyzer/build_tool/poetry.py +++ b/src/macaron/slsa_analyzer/build_tool/poetry.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the Poetry class which inherits BaseBuildTool. @@ -73,19 +73,25 @@ def is_detected( package_lock_exists = file break + results: list[tuple[str, float, str | None, str | None]] = [] + confidence_score = 1.0 file_paths = (file_exists(repo_path, file, filters=self.path_filters) for file in self.build_configs) for config_path in file_paths: if config_path and os.path.basename(config_path) == "pyproject.toml": if package_lock_exists: - return True - if pyproject.contains_build_tool("poetry", config_path): - return True + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) + elif pyproject.contains_build_tool("poetry", config_path): + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) # Check the build-system section. - for tool in self.build_requires + self.build_backend: - if pyproject.build_system_contains_tool(tool, config_path): - return True + else: + for tool in self.build_requires + self.build_backend: + if pyproject.build_system_contains_tool(tool, config_path): + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) + break - return False + confidence_score = confidence_score / 2 + + return results def get_dep_analyzer(self) -> DependencyAnalyzer: """Create a DependencyAnalyzer for the build tool. diff --git a/src/macaron/slsa_analyzer/build_tool/yarn.py b/src/macaron/slsa_analyzer/build_tool/yarn.py index 192586136..478dc8e41 100644 --- a/src/macaron/slsa_analyzer/build_tool/yarn.py +++ b/src/macaron/slsa_analyzer/build_tool/yarn.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the Yarn class which inherits BaseBuildTool. @@ -66,7 +66,13 @@ def is_detected( # cases like .yarnrc existing but not package-lock.json and whether # they would still count as "detected" yarn_config_files = self.build_configs + self.package_lock + self.entry_conf - return any(file_exists(repo_path, file, filters=self.path_filters) for file in yarn_config_files) + results: list[tuple[str, float, str | None, str | None]] = [] + confidence_score = 1.0 + for config_name in yarn_config_files: + if config_path := file_exists(repo_path, config_name, filters=self.path_filters): + results.append((str(config_path.relative_to(repo_path)), confidence_score, None, None)) + confidence_score = confidence_score / 2 + return results def is_deploy_command( self, cmd: BuildToolCommand, excluded_configs: list[str] | None = None, provenance_workflow: str | None = None diff --git a/tests/build_spec_generator/common_spec/test_core.py b/tests/build_spec_generator/common_spec/test_core.py index b40da16df..c65539786 100644 --- a/tests/build_spec_generator/common_spec/test_core.py +++ b/tests/build_spec_generator/common_spec/test_core.py @@ -13,7 +13,7 @@ MacaronBuildToolName, compose_shell_commands, get_language_version, - get_macaron_build_tool_names, + get_macaron_build_tools, ) from macaron.build_spec_generator.macaron_db_extractor import GenericBuildCommandInfo from macaron.slsa_analyzer.checks.build_tool_check import BuildToolFacts @@ -56,7 +56,7 @@ def test_compose_shell_commands( ) ], "python", - [MacaronBuildToolName.PIP], + ["pip"], id="python_pip_supported", ), pytest.param( @@ -67,7 +67,7 @@ def test_compose_shell_commands( ) ], "java", - [MacaronBuildToolName.GRADLE], + ["gradle"], id="build_tool_gradle", ), pytest.param( @@ -78,7 +78,7 @@ def test_compose_shell_commands( ) ], "java", - [MacaronBuildToolName.MAVEN], + ["maven"], id="build_tool_maven", ), pytest.param( @@ -108,10 +108,11 @@ def test_compose_shell_commands( def test_get_build_tool_name( build_tool_facts: list[BuildToolFacts], language: str, - expected: list[MacaronBuildToolName] | None, + expected: list[str] | None, ) -> None: """Test build tool name detection.""" - assert get_macaron_build_tool_names(build_tool_facts, target_language=language) == expected + result = get_macaron_build_tools(build_tool_facts, target_language=language) + assert (list(result.keys()) if result else None) == expected @pytest.mark.parametrize( @@ -187,7 +188,14 @@ def test_get_language_version( "purl": "pkg:maven/foo/bar@1.0.0", "language": LANGUAGES.MAVEN.value, "build_tools": ["ant"], - "build_commands": [SpecBuildCommandDict(build_tool="ant", command=["ant", "dist"])], + "build_commands": [ + SpecBuildCommandDict( + build_tool="ant", + command=["ant", "dist"], + build_config_path="build.xml", + confidence_score=1.0, + ) + ], } ), id="unsupported build tool for maven", @@ -227,7 +235,14 @@ def test_get_language_version( "purl": "pkg:pypi/bar@1.0.0", "language": LANGUAGES.PYPI.value, "build_tools": ["uv"], - "build_commands": [SpecBuildCommandDict(build_tool="uv", command=["python", "-m", "build"])], + "build_commands": [ + SpecBuildCommandDict( + build_tool="uv", + command=["python", "-m", "build"], + build_config_path="pyproject.toml", + confidence_score=1.0, + ) + ], } ), id="unsupported build tool for pypi", diff --git a/tests/build_spec_generator/dockerfile/test_dockerfile_output.py b/tests/build_spec_generator/dockerfile/test_dockerfile_output.py index c8c55be29..2c7d8fab9 100644 --- a/tests/build_spec_generator/dockerfile/test_dockerfile_output.py +++ b/tests/build_spec_generator/dockerfile/test_dockerfile_output.py @@ -28,7 +28,14 @@ def fixture_base_build_spec() -> BaseBuildSpecDict: "build_tools": ["maven"], "newline": "lf", "language_version": ["17"], - "build_commands": [SpecBuildCommandDict(build_tool="maven", command=["mvn", "package"])], + "build_commands": [ + SpecBuildCommandDict( + build_tool="maven", + command=["mvn", "package"], + build_config_path="pom.xml", + confidence_score=1.0, + ) + ], "purl": "pkg:maven/com.oracle/example-artifact@1.2.3", } ) diff --git a/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py b/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py index 0c3523474..c2b03fcab 100644 --- a/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py +++ b/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py @@ -29,7 +29,14 @@ def fixture_base_build_spec() -> BaseBuildSpecDict: "language": "python", "has_binaries": False, "build_tools": ["pip"], - "build_commands": [SpecBuildCommandDict(build_tool="pip", command=["python", "-m", "build"])], + "build_commands": [ + SpecBuildCommandDict( + build_tool="pip", + command=["python", "-m", "build"], + build_config_path="pyproject.toml", + confidence_score=1.0, + ) + ], "build_requires": {"setuptools": "==80.9.0", "wheel": ""}, "build_backends": ["setuptools.build_meta"], "upstream_artifacts": { diff --git a/tests/build_spec_generator/reproducible_central/test_reproducible_central.py b/tests/build_spec_generator/reproducible_central/test_reproducible_central.py index 9b8b47379..15c35e44e 100644 --- a/tests/build_spec_generator/reproducible_central/test_reproducible_central.py +++ b/tests/build_spec_generator/reproducible_central/test_reproducible_central.py @@ -27,7 +27,14 @@ def fixture_base_build_spec() -> BaseBuildSpecDict: "build_tools": ["maven"], "newline": "lf", "language_version": ["17"], - "build_commands": [SpecBuildCommandDict(build_tool="maven", command=["mvn", "package"])], + "build_commands": [ + SpecBuildCommandDict( + build_tool="maven", + command=["mvn", "package"], + build_config_path="pom.xml", + confidence_score=1.0, + ) + ], "purl": "pkg:maven/com.oracle/example-artifact@1.2.3", } ) @@ -79,10 +86,25 @@ def test_build_tool_name_variants(base_build_spec: BaseBuildSpecDict, build_tool def test_compose_shell_commands_integration(base_build_spec: BaseBuildSpecDict) -> None: """Test that the correct compose_shell_commands function is used.""" base_build_spec["build_commands"] = [ - SpecBuildCommandDict(build_tool="maven", command=["mvn", "clean", "package"]), - SpecBuildCommandDict(build_tool="maven", command=["mvn", "deploy"]), + SpecBuildCommandDict( + build_tool="maven", + command=["mvn", "clean", "package"], + build_config_path="pom.xml", + confidence_score=1.0, + ), + SpecBuildCommandDict( + build_tool="maven", + command=["mvn", "deploy"], + build_config_path="pom.xml", + confidence_score=1.0, + ), ] content = gen_reproducible_central_build_spec(base_build_spec) - expected_commands = compose_shell_commands([["mvn", "clean", "package"], ["mvn", "deploy"]]) + expected_commands = compose_shell_commands( + [ + ["mvn", "-Dmaven.test.skip=true", "clean", "package"], + ["mvn", "-Dmaven.test.skip=true", "deploy"], + ] + ) assert content assert f'command="{expected_commands}"' in content diff --git a/tests/integration/cases/org_apache_hugegraph/computer-k8s/expected_default.buildspec b/tests/integration/cases/org_apache_hugegraph/computer-k8s/expected_default.buildspec index 79d0c8fbb..4eecd76c8 100644 --- a/tests/integration/cases/org_apache_hugegraph/computer-k8s/expected_default.buildspec +++ b/tests/integration/cases/org_apache_hugegraph/computer-k8s/expected_default.buildspec @@ -1,5 +1,5 @@ { - "macaron_version": "0.20.0", + "macaron_version": "0.22.0", "group_id": "org.apache.hugegraph", "artifact_id": "computer-k8s", "version": "1.0.0", @@ -26,7 +26,10 @@ "-Dmaven.javadoc.skip=true", "clean", "package" - ] + ], + "build_config_path": "computer-k8s/pom.xml", + "confidence_score": 1.0, + "root_build_config_path": "pom.xml" } ] } diff --git a/tests/integration/cases/pypi_cachetools/expected_default.buildspec b/tests/integration/cases/pypi_cachetools/expected_default.buildspec index 53ae6d8f4..a8f6f94a4 100644 --- a/tests/integration/cases/pypi_cachetools/expected_default.buildspec +++ b/tests/integration/cases/pypi_cachetools/expected_default.buildspec @@ -1,5 +1,5 @@ { - "macaron_version": "0.20.0", + "macaron_version": "0.22.0", "group_id": null, "artifact_id": "cachetools", "version": "6.2.1", @@ -24,7 +24,9 @@ "build", "--wheel", "-n" - ] + ], + "build_config_path": "setup.py", + "confidence_score": 1.0 } ], "has_binaries": false, diff --git a/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec b/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec index 79071d6c3..3f781a96a 100644 --- a/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec +++ b/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec @@ -1,5 +1,5 @@ { - "macaron_version": "0.20.0", + "macaron_version": "0.22.0", "group_id": null, "artifact_id": "markdown-it-py", "version": "4.0.0", @@ -22,7 +22,9 @@ "command": [ "flit", "build" - ] + ], + "build_config_path": "pyproject.toml", + "confidence_score": 1.0 } ], "has_binaries": false, diff --git a/tests/integration/cases/pypi_toga/expected_default.buildspec b/tests/integration/cases/pypi_toga/expected_default.buildspec index 29e2f4cf8..d729267f0 100644 --- a/tests/integration/cases/pypi_toga/expected_default.buildspec +++ b/tests/integration/cases/pypi_toga/expected_default.buildspec @@ -1,5 +1,5 @@ { - "macaron_version": "0.20.0", + "macaron_version": "0.22.0", "group_id": null, "artifact_id": "toga", "version": "0.5.1", @@ -25,7 +25,9 @@ "build", "--wheel", "-n" - ] + ], + "build_config_path": "pyproject.toml", + "confidence_score": 1.0 } ], "has_binaries": false, diff --git a/tests/integration/cases/pypi_tree-sitter/expected_default.buildspec b/tests/integration/cases/pypi_tree-sitter/expected_default.buildspec index 2173ac78b..a67a019b1 100644 --- a/tests/integration/cases/pypi_tree-sitter/expected_default.buildspec +++ b/tests/integration/cases/pypi_tree-sitter/expected_default.buildspec @@ -1,5 +1,5 @@ { - "macaron_version": "0.20.0", + "macaron_version": "0.22.0", "group_id": null, "artifact_id": "tree-sitter", "version": "0.25.2", @@ -24,6 +24,8 @@ "setuptools.build_meta" ], "upstream_artifacts": { - "sdist": ["https://files.pythonhosted.org/packages/66/7c/0350cfc47faadc0d3cf7d8237a4e34032b3014ddf4a12ded9933e1648b55/tree-sitter-0.25.2.tar.gz"] + "sdist": [ + "https://files.pythonhosted.org/packages/66/7c/0350cfc47faadc0d3cf7d8237a4e34032b3014ddf4a12ded9933e1648b55/tree-sitter-0.25.2.tar.gz" + ] } } diff --git a/tests/slsa_analyzer/build_tool/test_conda.py b/tests/slsa_analyzer/build_tool/test_conda.py index 5adec7688..3f85fc0e4 100644 --- a/tests/slsa_analyzer/build_tool/test_conda.py +++ b/tests/slsa_analyzer/build_tool/test_conda.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the Conda build functions.""" @@ -28,11 +28,16 @@ def test_get_build_dirs(snapshot: list, conda_tool: Conda, mock_repo: Path) -> N @pytest.mark.parametrize( ("mock_repo", "expected_value"), [ - (Path(__file__).parent.joinpath("mock_repos", "conda_repos", "has_conda"), True), - (Path(__file__).parent.joinpath("mock_repos", "conda_repos", "no_conda"), False), + (Path(__file__).parent.joinpath("mock_repos", "conda_repos", "has_conda"), [("meta.yaml", 1.0, None, None)]), + (Path(__file__).parent.joinpath("mock_repos", "conda_repos", "no_conda"), []), ], ) -def test_conda_build_tool(conda_tool: Conda, macaron_path: str, mock_repo: str, expected_value: bool) -> None: +def test_conda_build_tool( + conda_tool: Conda, + macaron_path: str, + mock_repo: str, + expected_value: list[tuple[str, float, str | None, str | None]], +) -> None: """Test the Conda build tool.""" base_dir = Path(__file__).parent ctx = prepare_repo_for_testing(mock_repo, macaron_path, base_dir) diff --git a/tests/slsa_analyzer/build_tool/test_docker.py b/tests/slsa_analyzer/build_tool/test_docker.py index 4f256e5c9..7b2e6e5b4 100644 --- a/tests/slsa_analyzer/build_tool/test_docker.py +++ b/tests/slsa_analyzer/build_tool/test_docker.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the Docker build functions.""" @@ -31,14 +31,31 @@ def test_get_build_dirs(snapshot: list, docker_tool: Docker, mock_repo: Path) -> @pytest.mark.parametrize( ("mock_repo", "expected_value"), [ - (Path(__file__).parent.joinpath("mock_repos", "docker_repos", "root_dockerfile"), True), - (Path(__file__).parent.joinpath("mock_repos", "docker_repos", "nested_dockerfile"), True), - (Path(__file__).parent.joinpath("mock_repos", "docker_repos", "root_wildcard_dockerfile"), True), - (Path(__file__).parent.joinpath("mock_repos", "docker_repos", "root_dockerfile_wildcard"), True), - (Path(__file__).parent.joinpath("mock_repos", "docker_repos", "no_docker"), False), + ( + Path(__file__).parent.joinpath("mock_repos", "docker_repos", "root_dockerfile"), + [("Dockerfile", 1.0, None, None)], + ), + ( + Path(__file__).parent.joinpath("mock_repos", "docker_repos", "nested_dockerfile"), + [("project/Dockerfile", 1.0, None, None)], + ), + ( + Path(__file__).parent.joinpath("mock_repos", "docker_repos", "root_wildcard_dockerfile"), + [("final.Dockerfile", 1.0, None, None)], + ), + ( + Path(__file__).parent.joinpath("mock_repos", "docker_repos", "root_dockerfile_wildcard"), + [("Dockerfile.final", 1.0, None, None)], + ), + (Path(__file__).parent.joinpath("mock_repos", "docker_repos", "no_docker"), []), ], ) -def test_docker_build_tool(docker_tool: Docker, macaron_path: str, mock_repo: str, expected_value: bool) -> None: +def test_docker_build_tool( + docker_tool: Docker, + macaron_path: str, + mock_repo: str, + expected_value: list[tuple[str, float, str | None, str | None]], +) -> None: """Test the Docker build tool.""" base_dir = Path(__file__).parent ctx = prepare_repo_for_testing(mock_repo, macaron_path, base_dir) diff --git a/tests/slsa_analyzer/build_tool/test_flit.py b/tests/slsa_analyzer/build_tool/test_flit.py index 6ffbed7f0..096985b7d 100644 --- a/tests/slsa_analyzer/build_tool/test_flit.py +++ b/tests/slsa_analyzer/build_tool/test_flit.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the Flit build functions.""" @@ -28,11 +28,19 @@ def test_get_build_dirs(snapshot: list, flit_tool: Flit, mock_repo: Path) -> Non @pytest.mark.parametrize( ("mock_repo", "expected_value"), [ - (Path(__file__).parent.joinpath("mock_repos", "flit_repos", "has_flit_pyproject"), True), - (Path(__file__).parent.joinpath("mock_repos", "flit_repos", "no_flit"), False), + ( + Path(__file__).parent.joinpath("mock_repos", "flit_repos", "has_flit_pyproject"), + [("pyproject.toml", 1.0, None, None)], + ), + (Path(__file__).parent.joinpath("mock_repos", "flit_repos", "no_flit"), []), ], ) -def test_flit_build_tool(flit_tool: Flit, macaron_path: str, mock_repo: str, expected_value: bool) -> None: +def test_flit_build_tool( + flit_tool: Flit, + macaron_path: str, + mock_repo: str, + expected_value: list[tuple[str, float, str | None, str | None]], +) -> None: """Test the Flit build tool.""" base_dir = Path(__file__).parent ctx = prepare_repo_for_testing(mock_repo, macaron_path, base_dir) diff --git a/tests/slsa_analyzer/build_tool/test_go.py b/tests/slsa_analyzer/build_tool/test_go.py index 3f2796326..080a3f30f 100644 --- a/tests/slsa_analyzer/build_tool/test_go.py +++ b/tests/slsa_analyzer/build_tool/test_go.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the Go build functions.""" @@ -34,12 +34,17 @@ def test_get_build_dirs(snapshot: list, tmp_path: Path, go_tool: Go, folder: str @pytest.mark.parametrize( ("folder", "file", "expected_value"), [ - ("root_go_mod", "go.mod", True), - ("no_go_mod", "dummyfile.txt", False), + ("root_go_mod", "go.mod", [("go.mod", 1.0, None, None)]), + ("no_go_mod", "dummyfile.txt", []), ], ) def test_go_build_tool( - go_tool: Go, macaron_path: str, tmp_path: Path, folder: str, file: str, expected_value: bool + go_tool: Go, + macaron_path: str, + tmp_path: Path, + folder: str, + file: str, + expected_value: list[tuple[str, float, str | None, str | None]], ) -> None: """Test the Go build tool.""" base_dir = Path(__file__).parent diff --git a/tests/slsa_analyzer/build_tool/test_hatch.py b/tests/slsa_analyzer/build_tool/test_hatch.py index 3fd687476..6992cacf6 100644 --- a/tests/slsa_analyzer/build_tool/test_hatch.py +++ b/tests/slsa_analyzer/build_tool/test_hatch.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the Hatch build functions.""" @@ -28,11 +28,19 @@ def test_get_build_dirs(snapshot: list, hatch_tool: Hatch, mock_repo: Path) -> N @pytest.mark.parametrize( ("mock_repo", "expected_value"), [ - (Path(__file__).parent.joinpath("mock_repos", "hatch_repos", "has_hatch_pyproject"), True), - (Path(__file__).parent.joinpath("mock_repos", "hatch_repos", "no_hatch"), False), + ( + Path(__file__).parent.joinpath("mock_repos", "hatch_repos", "has_hatch_pyproject"), + [("pyproject.toml", 1.0, None, None)], + ), + (Path(__file__).parent.joinpath("mock_repos", "hatch_repos", "no_hatch"), []), ], ) -def test_hatch_build_tool(hatch_tool: Hatch, macaron_path: str, mock_repo: str, expected_value: bool) -> None: +def test_hatch_build_tool( + hatch_tool: Hatch, + macaron_path: str, + mock_repo: str, + expected_value: list[tuple[str, float, str | None, str | None]], +) -> None: """Test the Hatch build tool.""" base_dir = Path(__file__).parent ctx = prepare_repo_for_testing(mock_repo, macaron_path, base_dir) diff --git a/tests/slsa_analyzer/build_tool/test_npm.py b/tests/slsa_analyzer/build_tool/test_npm.py index f27b623f0..190b7982e 100644 --- a/tests/slsa_analyzer/build_tool/test_npm.py +++ b/tests/slsa_analyzer/build_tool/test_npm.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the NPM build functions.""" @@ -30,13 +30,27 @@ def test_get_build_dirs(snapshot: list, npm_tool: NPM, mock_repo: Path) -> None: @pytest.mark.parametrize( ("mock_repo", "expected_value"), [ - (Path(__file__).parent.joinpath("mock_repos", "npm_repos", "root_package"), True), - (Path(__file__).parent.joinpath("mock_repos", "npm_repos", "root_package_packagelock"), True), - (Path(__file__).parent.joinpath("mock_repos", "npm_repos", "nested_package"), True), - (Path(__file__).parent.joinpath("mock_repos", "npm_repos", "no_package"), False), + ( + Path(__file__).parent.joinpath("mock_repos", "npm_repos", "root_package"), + [("package.json", 1.0, None, None)], + ), + ( + Path(__file__).parent.joinpath("mock_repos", "npm_repos", "root_package_packagelock"), + [("package.json", 1.0, None, None), ("package-lock.json", 0.5, None, None)], + ), + ( + Path(__file__).parent.joinpath("mock_repos", "npm_repos", "nested_package"), + [("project/package.json", 1.0, None, None)], + ), + (Path(__file__).parent.joinpath("mock_repos", "npm_repos", "no_package"), []), ], ) -def test_npm_build_tool(npm_tool: NPM, macaron_path: str, mock_repo: str, expected_value: bool) -> None: +def test_npm_build_tool( + npm_tool: NPM, + macaron_path: str, + mock_repo: str, + expected_value: list[tuple[str, float, str | None, str | None]], +) -> None: """Test the NPM build tool.""" base_dir = Path(__file__).parent ctx = prepare_repo_for_testing(mock_repo, macaron_path, base_dir) diff --git a/tests/slsa_analyzer/build_tool/test_poetry.py b/tests/slsa_analyzer/build_tool/test_poetry.py index ae42669af..92d4de7b1 100644 --- a/tests/slsa_analyzer/build_tool/test_poetry.py +++ b/tests/slsa_analyzer/build_tool/test_poetry.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the Poetry build functions.""" @@ -29,12 +29,23 @@ def test_get_build_dirs(snapshot: list, poetry_tool: Poetry, mock_repo: Path) -> @pytest.mark.parametrize( ("mock_repo", "expected_value"), [ - (Path(__file__).parent.joinpath("mock_repos", "poetry_repos", "has_poetry_lock"), True), - (Path(__file__).parent.joinpath("mock_repos", "poetry_repos", "no_poetry"), False), - (Path(__file__).parent.joinpath("mock_repos", "poetry_repos", "no_poetry_lock"), True), + ( + Path(__file__).parent.joinpath("mock_repos", "poetry_repos", "has_poetry_lock"), + [("pyproject.toml", 1.0, None, None)], + ), + (Path(__file__).parent.joinpath("mock_repos", "poetry_repos", "no_poetry"), []), + ( + Path(__file__).parent.joinpath("mock_repos", "poetry_repos", "no_poetry_lock"), + [("pyproject.toml", 1.0, None, None)], + ), ], ) -def test_poetry_build_tool(poetry_tool: Poetry, macaron_path: str, mock_repo: str, expected_value: bool) -> None: +def test_poetry_build_tool( + poetry_tool: Poetry, + macaron_path: str, + mock_repo: str, + expected_value: list[tuple[str, float, str | None, str | None]], +) -> None: """Test the Poetry build tool.""" base_dir = Path(__file__).parent ctx = prepare_repo_for_testing(mock_repo, macaron_path, base_dir) diff --git a/tests/slsa_analyzer/build_tool/test_yarn.py b/tests/slsa_analyzer/build_tool/test_yarn.py index 48f49977c..56841bf63 100644 --- a/tests/slsa_analyzer/build_tool/test_yarn.py +++ b/tests/slsa_analyzer/build_tool/test_yarn.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the Yarn build functions.""" @@ -30,13 +30,27 @@ def test_get_build_dirs(snapshot: list, yarn_tool: Yarn, mock_repo: Path) -> Non @pytest.mark.parametrize( ("mock_repo", "expected_value"), [ - (Path(__file__).parent.joinpath("mock_repos", "yarn_repos", "root_package"), True), - (Path(__file__).parent.joinpath("mock_repos", "yarn_repos", "root_package_packagelock"), True), - (Path(__file__).parent.joinpath("mock_repos", "yarn_repos", "nested_package"), True), - (Path(__file__).parent.joinpath("mock_repos", "yarn_repos", "no_package"), False), + ( + Path(__file__).parent.joinpath("mock_repos", "yarn_repos", "root_package"), + [("package.json", 1.0, None, None)], + ), + ( + Path(__file__).parent.joinpath("mock_repos", "yarn_repos", "root_package_packagelock"), + [("package.json", 1.0, None, None), ("package-lock.json", 0.5, None, None)], + ), + ( + Path(__file__).parent.joinpath("mock_repos", "yarn_repos", "nested_package"), + [("project/package.json", 1.0, None, None)], + ), + (Path(__file__).parent.joinpath("mock_repos", "yarn_repos", "no_package"), []), ], ) -def test_yarn_build_tool(yarn_tool: Yarn, macaron_path: str, mock_repo: str, expected_value: bool) -> None: +def test_yarn_build_tool( + yarn_tool: Yarn, + macaron_path: str, + mock_repo: str, + expected_value: list[tuple[str, float, str | None, str | None]], +) -> None: """Test the yarn build tool.""" base_dir = Path(__file__).parent ctx = prepare_repo_for_testing(mock_repo, macaron_path, base_dir) From ae35915f50527e5f0bda271250de071a3145f8d5 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Tue, 24 Mar 2026 17:51:02 +1000 Subject: [PATCH 6/7] test: fix unit tests Signed-off-by: behnazh-w --- src/macaron/config/defaults.ini | 7 +--- .../build_tool/base_build_tool.py | 6 ++- .../common_spec/test_core.py | 5 +++ .../policy.dl | 7 +++- .../facebook_yoga_yarn_classic/policy.dl | 4 +- .../cases/fnproject_fdk-java/policy.dl | 7 +++- .../integration/cases/google_guava/policy.dl | 4 +- .../jackson-databind.dl | 4 +- .../micronaut-test.dl | 2 +- .../slsa-framework_slsa-verifier/policy.dl | 4 +- .../cases/timyarkov_docker_test/policy.dl | 4 +- .../timyarkov_multibuild_test_maven/policy.dl | 6 +-- tests/integration/cases/uiv-lib_uiv/policy.dl | 4 +- .../cases/urllib3_expectation_dir/policy.dl | 4 +- tests/slsa_analyzer/build_tool/test_gradle.py | 42 ++++--------------- tests/slsa_analyzer/build_tool/test_maven.py | 6 +-- tests/slsa_analyzer/build_tool/test_pip.py | 15 +------ 17 files changed, 54 insertions(+), 77 deletions(-) diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini index d03d6b43e..5e9ea705b 100644 --- a/src/macaron/config/defaults.ini +++ b/src/macaron/config/defaults.ini @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. [requests] @@ -288,7 +288,7 @@ build_configs = setup.py setup.cfg pyproject.toml -packager = +builder = build publisher = twine @@ -304,9 +304,6 @@ interpreter = python3 interpreter_flag = -m -build_arg = - build - setup.py deploy_arg = publish upload diff --git a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py index 9bb572527..3d9ef58b2 100644 --- a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py +++ b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py @@ -133,7 +133,11 @@ def file_exists( root_dir = Path(path) def _accepted(p: Path) -> bool: - return True if predicate is None else bool(predicate(p, **predicate_kwargs)) + return ( + True + if predicate is None or predicate_kwargs == {"group_id": None, "artifact_id": None} + else bool(predicate(p, **predicate_kwargs)) + ) # Check for file directly at root. if target_path := find_first_matching_file(root_dir, file_name): diff --git a/tests/build_spec_generator/common_spec/test_core.py b/tests/build_spec_generator/common_spec/test_core.py index c65539786..aae9da7dc 100644 --- a/tests/build_spec_generator/common_spec/test_core.py +++ b/tests/build_spec_generator/common_spec/test_core.py @@ -53,6 +53,7 @@ def test_compose_shell_commands( BuildToolFacts( language="python", build_tool_name="pip", + confidence=1.0, ) ], "python", @@ -64,6 +65,7 @@ def test_compose_shell_commands( BuildToolFacts( language="java", build_tool_name="gradle", + confidence=1.0, ) ], "java", @@ -75,6 +77,7 @@ def test_compose_shell_commands( BuildToolFacts( language="java", build_tool_name="maven", + confidence=1.0, ) ], "java", @@ -86,6 +89,7 @@ def test_compose_shell_commands( BuildToolFacts( language="not_java", build_tool_name="maven", + confidence=1.0, ) ], "java", @@ -97,6 +101,7 @@ def test_compose_shell_commands( BuildToolFacts( language="java", build_tool_name="some_java_build_tool", + confidence=1.0, ) ], "java", diff --git a/tests/integration/cases/IntellectualSites_Arkitektonika-Client/policy.dl b/tests/integration/cases/IntellectualSites_Arkitektonika-Client/policy.dl index 3faa06d37..a23f1d7a5 100644 --- a/tests/integration/cases/IntellectualSites_Arkitektonika-Client/policy.dl +++ b/tests/integration/cases/IntellectualSites_Arkitektonika-Client/policy.dl @@ -1,4 +1,4 @@ -/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ #include "prelude.dl" @@ -8,7 +8,12 @@ Policy("test_policy", component_id, "") :- is_repo_url(component_id, "https://github.com/IntellectualSites/Arkitektonika-Client"), build_tool_check( check_id, + _, "gradle", + _, + _, + _, + _, _ ), check_facts(check_id, _, component_id,_,_). diff --git a/tests/integration/cases/facebook_yoga_yarn_classic/policy.dl b/tests/integration/cases/facebook_yoga_yarn_classic/policy.dl index 1efa084e6..de91d82ae 100644 --- a/tests/integration/cases/facebook_yoga_yarn_classic/policy.dl +++ b/tests/integration/cases/facebook_yoga_yarn_classic/policy.dl @@ -1,4 +1,4 @@ -/* Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ #include "prelude.dl" @@ -9,7 +9,7 @@ Policy("test_policy", component_id, "") :- check_passed(component_id, "mcn_build_service_1"), check_passed(component_id, "mcn_version_control_system_1"), check_passed(component_id, "mcn_build_tool_1"), - build_tool_check(yarn_id, "yarn", "javascript"), + build_tool_check(yarn_id, "javascript", "yarn", _, "yarn.lock", _, _, _), check_facts(yarn_id, _, component_id,_,_), check_failed(component_id, "mcn_find_artifact_pipeline_1"), check_failed(component_id, "mcn_provenance_available_1"), diff --git a/tests/integration/cases/fnproject_fdk-java/policy.dl b/tests/integration/cases/fnproject_fdk-java/policy.dl index f4d71f66e..7d88eb05d 100644 --- a/tests/integration/cases/fnproject_fdk-java/policy.dl +++ b/tests/integration/cases/fnproject_fdk-java/policy.dl @@ -1,4 +1,4 @@ -/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ #include "prelude.dl" @@ -12,7 +12,12 @@ Policy("test_policy", component_id, "") :- build_tool_exists(component_id, name) :- build_tool_check( check_id, + _, name, + _, + _, + _, + _, _ ), check_facts(check_id, _, component_id,_,_). diff --git a/tests/integration/cases/google_guava/policy.dl b/tests/integration/cases/google_guava/policy.dl index 57c9abd30..9682020d7 100644 --- a/tests/integration/cases/google_guava/policy.dl +++ b/tests/integration/cases/google_guava/policy.dl @@ -1,4 +1,4 @@ -/* Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ #include "prelude.dl" @@ -9,7 +9,7 @@ Policy("test_policy", component_id, "") :- check_passed(component_id, "mcn_build_service_1"), check_passed(component_id, "mcn_version_control_system_1"), check_passed(component_id, "mcn_build_tool_1"), - build_tool_check(maven_id, "maven", "java"), + build_tool_check(maven_id, "java", "maven", _, _, _, _, _), check_facts(maven_id, _, component_id,_,_), check_failed(component_id, "mcn_provenance_available_1"), check_failed(component_id, "mcn_provenance_derived_commit_1"), diff --git a/tests/integration/cases/jackson_databind_with_purl_and_no_deps/jackson-databind.dl b/tests/integration/cases/jackson_databind_with_purl_and_no_deps/jackson-databind.dl index c2551db50..c034e9229 100644 --- a/tests/integration/cases/jackson_databind_with_purl_and_no_deps/jackson-databind.dl +++ b/tests/integration/cases/jackson_databind_with_purl_and_no_deps/jackson-databind.dl @@ -1,4 +1,4 @@ -/* Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ #include "prelude.dl" @@ -9,7 +9,7 @@ Policy("test_policy", component_id, "") :- check_passed(component_id, "mcn_build_service_1"), check_passed(component_id, "mcn_version_control_system_1"), check_passed(component_id, "mcn_build_tool_1"), - build_tool_check(maven_id, "maven", "java"), + build_tool_check(maven_id, "java", "maven", _, _, _, _, _), check_facts(maven_id, _, component_id,_,_), check_failed(component_id, "mcn_find_artifact_pipeline_1"), check_failed(component_id, "mcn_provenance_available_1"), diff --git a/tests/integration/cases/micronaut-projects_micronaut-test/micronaut-test.dl b/tests/integration/cases/micronaut-projects_micronaut-test/micronaut-test.dl index e307a28b8..75d4e6243 100644 --- a/tests/integration/cases/micronaut-projects_micronaut-test/micronaut-test.dl +++ b/tests/integration/cases/micronaut-projects_micronaut-test/micronaut-test.dl @@ -12,7 +12,7 @@ Policy("test_policy", component_id, "") :- check_passed(component_id, "mcn_provenance_verified_1"), check_passed(component_id, "mcn_provenance_derived_repo_1"), check_passed(component_id, "mcn_build_tool_1"), - build_tool_check(gradle_id, "gradle", "java"), + build_tool_check(gradle_id, "java", "gradle", _, _, _, _, _), check_facts(gradle_id, _, component_id,_,_), provenance_verified_check(_, build_level, _), build_level = 3, diff --git a/tests/integration/cases/slsa-framework_slsa-verifier/policy.dl b/tests/integration/cases/slsa-framework_slsa-verifier/policy.dl index 51a2ecb7a..71b46fc4a 100644 --- a/tests/integration/cases/slsa-framework_slsa-verifier/policy.dl +++ b/tests/integration/cases/slsa-framework_slsa-verifier/policy.dl @@ -1,4 +1,4 @@ -/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ #include "prelude.dl" @@ -10,7 +10,7 @@ Policy("test_policy", component_id, "") :- check_passed(component_id, "mcn_trusted_builder_level_three_1"), check_passed(component_id, "mcn_version_control_system_1"), check_passed(component_id, "mcn_build_tool_1"), - build_tool_check(go_id, "go", "go"), + build_tool_check(go_id, "go", "go", _, _, _, _, _), check_facts(go_id, _, component_id,_,_), check_passed(component_id, "mcn_provenance_available_1"), check_passed(component_id, "mcn_provenance_derived_commit_1"), diff --git a/tests/integration/cases/timyarkov_docker_test/policy.dl b/tests/integration/cases/timyarkov_docker_test/policy.dl index 0c5eceb2d..296875268 100644 --- a/tests/integration/cases/timyarkov_docker_test/policy.dl +++ b/tests/integration/cases/timyarkov_docker_test/policy.dl @@ -1,4 +1,4 @@ -/* Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ #include "prelude.dl" @@ -9,7 +9,7 @@ Policy("test_policy", component_id, "") :- check_passed(component_id, "mcn_build_service_1"), check_passed(component_id, "mcn_version_control_system_1"), check_passed(component_id, "mcn_build_tool_1"), - build_tool_check(docker_id, "docker", "docker"), + build_tool_check(docker_id, "docker", "docker", _, _, _, _, _), check_facts(docker_id, _, component_id,_,_), check_failed(component_id, "mcn_find_artifact_pipeline_1"), check_failed(component_id, "mcn_provenance_available_1"), diff --git a/tests/integration/cases/timyarkov_multibuild_test_maven/policy.dl b/tests/integration/cases/timyarkov_multibuild_test_maven/policy.dl index 3b93d8ab1..9edaf84f1 100644 --- a/tests/integration/cases/timyarkov_multibuild_test_maven/policy.dl +++ b/tests/integration/cases/timyarkov_multibuild_test_maven/policy.dl @@ -1,4 +1,4 @@ -/* Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ #include "prelude.dl" @@ -9,9 +9,9 @@ Policy("test_policy", component_id, "") :- check_passed(component_id, "mcn_build_service_1"), check_passed(component_id, "mcn_version_control_system_1"), check_passed(component_id, "mcn_build_tool_1"), - build_tool_check(gradle_id, "gradle", "java"), + build_tool_check(gradle_id, "java", "gradle", _, _, _, _, _), check_facts(gradle_id, _, component_id,_,_), - build_tool_check(maven_id, "maven", "java"), + build_tool_check(maven_id, "java", "maven", _, _, _, _, _), check_facts(maven_id, _, component_id,_,_), check_failed(component_id, "mcn_find_artifact_pipeline_1"), check_failed(component_id, "mcn_provenance_available_1"), diff --git a/tests/integration/cases/uiv-lib_uiv/policy.dl b/tests/integration/cases/uiv-lib_uiv/policy.dl index 35e17f423..fa93cf03f 100644 --- a/tests/integration/cases/uiv-lib_uiv/policy.dl +++ b/tests/integration/cases/uiv-lib_uiv/policy.dl @@ -1,4 +1,4 @@ -/* Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ #include "prelude.dl" @@ -9,7 +9,7 @@ Policy("test_policy", component_id, "") :- check_passed(component_id, "mcn_build_service_1"), check_passed(component_id, "mcn_version_control_system_1"), check_passed(component_id, "mcn_build_tool_1"), - build_tool_check(npm_id, "npm", "javascript"), + build_tool_check(npm_id, "javascript", "npm", _, _, _, _, _), check_facts(npm_id, _, component_id,_,_), check_failed(component_id, "mcn_find_artifact_pipeline_1"), check_failed(component_id, "mcn_provenance_available_1"), diff --git a/tests/integration/cases/urllib3_expectation_dir/policy.dl b/tests/integration/cases/urllib3_expectation_dir/policy.dl index 1ce99eca8..4b00d900c 100644 --- a/tests/integration/cases/urllib3_expectation_dir/policy.dl +++ b/tests/integration/cases/urllib3_expectation_dir/policy.dl @@ -1,4 +1,4 @@ -/* Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ #include "prelude.dl" @@ -13,7 +13,7 @@ Policy("test_policy", component_id, "") :- check_passed(component_id, "mcn_provenance_derived_repo_1"), check_passed(component_id, "mcn_provenance_expectation_1"), check_passed(component_id, "mcn_build_tool_1"), - build_tool_check(pip_id, "pip", "python"), + build_tool_check(pip_id, "python", "pip", _, _, _, _, _), check_facts(pip_id, _, component_id,_,_), check_failed(component_id, "mcn_find_artifact_pipeline_1"), check_passed(component_id, "mcn_provenance_verified_1"), diff --git a/tests/slsa_analyzer/build_tool/test_gradle.py b/tests/slsa_analyzer/build_tool/test_gradle.py index 70510cff6..cb6db64ab 100644 --- a/tests/slsa_analyzer/build_tool/test_gradle.py +++ b/tests/slsa_analyzer/build_tool/test_gradle.py @@ -21,40 +21,30 @@ Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "no_gradle"), ], ) -def test_get_build_dirs(gradle_tool: Gradle, mock_repo: Path) -> None: +def test_get_build_dirs(snapshot: list, gradle_tool: Gradle, mock_repo: Path) -> None: """Test discovering build directories.""" - # Gradle detection now relies on group/artifact validation, which is not - # provided by get_build_dirs(). - assert not list(gradle_tool.get_build_dirs(str(mock_repo))) + assert list(gradle_tool.get_build_dirs(str(mock_repo))) == snapshot @pytest.mark.parametrize( - ("mock_repo", "group_id", "artifact_id", "expected_value"), + ("mock_repo", "expected_value"), [ ( Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "groovy_gradle"), - "mock_repos", - "project1", [ - ("project1/build.gradle", 1.0, None, "settings.gradle"), - ("build.gradle", 50.0, None, "settings.gradle"), - ("settings.gradle", 2500.0, None, "settings.gradle"), + ("build.gradle", 1.0, None, "settings.gradle"), + ("settings.gradle", 0.5, None, "settings.gradle"), ], ), ( Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "kotlin_gradle"), - "mock_repos", - "project1", [ - ("project1/build.gradle.kts", 1.0, None, "settings.gradle.kts"), - ("build.gradle.kts", 50.0, None, "settings.gradle.kts"), - ("settings.gradle.kts", 2500.0, None, "settings.gradle.kts"), + ("build.gradle.kts", 1.0, None, "settings.gradle.kts"), + ("settings.gradle.kts", 0.5, None, "settings.gradle.kts"), ], ), ( Path(__file__).parent.joinpath("mock_repos", "gradle_repos", "no_gradle"), - "mock_repos", - "project1", [], ), ], @@ -63,16 +53,12 @@ def test_gradle_build_tool( gradle_tool: Gradle, macaron_path: str, mock_repo: str, - group_id: str, - artifact_id: str, expected_value: list[tuple[str, float, str | None, str | None]], ) -> None: """Test the Gradle build tool.""" base_dir = Path(__file__).parent ctx = prepare_repo_for_testing(mock_repo, macaron_path, base_dir) - assert gradle_tool.is_detected(ctx.component.repository.fs_path, group_id=group_id, artifact_id=artifact_id) == ( - expected_value - ) + assert gradle_tool.is_detected(ctx.component.repository.fs_path) == (expected_value) def test_gradle_build_tool_with_group_artifact_validation(gradle_tool: Gradle, tmp_path: Path) -> None: @@ -108,18 +94,6 @@ def test_gradle_build_tool_with_project_group_and_multimodule_name(gradle_tool: assert detected[0][3] == "settings.gradle" -def test_gradle_build_tool_with_repo_namespace_group(gradle_tool: Gradle, tmp_path: Path) -> None: - """Test Gradle detection when group input is a repository namespace.""" - gradle_repo = tmp_path.joinpath("micronaut-test") - gradle_repo.mkdir(parents=True) - gradle_repo.joinpath("build.gradle").write_text("plugins { id 'java' }\n") - gradle_repo.joinpath("settings.gradle").write_text("rootProject.name = 'test-parent'\ninclude 'test-junit5'\n") - gradle_repo.joinpath("gradle.properties").write_text("projectGroup=io.micronaut.test\n") - - detected = gradle_tool.is_detected(str(gradle_repo), group_id="micronaut-projects", artifact_id="micronaut-test") - assert detected - - @pytest.mark.parametrize( ( "command", diff --git a/tests/slsa_analyzer/build_tool/test_maven.py b/tests/slsa_analyzer/build_tool/test_maven.py index 48cb04c08..b34dfd08d 100644 --- a/tests/slsa_analyzer/build_tool/test_maven.py +++ b/tests/slsa_analyzer/build_tool/test_maven.py @@ -21,11 +21,9 @@ Path(__file__).parent.joinpath("mock_repos", "maven_repos", "no_pom"), ], ) -def test_get_build_dirs(maven_tool: Maven, mock_repo: Path) -> None: +def test_get_build_dirs(snapshot: list, maven_tool: Maven, mock_repo: Path) -> None: """Test discovering build directories.""" - # Maven detection now relies on group/artifact validation, which is not - # provided by get_build_dirs(). - assert not list(maven_tool.get_build_dirs(str(mock_repo))) + assert list(maven_tool.get_build_dirs(str(mock_repo))) == snapshot @pytest.mark.parametrize( diff --git a/tests/slsa_analyzer/build_tool/test_pip.py b/tests/slsa_analyzer/build_tool/test_pip.py index fa767bcce..e0d153e87 100644 --- a/tests/slsa_analyzer/build_tool/test_pip.py +++ b/tests/slsa_analyzer/build_tool/test_pip.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the Pip build functions.""" @@ -113,7 +113,7 @@ def test_is_pip_deploy_command( ), [ ( - ["pip", "build"], + ["python", "-m", "build"], BuildLanguage.PYTHON, None, None, @@ -123,17 +123,6 @@ def test_is_pip_deploy_command( ["codeql-analysis.yaml"], True, ), - ( - ["python", "-m", "pip", "build"], - BuildLanguage.PYTHON, - None, - None, - ".github/workflows/release.yaml", - [{"key", "pass"}], - ["push"], - ["codeql-analysis.yaml"], - True, - ), ( ["python", "-m", "flit", "build"], BuildLanguage.PYTHON, From 361e6fca26dfc1a983ea75e5232242335574c4d5 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Tue, 24 Mar 2026 20:31:28 +1000 Subject: [PATCH 7/7] chore: update buildspec schema Signed-off-by: behnazh-w --- .../schemas/macaron_buildspec_schema.json | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/macaron/resources/schemas/macaron_buildspec_schema.json b/src/macaron/resources/schemas/macaron_buildspec_schema.json index f07921b34..d2e7f1255 100644 --- a/src/macaron/resources/schemas/macaron_buildspec_schema.json +++ b/src/macaron/resources/schemas/macaron_buildspec_schema.json @@ -67,10 +67,27 @@ "build_commands": { "type": "array", "items": { - "type": "array", - "items": { "type": "string" } + "type": "object", + "properties": { + "build_tool": { "type": "string" }, + "build_tool_version": { "type": ["string", "null"] }, + "build_config_path": { "type": "string" }, + "root_build_config_path": { "type": "string" }, + "command": { + "type": "array", + "items": { "type": "string" } + }, + "confidence_score": { "type": "number" } + }, + "required": [ + "build_tool", + "build_config_path", + "command", + "confidence_score" + ], + "additionalProperties": false }, - "description": "List of shell commands to build the project." + "description": "List of build command objects and related metadata used to build the project." }, "test_commands": { "type": "array",