diff --git a/.gitignore b/.gitignore index 3a7241c941f5e6..f3372f15eb1bf4 100644 --- a/.gitignore +++ b/.gitignore @@ -48,6 +48,7 @@ *.s *.so *.so.dbg +*.spdx.json *.su *.symtypes *.tab.[ch] diff --git a/Documentation/tools/index.rst b/Documentation/tools/index.rst index 80488e290e1062..6b0319b2d33456 100644 --- a/Documentation/tools/index.rst +++ b/Documentation/tools/index.rst @@ -12,6 +12,7 @@ more additions are needed here: rtla/index rv/index + sbom/sbom .. only:: subproject and html diff --git a/Documentation/tools/sbom/sbom.rst b/Documentation/tools/sbom/sbom.rst new file mode 100644 index 00000000000000..029b08b6ad8741 --- /dev/null +++ b/Documentation/tools/sbom/sbom.rst @@ -0,0 +1,206 @@ +.. SPDX-License-Identifier: GPL-2.0-only OR MIT +.. Copyright (C) 2025 TNG Technology Consulting GmbH + +KernelSbom +========== + +Introduction +------------ + +KernelSbom is a Python script ``scripts/sbom/sbom.py`` that can be +executed after a successful kernel build. When invoked, KernelSbom +analyzes all files involved in the build and generates Software Bill of +Materials (SBOM) documents in SPDX 3.0.1 format. +The generated SBOM documents capture: + +* **Final output artifacts**, typically the kernel image and modules +* **All source files** that contributed to the build with metadata + and licensing information +* **Details of the build process**, including intermediate artifacts + and the build commands linking source files to the final output + artifacts + +KernelSbom is originally developed in the +`KernelSbom repository `_. + +Requirements +------------ + +Python 3.10 or later. No libraries or other dependencies are required. + +Basic Usage +----------- + +Run the ``make sbom`` target. +For example:: + + $ make defconfig O=kernel_build + $ make sbom O=kernel_build -j$(nproc) + +This will trigger a kernel build. After all build outputs have been +generated, KernelSbom produces three SPDX documents in the root +directory of the object tree: + +* ``sbom-source.spdx.json`` + Describes all source files involved in the build and + associates each file with its corresponding license expression. + +* ``sbom-output.spdx.json`` + Captures all final build outputs (kernel image and ``.ko`` module files) + and includes build metadata such as environment variables and + a hash of the ``.config`` file used for the build. + +* ``sbom-build.spdx.json`` + Imports files from the source and output documents and describes every + intermediate build artifact. For each artifact, it records the exact + build command used and establishes the relationship between + input files and generated outputs. + +When invoking the sbom target, it is recommended to perform +out-of-tree builds using ``O=``. KernelSbom classifies files as +source files when they are located in the source tree and not in the +object tree. For in-tree builds, where the source and object trees are +the same directory, this distinction can no longer be made reliably. +In that case, KernelSbom does not generate a dedicated source SBOM. +Instead, source files are included in the build SBOM. + +Standalone Usage +---------------- + +KernelSbom can also be used as a standalone script to generate +SPDX documents for specific build outputs. For example, after a +successful x86 kernel build, KernelSbom can generate SPDX documents +for the ``bzImage`` kernel image:: + + $ SRCARCH=x86 python3 scripts/sbom/sbom.py \ + --src-tree . \ + --obj-tree ./kernel_build \ + --roots arch/x86/boot/bzImage \ + --generate-spdx \ + --generate-used-files \ + --prettify-json \ + --debug + +Note that when KernelSbom is invoked outside of the ``make`` process, +the environment variables used during compilation are not available and +therefore cannot be included in the generated SPDX documents. It is +recommended to set at least the ``SRCARCH`` environment variable to the +architecture for which the build was performed. + +For a full list of command-line options, run:: + + $ python3 scripts/sbom/sbom.py --help + +Output Format +------------- + +KernelSbom generates documents conforming to the +`SPDX 3.0.1 specification `_ +serialized as JSON-LD. + +To reduce file size, the output documents use the JSON-LD ``@context`` +to define custom prefixes for ``spdxId`` values. While this is compliant +with the SPDX specification, only a limited number of tools in the +current SPDX ecosystem support custom JSON-LD contexts. To use such +tools with the generated documents, the custom JSON-LD context must +be expanded before providing the documents. +See https://lists.spdx.org/g/Spdx-tech/message/6064 for more information. + +How it Works +------------ + +KernelSbom operates in two major phases: + +1. **Generate the cmd graph**, an acyclic directed dependency graph. +2. **Generate SPDX documents** based on the cmd graph. + +KernelSbom begins from the root artifacts specified by the user, e.g., +``arch/x86/boot/bzImage``. For each root artifact, it collects all +dependencies required to build that artifact. The dependencies come +from multiple sources: + +* **.cmd files**: The primary source is the ``.cmd`` file of the + generated artifact, e.g., ``arch/x86/boot/.bzImage.cmd``. These files + contain the exact command used to build the artifact and often include + an explicit list of input dependencies. By parsing the ``.cmd`` + file, the full list of dependencies can be obtained. + +* **.incbin statements**: The second source are include binary + ``.incbin`` statements in ``.S`` assembly files. + +* **Hardcoded dependencies**: Unfortunately, not all build dependencies + can be found via ``.cmd`` files and ``.incbin`` statements. Some build + dependencies are directly defined in Makefiles or Kbuild files. + Parsing these files is considered too complex for the scope of this + project. Instead, the remaining gaps of the graph are filled using a + list of manually defined dependencies, see + ``scripts/sbom/sbom/cmd_graph/hardcoded_dependencies.py``. This list is + known to be incomplete. However, analysis of the cmd graph indicates a + ~99% completeness. For more information about the completeness analysis, + see `KernelSbom #95 `_. + +Given the list of dependency files, KernelSbom recursively processes +each file, expanding the dependency chain all the way to the version +controlled source files. The result is a complete dependency graph +where nodes represent files, and edges represent "file A was used to +build file B" relationships. + +Using the cmd graph, KernelSbom produces three SPDX documents. +For every file in the graph, KernelSbom: + +* Parses ``SPDX-License-Identifier`` headers, +* Computes file hashes, +* Estimates the file type based on extension and path, +* Records build relationships between files. + +Each root output file is additionally associated with an SPDX Package +element that captures version information, license data, and copyright. + +Advanced Usage +-------------- + +Including Kernel Modules +~~~~~~~~~~~~~~~~~~~~~~~~ + +The list of all ``.ko`` kernel modules produced during a build can be +extracted from the ``modules.order`` file within the object tree. +For example:: + + $ echo "arch/x86/boot/bzImage" > sbom-roots.txt + $ sed 's/\.o$/.ko/' ./kernel_build/modules.order >> sbom-roots.txt + +Then use the generated roots file:: + + $ SRCARCH=x86 python3 scripts/sbom/sbom.py \ + --src-tree . \ + --obj-tree ./kernel_build \ + --roots-file sbom-roots.txt \ + --generate-spdx + +Equal Source and Object Trees +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When the source tree and object tree are identical (for example, when +building in-tree), source files can no longer be reliably distinguished +from generated files. +In this scenario, KernelSbom does not produce a dedicated +``sbom-source.spdx.json`` document. Instead, both source files and build +artifacts are included together in ``sbom-build.spdx.json``, and +``sbom.used-files.txt`` lists all files referenced in the build document. + +Unknown Build Commands +~~~~~~~~~~~~~~~~~~~~~~ + +Because the kernel supports a wide range of configurations and versions, +KernelSbom may encounter build commands in ``.cmd`` files that it does +not yet support. By default, KernelSbom will fail if an unknown build +command is encountered. + +If you still wish to generate SPDX documents despite unsupported +commands, you can use the ``--do-not-fail-on-unknown-build-command`` +option. KernelSbom will continue and produce the documents, although +the resulting SBOM will be incomplete. + +This option should only be used when the missing portion of the +dependency graph is small and an incomplete SBOM is acceptable for +your use case. diff --git a/MAINTAINERS b/MAINTAINERS index f1b02058859739..decbab52cef135 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23365,6 +23365,12 @@ R: Marc Murphy S: Supported F: arch/arm/boot/dts/ti/omap/am335x-sancloud* +SBOM +M: Luis Augenstein +M: Maximilian Huber +S: Maintained +F: scripts/sbom/ + SC1200 WDT DRIVER M: Zwane Mwaikambo S: Maintained diff --git a/Makefile b/Makefile index 9d38125263fb09..46d4be490d7f7f 100644 --- a/Makefile +++ b/Makefile @@ -772,7 +772,7 @@ endif # in addition to whatever we do anyway. # Just "make" or "make all" shall build modules as well -ifneq ($(filter all modules nsdeps compile_commands.json clang-%,$(MAKECMDGOALS)),) +ifneq ($(filter all modules nsdeps compile_commands.json clang-% sbom,$(MAKECMDGOALS)),) KBUILD_MODULES := y endif @@ -1612,7 +1612,7 @@ CLEAN_FILES += vmlinux.symvers modules-only.symvers \ modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \ compile_commands.json rust/test \ rust-project.json .vmlinux.objs .vmlinux.export.c \ - .builtin-dtbs-list .builtin-dtb.S + .builtin-dtbs-list .builtin-dtb.S sbom-*.spdx.json # Directories & files removed with 'make mrproper' MRPROPER_FILES += include/config include/generated \ @@ -1728,6 +1728,7 @@ help: @echo '' @echo 'Tools:' @echo ' nsdeps - Generate missing symbol namespace dependencies' + @echo ' sbom - Generate Software Bill of Materials' @echo '' @echo 'Kernel selftest:' @echo ' kselftest - Build and run kernel selftest' @@ -2108,6 +2109,12 @@ nsdeps: export KBUILD_NSDEPS=1 nsdeps: modules $(Q)$(CONFIG_SHELL) $(srctree)/scripts/nsdeps +# Script to generate .spdx.json SBOM documents describing the build +# --------------------------------------------------------------------------- +PHONY += sbom +sbom: all + $(Q)$(MAKE) $(build)=scripts/sbom + # Clang Tooling # --------------------------------------------------------------------------- diff --git a/scripts/sbom/Makefile b/scripts/sbom/Makefile new file mode 100644 index 00000000000000..f14176ea6b55c4 --- /dev/null +++ b/scripts/sbom/Makefile @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +SBOM_SOURCE_FILE := $(objtree)/sbom-source.spdx.json +SBOM_BUILD_FILE := $(objtree)/sbom-build.spdx.json +SBOM_OUTPUT_FILE := $(objtree)/sbom-output.spdx.json +SBOM_ROOTS_FILE := $(objtree)/sbom-roots.txt + + +ifeq ($(srctree),$(objtree)) + SBOM_TARGETS := $(SBOM_BUILD_FILE) $(SBOM_OUTPUT_FILE) +else + SBOM_TARGETS := $(SBOM_SOURCE_FILE) $(SBOM_BUILD_FILE) $(SBOM_OUTPUT_FILE) +endif + +SBOM_DEPS := $(objtree)/$(KBUILD_IMAGE) $(objtree)/include/generated/autoconf.h +ifdef CONFIG_MODULES + SBOM_DEPS += $(objtree)/modules.order +endif + +$(SBOM_TARGETS) &: $(SBOM_DEPS) + $(Q)echo " GEN $(notdir $(SBOM_TARGETS))" + + $(Q)printf "%s\n" "$(KBUILD_IMAGE)" > $(SBOM_ROOTS_FILE) + $(Q)if [ "$(CONFIG_MODULES)" = "y" ]; then \ + sed 's/\.o$$/.ko/' $(objtree)/modules.order >> $(SBOM_ROOTS_FILE); \ + fi + + $(Q)$(PYTHON3) $(srctree)/scripts/sbom/sbom.py \ + --src-tree $(abspath $(srctree)) \ + --obj-tree $(abspath $(objtree)) \ + --roots-file $(SBOM_ROOTS_FILE) \ + --output-directory $(abspath $(objtree)) \ + --generate-spdx \ + --package-license "GPL-2.0 WITH Linux-syscall-note" \ + --package-version "$(KERNELVERSION)" + + $(Q)rm $(SBOM_ROOTS_FILE) + +$(obj)/: $(SBOM_TARGETS) diff --git a/scripts/sbom/sbom.py b/scripts/sbom/sbom.py new file mode 100644 index 00000000000000..426521ade460d8 --- /dev/null +++ b/scripts/sbom/sbom.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +""" +Compute software bill of materials in SPDX format describing a kernel build. +""" + +import json +import logging +import os +import sys +import time +import uuid +import sbom.sbom_logging as sbom_logging +from sbom.config import get_config +from sbom.path_utils import is_relative_to +from sbom.spdx import JsonLdSpdxDocument, SpdxIdGenerator +from sbom.spdx.core import CreationInfo, SpdxDocument +from sbom.spdx_graph import SpdxIdGeneratorCollection, build_spdx_graphs +from sbom.cmd_graph import CmdGraph + + +def main(): + # Read config + config = get_config() + + # Configure logging + logging.basicConfig( + level=logging.DEBUG if config.debug else logging.INFO, + format="[%(levelname)s] %(message)s", + ) + + # Build cmd graph + logging.debug("Start building cmd graph") + start_time = time.time() + cmd_graph = CmdGraph.create(config.root_paths, config) + logging.debug(f"Built cmd graph in {time.time() - start_time} seconds") + + # Save used files document + if config.generate_used_files: + if config.src_tree == config.obj_tree: + logging.info( + f"Extracting all files from the cmd graph to {(config.used_files_file_name,)} " + "instead of only source files because source files cannot be " + "reliably classified when the source and object trees are identical.", + ) + used_files = [os.path.relpath(node.absolute_path, config.src_tree) for node in cmd_graph] + logging.debug(f"Found {len(used_files)} files in cmd graph.") + else: + used_files = [ + os.path.relpath(node.absolute_path, config.src_tree) + for node in cmd_graph + if is_relative_to(node.absolute_path, config.src_tree) + and not is_relative_to(node.absolute_path, config.obj_tree) + ] + logging.debug(f"Found {len(used_files)} source files in cmd graph") + if not sbom_logging.has_errors() or config.write_output_on_error: + used_files_path = os.path.join(config.output_directory, config.used_files_file_name) + with open(used_files_path, "w", encoding="utf-8") as f: + f.write("\n".join(str(file_path) for file_path in used_files)) + logging.debug(f"Successfully saved {used_files_path}") + + if config.generate_spdx is False: + return + + # Build SPDX Documents + logging.debug("Start generating SPDX graph based on cmd graph") + start_time = time.time() + + # The real uuid will be generated based on the content of the SPDX graphs + # to ensure that the same SPDX document is always assigned the same uuid. + PLACEHOLDER_UUID = "00000000-0000-0000-0000-000000000000" + spdx_id_base_namespace = f"{config.spdxId_prefix}{PLACEHOLDER_UUID}/" + spdx_id_generators = SpdxIdGeneratorCollection( + base=SpdxIdGenerator(prefix="p", namespace=spdx_id_base_namespace), + source=SpdxIdGenerator(prefix="s", namespace=f"{spdx_id_base_namespace}source/"), + build=SpdxIdGenerator(prefix="b", namespace=f"{spdx_id_base_namespace}build/"), + output=SpdxIdGenerator(prefix="o", namespace=f"{spdx_id_base_namespace}output/"), + ) + + spdx_graphs = build_spdx_graphs( + cmd_graph, + spdx_id_generators, + config, + ) + spdx_id_uuid = uuid.uuid5( + uuid.NAMESPACE_URL, + "".join( + json.dumps(element.to_dict()) for spdx_graph in spdx_graphs.values() for element in spdx_graph.to_list() + ), + ) + logging.debug(f"Generated SPDX graph in {time.time() - start_time} seconds") + + # Report collected warnings and errors in case of failure + warning_summary = sbom_logging.summarize_warnings() + error_summary = sbom_logging.summarize_errors() + + if not sbom_logging.has_errors() or config.write_output_on_error: + for kernel_sbom_kind, spdx_graph in spdx_graphs.items(): + spdx_graph_objects = spdx_graph.to_list() + # Add warning and error summary to creation info comment + creation_info = next(element for element in spdx_graph_objects if isinstance(element, CreationInfo)) + creation_info.comment = "\n".join([warning_summary, error_summary]).strip() + # Replace Placeholder uuid with real uuid for spdxIds + spdx_document = next(element for element in spdx_graph_objects if isinstance(element, SpdxDocument)) + for namespaceMap in spdx_document.namespaceMap: + namespaceMap.namespace = namespaceMap.namespace.replace(PLACEHOLDER_UUID, str(spdx_id_uuid)) + # Serialize SPDX graph to JSON-LD + spdx_doc = JsonLdSpdxDocument(graph=spdx_graph_objects) + save_path = os.path.join(config.output_directory, config.spdx_file_names[kernel_sbom_kind]) + spdx_doc.save(save_path, config.prettify_json) + logging.debug(f"Successfully saved {save_path}") + + if warning_summary: + logging.warning(warning_summary) + if error_summary: + logging.error(error_summary) + if not config.write_output_on_error: + logging.info( + "Use --write-output-on-error to generate output documents even when errors occur. " + "Note that in this case the generated SPDX documents may be incomplete." + ) + sys.exit(1) + + +# Call main method +if __name__ == "__main__": + main() diff --git a/scripts/sbom/sbom/__init__.py b/scripts/sbom/sbom/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/scripts/sbom/sbom/cmd_graph/__init__.py b/scripts/sbom/sbom/cmd_graph/__init__.py new file mode 100644 index 00000000000000..9d661a5c3d93f1 --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/__init__.py @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from .cmd_graph import CmdGraph +from .cmd_graph_node import CmdGraphNode, CmdGraphNodeConfig + +__all__ = ["CmdGraph", "CmdGraphNode", "CmdGraphNodeConfig"] diff --git a/scripts/sbom/sbom/cmd_graph/cmd_file.py b/scripts/sbom/sbom/cmd_graph/cmd_file.py new file mode 100644 index 00000000000000..d85ef5de0c2675 --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/cmd_file.py @@ -0,0 +1,149 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import os +import re +from dataclasses import dataclass, field +from sbom.cmd_graph.deps_parser import parse_cmd_file_deps +from sbom.cmd_graph.savedcmd_parser import parse_inputs_from_commands +import sbom.sbom_logging as sbom_logging +from sbom.path_utils import PathStr + +SAVEDCMD_PATTERN = re.compile(r"^(saved)?cmd_.*?:=\s*(?P.+)$") +SOURCE_PATTERN = re.compile(r"^source.*?:=\s*(?P.+)$") + + +@dataclass +class CmdFile: + cmd_file_path: PathStr + savedcmd: str + source: PathStr | None = None + deps: list[str] = field(default_factory=list[str]) + make_rules: list[str] = field(default_factory=list[str]) + + @classmethod + def create(cls, cmd_file_path: PathStr) -> "CmdFile | None": + """ + Parses a .cmd file. + .cmd files are assumed to have one of the following structures: + 1. Full Cmd File + (saved)?cmd_ := + source_ := + deps_ := \ + + := $(deps_) + $(deps_): + + 2. Command Only Cmd File + (saved)?cmd_ := + + 3. Single Dependency Cmd File + (saved)?cmd_ := + := + + Args: + cmd_file_path (Path): absolute Path to a .cmd file + + Returns: + cmd_file (CmdFile): Parsed cmd file. + """ + with open(cmd_file_path, "rt") as f: + lines = [line.strip() for line in f.readlines() if line.strip() != "" and not line.startswith("#")] + + # savedcmd + match = SAVEDCMD_PATTERN.match(lines[0]) + if match is None: + sbom_logging.error( + "Skip parsing '{cmd_file_path}' because no 'savedcmd_' command was found.", cmd_file_path=cmd_file_path + ) + return None + savedcmd = match.group("full_command") + + # Command Only Cmd File + if len(lines) == 1: + return CmdFile(cmd_file_path, savedcmd) + + # Single Dependency Cmd File + if len(lines) == 2: + dep = lines[1].split(":")[1].strip() + return CmdFile(cmd_file_path, savedcmd, deps=[dep]) + + # Full Cmd File + # source + line1 = SOURCE_PATTERN.match(lines[1]) + if line1 is None: + sbom_logging.error( + "Skip parsing '{cmd_file_path}' because no 'source_' entry was found.", cmd_file_path=cmd_file_path + ) + return CmdFile(cmd_file_path, savedcmd) + source = line1.group("source_file") + + # deps + deps: list[str] = [] + i = 3 # lines[2] includes the variable assignment but no actual dependency, so we need to start at lines[3]. + while i < len(lines): + if not lines[i].endswith("\\"): + break + deps.append(lines[i][:-1].strip()) + i += 1 + + # make_rules + make_rules = lines[i:] + + return CmdFile(cmd_file_path, savedcmd, source, deps, make_rules) + + def get_dependencies( + self: "CmdFile", target_path: PathStr, obj_tree: PathStr, fail_on_unknown_build_command: bool + ) -> list[PathStr]: + """ + Parses all dependencies required to build a target file from its cmd file. + + Args: + target_path: path to the target file relative to `obj_tree`. + obj_tree: absolute path to the object tree. + fail_on_unknown_build_command: Whether to fail if an unknown build command is encountered. + + Returns: + list[PathStr]: dependency file paths relative to `obj_tree`. + """ + input_files: list[PathStr] = [ + str(p) for p in parse_inputs_from_commands(self.savedcmd, fail_on_unknown_build_command) + ] + if self.deps: + input_files += [str(p) for p in parse_cmd_file_deps(self.deps)] + input_files = _expand_resolve_files(input_files, obj_tree) + + cmd_file_dependencies: list[PathStr] = [] + for input_file in input_files: + # input files are either absolute or relative to the object tree + if os.path.isabs(input_file): + input_file = os.path.relpath(input_file, obj_tree) + if input_file == target_path: + # Skip target file to prevent cycles. This is necessary because some multi stage commands first create an output and then pass it as input to the next command, e.g., objcopy. + continue + cmd_file_dependencies.append(input_file) + + return cmd_file_dependencies + + +def _expand_resolve_files(input_files: list[PathStr], obj_tree: PathStr) -> list[PathStr]: + """ + Expands resolve files which may reference additional files via '@' notation. + + Args: + input_files (list[PathStr]): List of file paths relative to the object tree, where paths starting with '@' refer to files + containing further file paths, each on a separate line. + obj_tree: Absolute path to the root of the object tree. + + Returns: + list[PathStr]: Flattened list of all input file paths, with any nested '@' file references resolved recursively. + """ + expanded_input_files: list[PathStr] = [] + for input_file in input_files: + if not input_file.startswith("@"): + expanded_input_files.append(input_file) + continue + with open(os.path.join(obj_tree, input_file.lstrip("@")), "rt") as f: + resolve_file_content = [line_stripped for line in f.readlines() if (line_stripped := line.strip())] + expanded_input_files += _expand_resolve_files(resolve_file_content, obj_tree) + return expanded_input_files diff --git a/scripts/sbom/sbom/cmd_graph/cmd_graph.py b/scripts/sbom/sbom/cmd_graph/cmd_graph.py new file mode 100644 index 00000000000000..cad54243ff3fd7 --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/cmd_graph.py @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from collections import deque +from dataclasses import dataclass, field +from typing import Iterator + +from sbom.cmd_graph.cmd_graph_node import CmdGraphNode, CmdGraphNodeConfig +from sbom.path_utils import PathStr + + +@dataclass +class CmdGraph: + """Directed acyclic graph of build dependencies primarily inferred from .cmd files produced during kernel builds""" + + roots: list[CmdGraphNode] = field(default_factory=list[CmdGraphNode]) + + @classmethod + def create(cls, root_paths: list[PathStr], config: CmdGraphNodeConfig) -> "CmdGraph": + """ + Recursively builds a dependency graph starting from `root_paths`. + Dependencies are mainly discovered by parsing the `.cmd` files. + + Args: + root_paths (list[PathStr]): List of paths to root outputs relative to obj_tree + config (CmdGraphNodeConfig): Configuration options + + Returns: + CmdGraph: A graph of all build dependencies for the given root files. + """ + node_cache: dict[PathStr, CmdGraphNode] = {} + root_nodes = [CmdGraphNode.create(root_path, config, node_cache) for root_path in root_paths] + return CmdGraph(root_nodes) + + def __iter__(self) -> Iterator[CmdGraphNode]: + """Traverse the graph in breadth-first order, yielding each unique node.""" + visited: set[PathStr] = set() + node_stack: deque[CmdGraphNode] = deque(self.roots) + while len(node_stack) > 0: + node = node_stack.popleft() + if node.absolute_path in visited: + continue + + visited.add(node.absolute_path) + node_stack.extend(node.children) + yield node diff --git a/scripts/sbom/sbom/cmd_graph/cmd_graph_node.py b/scripts/sbom/sbom/cmd_graph/cmd_graph_node.py new file mode 100644 index 00000000000000..feacdbf7695512 --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/cmd_graph_node.py @@ -0,0 +1,142 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass, field +from itertools import chain +import logging +import os +from typing import Iterator, Protocol + +from sbom import sbom_logging +from sbom.cmd_graph.cmd_file import CmdFile +from sbom.cmd_graph.hardcoded_dependencies import get_hardcoded_dependencies +from sbom.cmd_graph.incbin_parser import parse_incbin_statements +from sbom.path_utils import PathStr, is_relative_to + + +@dataclass +class IncbinDependency: + node: "CmdGraphNode" + full_statement: str + + +class CmdGraphNodeConfig(Protocol): + obj_tree: PathStr + src_tree: PathStr + fail_on_unknown_build_command: bool + + +@dataclass +class CmdGraphNode: + """A node in the cmd graph representing a single file and its dependencies.""" + + absolute_path: PathStr + """Absolute path to the file this node represents.""" + + cmd_file: CmdFile | None = None + """Parsed .cmd file describing how the file at absolute_path was built, or None if not available.""" + + cmd_file_dependencies: list["CmdGraphNode"] = field(default_factory=list["CmdGraphNode"]) + incbin_dependencies: list[IncbinDependency] = field(default_factory=list[IncbinDependency]) + hardcoded_dependencies: list["CmdGraphNode"] = field(default_factory=list["CmdGraphNode"]) + + @property + def children(self) -> Iterator["CmdGraphNode"]: + seen: set[PathStr] = set() + for node in chain( + self.cmd_file_dependencies, + (dep.node for dep in self.incbin_dependencies), + self.hardcoded_dependencies, + ): + if node.absolute_path not in seen: + seen.add(node.absolute_path) + yield node + + @classmethod + def create( + cls, + target_path: PathStr, + config: CmdGraphNodeConfig, + cache: dict[PathStr, "CmdGraphNode"] | None = None, + depth: int = 0, + ) -> "CmdGraphNode": + """ + Recursively builds a dependency graph starting from `target_path`. + Dependencies are mainly discovered by parsing the `..cmd` file. + + Args: + target_path: Path to the target file relative to obj_tree. + config: Config options + cache: Tracks processed nodes to prevent cycles. + depth: Internal parameter to track the current recursion depth. + + Returns: + CmdGraphNode: cmd graph node representing the target file + """ + if cache is None: + cache = {} + + target_path_absolute = ( + os.path.realpath(p) + if os.path.islink(p := os.path.join(config.obj_tree, target_path)) + else os.path.normpath(p) + ) + + if target_path_absolute in cache: + return cache[target_path_absolute] + + if depth == 0: + logging.debug(f"Build node: {target_path}") + + cmd_file_path = _to_cmd_path(target_path_absolute) + cmd_file = CmdFile.create(cmd_file_path) if os.path.exists(cmd_file_path) else None + node = CmdGraphNode(target_path_absolute, cmd_file) + cache[target_path_absolute] = node + + if not os.path.exists(target_path_absolute): + error_or_warning = ( + sbom_logging.error + if is_relative_to(target_path_absolute, config.obj_tree) + or is_relative_to(target_path_absolute, config.src_tree) + else sbom_logging.warning + ) + error_or_warning( + "Skip parsing '{target_path_absolute}' because file does not exist", + target_path_absolute=target_path_absolute, + ) + return node + + # Search for dependencies to add to the graph as child nodes. Child paths are always relative to the output tree. + def _build_child_node(child_path: PathStr) -> "CmdGraphNode": + return CmdGraphNode.create(child_path, config, cache, depth + 1) + + node.hardcoded_dependencies = [ + _build_child_node(hardcoded_dependency_path) + for hardcoded_dependency_path in get_hardcoded_dependencies( + target_path_absolute, config.obj_tree, config.src_tree + ) + ] + + if cmd_file is not None: + node.cmd_file_dependencies = [ + _build_child_node(cmd_file_dependency_path) + for cmd_file_dependency_path in cmd_file.get_dependencies( + target_path, config.obj_tree, config.fail_on_unknown_build_command + ) + ] + + if node.absolute_path.endswith(".S"): + node.incbin_dependencies = [ + IncbinDependency( + node=_build_child_node(incbin_statement.path), + full_statement=incbin_statement.full_statement, + ) + for incbin_statement in parse_incbin_statements(node.absolute_path) + ] + + return node + + +def _to_cmd_path(path: PathStr) -> PathStr: + name = os.path.basename(path) + return path.removesuffix(name) + f".{name}.cmd" diff --git a/scripts/sbom/sbom/cmd_graph/deps_parser.py b/scripts/sbom/sbom/cmd_graph/deps_parser.py new file mode 100644 index 00000000000000..fb3ccdd415b540 --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/deps_parser.py @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import re +import sbom.sbom_logging as sbom_logging +from sbom.path_utils import PathStr + +# Match dependencies on config files +# Example match: "$(wildcard include/config/CONFIG_SOMETHING)" +CONFIG_PATTERN = re.compile(r"\$\(wildcard (include/config/[^)]+)\)") + +# Match dependencies on the objtool binary +# Example match: "$(wildcard ./tools/objtool/objtool)" +OBJTOOL_PATTERN = re.compile(r"\$\(wildcard \./tools/objtool/objtool\)") + +# Match any Makefile wildcard reference +# Example match: "$(wildcard path/to/file)" +WILDCARD_PATTERN = re.compile(r"\$\(wildcard (?P[^)]+)\)") + +# Match ordinary paths: +# - ^(\/)?: Optionally starts with a '/' +# - (([\w\-\., ]*)\/)*: Zero or more directory levels +# - [\w\-\., ]+$: Path component (file or directory) +# Example matches: "/foo/bar.c", "dir1/dir2/file.txt", "plainfile" +VALID_PATH_PATTERN = re.compile(r"^(\/)?(([\w\-\., ]*)\/)*[\w\-\., ]+$") + + +def parse_cmd_file_deps(deps: list[str]) -> list[PathStr]: + """ + Parse dependency strings of a .cmd file and return valid input file paths. + + Args: + deps: List of dependency strings as found in `.cmd` files. + + Returns: + input_files: List of input file paths + """ + input_files: list[PathStr] = [] + for dep in deps: + dep = dep.strip() + match dep: + case _ if CONFIG_PATTERN.match(dep) or OBJTOOL_PATTERN.match(dep): + # config paths like include/config/ should not be included in the graph + continue + case _ if match := WILDCARD_PATTERN.match(dep): + path = match.group("path") + input_files.append(path) + case _ if VALID_PATH_PATTERN.match(dep): + input_files.append(dep) + case _: + sbom_logging.error("Skip parsing dependency {dep} because of unrecognized format", dep=dep) + return input_files diff --git a/scripts/sbom/sbom/cmd_graph/hardcoded_dependencies.py b/scripts/sbom/sbom/cmd_graph/hardcoded_dependencies.py new file mode 100644 index 00000000000000..a5977f14ae491d --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/hardcoded_dependencies.py @@ -0,0 +1,83 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import os +from typing import Callable +import sbom.sbom_logging as sbom_logging +from sbom.path_utils import PathStr, is_relative_to +from sbom.environment import Environment + +HARDCODED_DEPENDENCIES: dict[str, list[str]] = { + # defined in linux/Kbuild + "include/generated/rq-offsets.h": ["kernel/sched/rq-offsets.s"], + "kernel/sched/rq-offsets.s": ["include/generated/asm-offsets.h"], + "include/generated/bounds.h": ["kernel/bounds.s"], + "include/generated/asm-offsets.h": ["arch/{arch}/kernel/asm-offsets.s"], +} + + +def get_hardcoded_dependencies(path: PathStr, obj_tree: PathStr, src_tree: PathStr) -> list[PathStr]: + """ + Some files in the kernel build process are not tracked by the .cmd dependency mechanism. + Parsing these dependencies programmatically is too complex for the scope of this project. + Therefore, this function provides manually defined dependencies to be added to the build graph. + + Args: + path: absolute path to a file within the src tree or object tree. + obj_tree: absolute Path to the base directory of the object tree. + src_tree: absolute Path to the `linux` source directory. + + Returns: + list[PathStr]: A list of dependency file paths (relative to the object tree) required to build the file at the given path. + """ + if is_relative_to(path, obj_tree): + path = os.path.relpath(path, obj_tree) + elif is_relative_to(path, src_tree): + path = os.path.relpath(path, src_tree) + + if path not in HARDCODED_DEPENDENCIES: + return [] + + template_variables: dict[str, Callable[[], str | None]] = { + "arch": lambda: _get_arch(path), + } + + dependencies: list[PathStr] = [] + for dependency_template in HARDCODED_DEPENDENCIES[path]: + dependency = _evaluate_template(dependency_template, template_variables) + if dependency is None: + continue + if os.path.exists(os.path.join(obj_tree, dependency)): + dependencies.append(dependency) + elif os.path.exists(os.path.join(src_tree, dependency)): + dependencies.append(os.path.relpath(dependency, obj_tree)) + else: + sbom_logging.error( + "Skip hardcoded dependency '{dependency}' for '{path}' because the dependency lies neither in the src tree nor the object tree.", + dependency=dependency, + path=path, + ) + + return dependencies + + +def _evaluate_template(template: str, variables: dict[str, Callable[[], str | None]]) -> str | None: + for key, value_function in variables.items(): + template_key = "{" + key + "}" + if template_key in template: + value = value_function() + if value is None: + return None + template = template.replace(template_key, value) + return template + + +def _get_arch(path: PathStr): + srcarch = Environment.SRCARCH() + if srcarch is None: + sbom_logging.error( + "Skipped architecture specific hardcoded dependency for '{path}' because the SRCARCH environment variable was not set.", + path=path, + ) + return None + return srcarch diff --git a/scripts/sbom/sbom/cmd_graph/incbin_parser.py b/scripts/sbom/sbom/cmd_graph/incbin_parser.py new file mode 100644 index 00000000000000..130f9520837d35 --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/incbin_parser.py @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass +import re + +from sbom.path_utils import PathStr + +INCBIN_PATTERN = re.compile(r'\s*\.incbin\s+"(?P[^"]+)"') +"""Regex pattern for matching `.incbin ""` statements.""" + + +@dataclass +class IncbinStatement: + """A parsed `.incbin ""` directive.""" + + path: PathStr + """path to the file referenced by the `.incbin` directive.""" + + full_statement: str + """Full `.incbin ""` statement as it originally appeared in the file.""" + + +def parse_incbin_statements(absolute_path: PathStr) -> list[IncbinStatement]: + """ + Parses `.incbin` directives from an `.S` assembly file. + + Args: + absolute_path: Absolute path to the `.S` assembly file. + + Returns: + list[IncbinStatement]: Parsed `.incbin` statements. + """ + with open(absolute_path, "rt") as f: + content = f.read() + return [ + IncbinStatement( + path=match.group("path"), + full_statement=match.group(0).strip(), + ) + for match in INCBIN_PATTERN.finditer(content) + ] diff --git a/scripts/sbom/sbom/cmd_graph/savedcmd_parser.py b/scripts/sbom/sbom/cmd_graph/savedcmd_parser.py new file mode 100644 index 00000000000000..d72f781b449866 --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/savedcmd_parser.py @@ -0,0 +1,664 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import re +import shlex +from dataclasses import dataclass +from typing import Any, Callable, Union +import sbom.sbom_logging as sbom_logging +from sbom.path_utils import PathStr + + +class CmdParsingError(Exception): + def __init__(self, message: str): + super().__init__(message) + self.message = message + + +@dataclass +class Option: + name: str + value: str | None = None + + +@dataclass +class Positional: + value: str + + +_SUBCOMMAND_PATTERN = re.compile(r"\$\$\(([^()]*)\)") +"""Pattern to match $$(...) blocks""" + + +def _tokenize_single_command(command: str, flag_options: list[str] | None = None) -> list[Union[Option, Positional]]: + """ + Parse a shell command into a list of Options and Positionals. + - Positional: the command and any positional arguments. + - Options: handles flags and options with values provided as space-separated, or equals-sign + (e.g., '--opt val', '--opt=val', '--flag'). + + Args: + command: Command line string. + flag_options: Options that are flags without values (e.g., '--verbose'). + + Returns: + List of `Option` and `Positional` objects in command order. + """ + + # Wrap all $$(...) blocks in double quotes to prevent shlex from splitting them. + command_with_protected_subcommands = _SUBCOMMAND_PATTERN.sub(lambda m: f'"$$({m.group(1)})"', command) + tokens = shlex.split(command_with_protected_subcommands) + + parsed: list[Option | Positional] = [] + i = 0 + while i < len(tokens): + token = tokens[i] + + # Positional + if not token.startswith("-"): + parsed.append(Positional(token)) + i += 1 + continue + + # Option without value (--flag) + if (token.startswith("-") and i + 1 < len(tokens) and tokens[i + 1].startswith("-")) or ( + flag_options and token in flag_options + ): + parsed.append(Option(name=token)) + i += 1 + continue + + # Option with equals sign (--opt=val) + if "=" in token: + name, value = token.split("=", 1) + parsed.append(Option(name=name, value=value)) + i += 1 + continue + + # Option with space-separated value (--opt val) + if i + 1 < len(tokens) and not tokens[i + 1].startswith("-"): + parsed.append(Option(name=token, value=tokens[i + 1])) + i += 2 + continue + + raise CmdParsingError(f"Unrecognized token: {token} in command {command}") + + return parsed + + +def _tokenize_single_command_positionals_only(command: str) -> list[str]: + command_parts = _tokenize_single_command(command) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + if len(positionals) != len(command_parts): + raise CmdParsingError( + f"Invalid command format: expected positional arguments only but got options in command {command}." + ) + return positionals + + +def _parse_dd_command(command: str) -> list[PathStr]: + match = re.match(r"dd.*?if=(\S+)", command) + if match: + return [match.group(1)] + return [] + + +def _parse_cat_command(command: str) -> list[PathStr]: + positionals = _tokenize_single_command_positionals_only(command) + # expect positionals to be ["cat", input1, input2, ...] + return [p for p in positionals[1:]] + + +def _parse_compound_command(command: str) -> list[PathStr]: + compound_command_parsers: list[tuple[re.Pattern[str], Callable[[str], list[PathStr]]]] = [ + (re.compile(r"dd\b"), _parse_dd_command), + (re.compile(r"cat.*?\|"), lambda c: _parse_cat_command(c.split("|")[0])), + (re.compile(r"cat\b[^|>]*$"), _parse_cat_command), + (re.compile(r"echo\b"), _parse_noop), + (re.compile(r"\S+="), _parse_noop), + (re.compile(r"printf\b"), _parse_noop), + (re.compile(r"sed\b"), _parse_sed_command), + ( + re.compile(r"(.*/)scripts/bin2c\s*<"), + lambda c: [input] if (input := c.split("<")[1].strip()) != "/dev/null" else [], + ), + (re.compile(r"^:$"), _parse_noop), + ] + + match = re.match(r"\s*[\(\{](.*)[\)\}]\s*>", command, re.DOTALL) + if match is None: + raise CmdParsingError("No inner commands found for compound command") + input_files: list[PathStr] = [] + inner_commands = _split_commands(match.group(1)) + for inner_command in inner_commands: + if isinstance(inner_command, IfBlock): + sbom_logging.error( + "Skip parsing inner command {inner_command} of compound command because IfBlock is not supported", + inner_command=inner_command, + ) + continue + + parser = next((parser for pattern, parser in compound_command_parsers if pattern.match(inner_command)), None) + if parser is None: + sbom_logging.error( + "Skip parsing inner command {inner_command} of compound command because no matching parser was found", + inner_command=inner_command, + ) + continue + try: + input_files += parser(inner_command) + except CmdParsingError as e: + sbom_logging.error( + "Skip parsing inner command {inner_command} of compound command because of command parsing error: {error_message}", + inner_command=inner_command, + error_message=e.message, + ) + return input_files + + +def _parse_objcopy_command(command: str) -> list[PathStr]: + command_parts = _tokenize_single_command(command, flag_options=["-S", "-w"]) + positionals = [part.value for part in command_parts if isinstance(part, Positional)] + # expect positionals to be ['objcopy', input_file] or ['objcopy', input_file, output_file] + if not (len(positionals) == 2 or len(positionals) == 3): + raise CmdParsingError( + f"Invalid objcopy command format: expected 2 or 3 positional arguments, got {len(positionals)} ({positionals})" + ) + return [positionals[1]] + + +def _parse_link_vmlinux_command(command: str) -> list[PathStr]: + """ + For simplicity we do not parse the `scripts/link-vmlinux.sh` script. + Instead the `vmlinux.a` dependency is just hardcoded for now. + """ + return ["vmlinux.a"] + + +def _parse_noop(command: str) -> list[PathStr]: + """ + No-op parser for commands with no input files (e.g., 'rm', 'true'). + Returns an empty list. + """ + return [] + + +def _parse_ar_command(command: str) -> list[PathStr]: + positionals = _tokenize_single_command_positionals_only(command) + # expect positionals to be ['ar', flags, output, input1, input2, ...] + flags = positionals[1] + if "r" not in flags: + # 'r' option indicates that new files are added to the archive. + # If this option is missing we won't find any relevant input files. + return [] + return positionals[3:] + + +def _parse_ar_piped_xargs_command(command: str) -> list[PathStr]: + printf_command, _ = command.split("|", 1) + positionals = _tokenize_single_command_positionals_only(printf_command.strip()) + # expect positionals to be ['printf', '{prefix_path}%s ', input1, input2, ...] + prefix_path = positionals[1].rstrip("%s ") + return [f"{prefix_path}{filename}" for filename in positionals[2:]] + + +def _parse_gcc_or_clang_command(command: str) -> list[PathStr]: + parts = shlex.split(command) + # compile mode: expect last positional argument ending in `.c` or `.S` to be the input file + for part in reversed(parts): + if not part.startswith("-") and any(part.endswith(suffix) for suffix in [".c", ".S"]): + return [part] + + # linking mode: expect all .o files to be the inputs + return [p for p in parts if p.endswith(".o")] + + +def _parse_rustc_command(command: str) -> list[PathStr]: + parts = shlex.split(command) + # expect last positional argument ending in `.rs` to be the input file + for part in reversed(parts): + if not part.startswith("-") and part.endswith(".rs"): + return [part] + raise CmdParsingError("Could not find .rs input source file") + + +def _parse_rustdoc_command(command: str) -> list[PathStr]: + parts = shlex.split(command) + # expect last positional argument ending in `.rs` to be the input file + for part in reversed(parts): + if not part.startswith("-") and part.endswith(".rs"): + return [part] + raise CmdParsingError("Could not find .rs input source file") + + +def _parse_syscallhdr_command(command: str) -> list[PathStr]: + command_parts = _tokenize_single_command(command.strip(), flag_options=["--emit-nr"]) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be ["sh", path/to/syscallhdr.sh, input, output] + return [positionals[2]] + + +def _parse_syscalltbl_command(command: str) -> list[PathStr]: + command_parts = _tokenize_single_command(command.strip()) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be ["sh", path/to/syscalltbl.sh, input, output] + return [positionals[2]] + + +def _parse_mkcapflags_command(command: str) -> list[PathStr]: + positionals = _tokenize_single_command_positionals_only(command) + # expect positionals to be ["sh", path/to/mkcapflags.sh, output, input1, input2] + return [positionals[3], positionals[4]] + + +def _parse_orc_hash_command(command: str) -> list[PathStr]: + positionals = _tokenize_single_command_positionals_only(command) + # expect positionals to be ["sh", path/to/orc_hash.sh, '<', input, '>', output] + return [positionals[3]] + + +def _parse_xen_hypercalls_command(command: str) -> list[PathStr]: + positionals = _tokenize_single_command_positionals_only(command) + # expect positionals to be ["sh", path/to/xen-hypercalls.sh, output, input1, input2, ...] + return positionals[3:] + + +def _parse_gen_initramfs_command(command: str) -> list[PathStr]: + command_parts = _tokenize_single_command(command) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be ["sh", path/to/gen_initramfs.sh, input1, input2, ...] + return positionals[2:] + + +def _parse_vdso2c_command(command: str) -> list[PathStr]: + positionals = _tokenize_single_command_positionals_only(command) + # expect positionals to be ['vdso2c', raw_input, stripped_input, output] + return [positionals[1], positionals[2]] + + +def _parse_ld_command(command: str) -> list[PathStr]: + command_parts = _tokenize_single_command( + command=command.strip(), + flag_options=[ + "-shared", + "--no-undefined", + "--eh-frame-hdr", + "-Bsymbolic", + "-r", + "--no-ld-generated-unwind-info", + "--no-dynamic-linker", + "-pie", + "--no-dynamic-linker--whole-archive", + "--whole-archive", + "--no-whole-archive", + "--start-group", + "--end-group", + ], + ) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be ["ld", input1, input2, ...] + return positionals[1:] + + +def _parse_sed_command(command: str) -> list[PathStr]: + command_parts = shlex.split(command) + # expect command parts to be ["sed", *, input] + input = command_parts[-1] + if input == "/dev/null": + return [] + return [input] + + +def _parse_awk(command: str) -> list[PathStr]: + command_parts = _tokenize_single_command(command) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be ["awk", input1, input2, ...] + return positionals[1:] + + +def _parse_nm_piped_command(command: str) -> list[PathStr]: + nm_command, _ = command.split("|", 1) + command_parts = _tokenize_single_command( + command=nm_command.strip(), + flag_options=["p", "--defined-only"], + ) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be ["nm", input1, input2, ...] + return [p for p in positionals[1:]] + + +def _parse_pnm_to_logo_command(command: str) -> list[PathStr]: + command_parts = shlex.split(command) + # expect command parts to be ["pnmtologo", , input] + return [command_parts[-1]] + + +def _parse_relacheck(command: str) -> list[PathStr]: + positionals = _tokenize_single_command_positionals_only(command) + # expect positionals to be ["relachek", input, log_reference] + return [positionals[1]] + + +def _parse_perl_command(command: str) -> list[PathStr]: + positionals = _tokenize_single_command_positionals_only(command.strip()) + # expect positionals to be ["perl", input] + return [positionals[1]] + + +def _parse_strip_command(command: str) -> list[PathStr]: + command_parts = _tokenize_single_command(command, flag_options=["--strip-debug"]) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be ["strip", input1, input2, ...] + return positionals[1:] + + +def _parse_mkpiggy_command(command: str) -> list[PathStr]: + mkpiggy_command, _ = command.split(">", 1) + positionals = _tokenize_single_command_positionals_only(mkpiggy_command) + # expect positionals to be ["mkpiggy", input] + return [positionals[1]] + + +def _parse_relocs_command(command: str) -> list[PathStr]: + if ">" not in command: + # Only consider relocs commands that redirect output to a file. + # If there's no redirection, we assume it produces no output file and therefore has no input we care about. + return [] + relocs_command, _ = command.split(">", 1) + command_parts = shlex.split(relocs_command) + # expect command_parts to be ["relocs", options, input] + return [command_parts[-1]] + + +def _parse_mk_elfconfig_command(command: str) -> list[PathStr]: + positionals = _tokenize_single_command_positionals_only(command) + # expect positionals to be ["mk_elfconfig", "<", input, ">", output] + return [positionals[2]] + + +def _parse_flex_command(command: str) -> list[PathStr]: + parts = shlex.split(command) + # expect last positional argument ending in `.l` to be the input file + for part in reversed(parts): + if not part.startswith("-") and part.endswith(".l"): + return [part] + raise CmdParsingError("Could not find .l input source file in command") + + +def _parse_bison_command(command: str) -> list[PathStr]: + parts = shlex.split(command) + # expect last positional argument ending in `.y` to be the input file + for part in reversed(parts): + if not part.startswith("-") and part.endswith(".y"): + return [part] + raise CmdParsingError("Could not find input .y input source file in command") + + +def _parse_tools_build_command(command: str) -> list[PathStr]: + positionals = _tokenize_single_command_positionals_only(command) + # expect positionals to be ["tools/build", "input1", "input2", "input3", "output"] + return positionals[1:-1] + + +def _parse_extract_cert_command(command: str) -> list[PathStr]: + command_parts = shlex.split(command) + # expect command parts to be [path/to/extract-cert, input, output] + input = command_parts[1] + if not input: + return [] + return [input] + + +def _parse_dtc_command(command: str) -> list[PathStr]: + wno_flags = [command_part for command_part in shlex.split(command) if command_part.startswith("-Wno-")] + command_parts = _tokenize_single_command(command, flag_options=wno_flags) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be [path/to/dtc, input] + return [positionals[1]] + + +def _parse_bindgen_command(command: str) -> list[PathStr]: + command_parts = shlex.split(command) + header_file_input_paths = [part for part in command_parts if part.endswith(".h")] + return header_file_input_paths + + +def _parse_gen_header(command: str) -> list[PathStr]: + command_parts = shlex.split(command) + # expect command parts to be ["python3", path/to/gen_headers.py, ..., "--xml", input] + i = next(i for i, token in enumerate(command_parts) if token == "--xml") + return [command_parts[i + 1]] + + +# Command parser registry +SINGLE_COMMAND_PARSERS: list[tuple[re.Pattern[str], Callable[[str], list[PathStr]]]] = [ + # Compound commands + (re.compile(r"\(.*?\)\s*>", re.DOTALL), _parse_compound_command), + (re.compile(r"\{.*?\}\s*>", re.DOTALL), _parse_compound_command), + # Standard Unix utilities and system tools + (re.compile(r"^rm\b"), _parse_noop), + (re.compile(r"^mkdir\b"), _parse_noop), + (re.compile(r"^touch\b"), _parse_noop), + (re.compile(r"^cat\b.*?[\|>]"), lambda c: _parse_cat_command(c.split("|")[0].split(">")[0])), + (re.compile(r"^echo[^|]*$"), _parse_noop), + (re.compile(r"^sed.*?>"), lambda c: _parse_sed_command(c.split(">")[0])), + (re.compile(r"^sed\b"), _parse_noop), + (re.compile(r"^awk.*?<.*?>"), lambda c: [c.split("<")[1].split(">")[0]]), + (re.compile(r"^awk.*?>"), lambda c: _parse_awk(c.split(">")[0])), + (re.compile(r"^(/bin/)?true\b"), _parse_noop), + (re.compile(r"^(/bin/)?false\b"), _parse_noop), + (re.compile(r"^openssl\s+req.*?-new.*?-keyout"), _parse_noop), + # Compilers and code generators + # (C/LLVM toolchain, Rust, Flex/Bison, Bindgen, Perl, etc.) + (re.compile(r"^([^\s]+-)?(gcc|clang)\b"), _parse_gcc_or_clang_command), + (re.compile(r"^([^\s]+-)?ld(\.bfd)?\b"), _parse_ld_command), + (re.compile(r"^printf\b.*\| xargs ([^\s]+-)?ar\b"), _parse_ar_piped_xargs_command), + (re.compile(r"^([^\s]+-)?ar\b"), _parse_ar_command), + (re.compile(r"^([^\s]+-)?nm\b.*?\|"), _parse_nm_piped_command), + (re.compile(r"^([^\s]+-)?objcopy\b"), _parse_objcopy_command), + (re.compile(r"^([^\s]+-)?strip\b"), _parse_strip_command), + (re.compile(r".*?rustc\b"), _parse_rustc_command), + (re.compile(r".*?rustdoc\b"), _parse_rustdoc_command), + (re.compile(r"^flex\b"), _parse_flex_command), + (re.compile(r"^bison\b"), _parse_bison_command), + (re.compile(r"^bindgen\b"), _parse_bindgen_command), + (re.compile(r"^perl\b"), _parse_perl_command), + # Kernel-specific build scripts and tools + (re.compile(r"^(.*/)?link-vmlinux\.sh\b"), _parse_link_vmlinux_command), + (re.compile(r"sh (.*/)?syscallhdr\.sh\b"), _parse_syscallhdr_command), + (re.compile(r"sh (.*/)?syscalltbl\.sh\b"), _parse_syscalltbl_command), + (re.compile(r"sh (.*/)?mkcapflags\.sh\b"), _parse_mkcapflags_command), + (re.compile(r"sh (.*/)?orc_hash\.sh\b"), _parse_orc_hash_command), + (re.compile(r"sh (.*/)?xen-hypercalls\.sh\b"), _parse_xen_hypercalls_command), + (re.compile(r"sh (.*/)?gen_initramfs\.sh\b"), _parse_gen_initramfs_command), + (re.compile(r"sh (.*/)?checkundef\.sh\b"), _parse_noop), + (re.compile(r"(.*/)?vdso2c\b"), _parse_vdso2c_command), + (re.compile(r"^(.*/)?mkpiggy.*?>"), _parse_mkpiggy_command), + (re.compile(r"^(.*/)?relocs\b"), _parse_relocs_command), + (re.compile(r"^(.*/)?mk_elfconfig.*?<.*?>"), _parse_mk_elfconfig_command), + (re.compile(r"^(.*/)?tools/build\b"), _parse_tools_build_command), + (re.compile(r"^(.*/)?certs/extract-cert"), _parse_extract_cert_command), + (re.compile(r"^(.*/)?scripts/dtc/dtc\b"), _parse_dtc_command), + (re.compile(r"^(.*/)?pnmtologo\b"), _parse_pnm_to_logo_command), + (re.compile(r"^(.*/)?kernel/pi/relacheck"), _parse_relacheck), + (re.compile(r"^drivers/gpu/drm/radeon/mkregtable"), lambda c: [c.split(" ")[1]]), + (re.compile(r"(.*/)?genheaders\b"), _parse_noop), + (re.compile(r"^(.*/)?mkcpustr\s+>"), _parse_noop), + (re.compile(r"^(.*/)polgen\b"), _parse_noop), + (re.compile(r"make -f .*/arch/x86/Makefile\.postlink"), _parse_noop), + (re.compile(r"^(.*/)?raid6/mktables\s+>"), _parse_noop), + (re.compile(r"^(.*/)?objtool\b"), _parse_noop), + (re.compile(r"^(.*/)?module/gen_test_kallsyms.sh"), _parse_noop), + (re.compile(r"^(.*/)?gen_header.py"), _parse_gen_header), + (re.compile(r"^(.*/)?scripts/rustdoc_test_gen"), _parse_noop), +] + + +# If Block pattern to match a simple, single-level if-then-fi block. Nested If blocks are not supported. +IF_BLOCK_PATTERN = re.compile( + r""" + ^if(.*?);\s* # Match 'if ;' (non-greedy) + then(.*?);\s* # Match 'then ;' (non-greedy) + fi\b # Match 'fi' + """, + re.VERBOSE, +) + + +@dataclass +class IfBlock: + condition: str + then_statement: str + + +def _unwrap_outer_parentheses(s: str) -> str: + s = s.strip() + if not (s.startswith("(") and s.endswith(")")): + return s + + count = 0 + for i, char in enumerate(s): + if char == "(": + count += 1 + elif char == ")": + count -= 1 + # If count is 0 before the end, outer parentheses don't match + if count == 0 and i != len(s) - 1: + return s + + # outer parentheses do match, unwrap once + return _unwrap_outer_parentheses(s[1:-1]) + + +def _find_first_top_level_command_separator( + commands: str, separators: list[str] = [";", "&&"] +) -> tuple[int | None, int | None]: + in_single_quote = False + in_double_quote = False + in_curly_braces = 0 + in_braces = 0 + for i, char in enumerate(commands): + if char == "'" and not in_double_quote: + # Toggle single quote state (unless inside double quotes) + in_single_quote = not in_single_quote + elif char == '"' and not in_single_quote: + # Toggle double quote state (unless inside single quotes) + in_double_quote = not in_double_quote + + if in_single_quote or in_double_quote: + continue + + # Toggle braces state + if char == "{": + in_curly_braces += 1 + if char == "}": + in_curly_braces -= 1 + + if char == "(": + in_braces += 1 + if char == ")": + in_braces -= 1 + + if in_curly_braces > 0 or in_braces > 0: + continue + + # return found separator position and separator length + for separator in separators: + if commands[i : i + len(separator)] == separator: + return i, len(separator) + + return None, None + + +def _split_commands(commands: str) -> list[str | IfBlock]: + """ + Splits a string of command-line commands into individual parts. + + This function handles: + - Top-level command separators (e.g., `;` and `&&`) to split multiple commands. + - Conditional if-blocks, returning them as `IfBlock` instances. + - Preserves the order of commands and trims whitespace. + + Args: + commands (str): The raw command string. + + Returns: + list[str | IfBlock]: A list of single commands or `IfBlock` objects. + """ + single_commands: list[str | IfBlock] = [] + remaining_commands = _unwrap_outer_parentheses(commands) + while len(remaining_commands) > 0: + remaining_commands = remaining_commands.strip() + + # if block + matched_if = IF_BLOCK_PATTERN.match(remaining_commands) + if matched_if: + condition, then_statement = matched_if.groups() + single_commands.append(IfBlock(condition.strip(), then_statement.strip())) + full_matched = matched_if.group(0) + remaining_commands = remaining_commands.removeprefix(full_matched).lstrip("; \n") + continue + + # command until next separator + separator_position, separator_length = _find_first_top_level_command_separator(remaining_commands) + if separator_position is not None and separator_length is not None: + single_commands.append(remaining_commands[:separator_position].strip()) + remaining_commands = remaining_commands[separator_position + separator_length :].strip() + continue + + # single last command + single_commands.append(remaining_commands) + break + + return single_commands + + +def parse_inputs_from_commands(commands: str, fail_on_unknown_build_command: bool) -> list[PathStr]: + """ + Extract input files referenced in a set of command-line commands. + + Args: + commands (str): Command line expression to parse. + fail_on_unknown_build_command (bool): Whether to fail if an unknown build command is encountered. If False, errors are logged as warnings. + + Returns: + list[PathStr]: List of input file paths required by the commands. + """ + + def log_error_or_warning(message: str, /, **kwargs: Any) -> None: + if fail_on_unknown_build_command: + sbom_logging.error(message, **kwargs) + else: + sbom_logging.warning(message, **kwargs) + + input_files: list[PathStr] = [] + for single_command in _split_commands(commands): + if isinstance(single_command, IfBlock): + inputs = parse_inputs_from_commands(single_command.then_statement, fail_on_unknown_build_command) + if inputs: + log_error_or_warning( + "Skipped parsing command {then_statement} because input files in IfBlock 'then' statement are not supported", + then_statement=single_command.then_statement, + ) + continue + + matched_parser = next( + (parser for pattern, parser in SINGLE_COMMAND_PARSERS if pattern.match(single_command)), None + ) + if matched_parser is None: + log_error_or_warning( + "Skipped parsing command {single_command} because no matching parser was found", + single_command=single_command, + ) + continue + try: + inputs = matched_parser(single_command) + input_files.extend(inputs) + except CmdParsingError as e: + log_error_or_warning( + "Skipped parsing command {single_command} because of command parsing error: {error_message}", + single_command=single_command, + error_message=e.message, + ) + + return [input.strip().rstrip("/") for input in input_files] diff --git a/scripts/sbom/sbom/config.py b/scripts/sbom/sbom/config.py new file mode 100644 index 00000000000000..de57d9d94edba0 --- /dev/null +++ b/scripts/sbom/sbom/config.py @@ -0,0 +1,335 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import argparse +from dataclasses import dataclass +from datetime import datetime +from enum import Enum +import os +from typing import Any +from sbom.path_utils import PathStr + + +class KernelSpdxDocumentKind(Enum): + SOURCE = "source" + BUILD = "build" + OUTPUT = "output" + + +@dataclass +class KernelSbomConfig: + src_tree: PathStr + """Absolute path to the Linux kernel source directory.""" + + obj_tree: PathStr + """Absolute path to the build output directory.""" + + root_paths: list[PathStr] + """List of paths to root outputs (relative to obj_tree) to base the SBOM on.""" + + generate_spdx: bool + """Whether to generate SPDX SBOM documents. If False, no SPDX files are created.""" + + spdx_file_names: dict[KernelSpdxDocumentKind, str] + """If `generate_spdx` is True, defines the file names for each SPDX SBOM kind + (source, build, output) to store on disk.""" + + generate_used_files: bool + """Whether to generate a flat list of all source files used in the build. + If False, no used-files document is created.""" + + used_files_file_name: str + """If `generate_used_files` is True, specifies the file name for the used-files document.""" + + output_directory: PathStr + """Path to the directory where the generated output documents will be saved.""" + + debug: bool + """Whether to enable debug logging.""" + + fail_on_unknown_build_command: bool + """Whether to fail if an unknown build command is encountered in a .cmd file.""" + + write_output_on_error: bool + """Whether to write output documents even if errors occur.""" + + created: datetime + """Datetime to use for the SPDX created property of the CreationInfo element.""" + + spdxId_prefix: str + """Prefix to use for all SPDX element IDs.""" + + build_type: str + """SPDX buildType property to use for all Build elements.""" + + build_id: str | None + """SPDX buildId property to use for all Build elements.""" + + package_license: str + """License expression applied to all SPDX Packages.""" + + package_version: str | None + """Version string applied to all SPDX Packages.""" + + package_copyright_text: str | None + """Copyright text applied to all SPDX Packages.""" + + prettify_json: bool + """Whether to pretty-print generated SPDX JSON documents.""" + + +def _parse_cli_arguments() -> dict[str, Any]: + """ + Parse command-line arguments using argparse. + + Returns: + Dictionary of parsed arguments. + """ + parser = argparse.ArgumentParser( + formatter_class=argparse.RawTextHelpFormatter, + description="Generate SPDX SBOM documents for kernel builds", + ) + parser.add_argument( + "--src-tree", + default="../linux", + help="Path to the kernel source tree (default: ../linux)", + ) + parser.add_argument( + "--obj-tree", + default="../linux/kernel_build", + help="Path to the build output directory (default: ../linux/kernel_build)", + ) + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument( + "--roots", + nargs="+", + default="arch/x86/boot/bzImage", + help="Space-separated list of paths relative to obj-tree for which the SBOM will be created.\n" + "Cannot be used together with --roots-file. (default: arch/x86/boot/bzImage)", + ) + group.add_argument( + "--roots-file", + help="Path to a file containing the root paths (one per line). Cannot be used together with --roots.", + ) + parser.add_argument( + "--generate-spdx", + action="store_true", + default=False, + help=( + "Whether to create sbom-source.spdx.json, sbom-build.spdx.json and " + "sbom-output.spdx.json documents (default: False)" + ), + ) + parser.add_argument( + "--generate-used-files", + action="store_true", + default=False, + help=( + "Whether to create the sbom.used-files.txt file, a flat list of all " + "source files used for the kernel build.\n" + "If src-tree and obj-tree are equal it is not possible to reliably " + "classify source files.\n" + "In this case sbom.used-files.txt will contain all files used for the " + "kernel build including all build artifacts. (default: False)" + ), + ) + parser.add_argument( + "--output-directory", + default=".", + help="Path to the directory where the generated output documents will be stored (default: .)", + ) + parser.add_argument( + "--debug", + action="store_true", + default=False, + help="Enable debug logs (default: False)", + ) + + # Error handling settings + parser.add_argument( + "--do-not-fail-on-unknown-build-command", + action="store_true", + default=False, + help=( + "Whether to fail if an unknown build command is encountered in a .cmd file.\n" + "If set to True, errors are logged as warnings instead. (default: False)" + ), + ) + parser.add_argument( + "--write-output-on-error", + action="store_true", + default=False, + help=( + "Write output documents even if errors occur. The resulting documents " + "may be incomplete.\n" + "A summary of warnings and errors can be found in the 'comment' property " + "of the CreationInfo element. (default: False)" + ), + ) + + # SPDX specific options + spdx_group = parser.add_argument_group("SPDX options", "Options for customizing SPDX document generation") + spdx_group.add_argument( + "--created", + default=None, + help=( + "The SPDX created property to use for the CreationInfo element in " + "ISO format (YYYY-MM-DD [HH:MM:SS]).\n" + "If not provided the last modification time of the first root output " + "is used. (default: None)" + ), + ) + spdx_group.add_argument( + "--spdxId-prefix", + default="urn:spdx.dev:", + help="The prefix to use for all spdxId properties. (default: urn:spdx.dev:)", + ) + spdx_group.add_argument( + "--build-type", + default="urn:spdx.dev:Kbuild", + help="The SPDX buildType property to use for all Build elements. (default: urn:spdx.dev:Kbuild)", + ) + spdx_group.add_argument( + "--build-id", + default=None, + help="The SPDX buildId property to use for all Build elements.\n" + "If not provided the spdxId of the high level Build element is used as the buildId. (default: None)", + ) + spdx_group.add_argument( + "--package-license", + default="NOASSERTION", + help=( + "The SPDX licenseExpression property to use for the LicenseExpression " + "linked to all SPDX Package elements. (default: NOASSERTION)" + ), + ) + spdx_group.add_argument( + "--package-version", + default=None, + help="The SPDX packageVersion property to use for all SPDX Package elements. (default: None)", + ) + spdx_group.add_argument( + "--package-copyright-text", + default=None, + help=( + "The SPDX copyrightText property to use for all SPDX Package elements.\n" + "If not specified, and if a COPYING file exists in the source tree,\n" + "the package-copyright-text is set to the content of this file. " + "(default: None)" + ), + ) + spdx_group.add_argument( + "--prettify-json", + action="store_true", + default=False, + help="Whether to pretty print the generated spdx.json documents (default: False)", + ) + + args = vars(parser.parse_args()) + return args + + +def get_config() -> KernelSbomConfig: + """ + Parse command-line arguments and construct the configuration object. + + Returns: + KernelSbomConfig: Configuration object with all settings for SBOM generation. + """ + # Parse cli arguments + args = _parse_cli_arguments() + + # Extract and validate cli arguments + src_tree = os.path.realpath(args["src_tree"]) + obj_tree = os.path.realpath(args["obj_tree"]) + root_paths = [] + if args["roots_file"]: + with open(args["roots_file"], "rt") as f: + root_paths = [root.strip() for root in f.readlines()] + else: + root_paths = args["roots"] + _validate_path_arguments(src_tree, obj_tree, root_paths) + + generate_spdx = args["generate_spdx"] + generate_used_files = args["generate_used_files"] + output_directory = os.path.realpath(args["output_directory"]) + debug = args["debug"] + + fail_on_unknown_build_command = not args["do_not_fail_on_unknown_build_command"] + write_output_on_error = args["write_output_on_error"] + + if args["created"] is None: + created = datetime.fromtimestamp(os.path.getmtime(os.path.join(obj_tree, root_paths[0]))) + else: + try: + created = datetime.fromisoformat(args["created"]) + except ValueError: + raise argparse.ArgumentTypeError( + f"Invalid date format for argument '--created': '{args['created']}'. " + "Expected ISO format (YYYY-MM-DD [HH:MM:SS])." + ) + spdxId_prefix = args["spdxId_prefix"] + build_type = args["build_type"] + build_id = args["build_id"] + package_license = args["package_license"] + package_version = args["package_version"] if args["package_version"] is not None else None + package_copyright_text: str | None = None + if args["package_copyright_text"] is not None: + package_copyright_text = args["package_copyright_text"] + elif os.path.isfile(copying_path := os.path.join(src_tree, "COPYING")): + with open(copying_path, "r") as f: + package_copyright_text = f.read() + prettify_json = args["prettify_json"] + + # Hardcoded config + spdx_file_names = { + KernelSpdxDocumentKind.SOURCE: "sbom-source.spdx.json", + KernelSpdxDocumentKind.BUILD: "sbom-build.spdx.json", + KernelSpdxDocumentKind.OUTPUT: "sbom-output.spdx.json", + } + used_files_file_name = "sbom.used-files.txt" + + return KernelSbomConfig( + src_tree=src_tree, + obj_tree=obj_tree, + root_paths=root_paths, + generate_spdx=generate_spdx, + spdx_file_names=spdx_file_names, + generate_used_files=generate_used_files, + used_files_file_name=used_files_file_name, + output_directory=output_directory, + debug=debug, + fail_on_unknown_build_command=fail_on_unknown_build_command, + write_output_on_error=write_output_on_error, + created=created, + spdxId_prefix=spdxId_prefix, + build_type=build_type, + build_id=build_id, + package_license=package_license, + package_version=package_version, + package_copyright_text=package_copyright_text, + prettify_json=prettify_json, + ) + + +def _validate_path_arguments(src_tree: PathStr, obj_tree: PathStr, root_paths: list[PathStr]) -> None: + """ + Validate that the provided paths exist. + + Args: + src_tree: Absolute path to the source tree. + obj_tree: Absolute path to the object tree. + root_paths: List of root paths relative to obj_tree. + + Raises: + argparse.ArgumentTypeError: If any of the paths don't exist. + """ + if not os.path.exists(src_tree): + raise argparse.ArgumentTypeError(f"--src-tree {src_tree} does not exist") + if not os.path.exists(obj_tree): + raise argparse.ArgumentTypeError(f"--obj-tree {obj_tree} does not exist") + for root_path in root_paths: + if not os.path.exists(os.path.join(obj_tree, root_path)): + raise argparse.ArgumentTypeError( + f"path to root artifact {os.path.join(obj_tree, root_path)} does not exist" + ) diff --git a/scripts/sbom/sbom/environment.py b/scripts/sbom/sbom/environment.py new file mode 100644 index 00000000000000..57d81214e47656 --- /dev/null +++ b/scripts/sbom/sbom/environment.py @@ -0,0 +1,168 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import os + +KERNEL_BUILD_VARIABLES_ALLOWLIST = [ + "AFLAGS_KERNEL", + "AFLAGS_MODULE", + "AR", + "ARCH", + "ARCH_CORE", + "ARCH_DRIVERS", + "ARCH_LIB", + "AWK", + "BASH", + "BINDGEN", + "BITS", + "CC", + "CC_FLAGS_FPU", + "CC_FLAGS_NO_FPU", + "CFLAGS_GCOV", + "CFLAGS_KERNEL", + "CFLAGS_MODULE", + "CHECK", + "CHECKFLAGS", + "CLIPPY_CONF_DIR", + "CONFIG_SHELL", + "CPP", + "CROSS_COMPILE", + "CURDIR", + "GNUMAKEFLAGS", + "HOSTCC", + "HOSTCXX", + "HOSTPKG_CONFIG", + "HOSTRUSTC", + "INSTALLKERNEL", + "INSTALL_DTBS_PATH", + "INSTALL_HDR_PATH", + "INSTALL_PATH", + "KBUILD_AFLAGS", + "KBUILD_AFLAGS_KERNEL", + "KBUILD_AFLAGS_MODULE", + "KBUILD_BUILTIN", + "KBUILD_CFLAGS", + "KBUILD_CFLAGS_KERNEL", + "KBUILD_CFLAGS_MODULE", + "KBUILD_CHECKSRC", + "KBUILD_CLIPPY", + "KBUILD_CPPFLAGS", + "KBUILD_EXTMOD", + "KBUILD_EXTRA_WARN", + "KBUILD_HOSTCFLAGS", + "KBUILD_HOSTCXXFLAGS", + "KBUILD_HOSTLDFLAGS", + "KBUILD_HOSTLDLIBS", + "KBUILD_HOSTRUSTFLAGS", + "KBUILD_IMAGE", + "KBUILD_LDFLAGS", + "KBUILD_LDFLAGS_MODULE", + "KBUILD_LDS", + "KBUILD_MODULES", + "KBUILD_PROCMACROLDFLAGS", + "KBUILD_RUSTFLAGS", + "KBUILD_RUSTFLAGS_KERNEL", + "KBUILD_RUSTFLAGS_MODULE", + "KBUILD_USERCFLAGS", + "KBUILD_USERLDFLAGS", + "KBUILD_VERBOSE", + "KBUILD_VMLINUX_LIBS", + "KBZIP2", + "KCONFIG_CONFIG", + "KERNELDOC", + "KERNELRELEASE", + "KERNELVERSION", + "KGZIP", + "KLZOP", + "LC_COLLATE", + "LC_NUMERIC", + "LD", + "LDFLAGS_MODULE", + "LEX", + "LINUXINCLUDE", + "LZ4", + "LZMA", + "MAKE", + "MAKEFILES", + "MAKEFILE_LIST", + "MAKEFLAGS", + "MAKELEVEL", + "MAKEOVERRIDES", + "MAKE_COMMAND", + "MAKE_HOST", + "MAKE_TERMERR", + "MAKE_TERMOUT", + "MAKE_VERSION", + "MFLAGS", + "MODLIB", + "NM", + "NOSTDINC_FLAGS", + "O", + "OBJCOPY", + "OBJCOPYFLAGS", + "OBJDUMP", + "PAHOLE", + "PATCHLEVEL", + "PERL", + "PYTHON3", + "Q", + "RCS_FIND_IGNORE", + "READELF", + "REALMODE_CFLAGS", + "RESOLVE_BTFIDS", + "RETHUNK_CFLAGS", + "RETHUNK_RUSTFLAGS", + "RETPOLINE_CFLAGS", + "RETPOLINE_RUSTFLAGS", + "RETPOLINE_VDSO_CFLAGS", + "RUSTC", + "RUSTC_BOOTSTRAP", + "RUSTC_OR_CLIPPY", + "RUSTC_OR_CLIPPY_QUIET", + "RUSTDOC", + "RUSTFLAGS_KERNEL", + "RUSTFLAGS_MODULE", + "RUSTFMT", + "SRCARCH", + "STRIP", + "SUBLEVEL", + "SUFFIXES", + "TAR", + "UTS_MACHINE", + "VERSION", + "VPATH", + "XZ", + "YACC", + "ZSTD", + "building_out_of_srctree", + "cross_compiling", + "objtree", + "quiet", + "rust_common_flags", + "srcroot", + "srctree", + "sub_make_done", + "subdir", +] + + +class Environment: + """ + Read-only accessor for kernel build environment variables. + """ + + @classmethod + def KERNEL_BUILD_VARIABLES(cls) -> dict[str, str]: + return { + name: value.strip() + for name in KERNEL_BUILD_VARIABLES_ALLOWLIST + if (value := os.getenv(name)) is not None and value.strip() + } + + @classmethod + def ARCH(cls) -> str | None: + return os.getenv("ARCH") + + @classmethod + def SRCARCH(cls) -> str | None: + return os.getenv("SRCARCH") diff --git a/scripts/sbom/sbom/path_utils.py b/scripts/sbom/sbom/path_utils.py new file mode 100644 index 00000000000000..d28d67b25398c3 --- /dev/null +++ b/scripts/sbom/sbom/path_utils.py @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import os + +PathStr = str +"""Filesystem path represented as a plain string for better performance than pathlib.Path.""" + + +def is_relative_to(path: PathStr, base: PathStr) -> bool: + return os.path.commonpath([path, base]) == base diff --git a/scripts/sbom/sbom/sbom_logging.py b/scripts/sbom/sbom/sbom_logging.py new file mode 100644 index 00000000000000..3460c4d846265f --- /dev/null +++ b/scripts/sbom/sbom/sbom_logging.py @@ -0,0 +1,88 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import logging +import inspect +from typing import Any, Literal + + +class MessageLogger: + """Logger that prints the first occurrence of each message immediately + and keeps track of repeated messages for a final summary.""" + + messages: dict[str, list[str]] + repeated_logs_limit: int + """Maximum number of repeated messages of the same type to log before suppressing further output.""" + + def __init__(self, level: Literal["error", "warning"], repeated_logs_limit: int = 3) -> None: + self._level = level + self.messages = {} + self.repeated_logs_limit = repeated_logs_limit + + def log(self, template: str, /, **kwargs: Any) -> None: + """Log a message based on a template and optional variables.""" + message = template.format(**kwargs) + if template not in self.messages: + self.messages[template] = [] + if len(self.messages[template]) < self.repeated_logs_limit: + if self._level == "error": + logging.error(message) + elif self._level == "warning": + logging.warning(message) + self.messages[template].append(message) + + def get_summary(self) -> str: + """Return summary of collected messages.""" + if len(self.messages) == 0: + return "" + summary: list[str] = [f"Summarize {self._level}s:"] + for msgs in self.messages.values(): + for i, msg in enumerate(msgs): + if i < self.repeated_logs_limit: + summary.append(msg) + continue + summary.append( + f"... (Found {len(msgs) - i} more {'instances' if (len(msgs) - i) != 1 else 'instance'} of this {self._level})" + ) + break + return "\n".join(summary) + + +_warning_logger: MessageLogger +_error_logger: MessageLogger + + +def warning(msg_template: str, /, **kwargs: Any) -> None: + """Log a warning message.""" + _warning_logger.log(msg_template, **kwargs) + + +def error(msg_template: str, /, **kwargs: Any) -> None: + """Log an error message including file, line, and function context.""" + frame = inspect.currentframe() + caller_frame = frame.f_back if frame else None + info = inspect.getframeinfo(caller_frame) if caller_frame else None + if info: + msg_template = f'File "{info.filename}", line {info.lineno}, in {info.function}\n{msg_template}' + _error_logger.log(msg_template, **kwargs) + + +def summarize_warnings() -> str: + return _warning_logger.get_summary() + + +def summarize_errors() -> str: + return _error_logger.get_summary() + + +def has_errors() -> bool: + return len(_error_logger.messages) > 0 + + +def init() -> None: + global _warning_logger, _error_logger + _warning_logger = MessageLogger("warning") + _error_logger = MessageLogger("error") + + +init() diff --git a/scripts/sbom/sbom/spdx/__init__.py b/scripts/sbom/sbom/spdx/__init__.py new file mode 100644 index 00000000000000..4097b59f8f1727 --- /dev/null +++ b/scripts/sbom/sbom/spdx/__init__.py @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from .spdxId import SpdxId, SpdxIdGenerator +from .serialization import JsonLdSpdxDocument + +__all__ = ["JsonLdSpdxDocument", "SpdxId", "SpdxIdGenerator"] diff --git a/scripts/sbom/sbom/spdx/build.py b/scripts/sbom/sbom/spdx/build.py new file mode 100644 index 00000000000000..180a8f1e8bd3b8 --- /dev/null +++ b/scripts/sbom/sbom/spdx/build.py @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass, field +from sbom.spdx.core import DictionaryEntry, Element, Hash + + +@dataclass(kw_only=True) +class Build(Element): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Build/Classes/Build/""" + + type: str = field(init=False, default="build_Build") + build_buildType: str + build_buildId: str + build_environment: list[DictionaryEntry] = field(default_factory=list[DictionaryEntry]) + build_configSourceUri: list[str] = field(default_factory=list[str]) + build_configSourceDigest: list[Hash] = field(default_factory=list[Hash]) diff --git a/scripts/sbom/sbom/spdx/core.py b/scripts/sbom/sbom/spdx/core.py new file mode 100644 index 00000000000000..c5de9194bb8985 --- /dev/null +++ b/scripts/sbom/sbom/spdx/core.py @@ -0,0 +1,182 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any, Literal +from sbom.spdx.spdxId import SpdxId + +SPDX_SPEC_VERSION = "3.0.1" + +ExternalIdentifierType = Literal["email", "gitoid", "urlScheme"] +HashAlgorithm = Literal["sha256", "sha512"] +ProfileIdentifierType = Literal["core", "software", "build", "lite", "simpleLicensing"] +RelationshipType = Literal[ + "contains", + "generates", + "hasDeclaredLicense", + "hasInput", + "hasOutput", + "ancestorOf", + "hasDistributionArtifact", + "dependsOn", +] +RelationshipCompleteness = Literal["complete", "incomplete", "noAssertion"] + + +@dataclass +class SpdxObject: + def to_dict(self) -> dict[str, Any]: + def _to_dict(v: Any): + return v.to_dict() if hasattr(v, "to_dict") else v + + d: dict[str, Any] = {} + for field_name in self.__dataclass_fields__: + value = getattr(self, field_name) + if not value: + continue + + if isinstance(value, Element): + d[field_name] = value.spdxId + elif isinstance(value, list) and len(value) > 0 and isinstance(value[0], Element): # type: ignore + value: list[Element] = value + d[field_name] = [v.spdxId for v in value] + else: + d[field_name] = [_to_dict(v) for v in value] if isinstance(value, list) else _to_dict(value) # type: ignore + return d + + +@dataclass(kw_only=True) +class IntegrityMethod(SpdxObject): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/IntegrityMethod/""" + + +@dataclass(kw_only=True) +class Hash(IntegrityMethod): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Hash/""" + + type: str = field(init=False, default="Hash") + hashValue: str + algorithm: HashAlgorithm + + +@dataclass(kw_only=True) +class Element(SpdxObject): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Element/""" + + type: str = field(init=False, default="Element") + spdxId: SpdxId + creationInfo: str = "_:creationinfo" + name: str | None = None + verifiedUsing: list[Hash] = field(default_factory=list[Hash]) + comment: str | None = None + + +@dataclass(kw_only=True) +class ExternalMap(SpdxObject): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/ExternalMap/""" + + type: str = field(init=False, default="ExternalMap") + externalSpdxId: SpdxId + + +@dataclass(kw_only=True) +class NamespaceMap(SpdxObject): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/NamespaceMap/""" + + type: str = field(init=False, default="NamespaceMap") + prefix: str + namespace: str + + +@dataclass(kw_only=True) +class ElementCollection(Element): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/ElementCollection/""" + + type: str = field(init=False, default="ElementCollection") + element: list[Element] = field(default_factory=list[Element]) + rootElement: list[Element] = field(default_factory=list[Element]) + profileConformance: list[ProfileIdentifierType] = field(default_factory=list[ProfileIdentifierType]) + + +@dataclass(kw_only=True) +class SpdxDocument(ElementCollection): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/SpdxDocument/""" + + type: str = field(init=False, default="SpdxDocument") + import_: list[ExternalMap] = field(default_factory=list[ExternalMap]) + namespaceMap: list[NamespaceMap] = field(default_factory=list[NamespaceMap]) + + def to_dict(self) -> dict[str, Any]: + return {("import" if k == "import_" else k): v for k, v in super().to_dict().items()} + + +@dataclass(kw_only=True) +class ExternalIdentifier(SpdxObject): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/ExternalIdentifier/""" + + type: str = field(init=False, default="ExternalIdentifier") + externalIdentifierType: ExternalIdentifierType + identifier: str + + +@dataclass(kw_only=True) +class Agent(Element): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Agent/""" + + type: str = field(init=False, default="Agent") + externalIdentifier: list[ExternalIdentifier] = field(default_factory=list[ExternalIdentifier]) + + +@dataclass(kw_only=True) +class SoftwareAgent(Agent): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/SoftwareAgent/""" + + type: str = field(init=False, default="SoftwareAgent") + + +@dataclass(kw_only=True) +class CreationInfo(SpdxObject): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/CreationInfo/""" + + type: str = field(init=False, default="CreationInfo") + id: SpdxId = "_:creationinfo" + specVersion: str = SPDX_SPEC_VERSION + createdBy: list[Agent] + created: str = field(default_factory=lambda: datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")) + comment: str | None = None + + def to_dict(self) -> dict[str, Any]: + return {("@id" if k == "id" else k): v for k, v in super().to_dict().items()} + + +@dataclass(kw_only=True) +class Relationship(Element): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Relationship/""" + + type: str = field(init=False, default="Relationship") + relationshipType: RelationshipType + from_: Element # underscore because 'from' is a reserved keyword + to: list[Element] + completeness: RelationshipCompleteness | None = None + + def to_dict(self) -> dict[str, Any]: + return {("from" if k == "from_" else k): v for k, v in super().to_dict().items()} + + +@dataclass(kw_only=True) +class Artifact(Element): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Artifact/""" + + type: str = field(init=False, default="Artifact") + builtTime: str | None = None + originatedBy: list[Agent] = field(default_factory=list[Agent]) + + +@dataclass(kw_only=True) +class DictionaryEntry(SpdxObject): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/DictionaryEntry/""" + + type: str = field(init=False, default="DictionaryEntry") + key: str + value: str diff --git a/scripts/sbom/sbom/spdx/serialization.py b/scripts/sbom/sbom/spdx/serialization.py new file mode 100644 index 00000000000000..c830d6b3cf1907 --- /dev/null +++ b/scripts/sbom/sbom/spdx/serialization.py @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import json +from typing import Any +from sbom.path_utils import PathStr +from sbom.spdx.core import SPDX_SPEC_VERSION, SpdxDocument, SpdxObject + + +class JsonLdSpdxDocument: + """Represents an SPDX document in JSON-LD format for serialization.""" + + context: list[str | dict[str, str]] + graph: list[SpdxObject] + + def __init__(self, graph: list[SpdxObject]) -> None: + """ + Initialize a JSON-LD SPDX document from a graph of SPDX objects. + The graph must contain a single SpdxDocument element. + + Args: + graph: List of SPDX objects representing the complete SPDX document. + """ + self.graph = graph + spdx_document = next(element for element in graph if isinstance(element, SpdxDocument)) + self.context = [ + f"https://spdx.org/rdf/{SPDX_SPEC_VERSION}/spdx-context.jsonld", + {namespaceMap.prefix: namespaceMap.namespace for namespaceMap in spdx_document.namespaceMap}, + ] + spdx_document.namespaceMap = [] + + def to_dict(self) -> dict[str, Any]: + """ + Convert the SPDX document to a dictionary representation suitable for JSON serialization. + + Returns: + Dictionary with @context and @graph keys following JSON-LD format. + """ + return { + "@context": self.context, + "@graph": [item.to_dict() for item in self.graph], + } + + def save(self, path: PathStr, prettify: bool) -> None: + """ + Save the SPDX document to a JSON file. + + Args: + path: File path where the document will be saved. + prettify: Whether to pretty-print the JSON with indentation. + """ + with open(path, "w", encoding="utf-8") as f: + if prettify: + json.dump(self.to_dict(), f, indent=2) + else: + json.dump(self.to_dict(), f, separators=(",", ":")) diff --git a/scripts/sbom/sbom/spdx/simplelicensing.py b/scripts/sbom/sbom/spdx/simplelicensing.py new file mode 100644 index 00000000000000..750ddd24ad8957 --- /dev/null +++ b/scripts/sbom/sbom/spdx/simplelicensing.py @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass, field +from sbom.spdx.core import Element + + +@dataclass(kw_only=True) +class AnyLicenseInfo(Element): + """https://spdx.github.io/spdx-spec/v3.0.1/model/SimpleLicensing/Classes/AnyLicenseInfo/""" + + type: str = field(init=False, default="simplelicensing_AnyLicenseInfo") + + +@dataclass(kw_only=True) +class LicenseExpression(AnyLicenseInfo): + """https://spdx.github.io/spdx-spec/v3.0.1/model/SimpleLicensing/Classes/LicenseExpression/""" + + type: str = field(init=False, default="simplelicensing_LicenseExpression") + simplelicensing_licenseExpression: str diff --git a/scripts/sbom/sbom/spdx/software.py b/scripts/sbom/sbom/spdx/software.py new file mode 100644 index 00000000000000..208e0168b93914 --- /dev/null +++ b/scripts/sbom/sbom/spdx/software.py @@ -0,0 +1,71 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass, field +from typing import Literal +from sbom.spdx.core import Artifact, ElementCollection, IntegrityMethod + + +SbomType = Literal["source", "build"] +FileKindType = Literal["file", "directory"] +SoftwarePurpose = Literal[ + "source", + "archive", + "library", + "file", + "data", + "configuration", + "executable", + "module", + "application", + "documentation", + "other", +] +ContentIdentifierType = Literal["gitoid", "swhid"] + + +@dataclass(kw_only=True) +class Sbom(ElementCollection): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/Sbom/""" + + type: str = field(init=False, default="software_Sbom") + software_sbomType: list[SbomType] = field(default_factory=list[SbomType]) + + +@dataclass(kw_only=True) +class ContentIdentifier(IntegrityMethod): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/ContentIdentifier/""" + + type: str = field(init=False, default="software_ContentIdentifier") + software_contentIdentifierType: ContentIdentifierType + software_contentIdentifierValue: str + + +@dataclass(kw_only=True) +class SoftwareArtifact(Artifact): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/SoftwareArtifact/""" + + type: str = field(init=False, default="software_Artifact") + software_primaryPurpose: SoftwarePurpose | None = None + software_additionalPurpose: list[SoftwarePurpose] = field(default_factory=list[SoftwarePurpose]) + software_copyrightText: str | None = None + software_contentIdentifier: list[ContentIdentifier] = field(default_factory=list[ContentIdentifier]) + + +@dataclass(kw_only=True) +class Package(SoftwareArtifact): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/Package/""" + + type: str = field(init=False, default="software_Package") + name: str # type: ignore + software_packageVersion: str | None = None + software_downloadLocation: str | None = None + + +@dataclass(kw_only=True) +class File(SoftwareArtifact): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/File/""" + + type: str = field(init=False, default="software_File") + name: str # type: ignore + software_fileKind: FileKindType | None = None diff --git a/scripts/sbom/sbom/spdx/spdxId.py b/scripts/sbom/sbom/spdx/spdxId.py new file mode 100644 index 00000000000000..589e85c5f7064a --- /dev/null +++ b/scripts/sbom/sbom/spdx/spdxId.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from itertools import count +from typing import Iterator + +SpdxId = str + + +class SpdxIdGenerator: + _namespace: str + _prefix: str | None = None + _counter: Iterator[int] + + def __init__(self, namespace: str, prefix: str | None = None) -> None: + """ + Initialize the SPDX ID generator with a namespace. + + Args: + namespace: The full namespace to use for generated IDs. + prefix: Optional. If provided, generated IDs will use this prefix instead of the full namespace. + """ + self._namespace = namespace + self._prefix = prefix + self._counter = count(0) + + def generate(self) -> SpdxId: + return f"{f'{self._prefix}:' if self._prefix else self._namespace}{next(self._counter)}" + + @property + def prefix(self) -> str | None: + return self._prefix + + @property + def namespace(self) -> str: + return self._namespace diff --git a/scripts/sbom/sbom/spdx_graph/__init__.py b/scripts/sbom/sbom/spdx_graph/__init__.py new file mode 100644 index 00000000000000..3557b1d51bf939 --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/__init__.py @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from .build_spdx_graphs import build_spdx_graphs +from .spdx_graph_model import SpdxIdGeneratorCollection + +__all__ = ["build_spdx_graphs", "SpdxIdGeneratorCollection"] diff --git a/scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py b/scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py new file mode 100644 index 00000000000000..eecc5215644982 --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py @@ -0,0 +1,82 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from datetime import datetime +from typing import Protocol + +import logging +from sbom.config import KernelSpdxDocumentKind +from sbom.cmd_graph import CmdGraph +from sbom.path_utils import PathStr +from sbom.spdx_graph.kernel_file import KernelFileCollection +from sbom.spdx_graph.spdx_graph_model import SpdxGraph, SpdxIdGeneratorCollection +from sbom.spdx_graph.shared_spdx_elements import SharedSpdxElements +from sbom.spdx_graph.spdx_source_graph import SpdxSourceGraph +from sbom.spdx_graph.spdx_build_graph import SpdxBuildGraph +from sbom.spdx_graph.spdx_output_graph import SpdxOutputGraph + + +class SpdxGraphConfig(Protocol): + obj_tree: PathStr + src_tree: PathStr + created: datetime + build_type: str + build_id: str | None + package_license: str + package_version: str | None + package_copyright_text: str | None + + +def build_spdx_graphs( + cmd_graph: CmdGraph, + spdx_id_generators: SpdxIdGeneratorCollection, + config: SpdxGraphConfig, +) -> dict[KernelSpdxDocumentKind, SpdxGraph]: + """ + Builds SPDX graphs (output, source, and build) based on a cmd dependency graph. + If the source and object trees are identical, no dedicated source graph can be created. + In that case the source files are added to the build graph instead. + + Args: + cmd_graph: The dependency graph of a kernel build. + spdx_id_generators: Collection of SPDX ID generators. + config: Configuration options. + + Returns: + Dictionary of SPDX graphs + """ + shared_elements = SharedSpdxElements.create(spdx_id_generators.base, config.created) + kernel_files = KernelFileCollection.create(cmd_graph, config.obj_tree, config.src_tree, spdx_id_generators) + output_graph = SpdxOutputGraph.create( + root_files=list(kernel_files.output.values()), + shared_elements=shared_elements, + spdx_id_generators=spdx_id_generators, + config=config, + ) + spdx_graphs: dict[KernelSpdxDocumentKind, SpdxGraph] = { + KernelSpdxDocumentKind.OUTPUT: output_graph, + } + + if len(kernel_files.source) > 0: + spdx_graphs[KernelSpdxDocumentKind.SOURCE] = SpdxSourceGraph.create( + source_files=list(kernel_files.source.values()), + shared_elements=shared_elements, + spdx_id_generators=spdx_id_generators, + ) + else: + logging.info( + "Skipped creating a dedicated source SBOM because source files cannot be " + "reliably classified when the source and object trees are identical. " + "Added source files to the build SBOM instead." + ) + + build_graph = SpdxBuildGraph.create( + cmd_graph, + kernel_files, + shared_elements, + output_graph.high_level_build_element, + spdx_id_generators, + ) + spdx_graphs[KernelSpdxDocumentKind.BUILD] = build_graph + + return spdx_graphs diff --git a/scripts/sbom/sbom/spdx_graph/kernel_file.py b/scripts/sbom/sbom/spdx_graph/kernel_file.py new file mode 100644 index 00000000000000..84582567bc4d91 --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/kernel_file.py @@ -0,0 +1,310 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass +from enum import Enum +import hashlib +import os +import re +from sbom.cmd_graph import CmdGraph +from sbom.path_utils import PathStr, is_relative_to +from sbom.spdx import SpdxId, SpdxIdGenerator +from sbom.spdx.core import Hash +from sbom.spdx.software import ContentIdentifier, File, SoftwarePurpose +import sbom.sbom_logging as sbom_logging +from sbom.spdx_graph.spdx_graph_model import SpdxIdGeneratorCollection + + +class KernelFileLocation(Enum): + """Represents the location of a file relative to the source/object trees.""" + + SOURCE_TREE = "source_tree" + """File is located in the source tree.""" + OBJ_TREE = "obj_tree" + """File is located in the object tree.""" + EXTERNAL = "external" + """File is located outside both source and object trees.""" + BOTH = "both" + """File is located in a folder that is both source and object tree.""" + + +@dataclass +class KernelFile: + """kernel-specific metadata used to generate an SPDX File element.""" + + absolute_path: PathStr + """Absolute path of the file.""" + file_location: KernelFileLocation + """Location of the file relative to the source/object trees.""" + name: str + """Name of the file element. Should be relative to the source tree if + file_location equals SOURCE_TREE and relative to the object tree if + file_location equals OBJ_TREE. If file_location equals EXTERNAL, the + absolute path is used.""" + license_identifier: str | None + """SPDX license ID if file_location equals SOURCE_TREE or BOTH; otherwise None.""" + spdx_id_generator: SpdxIdGenerator + """Generator for the SPDX ID of the file element.""" + + _spdx_file_element: File | None = None + + @classmethod + def create( + cls, + absolute_path: PathStr, + obj_tree: PathStr, + src_tree: PathStr, + spdx_id_generators: SpdxIdGeneratorCollection, + is_output: bool, + ) -> "KernelFile": + is_in_obj_tree = is_relative_to(absolute_path, obj_tree) + is_in_src_tree = is_relative_to(absolute_path, src_tree) + + # file element name should be relative to output or src tree if possible + if not is_in_src_tree and not is_in_obj_tree: + file_element_name = str(absolute_path) + file_location = KernelFileLocation.EXTERNAL + spdx_id_generator = spdx_id_generators.build + elif is_in_src_tree and src_tree == obj_tree: + file_element_name = os.path.relpath(absolute_path, obj_tree) + file_location = KernelFileLocation.BOTH + spdx_id_generator = spdx_id_generators.output if is_output else spdx_id_generators.build + elif is_in_obj_tree: + file_element_name = os.path.relpath(absolute_path, obj_tree) + file_location = KernelFileLocation.OBJ_TREE + spdx_id_generator = spdx_id_generators.output if is_output else spdx_id_generators.build + else: + file_element_name = os.path.relpath(absolute_path, src_tree) + file_location = KernelFileLocation.SOURCE_TREE + spdx_id_generator = spdx_id_generators.source + + # parse spdx license identifier + license_identifier = ( + _parse_spdx_license_identifier(absolute_path) + if file_location == KernelFileLocation.SOURCE_TREE or file_location == KernelFileLocation.BOTH + else None + ) + + return KernelFile( + absolute_path, + file_location, + file_element_name, + license_identifier, + spdx_id_generator, + ) + + @property + def spdx_file_element(self) -> File: + if self._spdx_file_element is None: + self._spdx_file_element = _build_file_element( + self.absolute_path, + self.name, + self.spdx_id_generator.generate(), + self.file_location, + ) + return self._spdx_file_element + + +@dataclass +class KernelFileCollection: + """Collection of kernel files.""" + + source: dict[PathStr, KernelFile] + build: dict[PathStr, KernelFile] + output: dict[PathStr, KernelFile] + + @classmethod + def create( + cls, + cmd_graph: CmdGraph, + obj_tree: PathStr, + src_tree: PathStr, + spdx_id_generators: SpdxIdGeneratorCollection, + ) -> "KernelFileCollection": + source: dict[PathStr, KernelFile] = {} + build: dict[PathStr, KernelFile] = {} + output: dict[PathStr, KernelFile] = {} + root_node_paths = {node.absolute_path for node in cmd_graph.roots} + for node in cmd_graph: + is_root = node.absolute_path in root_node_paths + kernel_file = KernelFile.create( + node.absolute_path, + obj_tree, + src_tree, + spdx_id_generators, + is_root, + ) + if is_root: + output[kernel_file.absolute_path] = kernel_file + elif kernel_file.file_location == KernelFileLocation.SOURCE_TREE: + source[kernel_file.absolute_path] = kernel_file + else: + build[kernel_file.absolute_path] = kernel_file + + return KernelFileCollection(source, build, output) + + def to_dict(self) -> dict[PathStr, KernelFile]: + return {**self.source, **self.build, **self.output} + + +def _build_file_element(absolute_path: PathStr, name: str, spdx_id: SpdxId, file_location: KernelFileLocation) -> File: + verifiedUsing: list[Hash] = [] + content_identifier: list[ContentIdentifier] = [] + if os.path.exists(absolute_path): + verifiedUsing = [Hash(algorithm="sha256", hashValue=_sha256(absolute_path))] + content_identifier = [ + ContentIdentifier( + software_contentIdentifierType="gitoid", + software_contentIdentifierValue=_git_blob_oid(absolute_path), + ) + ] + elif file_location == KernelFileLocation.EXTERNAL: + sbom_logging.warning( + "Cannot compute hash for {absolute_path} because file does not exist.", + absolute_path=absolute_path, + ) + else: + sbom_logging.error( + "Cannot compute hash for {absolute_path} because file does not exist.", + absolute_path=absolute_path, + ) + + # primary purpose + primary_purpose = _get_primary_purpose(absolute_path) + + return File( + spdxId=spdx_id, + name=name, + verifiedUsing=verifiedUsing, + software_primaryPurpose=primary_purpose, + software_contentIdentifier=content_identifier, + ) + + +def _sha256(path: PathStr) -> str: + """Compute the SHA-256 hash of a file.""" + with open(path, "rb") as f: + data = f.read() + return hashlib.sha256(data).hexdigest() + + +def _git_blob_oid(file_path: str) -> str: + """ + Compute the Git blob object ID (SHA-1) for a file, like `git hash-object`. + + Args: + file_path: Path to the file. + + Returns: + SHA-1 hash (hex) of the Git blob object. + """ + with open(file_path, "rb") as f: + content = f.read() + header = f"blob {len(content)}\0".encode() + store = header + content + sha1_hash = hashlib.sha1(store).hexdigest() + return sha1_hash + + +# REUSE-IgnoreStart +SPDX_LICENSE_IDENTIFIER_PATTERN = re.compile(r"SPDX-License-Identifier:\s*(?P.*?)(?:\s*(\*/|$))") +# REUSE-IgnoreEnd + + +def _parse_spdx_license_identifier(absolute_path: str, max_lines: int = 5) -> str | None: + """ + Extracts the SPDX-License-Identifier from the first few lines of a source file. + + Args: + absolute_path: Path to the source file. + max_lines: Number of lines to scan from the top (default: 5). + + Returns: + The license identifier string (e.g., 'GPL-2.0-only') if found, otherwise None. + """ + try: + with open(absolute_path, "r") as f: + for _ in range(max_lines): + match = SPDX_LICENSE_IDENTIFIER_PATTERN.search(f.readline()) + if match: + return match.group("id") + except (UnicodeDecodeError, OSError): + return None + return None + + +def _get_primary_purpose(absolute_path: PathStr) -> SoftwarePurpose | None: + def ends_with(suffixes: list[str]) -> bool: + return any(absolute_path.endswith(suffix) for suffix in suffixes) + + def includes_path_segments(path_segments: list[str]) -> bool: + return any(segment in absolute_path for segment in path_segments) + + # Source code + if ends_with([".c", ".h", ".S", ".s", ".rs", ".pl"]): + return "source" + + # Libraries + if ends_with([".a", ".so", ".rlib"]): + return "library" + + # Archives + if ends_with([".xz", ".cpio", ".gz", ".tar", ".zip"]): + return "archive" + + # Applications + if ends_with(["bzImage", "Image"]): + return "application" + + # Executables / machine code + if ends_with([".bin", ".elf", "vmlinux", "vmlinux.unstripped", "bpfilter_umh"]): + return "executable" + + # Kernel modules + if ends_with([".ko"]): + return "module" + + # Data files + if ends_with( + [ + ".tbl", + ".relocs", + ".rmeta", + ".in", + ".dbg", + ".x509", + ".pbm", + ".ppm", + ".dtb", + ".uc", + ".inc", + ".dts", + ".dtsi", + ".dtbo", + ".xml", + ".ro", + "initramfs_inc_data", + "default_cpio_list", + "x509_certificate_list", + "utf8data.c_shipped", + "blacklist_hash_list", + "x509_revocation_list", + "cpucaps", + "sysreg", + ] + ) or includes_path_segments(["drivers/gpu/drm/radeon/reg_srcs/"]): + return "data" + + # Configuration files + if ends_with([".pem", ".key", ".conf", ".config", ".cfg", ".bconf"]): + return "configuration" + + # Documentation + if ends_with([".md"]): + return "documentation" + + # Other / miscellaneous + if ends_with([".o", ".tmp"]): + return "other" + + sbom_logging.warning("Could not infer primary purpose for {absolute_path}", absolute_path=absolute_path) diff --git a/scripts/sbom/sbom/spdx_graph/shared_spdx_elements.py b/scripts/sbom/sbom/spdx_graph/shared_spdx_elements.py new file mode 100644 index 00000000000000..0c83428f4c7039 --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/shared_spdx_elements.py @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass +from datetime import datetime +from sbom.spdx.core import CreationInfo, SoftwareAgent +from sbom.spdx.spdxId import SpdxIdGenerator + + +@dataclass(frozen=True) +class SharedSpdxElements: + agent: SoftwareAgent + creation_info: CreationInfo + + @classmethod + def create(cls, spdx_id_generator: SpdxIdGenerator, created: datetime) -> "SharedSpdxElements": + """ + Creates shared SPDX elements used across multiple documents. + + Args: + spdx_id_generator: Generator for creating SPDX IDs. + created: SPDX 'created' property used for the creation info. + + Returns: + SharedSpdxElements with agent and creation info. + """ + agent = SoftwareAgent( + spdxId=spdx_id_generator.generate(), + name="KernelSbom", + ) + creation_info = CreationInfo(createdBy=[agent], created=created.strftime("%Y-%m-%dT%H:%M:%SZ")) + return SharedSpdxElements(agent=agent, creation_info=creation_info) diff --git a/scripts/sbom/sbom/spdx_graph/spdx_build_graph.py b/scripts/sbom/sbom/spdx_graph/spdx_build_graph.py new file mode 100644 index 00000000000000..2956800fa9ed39 --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/spdx_build_graph.py @@ -0,0 +1,317 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass +from typing import Mapping +from sbom.cmd_graph import CmdGraph +from sbom.path_utils import PathStr +from sbom.spdx import SpdxIdGenerator +from sbom.spdx.build import Build +from sbom.spdx.core import ExternalMap, NamespaceMap, Relationship, SpdxDocument +from sbom.spdx.software import File, Sbom +from sbom.spdx_graph.kernel_file import KernelFileCollection +from sbom.spdx_graph.shared_spdx_elements import SharedSpdxElements +from sbom.spdx_graph.spdx_graph_model import SpdxGraph, SpdxIdGeneratorCollection +from sbom.spdx_graph.spdx_source_graph import source_file_license_elements + + +@dataclass +class SpdxBuildGraph(SpdxGraph): + """SPDX graph representing build dependencies connecting source files and + distributable output files""" + + @classmethod + def create( + cls, + cmd_graph: CmdGraph, + kernel_files: KernelFileCollection, + shared_elements: SharedSpdxElements, + high_level_build_element: Build, + spdx_id_generators: SpdxIdGeneratorCollection, + ) -> "SpdxBuildGraph": + if len(kernel_files.source) > 0: + return _create_spdx_build_graph( + cmd_graph, + kernel_files, + shared_elements, + high_level_build_element, + spdx_id_generators, + ) + else: + return _create_spdx_build_graph_with_mixed_sources( + cmd_graph, + kernel_files, + shared_elements, + high_level_build_element, + spdx_id_generators, + ) + + +def _create_spdx_build_graph( + cmd_graph: CmdGraph, + kernel_files: KernelFileCollection, + shared_elements: SharedSpdxElements, + high_level_build_element: Build, + spdx_id_generators: SpdxIdGeneratorCollection, +) -> SpdxBuildGraph: + """ + Creates an SPDX build graph where source and output files are referenced + from external documents. + + Args: + cmd_graph: The dependency graph of a kernel build. + kernel_files: Collection of categorized kernel files involved in the build. + shared_elements: SPDX elements shared across multiple documents. + high_level_build_element: The high-level Build element referenced by the build graph. + spdx_id_generators: Collection of generators for SPDX element IDs. + + Returns: + SpdxBuildGraph: The SPDX build graph connecting source files and distributable output files. + """ + # SpdxDocument + build_spdx_document = SpdxDocument( + spdxId=spdx_id_generators.build.generate(), + profileConformance=["core", "software", "build"], + namespaceMap=[ + NamespaceMap(prefix=generator.prefix, namespace=generator.namespace) + for generator in [ + spdx_id_generators.build, + spdx_id_generators.source, + spdx_id_generators.output, + spdx_id_generators.base, + ] + if generator.prefix is not None + ], + ) + + # Sbom + build_sbom = Sbom( + spdxId=spdx_id_generators.build.generate(), + software_sbomType=["build"], + ) + + # Src and object tree elements + obj_tree_element = File( + spdxId=spdx_id_generators.build.generate(), + name="$(obj_tree)", + software_fileKind="directory", + ) + obj_tree_contains_relationship = Relationship( + spdxId=spdx_id_generators.build.generate(), + relationshipType="contains", + from_=obj_tree_element, + to=[], + ) + + # File elements + build_file_elements = [file.spdx_file_element for file in kernel_files.build.values()] + file_relationships = _file_relationships( + cmd_graph=cmd_graph, + file_elements={key: file.spdx_file_element for key, file in kernel_files.to_dict().items()}, + high_level_build_element=high_level_build_element, + spdx_id_generator=spdx_id_generators.build, + ) + + # Update relationships + build_spdx_document.rootElement = [build_sbom] + + build_spdx_document.import_ = [ + *( + ExternalMap(externalSpdxId=file_element.spdx_file_element.spdxId) + for file_element in kernel_files.source.values() + ), + ExternalMap(externalSpdxId=high_level_build_element.spdxId), + *(ExternalMap(externalSpdxId=file.spdx_file_element.spdxId) for file in kernel_files.output.values()), + ] + + build_sbom.rootElement = [obj_tree_element] + build_sbom.element = [ + obj_tree_element, + obj_tree_contains_relationship, + *build_file_elements, + *file_relationships, + ] + + obj_tree_contains_relationship.to = [ + *build_file_elements, + *(file.spdx_file_element for file in kernel_files.output.values()), + ] + + # create Spdx graphs + build_graph = SpdxBuildGraph( + build_spdx_document, + shared_elements.agent, + shared_elements.creation_info, + build_sbom, + ) + return build_graph + + +def _create_spdx_build_graph_with_mixed_sources( + cmd_graph: CmdGraph, + kernel_files: KernelFileCollection, + shared_elements: SharedSpdxElements, + high_level_build_element: Build, + spdx_id_generators: SpdxIdGeneratorCollection, +) -> SpdxBuildGraph: + """ + Creates an SPDX build graph where only output files are referenced from + an external document. Source files are included directly in the build graph. + + Args: + cmd_graph: The dependency graph of a kernel build. + kernel_files: Collection of categorized kernel files involved in the build. + shared_elements: SPDX elements shared across multiple documents. + high_level_build_element: The high-level Build element referenced by the build graph. + spdx_id_generators: Collection of generators for SPDX element IDs. + + Returns: + SpdxBuildGraph: The SPDX build graph connecting source files and distributable output files. + """ + # SpdxDocument + build_spdx_document = SpdxDocument( + spdxId=spdx_id_generators.build.generate(), + profileConformance=["core", "software", "build"], + namespaceMap=[ + NamespaceMap(prefix=generator.prefix, namespace=generator.namespace) + for generator in [ + spdx_id_generators.build, + spdx_id_generators.output, + spdx_id_generators.base, + ] + if generator.prefix is not None + ], + ) + + # Sbom + build_sbom = Sbom( + spdxId=spdx_id_generators.build.generate(), + software_sbomType=["build"], + ) + + # File elements + build_file_elements = [file.spdx_file_element for file in kernel_files.build.values()] + file_relationships = _file_relationships( + cmd_graph=cmd_graph, + file_elements={key: file.spdx_file_element for key, file in kernel_files.to_dict().items()}, + high_level_build_element=high_level_build_element, + spdx_id_generator=spdx_id_generators.build, + ) + + # Source file license elements + source_file_license_identifiers, source_file_license_relationships = source_file_license_elements( + list(kernel_files.build.values()), spdx_id_generators.build + ) + + # Update relationships + build_spdx_document.rootElement = [build_sbom] + root_file_elements = [file.spdx_file_element for file in kernel_files.output.values()] + build_spdx_document.import_ = [ + ExternalMap(externalSpdxId=high_level_build_element.spdxId), + *(ExternalMap(externalSpdxId=file.spdxId) for file in root_file_elements), + ] + + build_sbom.rootElement = [*root_file_elements] + build_sbom.element = [ + *build_file_elements, + *source_file_license_identifiers, + *source_file_license_relationships, + *file_relationships, + ] + + build_graph = SpdxBuildGraph( + build_spdx_document, + shared_elements.agent, + shared_elements.creation_info, + build_sbom, + ) + return build_graph + + +def _file_relationships( + cmd_graph: CmdGraph, + file_elements: Mapping[PathStr, File], + high_level_build_element: Build, + spdx_id_generator: SpdxIdGenerator, +) -> list[Build | Relationship]: + """ + Construct SPDX Build and Relationship elements representing dependency + relationships in the cmd graph. + + Args: + cmd_graph: The dependency graph of a kernel build. + file_elements: Mapping of filesystem paths (PathStr) to their + corresponding SPDX File elements. + high_level_build_element: The SPDX Build element representing the overall build process/root. + spdx_id_generator: Generator for unique SPDX IDs. + + Returns: + list[Build | Relationship]: List of SPDX Build and Relationship elements + """ + high_level_build_ancestorOf_relationship = Relationship( + spdxId=spdx_id_generator.generate(), + relationshipType="ancestorOf", + from_=high_level_build_element, + completeness="complete", + to=[], + ) + + # Create a relationship between each node (output file) + # and its children (input files) + build_and_relationship_elements: list[Build | Relationship] = [high_level_build_ancestorOf_relationship] + for node in cmd_graph: + if next(node.children, None) is None: + continue + + # .cmd file dependencies + if node.cmd_file is not None: + build_element = Build( + spdxId=spdx_id_generator.generate(), + build_buildType=high_level_build_element.build_buildType, + build_buildId=high_level_build_element.build_buildId, + comment=node.cmd_file.savedcmd, + ) + hasInput_relationship = Relationship( + spdxId=spdx_id_generator.generate(), + relationshipType="hasInput", + from_=build_element, + to=[file_elements[child_node.absolute_path] for child_node in node.children], + ) + hasOutput_relationship = Relationship( + spdxId=spdx_id_generator.generate(), + relationshipType="hasOutput", + from_=build_element, + to=[file_elements[node.absolute_path]], + ) + build_and_relationship_elements += [ + build_element, + hasInput_relationship, + hasOutput_relationship, + ] + high_level_build_ancestorOf_relationship.to.append(build_element) + + # incbin dependencies + if len(node.incbin_dependencies) > 0: + incbin_dependsOn_relationship = Relationship( + spdxId=spdx_id_generator.generate(), + relationshipType="dependsOn", + comment="\n".join([incbin_dependency.full_statement for incbin_dependency in node.incbin_dependencies]), + from_=file_elements[node.absolute_path], + to=[ + file_elements[incbin_dependency.node.absolute_path] + for incbin_dependency in node.incbin_dependencies + ], + ) + build_and_relationship_elements.append(incbin_dependsOn_relationship) + + # hardcoded dependencies + if len(node.hardcoded_dependencies) > 0: + hardcoded_dependency_relationship = Relationship( + spdxId=spdx_id_generator.generate(), + relationshipType="dependsOn", + from_=file_elements[node.absolute_path], + to=[file_elements[n.absolute_path] for n in node.hardcoded_dependencies], + ) + build_and_relationship_elements.append(hardcoded_dependency_relationship) + + return build_and_relationship_elements diff --git a/scripts/sbom/sbom/spdx_graph/spdx_graph_model.py b/scripts/sbom/sbom/spdx_graph/spdx_graph_model.py new file mode 100644 index 00000000000000..682194d4362a22 --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/spdx_graph_model.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass +from sbom.spdx.core import CreationInfo, SoftwareAgent, SpdxDocument, SpdxObject +from sbom.spdx.software import Sbom +from sbom.spdx.spdxId import SpdxIdGenerator + + +@dataclass +class SpdxGraph: + """Represents the complete graph of a single SPDX document.""" + + spdx_document: SpdxDocument + agent: SoftwareAgent + creation_info: CreationInfo + sbom: Sbom + + def to_list(self) -> list[SpdxObject]: + return [ + self.spdx_document, + self.agent, + self.creation_info, + self.sbom, + *self.sbom.element, + ] + + +@dataclass +class SpdxIdGeneratorCollection: + """Holds SPDX ID generators for different document types to ensure globally unique SPDX IDs.""" + + base: SpdxIdGenerator + source: SpdxIdGenerator + build: SpdxIdGenerator + output: SpdxIdGenerator diff --git a/scripts/sbom/sbom/spdx_graph/spdx_output_graph.py b/scripts/sbom/sbom/spdx_graph/spdx_output_graph.py new file mode 100644 index 00000000000000..1ae0f935e0b9b8 --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/spdx_output_graph.py @@ -0,0 +1,188 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass +import os +from typing import Protocol +from sbom.environment import Environment +from sbom.path_utils import PathStr +from sbom.spdx.build import Build +from sbom.spdx.core import DictionaryEntry, NamespaceMap, Relationship, SpdxDocument +from sbom.spdx.simplelicensing import LicenseExpression +from sbom.spdx.software import File, Package, Sbom +from sbom.spdx.spdxId import SpdxIdGenerator +from sbom.spdx_graph.kernel_file import KernelFile +from sbom.spdx_graph.shared_spdx_elements import SharedSpdxElements +from sbom.spdx_graph.spdx_graph_model import SpdxGraph, SpdxIdGeneratorCollection + + +class SpdxOutputGraphConfig(Protocol): + obj_tree: PathStr + src_tree: PathStr + build_type: str + build_id: str | None + package_license: str + package_version: str | None + package_copyright_text: str | None + + +@dataclass +class SpdxOutputGraph(SpdxGraph): + """SPDX graph representing distributable output files""" + + high_level_build_element: Build + + @classmethod + def create( + cls, + root_files: list[KernelFile], + shared_elements: SharedSpdxElements, + spdx_id_generators: SpdxIdGeneratorCollection, + config: SpdxOutputGraphConfig, + ) -> "SpdxOutputGraph": + """ + Args: + root_files: List of distributable output files which act as roots + of the dependency graph. + shared_elements: Shared SPDX elements used across multiple documents. + spdx_id_generators: Collection of SPDX ID generators. + config: Configuration options. + + Returns: + SpdxOutputGraph: The SPDX output graph. + """ + # SpdxDocument + spdx_document = SpdxDocument( + spdxId=spdx_id_generators.output.generate(), + profileConformance=["core", "software", "build", "simpleLicensing"], + namespaceMap=[ + NamespaceMap(prefix=generator.prefix, namespace=generator.namespace) + for generator in [spdx_id_generators.output, spdx_id_generators.base] + if generator.prefix is not None + ], + ) + + # Sbom + sbom = Sbom( + spdxId=spdx_id_generators.output.generate(), + software_sbomType=["build"], + ) + + # High-level Build elements + config_source_element = KernelFile.create( + absolute_path=os.path.join(config.obj_tree, ".config"), + obj_tree=config.obj_tree, + src_tree=config.src_tree, + spdx_id_generators=spdx_id_generators, + is_output=True, + ).spdx_file_element + high_level_build_element, high_level_build_element_hasOutput_relationship = _high_level_build_elements( + config.build_type, + config.build_id, + config_source_element, + spdx_id_generators.output, + ) + + # Root file elements + root_file_elements: list[File] = [file.spdx_file_element for file in root_files] + + # Package elements + package_elements = [ + Package( + spdxId=spdx_id_generators.output.generate(), + name=_get_package_name(file.name), + software_packageVersion=config.package_version, + software_copyrightText=config.package_copyright_text, + originatedBy=[shared_elements.agent], + comment=f"Architecture={arch}" if (arch := Environment.ARCH() or Environment.SRCARCH()) else None, + software_primaryPurpose=file.software_primaryPurpose, + ) + for file in root_file_elements + ] + package_hasDistributionArtifact_file_relationships = [ + Relationship( + spdxId=spdx_id_generators.output.generate(), + relationshipType="hasDistributionArtifact", + from_=package, + to=[file], + ) + for package, file in zip(package_elements, root_file_elements) + ] + package_license_expression = LicenseExpression( + spdxId=spdx_id_generators.output.generate(), + simplelicensing_licenseExpression=config.package_license, + ) + package_hasDeclaredLicense_relationships = [ + Relationship( + spdxId=spdx_id_generators.output.generate(), + relationshipType="hasDeclaredLicense", + from_=package, + to=[package_license_expression], + ) + for package in package_elements + ] + + # Update relationships + spdx_document.rootElement = [sbom] + + sbom.rootElement = [*package_elements] + sbom.element = [ + config_source_element, + high_level_build_element, + high_level_build_element_hasOutput_relationship, + *root_file_elements, + *package_elements, + *package_hasDistributionArtifact_file_relationships, + package_license_expression, + *package_hasDeclaredLicense_relationships, + ] + + high_level_build_element_hasOutput_relationship.to = [*root_file_elements] + + output_graph = SpdxOutputGraph( + spdx_document, + shared_elements.agent, + shared_elements.creation_info, + sbom, + high_level_build_element, + ) + return output_graph + + +def _get_package_name(filename: str) -> str: + """ + Generates a SPDX package name from a filename. + Kernel images (bzImage, Image) get a descriptive name, others use the basename of the file. + """ + KERNEL_FILENAMES = ["bzImage", "Image"] + basename = os.path.basename(filename) + return f"Linux Kernel ({basename})" if basename in KERNEL_FILENAMES else basename + + +def _high_level_build_elements( + build_type: str, + build_id: str | None, + config_source_element: File, + spdx_id_generator: SpdxIdGenerator, +) -> tuple[Build, Relationship]: + build_spdxId = spdx_id_generator.generate() + high_level_build_element = Build( + spdxId=build_spdxId, + build_buildType=build_type, + build_buildId=build_id if build_id is not None else build_spdxId, + build_environment=[ + DictionaryEntry(key=key, value=value) + for key, value in Environment.KERNEL_BUILD_VARIABLES().items() + if value + ], + build_configSourceUri=[config_source_element.spdxId], + build_configSourceDigest=config_source_element.verifiedUsing, + ) + + high_level_build_element_hasOutput_relationship = Relationship( + spdxId=spdx_id_generator.generate(), + relationshipType="hasOutput", + from_=high_level_build_element, + to=[], + ) + return high_level_build_element, high_level_build_element_hasOutput_relationship diff --git a/scripts/sbom/sbom/spdx_graph/spdx_source_graph.py b/scripts/sbom/sbom/spdx_graph/spdx_source_graph.py new file mode 100644 index 00000000000000..16176c4ea5eedd --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/spdx_source_graph.py @@ -0,0 +1,126 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass +from sbom.spdx import SpdxIdGenerator +from sbom.spdx.core import Element, NamespaceMap, Relationship, SpdxDocument +from sbom.spdx.simplelicensing import LicenseExpression +from sbom.spdx.software import File, Sbom +from sbom.spdx_graph.kernel_file import KernelFile +from sbom.spdx_graph.shared_spdx_elements import SharedSpdxElements +from sbom.spdx_graph.spdx_graph_model import SpdxGraph, SpdxIdGeneratorCollection + + +@dataclass +class SpdxSourceGraph(SpdxGraph): + """SPDX graph representing source files""" + + @classmethod + def create( + cls, + source_files: list[KernelFile], + shared_elements: SharedSpdxElements, + spdx_id_generators: SpdxIdGeneratorCollection, + ) -> "SpdxSourceGraph": + """ + Args: + source_files: List of files within the kernel source tree. + shared_elements: Shared SPDX elements used across multiple documents. + spdx_id_generators: Collection of SPDX ID generators. + + Returns: + SpdxSourceGraph: The SPDX source graph. + """ + # SpdxDocument + source_spdx_document = SpdxDocument( + spdxId=spdx_id_generators.source.generate(), + profileConformance=["core", "software", "simpleLicensing"], + namespaceMap=[ + NamespaceMap(prefix=generator.prefix, namespace=generator.namespace) + for generator in [spdx_id_generators.source, spdx_id_generators.base] + if generator.prefix is not None + ], + ) + + # Sbom + source_sbom = Sbom( + spdxId=spdx_id_generators.source.generate(), + software_sbomType=["source"], + ) + + # Src Tree Elements + src_tree_element = File( + spdxId=spdx_id_generators.source.generate(), + name="$(src_tree)", + software_fileKind="directory", + ) + src_tree_contains_relationship = Relationship( + spdxId=spdx_id_generators.source.generate(), + relationshipType="contains", + from_=src_tree_element, + to=[], + ) + + # Source file elements + source_file_elements: list[Element] = [file.spdx_file_element for file in source_files] + + # Source file license elements + source_file_license_identifiers, source_file_license_relationships = source_file_license_elements( + source_files, spdx_id_generators.source + ) + + # Update relationships + source_spdx_document.rootElement = [source_sbom] + source_sbom.rootElement = [src_tree_element] + source_sbom.element = [ + src_tree_element, + src_tree_contains_relationship, + *source_file_elements, + *source_file_license_identifiers, + *source_file_license_relationships, + ] + src_tree_contains_relationship.to = source_file_elements + + source_graph = SpdxSourceGraph( + source_spdx_document, + shared_elements.agent, + shared_elements.creation_info, + source_sbom, + ) + return source_graph + + +def source_file_license_elements( + source_files: list[KernelFile], spdx_id_generator: SpdxIdGenerator +) -> tuple[list[LicenseExpression], list[Relationship]]: + """ + Creates SPDX license expressions and links them to the given source files + via hasDeclaredLicense relationships. + + Args: + source_files: List of files within the kernel source tree. + spdx_id_generator: Generator for unique SPDX IDs. + + Returns: + Tuple of (license expressions, hasDeclaredLicense relationships). + """ + license_expressions: dict[str, LicenseExpression] = {} + for file in source_files: + if file.license_identifier is None or file.license_identifier in license_expressions: + continue + license_expressions[file.license_identifier] = LicenseExpression( + spdxId=spdx_id_generator.generate(), + simplelicensing_licenseExpression=file.license_identifier, + ) + + source_file_license_relationships = [ + Relationship( + spdxId=spdx_id_generator.generate(), + relationshipType="hasDeclaredLicense", + from_=file.spdx_file_element, + to=[license_expressions[file.license_identifier]], + ) + for file in source_files + if file.license_identifier is not None + ] + return ([*license_expressions.values()], source_file_license_relationships) diff --git a/scripts/sbom/tests/__init__.py b/scripts/sbom/tests/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/scripts/sbom/tests/cmd_graph/__init__.py b/scripts/sbom/tests/cmd_graph/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/scripts/sbom/tests/cmd_graph/test_savedcmd_parser.py b/scripts/sbom/tests/cmd_graph/test_savedcmd_parser.py new file mode 100644 index 00000000000000..9409bc65ee25e4 --- /dev/null +++ b/scripts/sbom/tests/cmd_graph/test_savedcmd_parser.py @@ -0,0 +1,383 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import unittest + +from sbom.cmd_graph.savedcmd_parser import parse_inputs_from_commands +import sbom.sbom_logging as sbom_logging + + +class TestSavedCmdParser(unittest.TestCase): + def _assert_parsing(self, cmd: str, expected: str) -> None: + sbom_logging.init() + parsed = parse_inputs_from_commands(cmd, fail_on_unknown_build_command=False) + target = [] if expected == "" else expected.split(" ") + self.assertEqual(parsed, target) + errors = sbom_logging._error_logger.messages # type: ignore + self.assertEqual(errors, {}) + + # Compound command tests + def test_dd_cat(self): + cmd = "(dd if=arch/x86/boot/setup.bin bs=4k conv=sync status=none; cat arch/x86/boot/vmlinux.bin) >arch/x86/boot/bzImage" + expected = "arch/x86/boot/setup.bin arch/x86/boot/vmlinux.bin" + self._assert_parsing(cmd, expected) + + def test_manual_file_creation(self): + cmd = """{ symbase=__dtbo_overlay_bad_unresolved; echo '$(pound)include '; echo '.section .rodata,"a"'; echo '.balign STRUCT_ALIGNMENT'; echo ".global $${symbase}_begin"; echo "$${symbase}_begin:"; echo '.incbin "drivers/of/unittest-data/overlay_bad_unresolved.dtbo" '; echo ".global $${symbase}_end"; echo "$${symbase}_end:"; echo '.balign STRUCT_ALIGNMENT'; } > drivers/of/unittest-data/overlay_bad_unresolved.dtbo.S""" + expected = "" + self._assert_parsing(cmd, expected) + + def test_cat_xz_wrap(self): + cmd = "{ cat arch/x86/boot/compressed/vmlinux.bin | sh ../scripts/xz_wrap.sh; printf \\130\\064\\024\\000; } > arch/x86/boot/compressed/vmlinux.bin.xz" + expected = "arch/x86/boot/compressed/vmlinux.bin" + self._assert_parsing(cmd, expected) + + def test_printf_sed(self): + cmd = r"""{ printf 'static char tomoyo_builtin_profile[] __initdata =\n'; sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- /dev/null; printf '\t"";\n'; printf 'static char tomoyo_builtin_exception_policy[] __initdata =\n'; sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- ../security/tomoyo/policy/exception_policy.conf.default; printf '\t"";\n'; printf 'static char tomoyo_builtin_domain_policy[] __initdata =\n'; sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- /dev/null; printf '\t"";\n'; printf 'static char tomoyo_builtin_manager[] __initdata =\n'; sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- /dev/null; printf '\t"";\n'; printf 'static char tomoyo_builtin_stat[] __initdata =\n'; sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- /dev/null; printf '\t"";\n'; } > security/tomoyo/builtin-policy.h""" + expected = "../security/tomoyo/policy/exception_policy.conf.default" + self._assert_parsing(cmd, expected) + + def test_bin2c_echo(self): + cmd = """(echo "static char tomoyo_builtin_profile[] __initdata ="; ./scripts/bin2c security/tomoyo/builtin-policy.h""" + expected = "../security/tomoyo/policy/exception_policy.conf.default" + self._assert_parsing(cmd, expected) + + def test_cat_colon(self): + cmd = "{ cat init/modules.order; cat usr/modules.order; cat arch/x86/modules.order; cat arch/x86/boot/startup/modules.order; cat kernel/modules.order; cat certs/modules.order; cat mm/modules.order; cat fs/modules.order; cat ipc/modules.order; cat security/modules.order; cat crypto/modules.order; cat block/modules.order; cat io_uring/modules.order; cat lib/modules.order; cat arch/x86/lib/modules.order; cat drivers/modules.order; cat sound/modules.order; cat samples/modules.order; cat net/modules.order; cat virt/modules.order; cat arch/x86/pci/modules.order; cat arch/x86/power/modules.order; cat arch/x86/video/modules.order; :; } > modules.order" + expected = "init/modules.order usr/modules.order arch/x86/modules.order arch/x86/boot/startup/modules.order kernel/modules.order certs/modules.order mm/modules.order fs/modules.order ipc/modules.order security/modules.order crypto/modules.order block/modules.order io_uring/modules.order lib/modules.order arch/x86/lib/modules.order drivers/modules.order sound/modules.order samples/modules.order net/modules.order virt/modules.order arch/x86/pci/modules.order arch/x86/power/modules.order arch/x86/video/modules.order" + self._assert_parsing(cmd, expected) + + def test_cat_zstd(self): + cmd = "{ cat arch/x86/boot/compressed/vmlinux.bin arch/x86/boot/compressed/vmlinux.relocs | zstd -22 --ultra; printf \\340\\362\\066\\003; } > arch/x86/boot/compressed/vmlinux.bin.zst" + expected = "arch/x86/boot/compressed/vmlinux.bin arch/x86/boot/compressed/vmlinux.relocs" + self._assert_parsing(cmd, expected) + + # cat command tests + def test_cat_redirect(self): + cmd = "cat ../fs/unicode/utf8data.c_shipped > fs/unicode/utf8data.c" + expected = "../fs/unicode/utf8data.c_shipped" + self._assert_parsing(cmd, expected) + + def test_cat_piped(self): + cmd = "cat arch/x86/boot/compressed/vmlinux.bin arch/x86/boot/compressed/vmlinux.relocs | gzip -n -f -9 > arch/x86/boot/compressed/vmlinux.bin.gz" + expected = "arch/x86/boot/compressed/vmlinux.bin arch/x86/boot/compressed/vmlinux.relocs" + self._assert_parsing(cmd, expected) + + # sed command tests + def test_sed(self): + cmd = "sed -n 's/.*define *BLIST_\\([A-Z0-9_]*\\) *.*/BLIST_FLAG_NAME(\\1),/p' ../include/scsi/scsi_devinfo.h > drivers/scsi/scsi_devinfo_tbl.c" + expected = "../include/scsi/scsi_devinfo.h" + self._assert_parsing(cmd, expected) + + # awk command tests + def test_awk(self): + cmd = "awk -f ../arch/arm64/tools/gen-cpucaps.awk ../arch/arm64/tools/cpucaps > arch/arm64/include/generated/asm/cpucap-defs.h" + expected = "../arch/arm64/tools/cpucaps" + self._assert_parsing(cmd, expected) + + def test_awk_with_input_redirection(self): + cmd = "awk -v N=1 -f ../lib/raid6/unroll.awk < ../lib/raid6/int.uc > lib/raid6/int1.c" + expected = "../lib/raid6/int.uc" + self._assert_parsing(cmd, expected) + + # openssl command tests + def test_openssl(self): + cmd = "openssl req -new -nodes -utf8 -sha256 -days 36500 -batch -x509 -config certs/x509.genkey -outform PEM -out certs/signing_key.pem -keyout certs/signing_key.pem 2>&1" + expected = "" + self._assert_parsing(cmd, expected) + + # gcc/clang command tests + def test_gcc(self): + cmd = ( + "gcc -Wp,-MMD,arch/x86/pci/.i386.o.d -nostdinc -I../arch/x86/include -I./arch/x86/include/generated -I../include -I./include -I../arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I../include/uapi -I./include/generated/uapi -include ../include/linux/compiler-version.h -include ../include/linux/kconfig.h -include ../include/linux/compiler_types.h -D__KERNEL__ -fmacro-prefix-map=../= -Werror -std=gnu11 -fshort-wchar -funsigned-char -fno-common -fno-PIE -fno-strict-aliasing -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -fcf-protection=branch -fno-jump-tables -m64 -falign-jumps=1 -falign-loops=1 -mno-80387 -mno-fp-ret-in-387 -mpreferred-stack-boundary=3 -mskip-rax-setup -march=x86-64 -mtune=generic -mno-red-zone -mcmodel=kernel -mstack-protector-guard-reg=gs -mstack-protector-guard-symbol=__ref_stack_chk_guard -Wno-sign-compare -fno-asynchronous-unwind-tables -mindirect-branch=thunk-extern -mindirect-branch-register -mindirect-branch-cs-prefix -mfunction-return=thunk-extern -fno-jump-tables -fpatchable-function-entry=16,16 -fno-delete-null-pointer-checks -O2 -fno-allow-store-data-races -fstack-protector-strong -fomit-frame-pointer -fno-stack-clash-protection -falign-functions=16 -fno-strict-overflow -fno-stack-check -fconserve-stack -fno-builtin-wcslen -Wall -Wextra -Wundef -Werror=implicit-function-declaration -Werror=implicit-int -Werror=return-type -Werror=strict-prototypes -Wno-format-security -Wno-trigraphs -Wno-frame-address -Wno-address-of-packed-member -Wmissing-declarations -Wmissing-prototypes -Wframe-larger-than=2048 -Wno-main -Wvla-larger-than=1 -Wno-pointer-sign -Wcast-function-type -Wno-array-bounds -Wno-stringop-overflow -Wno-alloc-size-larger-than -Wimplicit-fallthrough=5 -Werror=date-time -Werror=incompatible-pointer-types -Werror=designated-init -Wenum-conversion -Wunused -Wno-unused-but-set-variable -Wno-unused-const-variable -Wno-packed-not-aligned -Wno-format-overflow -Wno-format-truncation -Wno-stringop-truncation -Wno-override-init -Wno-missing-field-initializers -Wno-type-limits -Wno-shift-negative-value -Wno-maybe-uninitialized -Wno-sign-compare -Wno-unused-parameter -I../arch/x86/pci -Iarch/x86/pci -DKBUILD_MODFILE=" + "arch/x86/pci/i386" + " -DKBUILD_BASENAME=" + "i386" + " -DKBUILD_MODNAME=" + "i386" + " -D__KBUILD_MODNAME=kmod_i386 -c -o arch/x86/pci/i386.o ../arch/x86/pci/i386.c " + ) + expected = "../arch/x86/pci/i386.c" + self._assert_parsing(cmd, expected) + + def test_gcc_linking(self): + cmd = "gcc -o arch/x86/tools/relocs arch/x86/tools/relocs_32.o arch/x86/tools/relocs_64.o arch/x86/tools/relocs_common.o" + expected = "arch/x86/tools/relocs_32.o arch/x86/tools/relocs_64.o arch/x86/tools/relocs_common.o" + self._assert_parsing(cmd, expected) + + def test_gcc_without_compile_flag(self): + cmd = "gcc -Wp,-MMD,arch/x86/boot/compressed/.mkpiggy.d -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu11 -I ../scripts/include -I../tools/include -I arch/x86/boot/compressed -o arch/x86/boot/compressed/mkpiggy ../arch/x86/boot/compressed/mkpiggy.c" + expected = "../arch/x86/boot/compressed/mkpiggy.c" + self._assert_parsing(cmd, expected) + + def test_clang(self): + cmd = """clang -Wp,-MMD,arch/x86/entry/.entry_64_compat.o.d -nostdinc -I../arch/x86/include -I./arch/x86/include/generated -I../include -I./include -I../arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I../include/uapi -I./include/generated/uapi -include ../include/linux/compiler-version.h -include ../include/linux/kconfig.h -D__KERNEL__ --target=x86_64-linux-gnu -fintegrated-as -Werror=unknown-warning-option -Werror=ignored-optimization-argument -Werror=option-ignored -Werror=unused-command-line-argument -fmacro-prefix-map=../= -Werror -D__ASSEMBLY__ -fno-PIE -m64 -I../arch/x86/entry -Iarch/x86/entry -DKBUILD_MODFILE='"arch/x86/entry/entry_64_compat"' -DKBUILD_MODNAME='"entry_64_compat"' -D__KBUILD_MODNAME=kmod_entry_64_compat -c -o arch/x86/entry/entry_64_compat.o ../arch/x86/entry/entry_64_compat.S""" + expected = "../arch/x86/entry/entry_64_compat.S" + self._assert_parsing(cmd, expected) + + # ld command tests + def test_ld(self): + cmd = 'ld -o arch/x86/entry/vdso/vdso64.so.dbg -shared --hash-style=both --build-id=sha1 --no-undefined --eh-frame-hdr -Bsymbolic -z noexecstack -m elf_x86_64 -soname linux-vdso.so.1 -z max-page-size=4096 -T arch/x86/entry/vdso/vdso.lds arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vgetrandom.o arch/x86/entry/vdso/vgetrandom-chacha.o; if readelf -rW arch/x86/entry/vdso/vdso64.so.dbg | grep -v _NONE | grep -q " R_\w*_"; then (echo >&2 "arch/x86/entry/vdso/vdso64.so.dbg: dynamic relocations are not supported"; rm -f arch/x86/entry/vdso/vdso64.so.dbg; /bin/false); fi' # type: ignore + expected = "arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vgetrandom.o arch/x86/entry/vdso/vgetrandom-chacha.o" + self._assert_parsing(cmd, expected) + + def test_ld_whole_archive(self): + cmd = "ld -m elf_x86_64 -z noexecstack -r -o vmlinux.o --whole-archive vmlinux.a --no-whole-archive --start-group --end-group" + expected = "vmlinux.a" + self._assert_parsing(cmd, expected) + + def test_ld_with_at_symbol(self): + cmd = "ld.lld -m elf_x86_64 -z noexecstack -r -o fs/efivarfs/efivarfs.o @fs/efivarfs/efivarfs.mod ; ./tools/objtool/objtool --hacks=jump_label --hacks=noinstr --hacks=skylake --ibt --orc --retpoline --rethunk --static-call --uaccess --prefix=16 --link --module fs/efivarfs/efivarfs.o" + expected = "@fs/efivarfs/efivarfs.mod" + self._assert_parsing(cmd, expected) + + def test_ld_if_objdump(self): + cmd = """ld -o arch/x86/entry/vdso/vdso64.so.dbg -shared --hash-style=both --build-id=sha1 --eh-frame-hdr -Bsymbolic -z noexecstack -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined -z max-page-size=4096 -T arch/x86/entry/vdso/vdso.lds arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vsgx.o && sh ./arch/x86/entry/vdso/checkundef.sh 'nm' 'arch/x86/entry/vdso/vdso64.so.dbg'; if objdump -R arch/x86/entry/vdso/vdso64.so.dbg | grep -E -h "R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE| R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE"; then (echo >&2 "arch/x86/entry/vdso/vdso64.so.dbg: dynamic relocations are not supported"; rm -f arch/x86/entry/vdso/vdso64.so.dbg; /bin/false); fi""" + expected = "arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vsgx.o" + self._assert_parsing(cmd, expected) + + # printf | xargs ar command tests + def test_ar_printf(self): + cmd = 'rm -f built-in.a; printf "./%s " init/built-in.a usr/built-in.a arch/x86/built-in.a arch/x86/boot/startup/built-in.a kernel/built-in.a certs/built-in.a mm/built-in.a fs/built-in.a ipc/built-in.a security/built-in.a crypto/built-in.a block/built-in.a io_uring/built-in.a lib/built-in.a arch/x86/lib/built-in.a drivers/built-in.a sound/built-in.a net/built-in.a virt/built-in.a arch/x86/pci/built-in.a arch/x86/power/built-in.a arch/x86/video/built-in.a | xargs ar cDPrST built-in.a' + expected = "./init/built-in.a ./usr/built-in.a ./arch/x86/built-in.a ./arch/x86/boot/startup/built-in.a ./kernel/built-in.a ./certs/built-in.a ./mm/built-in.a ./fs/built-in.a ./ipc/built-in.a ./security/built-in.a ./crypto/built-in.a ./block/built-in.a ./io_uring/built-in.a ./lib/built-in.a ./arch/x86/lib/built-in.a ./drivers/built-in.a ./sound/built-in.a ./net/built-in.a ./virt/built-in.a ./arch/x86/pci/built-in.a ./arch/x86/power/built-in.a ./arch/x86/video/built-in.a" + self._assert_parsing(cmd, expected) + + def test_ar_printf_nested(self): + cmd = 'rm -f arch/x86/pci/built-in.a; printf "arch/x86/pci/%s " i386.o init.o mmconfig_64.o direct.o mmconfig-shared.o fixup.o acpi.o legacy.o irq.o common.o early.o bus_numa.o amd_bus.o | xargs ar cDPrST arch/x86/pci/built-in.a' + expected = "arch/x86/pci/i386.o arch/x86/pci/init.o arch/x86/pci/mmconfig_64.o arch/x86/pci/direct.o arch/x86/pci/mmconfig-shared.o arch/x86/pci/fixup.o arch/x86/pci/acpi.o arch/x86/pci/legacy.o arch/x86/pci/irq.o arch/x86/pci/common.o arch/x86/pci/early.o arch/x86/pci/bus_numa.o arch/x86/pci/amd_bus.o" + self._assert_parsing(cmd, expected) + + # ar command tests + def test_ar_reordering(self): + cmd = "rm -f vmlinux.a; ar cDPrST vmlinux.a built-in.a lib/lib.a arch/x86/lib/lib.a; ar mPiT $$(ar t vmlinux.a | sed -n 1p) vmlinux.a $$(ar t vmlinux.a | grep -F -f ../scripts/head-object-list.txt)" + expected = "built-in.a lib/lib.a arch/x86/lib/lib.a" + self._assert_parsing(cmd, expected) + + def test_ar_default(self): + cmd = "rm -f lib/lib.a; ar cDPrsT lib/lib.a lib/argv_split.o lib/bug.o lib/buildid.o lib/clz_tab.o lib/cmdline.o lib/cpumask.o lib/ctype.o lib/dec_and_lock.o lib/decompress.o lib/decompress_bunzip2.o lib/decompress_inflate.o lib/decompress_unlz4.o lib/decompress_unlzma.o lib/decompress_unlzo.o lib/decompress_unxz.o lib/decompress_unzstd.o lib/dump_stack.o lib/earlycpio.o lib/extable.o lib/flex_proportions.o lib/idr.o lib/iomem_copy.o lib/irq_regs.o lib/is_single_threaded.o lib/klist.o lib/kobject.o lib/kobject_uevent.o lib/logic_pio.o lib/maple_tree.o lib/memcat_p.o lib/nmi_backtrace.o lib/objpool.o lib/plist.o lib/radix-tree.o lib/ratelimit.o lib/rbtree.o lib/seq_buf.o lib/siphash.o lib/string.o lib/sys_info.o lib/timerqueue.o lib/union_find.o lib/vsprintf.o lib/win_minmax.o lib/xarray.o" + expected = "lib/argv_split.o lib/bug.o lib/buildid.o lib/clz_tab.o lib/cmdline.o lib/cpumask.o lib/ctype.o lib/dec_and_lock.o lib/decompress.o lib/decompress_bunzip2.o lib/decompress_inflate.o lib/decompress_unlz4.o lib/decompress_unlzma.o lib/decompress_unlzo.o lib/decompress_unxz.o lib/decompress_unzstd.o lib/dump_stack.o lib/earlycpio.o lib/extable.o lib/flex_proportions.o lib/idr.o lib/iomem_copy.o lib/irq_regs.o lib/is_single_threaded.o lib/klist.o lib/kobject.o lib/kobject_uevent.o lib/logic_pio.o lib/maple_tree.o lib/memcat_p.o lib/nmi_backtrace.o lib/objpool.o lib/plist.o lib/radix-tree.o lib/ratelimit.o lib/rbtree.o lib/seq_buf.o lib/siphash.o lib/string.o lib/sys_info.o lib/timerqueue.o lib/union_find.o lib/vsprintf.o lib/win_minmax.o lib/xarray.o" + self._assert_parsing(cmd, expected) + + def test_ar_llvm(self): + cmd = "llvm-ar mPiT $$(llvm-ar t vmlinux.a | sed -n 1p) vmlinux.a $$(llvm-ar t vmlinux.a | grep -F -f ../scripts/head-object-list.txt)" + expected = "" + self._assert_parsing(cmd, expected) + + # nm command tests + def test_nm(self): + cmd = """llvm-nm -p --defined-only rust/core.o | awk '$$2~/(T|R|D|B)/ && $$3!~/__(pfx|cfi|odr_asan)/ { printf "EXPORT_SYMBOL_RUST_GPL(%s);\n",$$3 }' > rust/exports_core_generated.h""" + expected = "rust/core.o" + self._assert_parsing(cmd, expected) + + def test_nm_vmlinux(self): + cmd = r"nm vmlinux | sed -n -e 's/^\([0-9a-fA-F]*\) [ABbCDGRSTtVW] \(_text\|__start_rodata\|__bss_start\|_end\)$/#define VO_\2 _AC(0x\1,UL)/p' > arch/x86/boot/voffset.h" + expected = "vmlinux" + self._assert_parsing(cmd, expected) + + # objcopy command tests + def test_objcopy(self): + cmd = "objcopy --remove-section='.rel*' --remove-section=!'.rel*.dyn' vmlinux.unstripped vmlinux" + expected = "vmlinux.unstripped" + self._assert_parsing(cmd, expected) + + def test_objcopy_llvm(self): + cmd = "llvm-objcopy --remove-section='.rel*' --remove-section=!'.rel*.dyn' vmlinux.unstripped vmlinux" + expected = "vmlinux.unstripped" + self._assert_parsing(cmd, expected) + + # strip command tests + def test_strip(self): + cmd = "strip --strip-debug -o drivers/firmware/efi/libstub/mem.stub.o drivers/firmware/efi/libstub/mem.o" + expected = "drivers/firmware/efi/libstub/mem.o" + self._assert_parsing(cmd, expected) + + # rustc command tests + def test_rustc(self): + cmd = """OBJTREE=/workspace/linux/kernel_build rustc -Zbinary_dep_depinfo=y -Astable_features -Dnon_ascii_idents -Dunsafe_op_in_unsafe_fn -Wmissing_docs -Wrust_2018_idioms -Wclippy::all -Wclippy::as_ptr_cast_mut -Wclippy::as_underscore -Wclippy::cast_lossless -Wclippy::ignored_unit_patterns -Wclippy::mut_mut -Wclippy::needless_bitwise_bool -Aclippy::needless_lifetimes -Wclippy::no_mangle_with_rust_abi -Wclippy::ptr_as_ptr -Wclippy::ptr_cast_constness -Wclippy::ref_as_ptr -Wclippy::undocumented_unsafe_blocks -Wclippy::unnecessary_safety_comment -Wclippy::unnecessary_safety_doc -Wrustdoc::missing_crate_level_docs -Wrustdoc::unescaped_backticks -Cpanic=abort -Cembed-bitcode=n -Clto=n -Cforce-unwind-tables=n -Ccodegen-units=1 -Csymbol-mangling-version=v0 -Crelocation-model=static -Zfunction-sections=n -Wclippy::float_arithmetic --target=./scripts/target.json -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 -Zcf-protection=branch -Zno-jump-tables -Ctarget-cpu=x86-64 -Ztune-cpu=generic -Cno-redzone=y -Ccode-model=kernel -Zfunction-return=thunk-extern -Zpatchable-function-entry=16,16 -Copt-level=2 -Cdebug-assertions=n -Coverflow-checks=y -Dwarnings @./include/generated/rustc_cfg --edition=2021 --cfg no_fp_fmt_parse --emit=dep-info=rust/.core.o.d --emit=obj=rust/core.o --emit=metadata=rust/libcore.rmeta --crate-type rlib -L./rust --crate-name core /usr/lib/rust-1.84/lib/rustlib/src/rust/library/core/src/lib.rs --sysroot=/dev/null ;llvm-objcopy --redefine-sym __addsf3=__rust__addsf3 --redefine-sym __eqsf2=__rust__eqsf2 --redefine-sym __extendsfdf2=__rust__extendsfdf2 --redefine-sym __gesf2=__rust__gesf2 --redefine-sym __lesf2=__rust__lesf2 --redefine-sym __ltsf2=__rust__ltsf2 --redefine-sym __mulsf3=__rust__mulsf3 --redefine-sym __nesf2=__rust__nesf2 --redefine-sym __truncdfsf2=__rust__truncdfsf2 --redefine-sym __unordsf2=__rust__unordsf2 --redefine-sym __adddf3=__rust__adddf3 --redefine-sym __eqdf2=__rust__eqdf2 --redefine-sym __ledf2=__rust__ledf2 --redefine-sym __ltdf2=__rust__ltdf2 --redefine-sym __muldf3=__rust__muldf3 --redefine-sym __unorddf2=__rust__unorddf2 --redefine-sym __muloti4=__rust__muloti4 --redefine-sym __multi3=__rust__multi3 --redefine-sym __udivmodti4=__rust__udivmodti4 --redefine-sym __udivti3=__rust__udivti3 --redefine-sym __umodti3=__rust__umodti3 rust/core.o""" + expected = "/usr/lib/rust-1.84/lib/rustlib/src/rust/library/core/src/lib.rs rust/core.o" + self._assert_parsing(cmd, expected) + + # rustdoc command tests + def test_rustdoc(self): + cmd = """OBJTREE=/workspace/linux/kernel_build rustdoc --test --edition=2021 -Zbinary_dep_depinfo=y -Astable_features -Dnon_ascii_idents -Dunsafe_op_in_unsafe_fn -Wmissing_docs -Wrust_2018_idioms -Wunreachable_pub -Wclippy::all -Wclippy::as_ptr_cast_mut -Wclippy::as_underscore -Wclippy::cast_lossless -Wclippy::ignored_unit_patterns -Wclippy::mut_mut -Wclippy::needless_bitwise_bool -Aclippy::needless_lifetimes -Wclippy::no_mangle_with_rust_abi -Wclippy::ptr_as_ptr -Wclippy::ptr_cast_constness -Wclippy::ref_as_ptr -Wclippy::undocumented_unsafe_blocks -Wclippy::unnecessary_safety_comment -Wclippy::unnecessary_safety_doc -Wrustdoc::missing_crate_level_docs -Wrustdoc::unescaped_backticks -Cpanic=abort -Cembed-bitcode=n -Clto=n -Cforce-unwind-tables=n -Ccodegen-units=1 -Csymbol-mangling-version=v0 -Crelocation-model=static -Zfunction-sections=n -Wclippy::float_arithmetic --target=aarch64-unknown-none -Ctarget-feature="-neon" -Cforce-unwind-tables=n -Zbranch-protection=pac-ret -Copt-level=2 -Cdebug-assertions=y -Coverflow-checks=y -Dwarnings -Cforce-frame-pointers=y -Zsanitizer=kernel-address -Zsanitizer-recover=kernel-address -Cllvm-args=-asan-mapping-offset=0xdfff800000000000 -Cpasses=sancov-module -Cllvm-args=-sanitizer-coverage-level=3 -Cllvm-args=-sanitizer-coverage-trace-pc -Cllvm-args=-sanitizer-coverage-trace-compares @./include/generated/rustc_cfg -L./rust --extern ffi --extern pin_init --extern kernel --extern build_error --extern macros --extern bindings --extern uapi --no-run --crate-name kernel -Zunstable-options --sysroot=/dev/null --test-builder ./scripts/rustdoc_test_builder ../rust/kernel/lib.rs >/dev/null""" + expected = "../rust/kernel/lib.rs" + self._assert_parsing(cmd, expected) + + def test_rustdoc_test_gen(self): + cmd = "./scripts/rustdoc_test_gen" + expected = "" + self._assert_parsing(cmd, expected) + + # flex command tests + def test_flex(self): + cmd = "flex -oscripts/kconfig/lexer.lex.c -L ../scripts/kconfig/lexer.l" + expected = "../scripts/kconfig/lexer.l" + self._assert_parsing(cmd, expected) + + # bison command tests + def test_bison(self): + cmd = "bison -o scripts/kconfig/parser.tab.c --defines=scripts/kconfig/parser.tab.h -t -l ../scripts/kconfig/parser.y" + expected = "../scripts/kconfig/parser.y" + self._assert_parsing(cmd, expected) + + # bindgen command tests + def test_bindgen(self): + cmd = ( + "bindgen ../rust/bindings/bindings_helper.h " + "--blocklist-type __kernel_s?size_t --blocklist-type __kernel_ptrdiff_t " + "--opaque-type xregs_state --opaque-type desc_struct --no-doc-comments " + "--rust-target 1.68 --use-core --with-derive-default -o rust/bindings/bindings_generated.rs " + "-- -Wp,-MMD,rust/bindings/.bindings_generated.rs.d -nostdinc -I../arch/x86/include " + "-include ../include/linux/compiler-version.h -D__KERNEL__ -fintegrated-as -fno-builtin -DMODULE; " + "sed -Ei 's/pub const RUST_CONST_HELPER_([a-zA-Z0-9_]*)/pub const \\1/g' rust/bindings/bindings_generated.rs" + ) + expected = "../rust/bindings/bindings_helper.h ../include/linux/compiler-version.h" + self._assert_parsing(cmd, expected) + + # perl command tests + def test_perl(self): + cmd = "perl ../lib/crypto/x86/poly1305-x86_64-cryptogams.pl > lib/crypto/x86/poly1305-x86_64-cryptogams.S" + expected = "../lib/crypto/x86/poly1305-x86_64-cryptogams.pl" + self._assert_parsing(cmd, expected) + + # link-vmlinux.sh command tests + def test_link_vmlinux(self): + cmd = '../scripts/link-vmlinux.sh "ld" "-m elf_x86_64 -z noexecstack" "-z max-page-size=0x200000 --build-id=sha1 --orphan-handling=error --emit-relocs --discard-none" "vmlinux.unstripped"; true' + expected = "vmlinux.a" + self._assert_parsing(cmd, expected) + + def test_link_vmlinux_postlink(self): + cmd = '../scripts/link-vmlinux.sh "ld" "-m elf_x86_64 -z noexecstack --no-warn-rwx-segments" "--emit-relocs --discard-none -z max-page-size=0x200000 --build-id=sha1 -X --orphan-handling=error"; make -f ../arch/x86/Makefile.postlink vmlinux' + expected = "vmlinux.a" + self._assert_parsing(cmd, expected) + + # syscallhdr.sh command tests + def test_syscallhdr(self): + cmd = "sh ../scripts/syscallhdr.sh --abis common,64 --emit-nr ../arch/x86/entry/syscalls/syscall_64.tbl arch/x86/include/generated/uapi/asm/unistd_64.h" + expected = "../arch/x86/entry/syscalls/syscall_64.tbl" + self._assert_parsing(cmd, expected) + + # syscalltbl.sh command tests + def test_syscalltbl(self): + cmd = "sh ../scripts/syscalltbl.sh --abis common,64 ../arch/x86/entry/syscalls/syscall_64.tbl arch/x86/include/generated/asm/syscalls_64.h" + expected = "../arch/x86/entry/syscalls/syscall_64.tbl" + self._assert_parsing(cmd, expected) + + # mkcapflags.sh command tests + def test_mkcapflags(self): + cmd = "sh ../arch/x86/kernel/cpu/mkcapflags.sh arch/x86/kernel/cpu/capflags.c ../arch/x86/kernel/cpu/../../include/asm/cpufeatures.h ../arch/x86/kernel/cpu/../../include/asm/vmxfeatures.h ../arch/x86/kernel/cpu/mkcapflags.sh FORCE" + expected = "../arch/x86/kernel/cpu/../../include/asm/cpufeatures.h ../arch/x86/kernel/cpu/../../include/asm/vmxfeatures.h" + self._assert_parsing(cmd, expected) + + # orc_hash.sh command tests + def test_orc_hash(self): + cmd = "mkdir -p arch/x86/include/generated/asm/; sh ../scripts/orc_hash.sh < ../arch/x86/include/asm/orc_types.h > arch/x86/include/generated/asm/orc_hash.h" + expected = "../arch/x86/include/asm/orc_types.h" + self._assert_parsing(cmd, expected) + + # xen-hypercalls.sh command tests + def test_xen_hypercalls(self): + cmd = "sh '../scripts/xen-hypercalls.sh' arch/x86/include/generated/asm/xen-hypercalls.h ../include/xen/interface/xen-mca.h ../include/xen/interface/xen.h ../include/xen/interface/xenpmu.h" + expected = "../include/xen/interface/xen-mca.h ../include/xen/interface/xen.h ../include/xen/interface/xenpmu.h" + self._assert_parsing(cmd, expected) + + # gen_initramfs.sh command tests + def test_gen_initramfs(self): + cmd = "sh ../usr/gen_initramfs.sh -o usr/initramfs_data.cpio -l usr/.initramfs_data.cpio.d ../usr/default_cpio_list" + expected = "../usr/default_cpio_list" + self._assert_parsing(cmd, expected) + + # vdso2c command tests + def test_vdso2c(self): + cmd = "arch/x86/entry/vdso/vdso2c arch/x86/entry/vdso/vdso64.so.dbg arch/x86/entry/vdso/vdso64.so arch/x86/entry/vdso/vdso-image-64.c" + expected = "arch/x86/entry/vdso/vdso64.so.dbg arch/x86/entry/vdso/vdso64.so" + self._assert_parsing(cmd, expected) + + # mkpiggy command tests + def test_mkpiggy(self): + cmd = "arch/x86/boot/compressed/mkpiggy arch/x86/boot/compressed/vmlinux.bin.gz > arch/x86/boot/compressed/piggy.S" + expected = "arch/x86/boot/compressed/vmlinux.bin.gz" + self._assert_parsing(cmd, expected) + + # relocs command tests + def test_relocs(self): + cmd = "arch/x86/tools/relocs vmlinux.unstripped > arch/x86/boot/compressed/vmlinux.relocs;arch/x86/tools/relocs --abs-relocs vmlinux.unstripped" + expected = "vmlinux.unstripped" + self._assert_parsing(cmd, expected) + + def test_relocs_with_realmode(self): + cmd = ( + "arch/x86/tools/relocs --realmode arch/x86/realmode/rm/realmode.elf > arch/x86/realmode/rm/realmode.relocs" + ) + expected = "arch/x86/realmode/rm/realmode.elf" + self._assert_parsing(cmd, expected) + + # mk_elfconfig command tests + def test_mk_elfconfig(self): + cmd = "scripts/mod/mk_elfconfig < scripts/mod/empty.o > scripts/mod/elfconfig.h" + expected = "scripts/mod/empty.o" + self._assert_parsing(cmd, expected) + + # tools/build command tests + def test_build(self): + cmd = "arch/x86/boot/tools/build arch/x86/boot/setup.bin arch/x86/boot/vmlinux.bin arch/x86/boot/zoffset.h arch/x86/boot/bzImage" + expected = "arch/x86/boot/setup.bin arch/x86/boot/vmlinux.bin arch/x86/boot/zoffset.h" + self._assert_parsing(cmd, expected) + + # extract-cert command tests + def test_extract_cert(self): + cmd = 'certs/extract-cert "" certs/signing_key.x509' + expected = "" + self._assert_parsing(cmd, expected) + + # dtc command tests + def test_dtc_cat(self): + cmd = "./scripts/dtc/dtc -o drivers/of/empty_root.dtb -b 0 -i../drivers/of/ -i../scripts/dtc/include-prefixes -Wno-unique_unit_address -Wno-unit_address_vs_reg -Wno-avoid_unnecessary_addr_size -Wno-alias_paths -Wno-graph_child_address -Wno-simple_bus_reg -d drivers/of/.empty_root.dtb.d.dtc.tmp drivers/of/.empty_root.dtb.dts.tmp ; cat drivers/of/.empty_root.dtb.d.pre.tmp drivers/of/.empty_root.dtb.d.dtc.tmp > drivers/of/.empty_root.dtb.d" + expected = "drivers/of/.empty_root.dtb.dts.tmp drivers/of/.empty_root.dtb.d.pre.tmp drivers/of/.empty_root.dtb.d.dtc.tmp" + self._assert_parsing(cmd, expected) + + # pnmtologo command tests + def test_pnmtologo(self): + cmd = "drivers/video/logo/pnmtologo -t clut224 -n logo_linux_clut224 -o drivers/video/logo/logo_linux_clut224.c ../drivers/video/logo/logo_linux_clut224.ppm" + expected = "../drivers/video/logo/logo_linux_clut224.ppm" + self._assert_parsing(cmd, expected) + + # relacheck command tests + def test_relacheck(self): + cmd = "arch/arm64/kernel/pi/relacheck arch/arm64/kernel/pi/idreg-override.pi.o arch/arm64/kernel/pi/idreg-override.o" + expected = "arch/arm64/kernel/pi/idreg-override.pi.o" + self._assert_parsing(cmd, expected) + + # mkregtable command tests + def test_mkregtable(self): + cmd = "drivers/gpu/drm/radeon/mkregtable ../drivers/gpu/drm/radeon/reg_srcs/r100 > drivers/gpu/drm/radeon/r100_reg_safe.h" + expected = "../drivers/gpu/drm/radeon/reg_srcs/r100" + self._assert_parsing(cmd, expected) + + # genheaders command tests + def test_genheaders(self): + cmd = "security/selinux/genheaders security/selinux/flask.h security/selinux/av_permissions.h" + expected = "" + self._assert_parsing(cmd, expected) + + # mkcpustr command tests + def test_mkcpustr(self): + cmd = "arch/x86/boot/mkcpustr > arch/x86/boot/cpustr.h" + expected = "" + self._assert_parsing(cmd, expected) + + # polgen command tests + def test_polgen(self): + cmd = "scripts/ipe/polgen/polgen security/ipe/boot_policy.c" + expected = "" + self._assert_parsing(cmd, expected) + + # gen_header.py command tests + def test_gen_header(self): + cmd = "mkdir -p drivers/gpu/drm/msm/generated && python3 ../drivers/gpu/drm/msm/registers/gen_header.py --no-validate --rnn ../drivers/gpu/drm/msm/registers --xml ../drivers/gpu/drm/msm/registers/adreno/a2xx.xml c-defines > drivers/gpu/drm/msm/generated/a2xx.xml.h" + expected = "../drivers/gpu/drm/msm/registers/adreno/a2xx.xml" + self._assert_parsing(cmd, expected) + + +if __name__ == "__main__": + unittest.main() diff --git a/scripts/sbom/tests/spdx_graph/__init__.py b/scripts/sbom/tests/spdx_graph/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/scripts/sbom/tests/spdx_graph/test_kernel_file.py b/scripts/sbom/tests/spdx_graph/test_kernel_file.py new file mode 100644 index 00000000000000..bc44e7a97d2a99 --- /dev/null +++ b/scripts/sbom/tests/spdx_graph/test_kernel_file.py @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import unittest +from pathlib import Path +import tempfile +from sbom.spdx_graph.kernel_file import _parse_spdx_license_identifier # type: ignore + + +class TestKernelFile(unittest.TestCase): + def setUp(self): + self.tmpdir = tempfile.TemporaryDirectory() + self.src_tree = Path(self.tmpdir.name) + + def tearDown(self): + self.tmpdir.cleanup() + + def test_parse_spdx_license_identifier(self): + # REUSE-IgnoreStart + test_cases: list[tuple[str, str | None]] = [ + ("/* SPDX-License-Identifier: MIT*/", "MIT"), + ("// SPDX-License-Identifier: GPL-2.0-only", "GPL-2.0-only"), + ("/* SPDX-License-Identifier: GPL-2.0-or-later OR MIT */", "GPL-2.0-or-later OR MIT"), + ("/* SPDX-License-Identifier: Apache-2.0 */\n extra text", "Apache-2.0"), + ("int main() { return 0; }", None), + ] + # REUSE-IgnoreEnd + + for i, (file_content, expected_identifier) in enumerate(test_cases): + file_path = self.src_tree / f"file_{i}.c" + file_path.write_text(file_content) + self.assertEqual(_parse_spdx_license_identifier(str(file_path)), expected_identifier)