diff --git a/.github/scripts/sbom/expand-custom-context.sh b/.github/scripts/sbom/expand-custom-context.sh index 7c2d10f7..b354a42c 100755 --- a/.github/scripts/sbom/expand-custom-context.sh +++ b/.github/scripts/sbom/expand-custom-context.sh @@ -24,17 +24,24 @@ fi spdx_documents=("$@") for spdx_document in "${spdx_documents[@]}"; do - # Extract SPDX context URL and read JSON into variable + # Extract SPDX context URL spdx_context=$(jq -r '.["@context"][0]' "$spdx_document") - spdx_json=$(cat "$spdx_document") + + # Create a temporary sed script + sed_script=$(mktemp) # Replace prefixes with full namespaces while IFS=$'\t' read -r prefix namespace; do + # Escape special characters for sed namespace_escaped=$(printf '%s\n' "$namespace" | sed 's/[[\.*^$()+?{|]/\\&/g') - spdx_json=$(echo "$spdx_json" | sed "s|\"${prefix}:|\"${namespace_escaped}|g") + echo "s|\"${prefix}:|\"${namespace_escaped}|g" >> "$sed_script" done < <(jq -r '.["@context"][1] | to_entries[] | "\(.key)\t\(.value)"' "$spdx_document") # Update @context and write to file output_file="$(dirname "$spdx_document")/${output_prefix}$(basename "$spdx_document")" - echo "$spdx_json" | jq --arg ctx "$spdx_context" '.["@context"] = $ctx' > "$output_file" + sed -f "$sed_script" "$spdx_document" | + jq --arg ctx "$spdx_context" '.["@context"] = $ctx' > "$output_file" + + # Clean up temporary sed script + rm "$sed_script" done diff --git a/sbom/Makefile b/sbom/Makefile index 38268ee1..f14176ea 100644 --- a/sbom/Makefile +++ b/sbom/Makefile @@ -18,26 +18,23 @@ ifdef CONFIG_MODULES SBOM_DEPS += $(objtree)/modules.order endif -sbom: $(SBOM_TARGETS) - @: - $(SBOM_TARGETS) &: $(SBOM_DEPS) - @echo " GEN $(notdir $(SBOM_TARGETS))" + $(Q)echo " GEN $(notdir $(SBOM_TARGETS))" - @printf "%s\n" "$(KBUILD_IMAGE)" > $(SBOM_ROOTS_FILE) - @if [ "$(CONFIG_MODULES)" = "y" ]; then \ + $(Q)printf "%s\n" "$(KBUILD_IMAGE)" > $(SBOM_ROOTS_FILE) + $(Q)if [ "$(CONFIG_MODULES)" = "y" ]; then \ sed 's/\.o$$/.ko/' $(objtree)/modules.order >> $(SBOM_ROOTS_FILE); \ fi - @python3 sbom.py \ - --src-tree $(srctree) \ - --obj-tree $(objtree) \ - --roots-file $(SBOM_ROOTS_FILE) \ - --output-directory $(objtree) \ - --generate-spdx \ - --package-license "GPL-2.0 WITH Linux-syscall-note" \ - --package-version "$(KERNELVERSION)" + $(Q)$(PYTHON3) $(srctree)/scripts/sbom/sbom.py \ + --src-tree $(abspath $(srctree)) \ + --obj-tree $(abspath $(objtree)) \ + --roots-file $(SBOM_ROOTS_FILE) \ + --output-directory $(abspath $(objtree)) \ + --generate-spdx \ + --package-license "GPL-2.0 WITH Linux-syscall-note" \ + --package-version "$(KERNELVERSION)" - @rm $(SBOM_ROOTS_FILE) + $(Q)rm $(SBOM_ROOTS_FILE) -.PHONY: sbom +$(obj)/: $(SBOM_TARGETS) diff --git a/sbom/README b/sbom/sbom.rst similarity index 56% rename from sbom/README rename to sbom/sbom.rst index 5d157a1f..029b08b6 100644 --- a/sbom/README +++ b/sbom/sbom.rst @@ -1,7 +1,5 @@ - +.. SPDX-License-Identifier: GPL-2.0-only OR MIT +.. Copyright (C) 2025 TNG Technology Consulting GmbH KernelSbom ========== @@ -9,21 +7,21 @@ KernelSbom Introduction ------------ -The kernel `CONFIG_SBOM` option enables the KernelSbom tool, -a Python script `sbom.py` that runs during the kernel build process -after all build outputs have been generated. -KernelSbom analyzes every file involved in the build and produces -Software Bill of Materials (SBOM) documents in SPDX 3.0.1 format. +KernelSbom is a Python script ``scripts/sbom/sbom.py`` that can be +executed after a successful kernel build. When invoked, KernelSbom +analyzes all files involved in the build and generates Software Bill of +Materials (SBOM) documents in SPDX 3.0.1 format. The generated SBOM documents capture: -- **Final output artifacts**, typically the kernel image and modules -- **All source files** that contributed to the build with metadata + +* **Final output artifacts**, typically the kernel image and modules +* **All source files** that contributed to the build with metadata and licensing information -- **Details of the build process**, including intermediate artifacts +* **Details of the build process**, including intermediate artifacts and the build commands linking source files to the final output artifacts KernelSbom is originally developed in the -[KernelSbom repository](https://github.com/TNG/KernelSbom). +`KernelSbom repository `_. Requirements ------------ @@ -33,33 +31,33 @@ Python 3.10 or later. No libraries or other dependencies are required. Basic Usage ----------- -Enable the `CONFIG_SBOM` option and start a kernel build. -For example: +Run the ``make sbom`` target. +For example:: $ make defconfig O=kernel_build - $ scripts/config --file kernel_build/.config --enable SBOM - $ make O=kernel_build -j$(nproc) + $ make sbom O=kernel_build -j$(nproc) -After all build outputs have been generated, KernelSbom produces three -SPDX documents in the root directory of the object tree: +This will trigger a kernel build. After all build outputs have been +generated, KernelSbom produces three SPDX documents in the root +directory of the object tree: -- `sbom-source.spdx.json` +* ``sbom-source.spdx.json`` Describes all source files involved in the build and associates each file with its corresponding license expression. -- `sbom-output.spdx.json` - Captures all final build outputs (kernel image and `.ko` module files) +* ``sbom-output.spdx.json`` + Captures all final build outputs (kernel image and ``.ko`` module files) and includes build metadata such as environment variables and - a hash of the `.config` file used for the build. + a hash of the ``.config`` file used for the build. -- `sbom-build.spdx.json` +* ``sbom-build.spdx.json`` Imports files from the source and output documents and describes every intermediate build artifact. For each artifact, it records the exact build command used and establishes the relationship between input files and generated outputs. -When enabling the KernelSbom tool, it is recommended to perform -out-of-tree builds using `O=`. KernelSbom classifies files as +When invoking the sbom target, it is recommended to perform +out-of-tree builds using ``O=``. KernelSbom classifies files as source files when they are located in the source tree and not in the object tree. For in-tree builds, where the source and object trees are the same directory, this distinction can no longer be made reliably. @@ -72,9 +70,9 @@ Standalone Usage KernelSbom can also be used as a standalone script to generate SPDX documents for specific build outputs. For example, after a successful x86 kernel build, KernelSbom can generate SPDX documents -for the `bzImage` kernel image: +for the ``bzImage`` kernel image:: - $ SRCARCH=x86 python3 tools/sbom/sbom.py \ + $ SRCARCH=x86 python3 scripts/sbom/sbom.py \ --src-tree . \ --obj-tree ./kernel_build \ --roots arch/x86/boot/bzImage \ @@ -83,25 +81,25 @@ for the `bzImage` kernel image: --prettify-json \ --debug -Note that when KernelSbom is invoked outside of the `make` process, +Note that when KernelSbom is invoked outside of the ``make`` process, the environment variables used during compilation are not available and therefore cannot be included in the generated SPDX documents. It is -recommended to set at least the `SRCARCH` environment variable to the +recommended to set at least the ``SRCARCH`` environment variable to the architecture for which the build was performed. -For a full list of command-line options, run: +For a full list of command-line options, run:: - $ python3 tools/sbom/sbom.py --help + $ python3 scripts/sbom/sbom.py --help Output Format ------------- KernelSbom generates documents conforming to the -[SPDX 3.0.1 specification](https://spdx.github.io/spdx-spec/v3.0.1/) +`SPDX 3.0.1 specification `_ serialized as JSON-LD. -To reduce file size, the output documents use the JSON-LD `@context` -to define custom prefixes for `spdxId` values. While this is compliant +To reduce file size, the output documents use the JSON-LD ``@context`` +to define custom prefixes for ``spdxId`` values. While this is compliant with the SPDX specification, only a limited number of tools in the current SPDX ecosystem support custom JSON-LD contexts. To use such tools with the generated documents, the custom JSON-LD context must @@ -112,33 +110,34 @@ How it Works ------------ KernelSbom operates in two major phases: + 1. **Generate the cmd graph**, an acyclic directed dependency graph. 2. **Generate SPDX documents** based on the cmd graph. KernelSbom begins from the root artifacts specified by the user, e.g., -`arch/x86/boot/bzImage`. For each root artifact, it collects all +``arch/x86/boot/bzImage``. For each root artifact, it collects all dependencies required to build that artifact. The dependencies come from multiple sources: -- **`.cmd` files**: The primary source is the `.cmd` file of the - generated artifact, e.g., `arch/x86/boot/.bzImage.cmd`. These files +* **.cmd files**: The primary source is the ``.cmd`` file of the + generated artifact, e.g., ``arch/x86/boot/.bzImage.cmd``. These files contain the exact command used to build the artifact and often include - an explicit list of input dependencies. By parsing the `.cmd` file, - the full list of dependencies can be obtained. + an explicit list of input dependencies. By parsing the ``.cmd`` + file, the full list of dependencies can be obtained. -- **`.incbin` statements**: The second source are include binary - `.incbin` statements in `.S` assembly files. +* **.incbin statements**: The second source are include binary + ``.incbin`` statements in ``.S`` assembly files. -- **Hardcoded dependencies**: Unfortunately, not all build dependencies - can be found via `.cmd` files and `.incbin` statements. Some build +* **Hardcoded dependencies**: Unfortunately, not all build dependencies + can be found via ``.cmd`` files and ``.incbin`` statements. Some build dependencies are directly defined in Makefiles or Kbuild files. Parsing these files is considered too complex for the scope of this project. Instead, the remaining gaps of the graph are filled using a list of manually defined dependencies, see - `sbom/cmd_graph/hardcoded_dependencies.py`. This list is known to be - incomplete. However, analysis of the cmd graph indicates a ~99% - completeness. For more information about the completeness analysis, - see [KernelSbom #95](https://github.com/TNG/KernelSbom/issues/95). + ``scripts/sbom/sbom/cmd_graph/hardcoded_dependencies.py``. This list is + known to be incomplete. However, analysis of the cmd graph indicates a + ~99% completeness. For more information about the completeness analysis, + see `KernelSbom #95 `_. Given the list of dependency files, KernelSbom recursively processes each file, expanding the dependency chain all the way to the version @@ -149,10 +148,10 @@ build file B" relationships. Using the cmd graph, KernelSbom produces three SPDX documents. For every file in the graph, KernelSbom: -- Parses `SPDX-License-Identifier` headers, -- Computes file hashes, -- Estimates the file type based on extension and path, -- Records build relationships between files. +* Parses ``SPDX-License-Identifier`` headers, +* Computes file hashes, +* Estimates the file type based on extension and path, +* Records build relationships between files. Each root output file is additionally associated with an SPDX Package element that captures version information, license data, and copyright. @@ -161,45 +160,44 @@ Advanced Usage -------------- Including Kernel Modules ------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~ -The list of all `.ko` kernel modules produced during a build can be -extracted from the `modules.order` file within the object tree. -For example: +The list of all ``.ko`` kernel modules produced during a build can be +extracted from the ``modules.order`` file within the object tree. +For example:: $ echo "arch/x86/boot/bzImage" > sbom-roots.txt $ sed 's/\.o$/.ko/' ./kernel_build/modules.order >> sbom-roots.txt -Then use the generated roots file: +Then use the generated roots file:: - $ SRCARCH=x86 python3 tools/sbom/sbom.py \ + $ SRCARCH=x86 python3 scripts/sbom/sbom.py \ --src-tree . \ --obj-tree ./kernel_build \ --roots-file sbom-roots.txt \ --generate-spdx - Equal Source and Object Trees ------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When the source tree and object tree are identical (for example, when building in-tree), source files can no longer be reliably distinguished from generated files. In this scenario, KernelSbom does not produce a dedicated -`sbom-source.spdx.json` document. Instead, both source files and build -artifacts are included together in `sbom-build.spdx.json`, and -`sbom.used-files.txt` lists all files referenced in the build document. +``sbom-source.spdx.json`` document. Instead, both source files and build +artifacts are included together in ``sbom-build.spdx.json``, and +``sbom.used-files.txt`` lists all files referenced in the build document. Unknown Build Commands ----------------------- +~~~~~~~~~~~~~~~~~~~~~~ Because the kernel supports a wide range of configurations and versions, -KernelSbom may encounter build commands in `.cmd` files that it does +KernelSbom may encounter build commands in ``.cmd`` files that it does not yet support. By default, KernelSbom will fail if an unknown build command is encountered. If you still wish to generate SPDX documents despite unsupported -commands, you can use the `--do-not-fail-on-unknown-build-command` +commands, you can use the ``--do-not-fail-on-unknown-build-command`` option. KernelSbom will continue and produce the documents, although the resulting SBOM will be incomplete. diff --git a/sbom/sbom/environment.py b/sbom/sbom/environment.py index f3a54bd6..57d81214 100644 --- a/sbom/sbom/environment.py +++ b/sbom/sbom/environment.py @@ -152,8 +152,12 @@ class Environment: """ @classmethod - def KERNEL_BUILD_VARIABLES(cls) -> dict[str, str | None]: - return {name: os.getenv(name) for name in KERNEL_BUILD_VARIABLES_ALLOWLIST} + def KERNEL_BUILD_VARIABLES(cls) -> dict[str, str]: + return { + name: value.strip() + for name in KERNEL_BUILD_VARIABLES_ALLOWLIST + if (value := os.getenv(name)) is not None and value.strip() + } @classmethod def ARCH(cls) -> str | None: