Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 21 additions & 15 deletions sbom/sbom.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,21 @@
from sbom.cmd_graph import CmdGraph


def _exit_with_summary(write_output_on_error: bool = False) -> None:
warning_summary = sbom_logging.summarize_warnings()
error_summary = sbom_logging.summarize_errors()
if warning_summary:
logging.warning(warning_summary)
if error_summary:
logging.error(error_summary)
if not write_output_on_error:
logging.info(
"Use --write-output-on-error to generate output documents even when errors occur. "
"Note that in this case the generated documents may be incomplete."
)
sys.exit(1)


def main():
# Read config
config = get_config()
Expand Down Expand Up @@ -62,6 +77,7 @@ def main():
logging.debug(f"Successfully saved {used_files_path}")

if config.generate_spdx is False:
_exit_with_summary(config.write_output_on_error)
return

# Build SPDX Documents
Expand Down Expand Up @@ -92,16 +108,15 @@ def main():
)
logging.debug(f"Generated SPDX graph in {time.time() - start_time} seconds")

# Report collected warnings and errors in case of failure
warning_summary = sbom_logging.summarize_warnings()
error_summary = sbom_logging.summarize_errors()

if not sbom_logging.has_errors() or config.write_output_on_error:
for kernel_sbom_kind, spdx_graph in spdx_graphs.items():
spdx_graph_objects = spdx_graph.to_list()
# Add warning and error summary to creation info comment
creation_info = next(element for element in spdx_graph_objects if isinstance(element, CreationInfo))
creation_info.comment = "\n".join([warning_summary, error_summary]).strip()
creation_info.comment = "\n".join([
sbom_logging.summarize_warnings(),
sbom_logging.summarize_errors(),
]).strip()
# Replace Placeholder uuid with real uuid for spdxIds
spdx_document = next(element for element in spdx_graph_objects if isinstance(element, SpdxDocument))
for namespaceMap in spdx_document.namespaceMap:
Expand All @@ -112,16 +127,7 @@ def main():
spdx_doc.save(save_path, config.prettify_json)
logging.debug(f"Successfully saved {save_path}")

if warning_summary:
logging.warning(warning_summary)
if error_summary:
logging.error(error_summary)
if not config.write_output_on_error:
logging.info(
"Use --write-output-on-error to generate output documents even when errors occur. "
"Note that in this case the generated SPDX documents may be incomplete."
)
sys.exit(1)
_exit_with_summary(config.write_output_on_error)


# Call main method
Expand Down
10 changes: 5 additions & 5 deletions sbom/sbom/cmd_graph/cmd_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def create(cls, cmd_file_path: PathStr) -> "CmdFile | None":
Returns:
cmd_file (CmdFile): Parsed cmd file.
"""
with open(cmd_file_path, "rt") as f:
with open(cmd_file_path, "rt", encoding="utf-8") as f:
lines = [line.strip() for line in f.readlines() if line.strip() != "" and not line.startswith("#")]

# savedcmd
Expand Down Expand Up @@ -128,8 +128,8 @@ def get_dependencies(
# Skip target file to prevent cycles. This is necessary because some multi stage commands first create an output and then pass it as input to the next command, e.g., objcopy.
continue
cmd_file_dependencies.append(input_file)

return cmd_file_dependencies
unique_cmd_file_dependencies = list(dict.fromkeys(cmd_file_dependencies))
return unique_cmd_file_dependencies


def _expand_resolve_files(input_files: list[PathStr], obj_tree: PathStr) -> list[PathStr]:
Expand All @@ -149,14 +149,14 @@ def _expand_resolve_files(input_files: list[PathStr], obj_tree: PathStr) -> list
if not input_file.startswith("@"):
expanded_input_files.append(input_file)
continue
resolve_file_path = os.path.join(obj_tree, input_file.lstrip("@"))
resolve_file_path = os.path.join(obj_tree, input_file.removeprefix("@"))
if not os.path.exists(resolve_file_path):
sbom_logging.error(
"Skip resolving '{resolve_file_path}' because the response file does not exist.",
resolve_file_path=resolve_file_path,
)
continue
with open(resolve_file_path, "rt") as f:
with open(resolve_file_path, "rt", encoding="utf-8") as f:
resolve_file_content = [line_stripped for line in f.readlines() if (line_stripped := line.strip())]
expanded_input_files += _expand_resolve_files(resolve_file_content, obj_tree)
return expanded_input_files
2 changes: 1 addition & 1 deletion sbom/sbom/cmd_graph/incbin_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def parse_incbin_statements(absolute_path: PathStr) -> list[IncbinStatement]:
Returns:
list[IncbinStatement]: Parsed `.incbin` statements.
"""
with open(absolute_path, "rt") as f:
with open(absolute_path, "rt", encoding="utf-8") as f:
content = f.read()
return [
IncbinStatement(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def _parse_nm_piped_command(command: str) -> list[PathStr]:
nm_command, _ = command.split("|", 1)
command_parts = tokenize_single_command(
command=nm_command.strip(),
flag_options=["p", "--defined-only"],
flag_options=["-p", "--defined-only"],
)
positionals = [p.value for p in command_parts if isinstance(p, Positional)]
# expect positionals to be ["nm", input1, input2, ...]
Expand Down
12 changes: 8 additions & 4 deletions sbom/sbom/cmd_graph/savedcmd_parser/command_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,20 @@ def _unwrap_outer_parentheses(s: str) -> str:
def _find_first_top_level_command_separator(
commands: str, separators: list[str] = [";", "&&"]
) -> tuple[int | None, int | None]:
def is_escaped(index: int) -> bool:
preceding = commands[:index]
return (len(preceding) - len(preceding.rstrip("\\"))) % 2 == 1

in_single_quote = False
in_double_quote = False
in_curly_braces = 0
in_braces = 0
for i, char in enumerate(commands):
if char == "'" and not in_double_quote:
# Toggle single quote state (unless inside double quotes)
if char == "'" and not in_double_quote and not is_escaped(i):
# Toggle single quote state (unless inside double quotes or escaped)
in_single_quote = not in_single_quote
elif char == '"' and not in_single_quote:
# Toggle double quote state (unless inside single quotes)
elif char == '"' and not in_single_quote and not is_escaped(i):
# Toggle double quote state (unless inside single quotes or escaped)
in_double_quote = not in_double_quote

if in_single_quote or in_double_quote:
Expand Down
7 changes: 3 additions & 4 deletions sbom/sbom/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,8 @@ def _parse_cli_arguments(parser: argparse.ArgumentParser) -> dict[str, Any]:
group.add_argument(
"--roots",
nargs="+",
default="arch/x86/boot/bzImage",
help="Space-separated list of paths relative to obj-tree for which the SBOM will be created.\n"
"Cannot be used together with --roots-file. (default: arch/x86/boot/bzImage)",
"Cannot be used together with --roots-file.",
)
group.add_argument(
"--roots-file",
Expand Down Expand Up @@ -233,7 +232,7 @@ def get_config() -> KernelSbomConfig:
obj_tree = os.path.realpath(args["obj_tree"])
root_paths = []
if args["roots_file"]:
with open(args["roots_file"], "rt") as f:
with open(args["roots_file"], "rt", encoding="utf-8") as f:
root_paths = [root.strip() for root in f.readlines()]
if len(root_paths) == 0:
parser.error("--roots-file must contain at least one path")
Expand Down Expand Up @@ -262,7 +261,7 @@ def get_config() -> KernelSbomConfig:
if args["package_copyright_text"] is not None:
package_copyright_text = args["package_copyright_text"]
elif os.path.isfile(copying_path := os.path.join(src_tree, "COPYING")):
with open(copying_path, "r") as f:
with open(copying_path, "r", encoding="utf-8") as f:
package_copyright_text = f.read()
prettify_json = args["prettify_json"]

Expand Down
60 changes: 32 additions & 28 deletions sbom/sbom/sbom_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,61 +6,65 @@
from typing import Literal


MessageTemplate = str


class MessageLogger:
"""Logger that prints the first occurrence of each message immediately
and keeps track of repeated messages for a final summary."""
"""Logger that surpresses repeated messages and stores a summary of all logged messages."""

messages: dict[str, list[str]]
repeated_logs_limit: int
_messages: dict[MessageTemplate, list[str]]
_message_counts: dict[MessageTemplate, int]
_repeated_logs_limit: int
"""Maximum number of repeated messages of the same type to log before suppressing further output."""

def __init__(self, level: Literal["error", "warning"], repeated_logs_limit: int = 3) -> None:
self._level = level
self.messages = {}
self.repeated_logs_limit = repeated_logs_limit
self._messages = {}
self._message_counts = {}
self._repeated_logs_limit = repeated_logs_limit

def log(self, template: str, /, **kwargs: str) -> None:
"""Log a message based on a template and optional variables."""
def log(self, template: MessageTemplate, /, **kwargs: str) -> None:
"""Log a message based on a template and optional variables. Example: `log("Missing {path}", path=str(p))`."""
message = template
for key, value in kwargs.items():
message = message.replace("{" + key + "}", value)
if template not in self.messages:
self.messages[template] = []
if len(self.messages[template]) < self.repeated_logs_limit:
if template not in self._messages:
self._messages[template] = []
self._message_counts[template] = 0
self._message_counts[template] += 1
if self._message_counts[template] <= self._repeated_logs_limit:
if self._level == "error":
logging.error(message)
elif self._level == "warning":
logging.warning(message)
self.messages[template].append(message)
self._messages[template].append(message)

def get_summary(self) -> str:
"""Return summary of collected messages."""
if len(self.messages) == 0:
if len(self._messages) == 0:
return ""
summary: list[str] = [f"Summarize {self._level}s:"]
for msgs in self.messages.values():
for i, msg in enumerate(msgs):
if i < self.repeated_logs_limit:
summary.append(msg)
continue
summary.append(
f"... (Found {len(msgs) - i} more {'instances' if (len(msgs) - i) != 1 else 'instance'} of this {self._level})"
)
break
for template, messages in self._messages.items():
for message in messages:
summary.append(message)
n_suppressed_messages = self._message_counts[template] - self._repeated_logs_limit
if n_suppressed_messages > 0:
instances = "instance" if n_suppressed_messages == 1 else "instances"
summary.append(f"... (Found {n_suppressed_messages} more {instances} of this {self._level})")
return "\n".join(summary)

def has_messages(self) -> bool:
return len(self._message_counts) > 0


_warning_logger: MessageLogger
_error_logger: MessageLogger


def warning(msg_template: str, /, **kwargs: str) -> None:
"""Log a warning message."""
def warning(msg_template: MessageTemplate, /, **kwargs: str) -> None:
_warning_logger.log(msg_template, **kwargs)


def error(msg_template: str, /, **kwargs: str) -> None:
"""Log an error message including file, line, and function context."""
def error(msg_template: MessageTemplate, /, **kwargs: str) -> None:
frame = inspect.currentframe()
caller_frame = frame.f_back if frame else None
info = inspect.getframeinfo(caller_frame) if caller_frame else None
Expand All @@ -78,7 +82,7 @@ def summarize_errors() -> str:


def has_errors() -> bool:
return len(_error_logger.messages) > 0
return _error_logger.has_messages()


def init() -> None:
Expand Down
4 changes: 2 additions & 2 deletions sbom/sbom/spdx/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Copyright (C) 2025 TNG Technology Consulting GmbH

from dataclasses import dataclass, field
from datetime import datetime, timezone

from typing import Any, Literal
from sbom.spdx.spdxId import SpdxId

Expand Down Expand Up @@ -133,7 +133,7 @@ class CreationInfo(SpdxObject):
id: SpdxId = "_:creationinfo"
specVersion: str = SPDX_SPEC_VERSION
createdBy: list[Agent]
created: str = field(default_factory=lambda: datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"))
created: str
comment: str | None = None

def to_dict(self) -> dict[str, Any]:
Expand Down
2 changes: 1 addition & 1 deletion sbom/sbom/spdx_graph/kernel_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def _parse_spdx_license_identifier(absolute_path: str, max_bytes: int = 512) ->
The license identifier string (e.g., 'GPL-2.0-only') if found, otherwise None.
"""
try:
with open(absolute_path, "r") as f:
with open(absolute_path, "r", encoding="utf-8") as f:
match = SPDX_LICENSE_IDENTIFIER_PATTERN.search(f.read(max_bytes))
if match:
return match.group("id")
Expand Down
27 changes: 13 additions & 14 deletions sbom/sbom/spdx_graph/spdx_build_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,6 @@ def _file_relationships(
# and its children (input files)
build_and_relationship_elements: list[Build | Relationship] = [high_level_build_ancestorOf_relationship]
for node in cmd_graph:
if next(node.children, None) is None:
continue

# .cmd file dependencies
if node.cmd_file is not None:
build_element = Build(
Expand All @@ -273,23 +270,25 @@ def _file_relationships(
build_buildId=high_level_build_element.build_buildId,
comment=node.cmd_file.savedcmd,
)
hasInput_relationship = Relationship(
spdxId=spdx_id_generator.generate(),
relationshipType="hasInput",
from_=build_element,
to=[file_elements[child_node.absolute_path] for child_node in node.children],
)
build_and_relationship_elements.append(build_element)

if node.cmd_file_dependencies:
hasInput_relationship = Relationship(
spdxId=spdx_id_generator.generate(),
relationshipType="hasInput",
from_=build_element,
to=[file_elements[dep.absolute_path] for dep in node.cmd_file_dependencies],
)
build_and_relationship_elements.append(hasInput_relationship)

hasOutput_relationship = Relationship(
spdxId=spdx_id_generator.generate(),
relationshipType="hasOutput",
from_=build_element,
to=[file_elements[node.absolute_path]],
)
build_and_relationship_elements += [
build_element,
hasInput_relationship,
hasOutput_relationship,
]
build_and_relationship_elements.append(hasOutput_relationship)

high_level_build_ancestorOf_relationship.to.append(build_element)

# incbin dependencies
Expand Down
6 changes: 3 additions & 3 deletions sbom/tests/cmd_graph/test_savedcmd_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def _assert_parsing(self, cmd: str, expected: str, registry: CommandParserRegist
parsed = parse_inputs_from_commands(cmd, fail_on_unknown_build_command=False, registry=registry)
target = [] if expected == "" else expected.split(" ")
self.assertEqual(parsed, target)
errors = sbom_logging._error_logger.messages # type: ignore
errors = sbom_logging._error_logger._message_counts # type: ignore
self.assertEqual(errors, {})

# Compound command tests
Expand Down Expand Up @@ -133,14 +133,14 @@ def test_clang(self):

# ld command tests
def test_ld(self):
cmd = 'ld -o arch/x86/entry/vdso/vdso64.so.dbg -shared --hash-style=both --build-id=sha1 --no-undefined --eh-frame-hdr -Bsymbolic -z noexecstack -m elf_x86_64 -soname linux-vdso.so.1 -z max-page-size=4096 -T arch/x86/entry/vdso/vdso.lds arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vgetrandom.o arch/x86/entry/vdso/vgetrandom-chacha.o; if readelf -rW arch/x86/entry/vdso/vdso64.so.dbg | grep -v _NONE | grep -q " R_\w*_"; then (echo >&2 "arch/x86/entry/vdso/vdso64.so.dbg: dynamic relocations are not supported"; rm -f arch/x86/entry/vdso/vdso64.so.dbg; /bin/false); fi' # type: ignore
cmd = r'ld -o arch/x86/entry/vdso/vdso64.so.dbg -shared --hash-style=both --build-id=sha1 --no-undefined --eh-frame-hdr -Bsymbolic -z noexecstack -m elf_x86_64 -soname linux-vdso.so.1 -z max-page-size=4096 -T arch/x86/entry/vdso/vdso.lds arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vgetrandom.o arch/x86/entry/vdso/vgetrandom-chacha.o; if readelf -rW arch/x86/entry/vdso/vdso64.so.dbg | grep -v _NONE | grep -q " R_\w*_"; then (echo >&2 "arch/x86/entry/vdso/vdso64.so.dbg: dynamic relocations are not supported"; rm -f arch/x86/entry/vdso/vdso64.so.dbg; /bin/false); fi'
expected = "arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vgetrandom.o arch/x86/entry/vdso/vgetrandom-chacha.o"
self._assert_parsing(cmd, expected)

def test_ld_with_env_override(self):
with patch.dict(os.environ, {"LD": "some-tool ld"}):
registry = CommandParserRegistry.create()
cmd = 'ld -o arch/x86/entry/vdso/vdso64.so.dbg -shared --hash-style=both --build-id=sha1 --no-undefined --eh-frame-hdr -Bsymbolic -z noexecstack -m elf_x86_64 -soname linux-vdso.so.1 -z max-page-size=4096 -T arch/x86/entry/vdso/vdso.lds arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vgetrandom.o arch/x86/entry/vdso/vgetrandom-chacha.o; if readelf -rW arch/x86/entry/vdso/vdso64.so.dbg | grep -v _NONE | grep -q " R_\w*_"; then (echo >&2 "arch/x86/entry/vdso/vdso64.so.dbg: dynamic relocations are not supported"; rm -f arch/x86/entry/vdso/vdso64.so.dbg; /bin/false); fi' # type: ignore
cmd = r'ld -o arch/x86/entry/vdso/vdso64.so.dbg -shared --hash-style=both --build-id=sha1 --no-undefined --eh-frame-hdr -Bsymbolic -z noexecstack -m elf_x86_64 -soname linux-vdso.so.1 -z max-page-size=4096 -T arch/x86/entry/vdso/vdso.lds arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vgetrandom.o arch/x86/entry/vdso/vgetrandom-chacha.o; if readelf -rW arch/x86/entry/vdso/vdso64.so.dbg | grep -v _NONE | grep -q " R_\w*_"; then (echo >&2 "arch/x86/entry/vdso/vdso64.so.dbg: dynamic relocations are not supported"; rm -f arch/x86/entry/vdso/vdso64.so.dbg; /bin/false); fi'
expected = "arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vgetrandom.o arch/x86/entry/vdso/vgetrandom-chacha.o"
self._assert_parsing(cmd, expected, registry)
self._assert_parsing(f"some-tool {cmd}", expected, registry)
Expand Down
2 changes: 2 additions & 0 deletions sbom/tests/spdx_graph/test_kernel_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def test_parse_spdx_license_identifier(self):
test_cases: list[tuple[str, str | None]] = [
("/* SPDX-License-Identifier: MIT*/", "MIT"),
("// SPDX-License-Identifier: GPL-2.0-only", "GPL-2.0-only"),
("# SPDX-License-Identifier: GPL-2.0-only", "GPL-2.0-only"),
("#!/bin/bash\n# SPDX-License-Identifier: GPL-2.0-only", "GPL-2.0-only"),
("/* SPDX-License-Identifier: GPL-2.0-or-later OR MIT */", "GPL-2.0-or-later OR MIT"),
("/* SPDX-License-Identifier: Apache-2.0 */\n extra text", "Apache-2.0"),
("<!-- SPDX-License-Identifier: GPL-2.0 -->", "GPL-2.0"),
Expand Down
Loading