diff --git a/.gitignore b/.gitignore index 14a974130f..2b5ea3826b 100644 --- a/.gitignore +++ b/.gitignore @@ -55,3 +55,4 @@ local # This is only created when packaging for external redistribution /thirdparty/ +.history/ \ No newline at end of file diff --git a/docs/built-in-pipelines.rst b/docs/built-in-pipelines.rst index 55baefa6df..092e42221a 100644 --- a/docs/built-in-pipelines.rst +++ b/docs/built-in-pipelines.rst @@ -274,3 +274,11 @@ Fetch Scores (addon) .. autoclass:: scanpipe.pipelines.fetch_scores.FetchScores() :members: :member-order: bysource + +.. _pipeline_analyze_nixpkg: + +Analyze Nixpkg +-------------- +.. autoclass:: scanpipe.pipelines.analyze_nixpkg.NixpkgClarity() + :members: + :member-order: bysource diff --git a/pyproject.toml b/pyproject.toml index 966bb60c36..a0adab733d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -157,6 +157,8 @@ resolve_dependencies = "scanpipe.pipelines.resolve_dependencies:ResolveDependenc scan_codebase = "scanpipe.pipelines.scan_codebase:ScanCodebase" scan_for_virus = "scanpipe.pipelines.scan_for_virus:ScanForVirus" scan_single_package = "scanpipe.pipelines.scan_single_package:ScanSinglePackage" +nixpkgs_license_audit = "scanpipe.pipelines.nixpkgs_license_audit:NixpkgsLicenseAudit" +analyze_nixpkg = "scanpipe.pipelines.analyze_nixpkg:NixpkgClarity" [tool.setuptools.packages.find] where = ["."] diff --git a/scancodeio/static/main.css b/scancodeio/static/main.css index a51269fcbe..0291ed835c 100644 --- a/scancodeio/static/main.css +++ b/scancodeio/static/main.css @@ -391,12 +391,6 @@ progress.file-upload::before { #message-list th#column-severity { min-width: 110px; } -th#column-vulnerability_id { - min-width: 220px; -} -th#column-summary { - width: 40%; -} .menu.is-info .is-active { background-color: #3e8ed0; } @@ -607,3 +601,33 @@ body.full-screen #resource-viewer .message-header { background-color: var(--bulma-background); border-radius: var(--bulma-radius); } + +.dropdown-content.is-scrollable { + max-height: 320px; + overflow-y: auto; + background-color: var(--bulma-scheme-main-ter); + scrollbar-width: thin; /* Firefox */ + scrollbar-color: var(--bulma-border) var(--bulma-scheme-main-ter); /* Firefox */ +} + +.dropdown-content.is-scrollable::-webkit-scrollbar { + width: 8px; +} +.dropdown-content.is-scrollable::-webkit-scrollbar-track { + background: var(--bulma-scheme-main-ter); +} +.dropdown-content.is-scrollable::-webkit-scrollbar-thumb { + background-color: var(--bulma-border); + border-radius: 6px; +} + +[data-theme="dark"] .dropdown-content.is-scrollable { + background-color: var(--bulma-scheme-main-bis); + scrollbar-color: var(--bulma-grey) var(--bulma-scheme-main-bis); +} +[data-theme="dark"] .dropdown-content.is-scrollable::-webkit-scrollbar-track { + background: var(--bulma-scheme-main-bis); +} +[data-theme="dark"] .dropdown-content.is-scrollable::-webkit-scrollbar-thumb { + background-color: var(--bulma-grey); +} \ No newline at end of file diff --git a/scanpipe/pipelines/analyze_nixpkg.py b/scanpipe/pipelines/analyze_nixpkg.py new file mode 100644 index 0000000000..9f868cb9eb --- /dev/null +++ b/scanpipe/pipelines/analyze_nixpkg.py @@ -0,0 +1,203 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/aboutcode-org/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. + +""" +NixpkgClarity: Analyze a single Nix package (nixpkg). + +Goals: +- Be aware of common nixpkgs conventions and community practices for license and origin metadata +- Normalize declared license fields (including lists and license set references) +- Determine source origins (homepage, vcs URL) and fetch sources when provided +- Perform a standard ScanCode scan and emit a summary for clarity + +Inputs (recommended): +- A nixpkg meta JSON file named "nixpkg_meta.json" placed in project inputs directory + Example keys: {"name", "version", "homepage", "license", "src"} + - "license" may be a string, a list of strings, or objects; we normalize + - "src" may be a URL (archive) or VCS URL (e.g., GitHub) +- Optionally, source archives or pre-fetched directories can be provided as inputs + +Outputs: +- Project output JSON file "nixpkg_clarity_summary.json" summarizing origin and license +- Standard ScanCode results available through the UI +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional, Union + +from scanpipe import pipes +from scanpipe.pipelines import Pipeline +from scanpipe.pipes.input import copy_inputs + +try: + # fetchcode is optional but available in this project dependencies + from fetchcode import fetch +except Exception: # pragma: no cover - keep pipeline usable without fetchcode + fetch = None + + +def _normalize_license(value: Any) -> List[str]: + """Return a list of normalized license identifiers/labels from nixpkgs meta. + + Nixpkgs conventions: + - license can be a single license OR a list + - license entries can be strings (e.g., "mit"), SPDX ids ("MIT"), + or objects referring to nixpkgs license sets (e.g., {"spdxId": "MIT", "shortName": "mit"}) + - some entries may be placeholders like "unfree"; keep them verbatim + """ + def to_label(item: Any) -> Optional[str]: + if item is None: + return None + if isinstance(item, str): + return item.strip() + if isinstance(item, dict): + # Prefer SPDX id if present, fallback to shortName or fullName + return ( + item.get("spdxId") + or item.get("spdx") + or item.get("shortName") + or item.get("fullName") + or item.get("name") + ) + return str(item) + + if value is None: + return [] + if isinstance(value, (list, tuple)): + labels = [to_label(v) for v in value] + else: + labels = [to_label(value)] + return sorted({l for l in labels if l}) + + +def _read_nixpkg_meta(inputs_dir: Path) -> Dict[str, Any]: + """Load nixpkg_meta.json if present in inputs, else return empty dict.""" + meta_path = inputs_dir / "nixpkg_meta.json" + if meta_path.exists(): + try: + return json.loads(meta_path.read_text(encoding="utf-8")) + except Exception: + return {} + return {} + + +class NixpkgClarity(Pipeline): + """Pipeline to analyze a single nixpkg with origin and license clarity.""" + + results_url = "/project/{slug}/packages/?compliance_alert=warning" + + @classmethod + def steps(cls): + return ( + cls.copy_inputs_to_codebase_directory, + cls.fetch_sources_if_any, + cls.extract_archives, + cls.collect_and_create_codebase_resources, + cls.flag_empty_files, + cls.flag_ignored_resources, + cls.scan_for_application_packages, + cls.scan_for_files, + cls.collect_and_create_license_detections, + cls.emit_nixpkg_clarity_summary, + ) + + def copy_inputs_to_codebase_directory(self): + copy_inputs(self.project.inputs("*"), self.project.codebase_path) + + def fetch_sources_if_any(self): + """If nixpkg meta provides a 'src' URL and fetchcode is available, fetch it.""" + inputs_dir = self.project.input_path + meta = _read_nixpkg_meta(inputs_dir) + src = meta.get("src") or meta.get("source") + if not src or fetch is None: + return + try: + self.log(f"Fetching sources from: {src}") + dest_dir = self.project.codebase_path / "nixpkg-src" + dest_dir.mkdir(parents=True, exist_ok=True) + # fetchcode will place archives or VCS clones under dest_dir + fetch(src, dest_dir=str(dest_dir)) + except Exception as e: + self.add_error(e) + + def collect_and_create_codebase_resources(self): + pipes.collect_and_create_codebase_resources(self.project) + + def scan_for_application_packages(self): + pipes.scancode.scan_for_application_packages(self.project, progress_logger=self.log) + + def scan_for_files(self): + pipes.scancode.scan_for_files(self.project, progress_logger=self.log) + + def collect_and_create_license_detections(self): + pipes.scancode.collect_and_create_license_detections(project=self.project) + + def emit_nixpkg_clarity_summary(self): + """Emit a JSON summary derived from nixpkg meta and scan results.""" + inputs_dir = self.project.input_path + meta = _read_nixpkg_meta(inputs_dir) + + name = meta.get("name") or meta.get("pname") or "" + version = meta.get("version") or meta.get("rev") or "" + homepage = meta.get("homepage") or meta.get("url") or "" + src = meta.get("src") or meta.get("source") or "" + licenses = _normalize_license(meta.get("license")) + + # Derive best-effort origin URL from common conventions + origin_urls: List[str] = [] + for key in ("homepage", "url", "repository", "src", "source"): + val = meta.get(key) + if isinstance(val, str) and val: + origin_urls.append(val) + origin_urls = [u for u in origin_urls if u] + + # Compose summary + summary: Dict[str, Any] = { + "nixpkg": { + "name": name, + "version": version, + }, + "origin": { + "homepage": homepage, + "source": src, + "candidates": origin_urls, + }, + "license": { + "declared": licenses, + }, + } + + # Optionally include a quick package overview + try: + from scanpipe.models import DiscoveredPackage + + pkgs = ( + DiscoveredPackage.objects.project(self.project) + .order_by("type", "namespace", "name", "version") + ) + summary["packages"] = [ + { + "purl": p.package_url, + "name": p.name, + "version": p.version, + "declared_spdx": p.get_declared_license_expression_spdx(), + } + for p in pkgs + ] + except Exception: + # best effort; keep summary minimal if models are not ready + pass + + output_file = self.project.get_output_file_path("nixpkg_clarity_summary", "json") + try: + output_file.write_text(json.dumps(summary, indent=2), encoding="utf-8") + self.log(f"Nixpkg clarity summary written to: {output_file}") + except Exception as e: + self.add_error(e) diff --git a/scanpipe/pipelines/nixpkgs_license_audit.py b/scanpipe/pipelines/nixpkgs_license_audit.py new file mode 100644 index 0000000000..0a724e9f1d --- /dev/null +++ b/scanpipe/pipelines/nixpkgs_license_audit.py @@ -0,0 +1,186 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/aboutcode-org/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. + +from collections import defaultdict +from pathlib import Path +import csv + +from licensedcode.cache import get_licensing + +from scanpipe import pipes +from scanpipe.pipelines import Pipeline +from scanpipe.pipes.input import copy_inputs + +class NixpkgsLicenseAudit(Pipeline): + """ + Scan the codebase, collect license detections, and audit package-level + declared vs detected licenses, with a focus on nixpkgs metadata correctness. + + What it does: + - Copies inputs, extracts archives, and scans for packages/files + - Aggregates detected SPDX license keys across resources per package + - Compares with each package's declared SPDX license expression + - Flags mismatches and emits a CSV report in the project output directory + + Notes: + - If you have nixpkgs-specific metadata (e.g., expected licenses), place a + mapping file in the project input directory named + "nixpkgs_licenses.json" mapping a package PURL to an expected SPDX + expression. This audit uses the package declared expression by default, + and will use the mapping file when available to override. + - MatchCode-based code matching can be run separately to enrich packages + before auditing if configured, using the MatchToMatchCode pipeline. + """ + + results_url = "/project/{slug}/packages/?compliance_alert=warning" + + @classmethod + def steps(cls): + return ( + cls.copy_inputs_to_codebase_directory, + cls.extract_archives, + cls.collect_and_create_codebase_resources, + cls.flag_empty_files, + cls.flag_ignored_resources, + cls.scan_for_application_packages, + cls.scan_for_files, + cls.collect_and_create_license_detections, + cls.audit_nixpkgs_licenses, + ) + + def copy_inputs_to_codebase_directory(self): + copy_inputs(self.project.inputs("*"), self.project.codebase_path) + + def collect_and_create_codebase_resources(self): + pipes.collect_and_create_codebase_resources(self.project) + + def scan_for_application_packages(self): + pipes.scancode.scan_for_application_packages(self.project, progress_logger=self.log) + + def scan_for_files(self): + pipes.scancode.scan_for_files(self.project, progress_logger=self.log) + + def collect_and_create_license_detections(self): + pipes.scancode.collect_and_create_license_detections(project=self.project) + + def _load_expected_map(self): + """Optionally load a PURL->SPDX expression mapping from inputs.""" + import json + + expected = {} + for input_src in self.project.inputsources.all(): + if input_src.filename == "nixpkgs_licenses.json" and input_src.exists(): + try: + data = json.loads(input_src.path.read_text(encoding="utf-8")) + if isinstance(data, dict): + expected.update({str(k): str(v) for k, v in data.items()}) + self.log("Loaded nixpkgs_licenses.json overrides") + except Exception as e: + self.add_error(e) + return expected + + def _symbols_from_expression(self, expression_spdx): + """Return a set of SPDX license keys parsed from an SPDX expression.""" + if not expression_spdx: + return set() + licensing = get_licensing() + try: + return {sym.key for sym in licensing.license_symbols(expression_spdx)} + except Exception: + return set() + + def audit_nixpkgs_licenses(self): + """Compare declared vs detected licenses per package and export CSV.""" + from scanpipe.models import DiscoveredPackage, CodebaseResource + expected_map = self._load_expected_map() + + detected_per_pkg = defaultdict(set) + + # Prefetch to reduce queries + packages = ( + DiscoveredPackage.objects.project(self.project) + .prefetch_related("codebase_resources") + .order_by("type", "namespace", "name", "version") + ) + + # Aggregate detected licenses per package + licensing = get_licensing() + for pkg in packages.iterator(chunk_size=2000): + for res in pkg.codebase_resources.all(): + expr = getattr(res, "detected_license_expression_spdx", "") + if not expr: + continue + try: + for sym in licensing.license_symbols(expr): + detected_per_pkg[pkg.uuid].add(sym.key) + except Exception: + continue + + # Prepare CSV output + output_file = self.project.get_output_file_path( + "nixpkgs_license_audit", "csv" + ) + + fieldnames = [ + "purl", + "name", + "version", + "declared_spdx", + "expected_spdx", + "detected_spdx_keys", + "files_with_detections", + "comparison", + ] + + with output_file.open("w", newline="", encoding="utf-8") as fh: + writer = csv.DictWriter(fh, fieldnames=fieldnames) + writer.writeheader() + + for pkg in packages.iterator(chunk_size=2000): + declared_spdx = pkg.get_declared_license_expression_spdx() + expected_spdx = expected_map.get(pkg.package_url, "") + basis_spdx = expected_spdx or declared_spdx + + basis_set = self._symbols_from_expression(basis_spdx) + detected_set = detected_per_pkg.get(pkg.uuid, set()) + + # Derive comparison classification + if not basis_set and not detected_set: + comparison = "no-declared-and-no-detected" + elif basis_set and not detected_set: + comparison = "no-detected" + elif detected_set and not basis_set: + comparison = "no-declared" + elif basis_set == detected_set: + comparison = "match" + elif basis_set.issubset(detected_set): + comparison = "declared-subset-of-detected" + elif detected_set.issubset(basis_set): + comparison = "detected-subset-of-declared" + else: + comparison = "different" + + # Count files with detections for this package + files_with_detections = 0 + for res in pkg.codebase_resources.all(): + if getattr(res, "detected_license_expression_spdx", ""): + files_with_detections += 1 + + writer.writerow( + { + "purl": pkg.package_url, + "name": pkg.name, + "version": pkg.version, + "declared_spdx": declared_spdx, + "expected_spdx": expected_spdx, + "detected_spdx_keys": " ".join(sorted(detected_set)), + "files_with_detections": files_with_detections, + "comparison": comparison, + } + ) + + self.log(f"Nixpkgs license audit written to: {output_file}") diff --git a/scanpipe/templates/scanpipe/dropdowns/filter_dropdown.html b/scanpipe/templates/scanpipe/dropdowns/filter_dropdown.html index e153c1953e..a521e2c3ee 100644 --- a/scanpipe/templates/scanpipe/dropdowns/filter_dropdown.html +++ b/scanpipe/templates/scanpipe/dropdowns/filter_dropdown.html @@ -13,7 +13,7 @@